15 #include "kmp_wait_release.h" 18 #include "kmp_stats.h" 20 #include "ompt-specific.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #include "tsan_annotations.h" 30 #if KMP_MIC && USE_NGO_STORES 32 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 33 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 34 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 35 #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") 37 #define ngo_load(src) ((void)0) 38 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 39 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 40 #define ngo_sync() ((void)0) 43 void __kmp_print_structure(
void);
48 static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
50 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
52 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
58 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
62 #if USE_ITT_BUILD && USE_ITT_NOTIFY 64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
71 if (!KMP_MASTER_TID(tid)) {
73 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
86 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
90 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
93 for (i = 1; i < nproc; ++i) {
97 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
99 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 100 "arrived(%p) == %llu\n",
101 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
103 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
106 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
108 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
109 ANNOTATE_BARRIER_END(other_threads[i]);
110 #if USE_ITT_BUILD && USE_ITT_NOTIFY 113 if (__kmp_forkjoin_frames_mode == 2) {
114 this_thr->th.th_bar_min_time = KMP_MIN(
115 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
120 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
121 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
123 ANNOTATE_REDUCE_AFTER(reduce);
124 (*reduce)(this_thr->th.th_local.reduce_data,
125 other_threads[i]->th.th_local.reduce_data);
126 ANNOTATE_REDUCE_BEFORE(reduce);
127 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
131 team_bar->b_arrived = new_state;
132 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " 133 "arrived(%p) = %llu\n",
134 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
139 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
140 gtid, team->t.t_id, tid, bt));
143 static void __kmp_linear_barrier_release(
144 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
145 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
146 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
147 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
150 if (KMP_MASTER_TID(tid)) {
152 kmp_uint32 nproc = this_thr->th.th_team_nproc;
153 kmp_info_t **other_threads;
155 team = __kmp_threads[gtid]->th.th_team;
156 KMP_DEBUG_ASSERT(team != NULL);
157 other_threads = team->t.t_threads;
159 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for " 161 gtid, team->t.t_id, tid, bt));
164 #if KMP_BARRIER_ICV_PUSH 166 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
167 if (propagate_icvs) {
168 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
169 for (i = 1; i < nproc; ++i) {
170 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
172 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
173 &team->t.t_implicit_task_taskdata[0].td_icvs);
178 #endif // KMP_BARRIER_ICV_PUSH 181 for (i = 1; i < nproc; ++i) {
185 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
189 (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 190 "go(%p): %u => %u\n",
191 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
192 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
193 other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
195 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
196 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
202 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
203 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
204 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
205 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
206 ANNOTATE_BARRIER_END(this_thr);
207 #if USE_ITT_BUILD && USE_ITT_NOTIFY 208 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
211 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
213 __kmp_itt_task_starting(itt_sync_obj);
215 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
218 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
219 if (itt_sync_obj != NULL)
221 __kmp_itt_task_finished(itt_sync_obj);
225 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
229 tid = __kmp_tid_from_gtid(gtid);
230 team = __kmp_threads[gtid]->th.th_team;
232 KMP_DEBUG_ASSERT(team != NULL);
233 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
235 (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
236 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
241 (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
242 gtid, team->t.t_id, tid, bt));
247 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
248 int tid,
void (*reduce)(
void *,
void *)
249 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
250 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
251 kmp_team_t *team = this_thr->th.th_team;
252 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
253 kmp_info_t **other_threads = team->t.t_threads;
254 kmp_uint32 nproc = this_thr->th.th_team_nproc;
255 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
256 kmp_uint32 branch_factor = 1 << branch_bits;
258 kmp_uint32 child_tid;
259 kmp_uint64 new_state;
262 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
263 gtid, team->t.t_id, tid, bt));
264 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
266 #if USE_ITT_BUILD && USE_ITT_NOTIFY 268 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
270 __itt_get_timestamp();
275 child_tid = (tid << branch_bits) + 1;
276 if (child_tid < nproc) {
278 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
281 kmp_info_t *child_thr = other_threads[child_tid];
282 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
285 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
287 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
290 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 291 "arrived(%p) == %llu\n",
292 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
293 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
295 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
296 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
297 ANNOTATE_BARRIER_END(child_thr);
298 #if USE_ITT_BUILD && USE_ITT_NOTIFY 301 if (__kmp_forkjoin_frames_mode == 2) {
302 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
303 child_thr->th.th_bar_min_time);
308 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
309 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
310 team->t.t_id, child_tid));
311 ANNOTATE_REDUCE_AFTER(reduce);
312 (*reduce)(this_thr->th.th_local.reduce_data,
313 child_thr->th.th_local.reduce_data);
314 ANNOTATE_REDUCE_BEFORE(reduce);
315 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
319 }
while (child <= branch_factor && child_tid < nproc);
322 if (!KMP_MASTER_TID(tid)) {
323 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
326 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 327 "arrived(%p): %llu => %llu\n",
328 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
329 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
330 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
336 ANNOTATE_BARRIER_BEGIN(this_thr);
337 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
342 team->t.t_bar[bt].b_arrived = new_state;
344 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
345 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " 346 "arrived(%p) = %llu\n",
347 gtid, team->t.t_id, tid, team->t.t_id,
348 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
351 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
352 gtid, team->t.t_id, tid, bt));
355 static void __kmp_tree_barrier_release(
356 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
357 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
358 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
360 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
362 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
363 kmp_uint32 branch_factor = 1 << branch_bits;
365 kmp_uint32 child_tid;
370 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
371 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
373 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
374 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
375 ANNOTATE_BARRIER_END(this_thr);
376 #if USE_ITT_BUILD && USE_ITT_NOTIFY 377 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
380 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
382 __kmp_itt_task_starting(itt_sync_obj);
384 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
387 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
388 if (itt_sync_obj != NULL)
390 __kmp_itt_task_finished(itt_sync_obj);
394 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
398 team = __kmp_threads[gtid]->th.th_team;
399 KMP_DEBUG_ASSERT(team != NULL);
400 tid = __kmp_tid_from_gtid(gtid);
402 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
404 (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
405 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
408 team = __kmp_threads[gtid]->th.th_team;
409 KMP_DEBUG_ASSERT(team != NULL);
410 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for " 412 gtid, team->t.t_id, tid, bt));
414 nproc = this_thr->th.th_team_nproc;
415 child_tid = (tid << branch_bits) + 1;
417 if (child_tid < nproc) {
418 kmp_info_t **other_threads = team->t.t_threads;
422 kmp_info_t *child_thr = other_threads[child_tid];
423 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
426 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
428 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
431 #if KMP_BARRIER_ICV_PUSH 433 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
434 if (propagate_icvs) {
435 __kmp_init_implicit_task(team->t.t_ident,
436 team->t.t_threads[child_tid], team,
438 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
439 &team->t.t_implicit_task_taskdata[0].td_icvs);
442 #endif // KMP_BARRIER_ICV_PUSH 444 (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 445 "go(%p): %u => %u\n",
446 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
447 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
448 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
450 ANNOTATE_BARRIER_BEGIN(child_thr);
451 kmp_flag_64 flag(&child_bar->b_go, child_thr);
455 }
while (child <= branch_factor && child_tid < nproc);
458 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
459 gtid, team->t.t_id, tid, bt));
464 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
465 int tid,
void (*reduce)(
void *,
void *)
466 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
467 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
468 kmp_team_t *team = this_thr->th.th_team;
469 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
470 kmp_info_t **other_threads = team->t.t_threads;
471 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
472 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
473 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
474 kmp_uint32 branch_factor = 1 << branch_bits;
480 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
481 gtid, team->t.t_id, tid, bt));
482 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
484 #if USE_ITT_BUILD && USE_ITT_NOTIFY 486 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
487 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
488 __itt_get_timestamp();
493 kmp_flag_64 p_flag(&thr_bar->b_arrived);
494 for (level = 0, offset = 1; offset < num_threads;
495 level += branch_bits, offset <<= branch_bits) {
497 kmp_uint32 child_tid;
499 if (((tid >> level) & (branch_factor - 1)) != 0) {
500 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
503 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 504 "arrived(%p): %llu => %llu\n",
505 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
506 team->t.t_id, parent_tid, &thr_bar->b_arrived,
508 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
513 ANNOTATE_BARRIER_BEGIN(this_thr);
514 p_flag.set_waiter(other_threads[parent_tid]);
520 if (new_state == KMP_BARRIER_UNUSED_STATE)
521 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
522 for (child = 1, child_tid = tid + (1 << level);
523 child < branch_factor && child_tid < num_threads;
524 child++, child_tid += (1 << level)) {
525 kmp_info_t *child_thr = other_threads[child_tid];
526 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
528 kmp_uint32 next_child_tid = child_tid + (1 << level);
530 if (child + 1 < branch_factor && next_child_tid < num_threads)
532 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
535 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 536 "arrived(%p) == %llu\n",
537 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
538 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
540 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
541 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
542 ANNOTATE_BARRIER_END(child_thr);
543 #if USE_ITT_BUILD && USE_ITT_NOTIFY 546 if (__kmp_forkjoin_frames_mode == 2) {
547 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
548 child_thr->th.th_bar_min_time);
553 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
554 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
555 team->t.t_id, child_tid));
556 ANNOTATE_REDUCE_AFTER(reduce);
557 (*reduce)(this_thr->th.th_local.reduce_data,
558 child_thr->th.th_local.reduce_data);
559 ANNOTATE_REDUCE_BEFORE(reduce);
560 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
565 if (KMP_MASTER_TID(tid)) {
567 if (new_state == KMP_BARRIER_UNUSED_STATE)
568 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
570 team->t.t_bar[bt].b_arrived = new_state;
571 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " 572 "arrived(%p) = %llu\n",
573 gtid, team->t.t_id, tid, team->t.t_id,
574 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
577 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
578 gtid, team->t.t_id, tid, bt));
582 #define KMP_REVERSE_HYPER_BAR 583 static void __kmp_hyper_barrier_release(
584 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
585 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
586 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
588 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
589 kmp_info_t **other_threads;
590 kmp_uint32 num_threads;
591 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
592 kmp_uint32 branch_factor = 1 << branch_bits;
594 kmp_uint32 child_tid;
602 if (KMP_MASTER_TID(tid)) {
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL);
605 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for " 607 gtid, team->t.t_id, tid, bt));
608 #if KMP_BARRIER_ICV_PUSH 609 if (propagate_icvs) {
610 copy_icvs(&thr_bar->th_fixed_icvs,
611 &team->t.t_implicit_task_taskdata[tid].td_icvs);
615 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
616 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
618 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
619 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
620 ANNOTATE_BARRIER_END(this_thr);
621 #if USE_ITT_BUILD && USE_ITT_NOTIFY 622 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
624 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
626 __kmp_itt_task_starting(itt_sync_obj);
628 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
631 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
632 if (itt_sync_obj != NULL)
634 __kmp_itt_task_finished(itt_sync_obj);
638 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
642 team = __kmp_threads[gtid]->th.th_team;
643 KMP_DEBUG_ASSERT(team != NULL);
644 tid = __kmp_tid_from_gtid(gtid);
646 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
648 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
649 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
652 num_threads = this_thr->th.th_team_nproc;
653 other_threads = team->t.t_threads;
655 #ifdef KMP_REVERSE_HYPER_BAR 657 for (level = 0, offset = 1;
658 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
659 level += branch_bits, offset <<= branch_bits)
663 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
664 level -= branch_bits, offset >>= branch_bits)
667 for (level = 0, offset = 1; offset < num_threads;
668 level += branch_bits, offset <<= branch_bits)
671 #ifdef KMP_REVERSE_HYPER_BAR 674 child = num_threads >> ((level == 0) ? level : level - 1);
675 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
676 child_tid = tid + (child << level);
677 child >= 1; child--, child_tid -= (1 << level))
679 if (((tid >> level) & (branch_factor - 1)) != 0)
684 for (child = 1, child_tid = tid + (1 << level);
685 child < branch_factor && child_tid < num_threads;
686 child++, child_tid += (1 << level))
687 #endif // KMP_REVERSE_HYPER_BAR 689 if (child_tid >= num_threads)
692 kmp_info_t *child_thr = other_threads[child_tid];
693 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
695 kmp_uint32 next_child_tid = child_tid - (1 << level);
697 #ifdef KMP_REVERSE_HYPER_BAR 698 if (child - 1 >= 1 && next_child_tid < num_threads)
700 if (child + 1 < branch_factor && next_child_tid < num_threads)
701 #endif // KMP_REVERSE_HYPER_BAR 703 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
706 #if KMP_BARRIER_ICV_PUSH 708 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
709 #endif // KMP_BARRIER_ICV_PUSH 713 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 714 "go(%p): %u => %u\n",
715 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
716 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
717 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
719 ANNOTATE_BARRIER_BEGIN(child_thr);
720 kmp_flag_64 flag(&child_bar->b_go, child_thr);
725 #if KMP_BARRIER_ICV_PUSH 726 if (propagate_icvs &&
727 !KMP_MASTER_TID(tid)) {
728 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
730 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
731 &thr_bar->th_fixed_icvs);
736 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
737 gtid, team->t.t_id, tid, bt));
750 static bool __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt,
751 kmp_bstate_t *thr_bar,
752 kmp_uint32 nproc,
int gtid,
753 int tid, kmp_team_t *team) {
755 bool uninitialized = thr_bar->team == NULL;
756 bool team_changed = team != thr_bar->team;
757 bool team_sz_changed = nproc != thr_bar->nproc;
758 bool tid_changed = tid != thr_bar->old_tid;
761 if (uninitialized || team_sz_changed) {
762 __kmp_get_hierarchy(nproc, thr_bar);
765 if (uninitialized || team_sz_changed || tid_changed) {
766 thr_bar->my_level = thr_bar->depth - 1;
767 thr_bar->parent_tid = -1;
771 while (d < thr_bar->depth) {
774 if (d == thr_bar->depth - 2) {
775 thr_bar->parent_tid = 0;
776 thr_bar->my_level = d;
778 }
else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
781 thr_bar->parent_tid = tid - rem;
782 thr_bar->my_level = d;
788 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
789 thr_bar->old_tid = tid;
790 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
791 thr_bar->team = team;
792 thr_bar->parent_bar =
793 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
795 if (uninitialized || team_changed || tid_changed) {
796 thr_bar->team = team;
797 thr_bar->parent_bar =
798 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
801 if (uninitialized || team_sz_changed || tid_changed) {
802 thr_bar->nproc = nproc;
803 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
804 if (thr_bar->my_level == 0)
805 thr_bar->leaf_kids = 0;
806 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
807 thr_bar->leaf_kids = nproc - tid - 1;
808 thr_bar->leaf_state = 0;
809 for (
int i = 0; i < thr_bar->leaf_kids; ++i)
810 ((
char *)&(thr_bar->leaf_state))[7 - i] = 1;
815 static void __kmp_hierarchical_barrier_gather(
816 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
817 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
818 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
819 kmp_team_t *team = this_thr->th.th_team;
820 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
821 kmp_uint32 nproc = this_thr->th.th_team_nproc;
822 kmp_info_t **other_threads = team->t.t_threads;
823 kmp_uint64 new_state;
825 int level = team->t.t_level;
828 ->th.th_teams_microtask)
829 if (this_thr->th.th_teams_size.nteams > 1)
833 thr_bar->use_oncore_barrier = 1;
835 thr_bar->use_oncore_barrier = 0;
837 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " 839 gtid, team->t.t_id, tid, bt));
840 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
842 #if USE_ITT_BUILD && USE_ITT_NOTIFY 844 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
845 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
849 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
852 if (thr_bar->my_level) {
855 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
856 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
857 thr_bar->use_oncore_barrier) {
858 if (thr_bar->leaf_kids) {
860 kmp_uint64 leaf_state =
862 ? thr_bar->b_arrived | thr_bar->leaf_state
863 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
864 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " 866 gtid, team->t.t_id, tid));
867 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
868 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
870 ANNOTATE_REDUCE_AFTER(reduce);
871 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
873 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 875 gtid, team->t.t_id, tid,
876 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
878 ANNOTATE_BARRIER_END(other_threads[child_tid]);
879 (*reduce)(this_thr->th.th_local.reduce_data,
880 other_threads[child_tid]->th.th_local.reduce_data);
882 ANNOTATE_REDUCE_BEFORE(reduce);
883 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
886 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
889 for (kmp_uint32 d = 1; d < thr_bar->my_level;
891 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
892 skip = thr_bar->skip_per_level[d];
895 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
896 kmp_info_t *child_thr = other_threads[child_tid];
897 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
898 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 900 "arrived(%p) == %llu\n",
901 gtid, team->t.t_id, tid,
902 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
903 child_tid, &child_bar->b_arrived, new_state));
904 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
905 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
906 ANNOTATE_BARRIER_END(child_thr);
908 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 910 gtid, team->t.t_id, tid,
911 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
913 ANNOTATE_REDUCE_AFTER(reduce);
914 (*reduce)(this_thr->th.th_local.reduce_data,
915 child_thr->th.th_local.reduce_data);
916 ANNOTATE_REDUCE_BEFORE(reduce);
917 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
922 for (kmp_uint32 d = 0; d < thr_bar->my_level;
924 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
925 skip = thr_bar->skip_per_level[d];
928 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
929 kmp_info_t *child_thr = other_threads[child_tid];
930 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
931 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 933 "arrived(%p) == %llu\n",
934 gtid, team->t.t_id, tid,
935 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
936 child_tid, &child_bar->b_arrived, new_state));
937 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
938 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
939 ANNOTATE_BARRIER_END(child_thr);
941 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 943 gtid, team->t.t_id, tid,
944 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
946 ANNOTATE_REDUCE_AFTER(reduce);
947 (*reduce)(this_thr->th.th_local.reduce_data,
948 child_thr->th.th_local.reduce_data);
949 ANNOTATE_REDUCE_BEFORE(reduce);
950 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
958 if (!KMP_MASTER_TID(tid)) {
961 (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 962 "arrived(%p): %llu => %llu\n",
963 gtid, team->t.t_id, tid,
964 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
965 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
966 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
970 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
971 !thr_bar->use_oncore_barrier) {
973 ANNOTATE_BARRIER_BEGIN(this_thr);
974 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
978 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
979 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
980 flag.set_waiter(other_threads[thr_bar->parent_tid]);
984 team->t.t_bar[bt].b_arrived = new_state;
985 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " 986 "arrived(%p) = %llu\n",
987 gtid, team->t.t_id, tid, team->t.t_id,
988 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
991 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " 993 gtid, team->t.t_id, tid, bt));
996 static void __kmp_hierarchical_barrier_release(
997 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
998 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
999 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
1001 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1003 bool team_change =
false;
1005 if (KMP_MASTER_TID(tid)) {
1006 team = __kmp_threads[gtid]->th.th_team;
1007 KMP_DEBUG_ASSERT(team != NULL);
1008 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master " 1009 "entered barrier type %d\n",
1010 gtid, team->t.t_id, tid, bt));
1013 if (!thr_bar->use_oncore_barrier ||
1014 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1015 thr_bar->team == NULL) {
1017 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1018 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1019 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1020 ANNOTATE_BARRIER_END(this_thr);
1021 TCW_8(thr_bar->b_go,
1022 KMP_INIT_BARRIER_STATE);
1026 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1027 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1028 thr_bar->offset, bt,
1029 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1030 flag.wait(this_thr, TRUE);
1031 if (thr_bar->wait_flag ==
1032 KMP_BARRIER_SWITCHING) {
1033 TCW_8(thr_bar->b_go,
1034 KMP_INIT_BARRIER_STATE);
1036 (RCAST(
volatile char *,
1037 &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
1040 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1042 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1045 team = __kmp_threads[gtid]->th.th_team;
1046 KMP_DEBUG_ASSERT(team != NULL);
1047 tid = __kmp_tid_from_gtid(gtid);
1051 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1052 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1056 nproc = this_thr->th.th_team_nproc;
1057 int level = team->t.t_level;
1059 if (team->t.t_threads[0]
1060 ->th.th_teams_microtask) {
1061 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1062 this_thr->th.th_teams_level == level)
1064 if (this_thr->th.th_teams_size.nteams > 1)
1069 thr_bar->use_oncore_barrier = 1;
1071 thr_bar->use_oncore_barrier = 0;
1075 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1076 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1077 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1083 #if KMP_BARRIER_ICV_PUSH 1084 if (propagate_icvs) {
1085 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1089 copy_icvs(&thr_bar->th_fixed_icvs,
1090 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1091 }
else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1092 thr_bar->use_oncore_barrier) {
1093 if (!thr_bar->my_level)
1096 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1097 &thr_bar->parent_bar->th_fixed_icvs);
1100 if (thr_bar->my_level)
1102 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1104 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1105 &thr_bar->parent_bar->th_fixed_icvs);
1108 #endif // KMP_BARRIER_ICV_PUSH 1111 if (thr_bar->my_level) {
1112 kmp_int32 child_tid;
1114 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1115 thr_bar->use_oncore_barrier) {
1116 if (KMP_MASTER_TID(tid)) {
1119 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1122 ngo_load(&thr_bar->th_fixed_icvs);
1125 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (
int)nproc;
1126 child_tid += thr_bar->skip_per_level[1]) {
1127 kmp_bstate_t *child_bar =
1128 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1129 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1130 "releasing T#%d(%d:%d)" 1131 " go(%p): %u => %u\n",
1132 gtid, team->t.t_id, tid,
1133 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1134 child_tid, &child_bar->b_go, child_bar->b_go,
1135 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1138 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1142 TCW_8(thr_bar->b_go,
1143 KMP_INIT_BARRIER_STATE);
1145 if (thr_bar->leaf_kids) {
1148 old_leaf_kids < thr_bar->leaf_kids) {
1149 if (old_leaf_kids) {
1150 thr_bar->b_go |= old_leaf_state;
1153 last = tid + thr_bar->skip_per_level[1];
1156 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1158 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1159 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1162 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 1163 " T#%d(%d:%d) go(%p): %u => %u\n",
1164 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1165 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1166 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1168 ANNOTATE_BARRIER_BEGIN(child_thr);
1169 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1174 thr_bar->b_go |= thr_bar->leaf_state;
1178 for (
int d = thr_bar->my_level - 1; d >= 0;
1180 last = tid + thr_bar->skip_per_level[d + 1];
1181 kmp_uint32 skip = thr_bar->skip_per_level[d];
1184 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1185 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1186 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1187 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1188 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1189 gtid, team->t.t_id, tid,
1190 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1191 child_tid, &child_bar->b_go, child_bar->b_go,
1192 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1194 ANNOTATE_BARRIER_BEGIN(child_thr);
1195 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1200 #if KMP_BARRIER_ICV_PUSH 1201 if (propagate_icvs && !KMP_MASTER_TID(tid))
1203 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1204 &thr_bar->th_fixed_icvs);
1205 #endif // KMP_BARRIER_ICV_PUSH 1207 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " 1208 "barrier type %d\n",
1209 gtid, team->t.t_id, tid, bt));
1219 int __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
1220 size_t reduce_size,
void *reduce_data,
1221 void (*reduce)(
void *,
void *)) {
1222 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1223 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1224 int tid = __kmp_tid_from_gtid(gtid);
1225 kmp_info_t *this_thr = __kmp_threads[gtid];
1226 kmp_team_t *team = this_thr->th.th_team;
1228 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1230 ompt_data_t *my_task_data;
1231 ompt_data_t *my_parallel_data;
1232 void *return_address;
1235 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1236 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1238 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1240 if (ompt_enabled.enabled) {
1242 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1243 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1244 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1245 if (ompt_enabled.ompt_callback_sync_region) {
1246 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1247 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1248 my_task_data, return_address);
1250 if (ompt_enabled.ompt_callback_sync_region_wait) {
1251 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1252 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1253 my_task_data, return_address);
1259 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
1263 if (!team->t.t_serialized) {
1266 void *itt_sync_obj = NULL;
1268 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1269 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1272 if (__kmp_tasking_mode == tskm_extra_barrier) {
1273 __kmp_tasking_barrier(team, this_thr, gtid);
1275 (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1276 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1283 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1285 this_thr->th.th_team_bt_intervals =
1286 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1287 this_thr->th.th_team_bt_set =
1288 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1290 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1295 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1296 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1300 if (KMP_MASTER_TID(tid)) {
1301 team->t.t_bar[bt].b_master_arrived += 1;
1303 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1306 if (reduce != NULL) {
1308 this_thr->th.th_local.reduce_data = reduce_data;
1311 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1312 __kmp_task_team_setup(
1316 switch (__kmp_barrier_gather_pattern[bt]) {
1317 case bp_hyper_bar: {
1318 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1320 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1321 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1324 case bp_hierarchical_bar: {
1325 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1326 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1330 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1332 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1333 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1337 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1338 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1344 if (KMP_MASTER_TID(tid)) {
1346 if (__kmp_tasking_mode != tskm_immediate_exec) {
1347 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1352 team->t.t_bar[bt].b_team_arrived += 1;
1357 if (team->t.t_cancel_request == cancel_loop ||
1358 team->t.t_cancel_request == cancel_sections) {
1359 team->t.t_cancel_request = cancel_noreq;
1367 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1368 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1370 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1372 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1373 __kmp_forkjoin_frames_mode &&
1375 this_thr->th.th_teams_microtask == NULL &&
1377 team->t.t_active_level == 1) {
1378 kmp_uint64 cur_time = __itt_get_timestamp();
1379 kmp_info_t **other_threads = team->t.t_threads;
1380 int nproc = this_thr->th.th_team_nproc;
1382 switch (__kmp_forkjoin_frames_mode) {
1384 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1386 this_thr->th.th_frame_time = cur_time;
1390 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1394 if (__itt_metadata_add_ptr) {
1396 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1399 this_thr->th.th_bar_arrive_time = 0;
1400 for (i = 1; i < nproc; ++i) {
1401 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1402 other_threads[i]->th.th_bar_arrive_time = 0;
1404 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1406 (kmp_uint64)(reduce != NULL));
1408 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1410 this_thr->th.th_frame_time = cur_time;
1418 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1419 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1422 if (status == 1 || !is_split) {
1423 switch (__kmp_barrier_release_pattern[bt]) {
1424 case bp_hyper_bar: {
1425 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1426 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1427 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1430 case bp_hierarchical_bar: {
1431 __kmp_hierarchical_barrier_release(
1432 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1436 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1437 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1438 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1442 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1443 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1446 if (__kmp_tasking_mode != tskm_immediate_exec) {
1447 __kmp_task_team_sync(this_thr, team);
1455 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1456 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1460 if (__kmp_tasking_mode != tskm_immediate_exec) {
1462 if (this_thr->th.th_task_team != NULL) {
1463 void *itt_sync_obj = NULL;
1465 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1466 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1467 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1471 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1473 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1474 __kmp_task_team_setup(this_thr, team, 0);
1477 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1478 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1484 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1485 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1489 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1490 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1491 __kmp_tid_from_gtid(gtid), status));
1494 if (ompt_enabled.enabled) {
1496 if (ompt_enabled.ompt_callback_sync_region_wait) {
1497 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1498 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1499 my_task_data, return_address);
1501 if (ompt_enabled.ompt_callback_sync_region) {
1502 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1503 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1504 my_task_data, return_address);
1507 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1510 ANNOTATE_BARRIER_END(&team->t.t_bar);
1515 void __kmp_end_split_barrier(
enum barrier_type bt,
int gtid) {
1516 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1517 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1518 int tid = __kmp_tid_from_gtid(gtid);
1519 kmp_info_t *this_thr = __kmp_threads[gtid];
1520 kmp_team_t *team = this_thr->th.th_team;
1522 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1523 if (!team->t.t_serialized) {
1524 if (KMP_MASTER_GTID(gtid)) {
1525 switch (__kmp_barrier_release_pattern[bt]) {
1526 case bp_hyper_bar: {
1527 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1528 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1529 FALSE USE_ITT_BUILD_ARG(NULL));
1532 case bp_hierarchical_bar: {
1533 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1534 FALSE USE_ITT_BUILD_ARG(NULL));
1538 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1539 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1540 FALSE USE_ITT_BUILD_ARG(NULL));
1544 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1545 FALSE USE_ITT_BUILD_ARG(NULL));
1548 if (__kmp_tasking_mode != tskm_immediate_exec) {
1549 __kmp_task_team_sync(this_thr, team);
1553 ANNOTATE_BARRIER_END(&team->t.t_bar);
1556 void __kmp_join_barrier(
int gtid) {
1557 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1558 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1559 kmp_info_t *this_thr = __kmp_threads[gtid];
1562 kmp_info_t *master_thread;
1568 void *itt_sync_obj = NULL;
1570 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1572 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1578 team = this_thr->th.th_team;
1579 nproc = this_thr->th.th_team_nproc;
1580 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1581 tid = __kmp_tid_from_gtid(gtid);
1583 team_id = team->t.t_id;
1585 master_thread = this_thr->th.th_team_master;
1587 if (master_thread != team->t.t_threads[0]) {
1588 __kmp_print_structure();
1591 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1595 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1596 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1597 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1598 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1599 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1600 gtid, team_id, tid));
1602 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1604 ompt_data_t *my_task_data;
1605 ompt_data_t *my_parallel_data;
1606 if (ompt_enabled.enabled) {
1608 void *codeptr = NULL;
1609 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1610 if (KMP_MASTER_TID(ds_tid) &&
1611 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1612 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1613 codeptr = team->t.ompt_team_info.master_return_address;
1614 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1615 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1616 if (ompt_enabled.ompt_callback_sync_region) {
1617 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1618 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1619 my_task_data, codeptr);
1621 if (ompt_enabled.ompt_callback_sync_region_wait) {
1622 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1623 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1624 my_task_data, codeptr);
1626 if (!KMP_MASTER_TID(ds_tid))
1627 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
1629 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
1633 if (__kmp_tasking_mode == tskm_extra_barrier) {
1634 __kmp_tasking_barrier(team, this_thr, gtid);
1635 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1639 if (__kmp_tasking_mode != tskm_immediate_exec) {
1640 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = " 1641 "%p, th_task_team = %p\n",
1642 __kmp_gtid_from_thread(this_thr), team_id,
1643 team->t.t_task_team[this_thr->th.th_task_state],
1644 this_thr->th.th_task_team));
1645 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1646 team->t.t_task_team[this_thr->th.th_task_state]);
1655 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1657 this_thr->th.th_team_bt_intervals =
1658 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1659 this_thr->th.th_team_bt_set =
1660 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1662 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1667 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1668 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1671 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1672 case bp_hyper_bar: {
1673 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1674 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1675 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1678 case bp_hierarchical_bar: {
1679 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1680 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1684 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1685 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1686 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1690 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1691 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1699 if (KMP_MASTER_TID(tid)) {
1700 if (__kmp_tasking_mode != tskm_immediate_exec) {
1701 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1703 #if KMP_STATS_ENABLED 1707 for (
int i = 0; i < team->t.t_nproc; ++i) {
1708 kmp_info_t *team_thread = team->t.t_threads[i];
1709 if (team_thread == this_thr)
1711 team_thread->th.th_stats->setIdleFlag();
1712 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1713 team_thread->th.th_sleep_loc != NULL)
1714 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1715 team_thread->th.th_sleep_loc);
1719 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1720 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1723 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1725 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1726 __kmp_forkjoin_frames_mode &&
1728 this_thr->th.th_teams_microtask == NULL &&
1730 team->t.t_active_level == 1) {
1731 kmp_uint64 cur_time = __itt_get_timestamp();
1732 ident_t *loc = team->t.t_ident;
1733 kmp_info_t **other_threads = team->t.t_threads;
1734 int nproc = this_thr->th.th_team_nproc;
1736 switch (__kmp_forkjoin_frames_mode) {
1738 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1742 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1746 if (__itt_metadata_add_ptr) {
1748 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1751 this_thr->th.th_bar_arrive_time = 0;
1752 for (i = 1; i < nproc; ++i) {
1753 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1754 other_threads[i]->th.th_bar_arrive_time = 0;
1756 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1757 cur_time, delta, 0);
1759 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1761 this_thr->th.th_frame_time = cur_time;
1769 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1770 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1775 if (KMP_MASTER_TID(tid)) {
1778 (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1779 gtid, team_id, tid, nproc));
1786 (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1788 ANNOTATE_BARRIER_END(&team->t.t_bar);
1793 void __kmp_fork_barrier(
int gtid,
int tid) {
1794 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1795 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1796 kmp_info_t *this_thr = __kmp_threads[gtid];
1797 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1799 void *itt_sync_obj = NULL;
1802 ANNOTATE_BARRIER_END(&team->t.t_bar);
1804 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1805 (team != NULL) ? team->t.t_id : -1, tid));
1808 if (KMP_MASTER_TID(tid)) {
1809 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1810 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1812 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1813 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1818 kmp_info_t **other_threads = team->t.t_threads;
1824 for (i = 1; i < team->t.t_nproc; ++i) {
1826 (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " 1828 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1829 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1830 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1832 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1833 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1834 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1838 if (__kmp_tasking_mode != tskm_immediate_exec) {
1840 __kmp_task_team_setup(this_thr, team, 0);
1849 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1851 this_thr->th.th_team_bt_intervals =
1852 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1853 this_thr->th.th_team_bt_set =
1854 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1856 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1861 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1862 case bp_hyper_bar: {
1863 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1864 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1865 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1868 case bp_hierarchical_bar: {
1869 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1870 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1874 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1875 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1876 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1880 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1881 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1886 if (ompt_enabled.enabled &&
1887 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
1888 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1889 ompt_data_t *task_data = (team)
1890 ? OMPT_CUR_TASK_DATA(this_thr)
1891 : &(this_thr->th.ompt_thread_info.task_data);
1892 this_thr->th.ompt_thread_info.state = omp_state_overhead;
1894 void *codeptr = NULL;
1895 if (KMP_MASTER_TID(ds_tid) &&
1896 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1897 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1898 codeptr = team->t.ompt_team_info.master_return_address;
1899 if (ompt_enabled.ompt_callback_sync_region_wait) {
1900 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1901 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
1903 if (ompt_enabled.ompt_callback_sync_region) {
1904 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1905 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
1908 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
1909 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1910 ompt_scope_end, NULL, task_data, 0, ds_tid);
1916 if (TCR_4(__kmp_global.g.g_done)) {
1917 this_thr->th.th_task_team = NULL;
1919 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1920 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1921 if (!KMP_MASTER_TID(tid)) {
1922 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1924 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1928 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1936 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1937 KMP_DEBUG_ASSERT(team != NULL);
1938 tid = __kmp_tid_from_gtid(gtid);
1940 #if KMP_BARRIER_ICV_PULL 1948 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1949 if (!KMP_MASTER_TID(tid)) {
1953 (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1954 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1956 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1957 &team->t.t_threads[0]
1958 ->th.th_bar[bs_forkjoin_barrier]
1962 #endif // KMP_BARRIER_ICV_PULL 1964 if (__kmp_tasking_mode != tskm_immediate_exec) {
1965 __kmp_task_team_sync(this_thr, team);
1968 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1969 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1970 if (proc_bind == proc_bind_intel) {
1972 #if KMP_AFFINITY_SUPPORTED 1974 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1975 __kmp_balanced_affinity(tid, team->t.t_nproc);
1977 #endif // KMP_AFFINITY_SUPPORTED 1978 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1979 }
else if (proc_bind != proc_bind_false) {
1980 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1981 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1982 __kmp_gtid_from_thread(this_thr),
1983 this_thr->th.th_current_place));
1985 __kmp_affinity_set_place(gtid);
1990 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1991 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1992 if (!KMP_MASTER_TID(tid)) {
1994 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1995 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1999 ANNOTATE_BARRIER_END(&team->t.t_bar);
2000 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
2001 team->t.t_id, tid));
2004 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
2005 kmp_internal_control_t *new_icvs,
ident_t *loc) {
2006 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
2008 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
2009 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
2014 #if KMP_BARRIER_ICV_PULL 2018 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2021 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2023 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
2024 team->t.t_threads[0], team));
2025 #elif KMP_BARRIER_ICV_PUSH 2028 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
2029 team->t.t_threads[0], team));
2034 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2036 for (
int f = 1; f < new_nproc; ++f) {
2038 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2039 f, team->t.t_threads[f], team));
2040 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2041 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2042 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2043 f, team->t.t_threads[f], team));
2046 #endif // KMP_BARRIER_ICV_PULL