17 #include "kmp_stats.h" 18 #include "kmp_wait_release.h" 21 #include "ompt-specific.h" 24 #include "tsan_annotations.h" 27 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
28 kmp_info_t *this_thr);
29 static void __kmp_alloc_task_deque(kmp_info_t *thread,
30 kmp_thread_data_t *thread_data);
31 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
32 kmp_task_team_t *task_team);
35 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
38 #ifdef BUILD_TIED_TASK_STACK 47 static void __kmp_trace_task_stack(kmp_int32 gtid,
48 kmp_thread_data_t *thread_data,
49 int threshold,
char *location) {
50 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
51 kmp_taskdata_t **stack_top = task_stack->ts_top;
52 kmp_int32 entries = task_stack->ts_entries;
53 kmp_taskdata_t *tied_task;
57 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 58 "first_block = %p, stack_top = %p \n",
59 location, gtid, entries, task_stack->ts_first_block, stack_top));
61 KMP_DEBUG_ASSERT(stack_top != NULL);
62 KMP_DEBUG_ASSERT(entries > 0);
64 while (entries != 0) {
65 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
67 if (entries & TASK_STACK_INDEX_MASK == 0) {
68 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
70 stack_block = stack_block->sb_prev;
71 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
78 tied_task = *stack_top;
80 KMP_DEBUG_ASSERT(tied_task != NULL);
81 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
84 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 85 "stack_top=%p, tied_task=%p\n",
86 location, gtid, entries, stack_top, tied_task));
88 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
91 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
101 static void __kmp_init_task_stack(kmp_int32 gtid,
102 kmp_thread_data_t *thread_data) {
103 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
104 kmp_stack_block_t *first_block;
107 first_block = &task_stack->ts_first_block;
108 task_stack->ts_top = (kmp_taskdata_t **)first_block;
109 memset((
void *)first_block,
'\0',
110 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
113 task_stack->ts_entries = TASK_STACK_EMPTY;
114 first_block->sb_next = NULL;
115 first_block->sb_prev = NULL;
122 static void __kmp_free_task_stack(kmp_int32 gtid,
123 kmp_thread_data_t *thread_data) {
124 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
125 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
127 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
129 while (stack_block != NULL) {
130 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
132 stack_block->sb_next = NULL;
133 stack_block->sb_prev = NULL;
134 if (stack_block != &task_stack->ts_first_block) {
135 __kmp_thread_free(thread,
138 stack_block = next_block;
141 task_stack->ts_entries = 0;
142 task_stack->ts_top = NULL;
151 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
152 kmp_taskdata_t *tied_task) {
154 kmp_thread_data_t *thread_data =
155 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
156 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
158 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
162 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
163 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
166 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
167 gtid, thread, tied_task));
169 *(task_stack->ts_top) = tied_task;
172 task_stack->ts_top++;
173 task_stack->ts_entries++;
175 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
177 kmp_stack_block_t *stack_block =
178 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
181 if (stack_block->sb_next !=
183 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
185 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
186 thread,
sizeof(kmp_stack_block_t));
188 task_stack->ts_top = &new_block->sb_block[0];
189 stack_block->sb_next = new_block;
190 new_block->sb_prev = stack_block;
191 new_block->sb_next = NULL;
195 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
196 gtid, tied_task, new_block));
199 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
210 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
211 kmp_taskdata_t *ending_task) {
213 kmp_thread_data_t *thread_data =
214 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
215 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
216 kmp_taskdata_t *tied_task;
218 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
223 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
224 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
226 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
230 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
231 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
233 stack_block = stack_block->sb_prev;
234 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
238 task_stack->ts_top--;
239 task_stack->ts_entries--;
241 tied_task = *(task_stack->ts_top);
243 KMP_DEBUG_ASSERT(tied_task != NULL);
244 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
245 KMP_DEBUG_ASSERT(tied_task == ending_task);
247 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
254 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
255 kmp_info_t *thread = __kmp_threads[gtid];
256 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
257 kmp_task_team_t *task_team = thread->th.th_task_team;
258 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
259 kmp_thread_data_t *thread_data;
262 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
264 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
267 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
270 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
271 gtid, counter, taskdata));
275 if (taskdata->td_flags.task_serial) {
276 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 277 "TASK_NOT_PUSHED for task %p\n",
279 return TASK_NOT_PUSHED;
284 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
285 if (!KMP_TASKING_ENABLED(task_team)) {
286 __kmp_enable_tasking(task_team, thread);
288 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
289 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
292 thread_data = &task_team->tt.tt_threads_data[tid];
295 if (thread_data->td.td_deque == NULL) {
296 __kmp_alloc_task_deque(thread, thread_data);
300 if (TCR_4(thread_data->td.td_deque_ntasks) >=
301 TASK_DEQUE_SIZE(thread_data->td)) {
302 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 303 "TASK_NOT_PUSHED for task %p\n",
305 return TASK_NOT_PUSHED;
309 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
313 if (TCR_4(thread_data->td.td_deque_ntasks) >=
314 TASK_DEQUE_SIZE(thread_data->td)) {
315 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
316 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning " 317 "TASK_NOT_PUSHED for task %p\n",
319 return TASK_NOT_PUSHED;
323 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
324 TASK_DEQUE_SIZE(thread_data->td));
327 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
330 thread_data->td.td_deque_tail =
331 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
332 TCW_4(thread_data->td.td_deque_ntasks,
333 TCR_4(thread_data->td.td_deque_ntasks) + 1);
335 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 336 "task=%p ntasks=%d head=%u tail=%u\n",
337 gtid, taskdata, thread_data->td.td_deque_ntasks,
338 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
340 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
342 return TASK_SUCCESSFULLY_PUSHED;
349 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
350 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 351 "this_thread=%p, curtask=%p, " 352 "curtask_parent=%p\n",
353 0, this_thr, this_thr->th.th_current_task,
354 this_thr->th.th_current_task->td_parent));
356 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
358 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 359 "this_thread=%p, curtask=%p, " 360 "curtask_parent=%p\n",
361 0, this_thr, this_thr->th.th_current_task,
362 this_thr->th.th_current_task->td_parent));
371 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
375 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 378 tid, this_thr, this_thr->th.th_current_task,
379 team->t.t_implicit_task_taskdata[tid].td_parent));
381 KMP_DEBUG_ASSERT(this_thr != NULL);
384 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
385 team->t.t_implicit_task_taskdata[0].td_parent =
386 this_thr->th.th_current_task;
387 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
390 team->t.t_implicit_task_taskdata[tid].td_parent =
391 team->t.t_implicit_task_taskdata[0].td_parent;
392 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
395 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 398 tid, this_thr, this_thr->th.th_current_task,
399 team->t.t_implicit_task_taskdata[tid].td_parent));
407 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
408 kmp_taskdata_t *current_task) {
409 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
410 kmp_info_t *thread = __kmp_threads[gtid];
413 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
414 gtid, taskdata, current_task));
416 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
421 current_task->td_flags.executing = 0;
424 #ifdef BUILD_TIED_TASK_STACK 425 if (taskdata->td_flags.tiedness == TASK_TIED) {
426 __kmp_push_task_stack(gtid, thread, taskdata);
431 thread->th.th_current_task = taskdata;
433 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
434 taskdata->td_flags.tiedness == TASK_UNTIED);
435 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
436 taskdata->td_flags.tiedness == TASK_UNTIED);
437 taskdata->td_flags.started = 1;
438 taskdata->td_flags.executing = 1;
439 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
440 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
447 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
458 static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
460 task->ompt_task_info.task_data.value = 0;
461 task->ompt_task_info.frame.exit_frame = NULL;
462 task->ompt_task_info.frame.enter_frame = NULL;
464 task->ompt_task_info.ndeps = 0;
465 task->ompt_task_info.deps = NULL;
471 static inline void __ompt_task_start(kmp_task_t *task,
472 kmp_taskdata_t *current_task,
474 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
475 ompt_task_status_t status = ompt_task_others;
476 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
477 status = ompt_task_yield;
478 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
481 if (ompt_enabled.ompt_callback_task_schedule) {
482 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
483 &(current_task->ompt_task_info.task_data), status,
484 &(taskdata->ompt_task_info.task_data));
486 taskdata->ompt_task_info.scheduling_parent = current_task;
491 static inline void __ompt_task_finish(kmp_task_t *task,
492 kmp_taskdata_t *resumed_task) {
493 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
494 ompt_task_status_t status = ompt_task_complete;
495 if (taskdata->td_flags.tiedness == TASK_UNTIED &&
496 KMP_TEST_THEN_ADD32(&(taskdata->td_untied_count), 0) > 1)
497 status = ompt_task_others;
498 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
499 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
500 status = ompt_task_cancel;
504 if (ompt_enabled.ompt_callback_task_schedule) {
505 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
506 &(taskdata->ompt_task_info.task_data), status,
507 &((resumed_task ? resumed_task
508 : (taskdata->ompt_task_info.scheduling_parent
509 ? taskdata->ompt_task_info.scheduling_parent
510 : taskdata->td_parent))
511 ->ompt_task_info.task_data));
517 static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
520 void *return_address) {
521 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
522 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
524 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 526 gtid, loc_ref, taskdata, current_task));
528 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
531 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
532 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 533 "incremented for task %p\n",
534 gtid, counter, taskdata));
537 taskdata->td_flags.task_serial =
539 __kmp_task_start(gtid, task, current_task);
543 if (current_task->ompt_task_info.frame.enter_frame == NULL) {
544 current_task->ompt_task_info.frame.enter_frame =
545 taskdata->ompt_task_info.frame.exit_frame = frame_address;
547 if (ompt_enabled.ompt_callback_task_create) {
548 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
549 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
550 &(parent_info->task_data), &(parent_info->frame),
551 &(taskdata->ompt_task_info.task_data),
552 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
555 __ompt_task_start(task, current_task, gtid);
557 #endif // OMPT_SUPPORT 559 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
565 static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
568 void *return_address) {
569 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
572 #endif // OMPT_SUPPORT 580 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
583 if (UNLIKELY(ompt_enabled.enabled)) {
584 OMPT_STORE_RETURN_ADDRESS(gtid);
585 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
586 OMPT_GET_FRAME_ADDRESS(1),
587 OMPT_LOAD_RETURN_ADDRESS(gtid));
591 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
597 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
598 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
602 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
603 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
605 __kmp_task_start(gtid, task, current_task);
607 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
608 loc_ref, KMP_TASK_TO_TASKDATA(task)));
611 #endif // TASK_UNUSED 618 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
619 kmp_info_t *thread) {
620 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
624 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
625 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
626 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
627 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
628 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
629 taskdata->td_flags.task_serial == 1);
630 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
632 taskdata->td_flags.freed = 1;
633 ANNOTATE_HAPPENS_BEFORE(taskdata);
636 __kmp_fast_free(thread, taskdata);
638 __kmp_thread_free(thread, taskdata);
641 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
650 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
651 kmp_taskdata_t *taskdata,
652 kmp_info_t *thread) {
656 kmp_int32 team_serial =
657 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
658 !taskdata->td_flags.proxy;
660 kmp_int32 team_serial =
661 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
663 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
666 KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
667 KMP_DEBUG_ASSERT(children >= 0);
670 while (children == 0) {
671 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
673 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 674 "and freeing itself\n",
678 __kmp_free_task(gtid, taskdata, thread);
680 taskdata = parent_taskdata;
684 if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
688 children = KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
689 KMP_DEBUG_ASSERT(children >= 0);
693 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 694 "not freeing it yet\n",
695 gtid, taskdata, children));
703 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
704 kmp_taskdata_t *resumed_task) {
705 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
706 kmp_info_t *thread = __kmp_threads[gtid];
707 kmp_task_team_t *task_team =
708 thread->th.th_task_team;
709 kmp_int32 children = 0;
711 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 713 gtid, taskdata, resumed_task));
715 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
718 #ifdef BUILD_TIED_TASK_STACK 719 if (taskdata->td_flags.tiedness == TASK_TIED) {
720 __kmp_pop_task_stack(gtid, thread, taskdata);
724 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
727 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
730 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
731 gtid, counter, taskdata));
735 if (resumed_task == NULL) {
736 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
737 resumed_task = taskdata->td_parent;
740 thread->th.th_current_task = resumed_task;
741 resumed_task->td_flags.executing = 1;
742 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 743 "resuming task %p\n",
744 gtid, taskdata, resumed_task));
749 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
750 taskdata->td_flags.complete = 1;
751 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
752 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
756 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
759 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) -
761 KMP_DEBUG_ASSERT(children >= 0);
763 if (taskdata->td_taskgroup)
764 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
769 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
770 (task_team && task_team->tt.tt_found_proxy_tasks)) {
772 __kmp_release_deps(gtid, taskdata);
779 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
780 taskdata->td_flags.executing = 0;
783 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
784 gtid, taskdata, children));
793 if (taskdata->td_flags.destructors_thunk) {
794 kmp_routine_entry_t destr_thunk = task->data1.destructors;
795 KMP_ASSERT(destr_thunk);
796 destr_thunk(gtid, task);
798 #endif // OMP_40_ENABLED 803 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
804 taskdata->td_flags.task_serial);
805 if (taskdata->td_flags.task_serial) {
806 if (resumed_task == NULL) {
807 resumed_task = taskdata->td_parent;
811 KMP_DEBUG_ASSERT(resumed_task !=
819 thread->th.th_current_task = resumed_task;
820 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
824 resumed_task->td_flags.executing = 1;
827 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
828 gtid, taskdata, resumed_task));
834 static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
837 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
838 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
840 __kmp_task_finish(gtid, task, NULL);
842 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
843 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
847 __ompt_task_finish(task, NULL);
848 omp_frame_t *ompt_frame;
849 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
850 ompt_frame->enter_frame = NULL;
859 void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
861 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
863 #endif // OMPT_SUPPORT 870 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
873 if (UNLIKELY(ompt_enabled.enabled)) {
874 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
878 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
884 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
886 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
887 loc_ref, KMP_TASK_TO_TASKDATA(task)));
889 __kmp_task_finish(gtid, task, NULL);
891 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
892 loc_ref, KMP_TASK_TO_TASKDATA(task)));
895 #endif // TASK_UNUSED 908 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
909 kmp_team_t *team,
int tid,
int set_curr_task) {
910 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
914 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
915 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
917 task->td_task_id = KMP_GEN_TASK_ID();
918 task->td_team = team;
921 task->td_ident = loc_ref;
922 task->td_taskwait_ident = NULL;
923 task->td_taskwait_counter = 0;
924 task->td_taskwait_thread = 0;
926 task->td_flags.tiedness = TASK_TIED;
927 task->td_flags.tasktype = TASK_IMPLICIT;
929 task->td_flags.proxy = TASK_FULL;
933 task->td_flags.task_serial = 1;
934 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
935 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
937 task->td_flags.started = 1;
938 task->td_flags.executing = 1;
939 task->td_flags.complete = 0;
940 task->td_flags.freed = 0;
943 task->td_depnode = NULL;
945 task->td_last_tied = task;
948 task->td_incomplete_child_tasks = 0;
950 task->td_allocated_child_tasks = 0;
952 task->td_taskgroup = NULL;
953 task->td_dephash = NULL;
955 __kmp_push_current_task_to_thread(this_thr, team, tid);
957 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
958 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
962 if (UNLIKELY(ompt_enabled.enabled))
963 __ompt_task_init(task, tid);
966 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
975 void __kmp_finish_implicit_task(kmp_info_t *thread) {
976 kmp_taskdata_t *task = thread->th.th_current_task;
977 if (task->td_dephash)
978 __kmp_dephash_free_entries(thread, task->td_dephash);
985 void __kmp_free_implicit_task(kmp_info_t *thread) {
986 kmp_taskdata_t *task = thread->th.th_current_task;
987 if (task && task->td_dephash) {
988 __kmp_dephash_free(thread, task->td_dephash);
989 task->td_dephash = NULL;
995 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
996 if (size & (val - 1)) {
998 if (size <= KMP_SIZE_T_MAX - val) {
1017 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1018 kmp_tasking_flags_t *flags,
1019 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1020 kmp_routine_entry_t task_entry) {
1022 kmp_taskdata_t *taskdata;
1023 kmp_info_t *thread = __kmp_threads[gtid];
1024 kmp_team_t *team = thread->th.th_team;
1025 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1026 size_t shareds_offset;
1028 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 1029 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1030 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1031 sizeof_shareds, task_entry));
1033 if (parent_task->td_flags.final) {
1034 if (flags->merged_if0) {
1038 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1042 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1046 if (flags->proxy == TASK_PROXY) {
1047 flags->tiedness = TASK_UNTIED;
1048 flags->merged_if0 = 1;
1052 if ((thread->th.th_task_team) == NULL) {
1055 KMP_DEBUG_ASSERT(team->t.t_serialized);
1057 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1059 __kmp_task_team_setup(
1062 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1064 kmp_task_team_t *task_team = thread->th.th_task_team;
1067 if (!KMP_TASKING_ENABLED(task_team)) {
1070 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1071 __kmp_enable_tasking(task_team, thread);
1072 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1073 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1075 if (thread_data->td.td_deque == NULL) {
1076 __kmp_alloc_task_deque(thread, thread_data);
1080 if (task_team->tt.tt_found_proxy_tasks == FALSE)
1081 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1087 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1088 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1091 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1093 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1098 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1101 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1104 ANNOTATE_HAPPENS_AFTER(taskdata);
1106 task = KMP_TASKDATA_TO_TASK(taskdata);
1109 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1110 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1111 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1113 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1114 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1116 if (sizeof_shareds > 0) {
1118 task->shareds = &((
char *)taskdata)[shareds_offset];
1120 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1123 task->shareds = NULL;
1125 task->routine = task_entry;
1128 taskdata->td_task_id = KMP_GEN_TASK_ID();
1129 taskdata->td_team = team;
1130 taskdata->td_alloc_thread = thread;
1131 taskdata->td_parent = parent_task;
1132 taskdata->td_level = parent_task->td_level + 1;
1133 taskdata->td_untied_count = 0;
1134 taskdata->td_ident = loc_ref;
1135 taskdata->td_taskwait_ident = NULL;
1136 taskdata->td_taskwait_counter = 0;
1137 taskdata->td_taskwait_thread = 0;
1138 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1141 if (flags->proxy == TASK_FULL)
1143 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1145 taskdata->td_flags.tiedness = flags->tiedness;
1146 taskdata->td_flags.final = flags->final;
1147 taskdata->td_flags.merged_if0 = flags->merged_if0;
1149 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1150 #endif // OMP_40_ENABLED 1152 taskdata->td_flags.proxy = flags->proxy;
1153 taskdata->td_task_team = thread->th.th_task_team;
1154 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1156 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1159 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1162 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1168 taskdata->td_flags.task_serial =
1169 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1170 taskdata->td_flags.tasking_ser);
1172 taskdata->td_flags.started = 0;
1173 taskdata->td_flags.executing = 0;
1174 taskdata->td_flags.complete = 0;
1175 taskdata->td_flags.freed = 0;
1177 taskdata->td_flags.native = flags->native;
1179 taskdata->td_incomplete_child_tasks = 0;
1180 taskdata->td_allocated_child_tasks = 1;
1183 taskdata->td_taskgroup =
1184 parent_task->td_taskgroup;
1185 taskdata->td_dephash = NULL;
1186 taskdata->td_depnode = NULL;
1188 if (flags->tiedness == TASK_UNTIED)
1189 taskdata->td_last_tied = NULL;
1191 taskdata->td_last_tied = taskdata;
1196 if (flags->proxy == TASK_PROXY ||
1197 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1199 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1202 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
1204 if (parent_task->td_taskgroup)
1205 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
1209 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1210 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
1214 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1215 gtid, taskdata, taskdata->td_parent));
1216 ANNOTATE_HAPPENS_BEFORE(task);
1219 if (UNLIKELY(ompt_enabled.enabled))
1220 __ompt_task_init(taskdata, gtid);
1226 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1227 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1228 size_t sizeof_shareds,
1229 kmp_routine_entry_t task_entry) {
1231 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1233 input_flags->native = FALSE;
1237 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1238 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1239 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1240 input_flags->proxy ?
"proxy" :
"", sizeof_kmp_task_t,
1241 sizeof_shareds, task_entry));
1243 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1244 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1245 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1246 sizeof_kmp_task_t, sizeof_shareds, task_entry));
1249 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1250 sizeof_shareds, task_entry);
1252 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1262 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1263 kmp_taskdata_t *current_task) {
1264 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1265 kmp_uint64 cur_time;
1270 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1271 gtid, taskdata, current_task));
1272 KMP_DEBUG_ASSERT(task);
1274 if (taskdata->td_flags.proxy == TASK_PROXY &&
1275 taskdata->td_flags.complete == 1) {
1280 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1283 __kmp_bottom_half_finish_proxy(gtid, task);
1285 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1286 "proxy task %p, resuming task %p\n",
1287 gtid, taskdata, current_task));
1293 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1294 if (__kmp_forkjoin_frames_mode == 3) {
1297 cur_time = __itt_get_timestamp();
1303 if (taskdata->td_flags.proxy != TASK_PROXY) {
1305 ANNOTATE_HAPPENS_AFTER(task);
1306 __kmp_task_start(gtid, task, current_task);
1312 ompt_thread_info_t oldInfo;
1314 if (UNLIKELY(ompt_enabled.enabled)) {
1316 thread = __kmp_threads[gtid];
1317 oldInfo = thread->th.ompt_thread_info;
1318 thread->th.ompt_thread_info.wait_id = 0;
1319 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1320 ? omp_state_work_serial
1321 : omp_state_work_parallel;
1322 taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
1330 if (__kmp_omp_cancellation) {
1331 kmp_info_t *this_thr = __kmp_threads[gtid];
1332 kmp_team_t *this_team = this_thr->th.th_team;
1333 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1334 if ((taskgroup && taskgroup->cancel_request) ||
1335 (this_team->t.t_cancel_request == cancel_parallel)) {
1336 #if OMPT_SUPPORT && OMPT_OPTIONAL 1337 ompt_data_t *task_data;
1338 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1339 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1340 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1342 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1343 : ompt_cancel_parallel) |
1344 ompt_cancel_discarded_task,
1357 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1358 taskdata->td_last_tied = current_task->td_last_tied;
1359 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1361 #if KMP_STATS_ENABLED 1363 switch (KMP_GET_THREAD_STATE()) {
1364 case FORK_JOIN_BARRIER:
1365 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1368 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1371 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1374 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1377 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1380 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1383 #endif // KMP_STATS_ENABLED 1384 #endif // OMP_40_ENABLED 1388 if (UNLIKELY(ompt_enabled.enabled))
1389 __ompt_task_start(task, current_task, gtid);
1392 #ifdef KMP_GOMP_COMPAT 1393 if (taskdata->td_flags.native) {
1394 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1398 (*(task->routine))(gtid, task);
1400 KMP_POP_PARTITIONED_TIMER();
1403 if (UNLIKELY(ompt_enabled.enabled))
1404 __ompt_task_finish(task, current_task);
1408 #endif // OMP_40_ENABLED 1411 if (UNLIKELY(ompt_enabled.enabled)) {
1412 thread->th.ompt_thread_info = oldInfo;
1413 taskdata->ompt_task_info.frame.exit_frame = NULL;
1419 if (taskdata->td_flags.proxy != TASK_PROXY) {
1421 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1422 __kmp_task_finish(gtid, task, current_task);
1427 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1429 if (__kmp_forkjoin_frames_mode == 3) {
1430 kmp_info_t *this_thr = __kmp_threads[gtid];
1431 if (this_thr->th.th_bar_arrive_time) {
1432 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1438 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1439 gtid, taskdata, current_task));
1453 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1454 kmp_task_t *new_task) {
1455 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1457 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1458 loc_ref, new_taskdata));
1461 kmp_taskdata_t *parent;
1462 if (UNLIKELY(ompt_enabled.enabled)) {
1463 parent = new_taskdata->td_parent;
1464 if (ompt_enabled.ompt_callback_task_create) {
1465 ompt_data_t task_data = ompt_data_none;
1466 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1467 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1468 parent ? &(parent->ompt_task_info.frame) : NULL,
1469 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1470 OMPT_GET_RETURN_ADDRESS(0));
1478 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1480 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1481 new_taskdata->td_flags.task_serial = 1;
1482 __kmp_invoke_task(gtid, new_task, current_task);
1487 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1488 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1489 gtid, loc_ref, new_taskdata));
1491 ANNOTATE_HAPPENS_BEFORE(new_task);
1493 if (UNLIKELY(ompt_enabled.enabled)) {
1494 parent->ompt_task_info.frame.enter_frame = NULL;
1497 return TASK_CURRENT_NOT_QUEUED;
1511 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1512 bool serialize_immediate) {
1513 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1518 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1519 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1521 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1524 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1525 if (serialize_immediate)
1526 new_taskdata->td_flags.task_serial = 1;
1527 __kmp_invoke_task(gtid, new_task, current_task);
1530 ANNOTATE_HAPPENS_BEFORE(new_task);
1531 return TASK_CURRENT_NOT_QUEUED;
1546 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1547 kmp_task_t *new_task) {
1549 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1551 #if KMP_DEBUG || OMPT_SUPPORT 1552 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1554 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1558 kmp_taskdata_t *parent = NULL;
1559 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1560 OMPT_STORE_RETURN_ADDRESS(gtid);
1561 parent = new_taskdata->td_parent;
1562 if (!parent->ompt_task_info.frame.enter_frame)
1563 parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1564 if (ompt_enabled.ompt_callback_task_create) {
1565 ompt_data_t task_data = ompt_data_none;
1566 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1567 parent ? &(parent->ompt_task_info.task_data) : &task_data,
1568 parent ? &(parent->ompt_task_info.frame) : NULL,
1569 &(new_taskdata->ompt_task_info.task_data),
1570 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1571 OMPT_LOAD_RETURN_ADDRESS(gtid));
1576 res = __kmp_omp_task(gtid, new_task,
true);
1578 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1579 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1580 gtid, loc_ref, new_taskdata));
1582 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1583 parent->ompt_task_info.frame.enter_frame = NULL;
1589 template <
bool ompt>
1590 static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
1591 void *frame_address,
1592 void *return_address) {
1593 kmp_taskdata_t *taskdata;
1595 int thread_finished = FALSE;
1596 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1598 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1600 if (__kmp_tasking_mode != tskm_immediate_exec) {
1601 thread = __kmp_threads[gtid];
1602 taskdata = thread->th.th_current_task;
1604 #if OMPT_SUPPORT && OMPT_OPTIONAL 1605 ompt_data_t *my_task_data;
1606 ompt_data_t *my_parallel_data;
1609 my_task_data = &(taskdata->ompt_task_info.task_data);
1610 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
1612 taskdata->ompt_task_info.frame.enter_frame = frame_address;
1614 if (ompt_enabled.ompt_callback_sync_region) {
1615 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1616 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1617 my_task_data, return_address);
1620 if (ompt_enabled.ompt_callback_sync_region_wait) {
1621 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1622 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
1623 my_task_data, return_address);
1626 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1633 taskdata->td_taskwait_counter += 1;
1634 taskdata->td_taskwait_ident = loc_ref;
1635 taskdata->td_taskwait_thread = gtid + 1;
1638 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1639 if (itt_sync_obj != NULL)
1640 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1644 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1647 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1648 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1652 RCAST(
volatile kmp_uint32 *, &taskdata->td_incomplete_child_tasks),
1654 while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
1655 flag.execute_tasks(thread, gtid, FALSE,
1656 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1657 __kmp_task_stealing_constraint);
1661 if (itt_sync_obj != NULL)
1662 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1667 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1669 #if OMPT_SUPPORT && OMPT_OPTIONAL 1671 if (ompt_enabled.ompt_callback_sync_region_wait) {
1672 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1673 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1674 my_task_data, return_address);
1676 if (ompt_enabled.ompt_callback_sync_region) {
1677 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1678 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
1679 my_task_data, return_address);
1681 taskdata->ompt_task_info.frame.enter_frame = NULL;
1683 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1685 ANNOTATE_HAPPENS_AFTER(taskdata);
1688 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1689 "returning TASK_CURRENT_NOT_QUEUED\n",
1692 return TASK_CURRENT_NOT_QUEUED;
1697 static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1698 void *frame_address,
1699 void *return_address) {
1700 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
1703 #endif // OMPT_SUPPORT 1707 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1708 #if OMPT_SUPPORT && OMPT_OPTIONAL 1709 if (UNLIKELY(ompt_enabled.enabled)) {
1710 OMPT_STORE_RETURN_ADDRESS(gtid);
1711 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
1712 OMPT_LOAD_RETURN_ADDRESS(gtid));
1715 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
1719 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
1720 kmp_taskdata_t *taskdata;
1722 int thread_finished = FALSE;
1725 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1727 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1728 gtid, loc_ref, end_part));
1730 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1731 thread = __kmp_threads[gtid];
1732 taskdata = thread->th.th_current_task;
1739 taskdata->td_taskwait_counter += 1;
1740 taskdata->td_taskwait_ident = loc_ref;
1741 taskdata->td_taskwait_thread = gtid + 1;
1744 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1745 if (itt_sync_obj != NULL)
1746 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1748 if (!taskdata->td_flags.team_serial) {
1749 kmp_task_team_t *task_team = thread->th.th_task_team;
1750 if (task_team != NULL) {
1751 if (KMP_TASKING_ENABLED(task_team)) {
1753 if (UNLIKELY(ompt_enabled.enabled))
1754 thread->th.ompt_thread_info.ompt_task_yielded = 1;
1756 __kmp_execute_tasks_32(
1757 thread, gtid, NULL, FALSE,
1758 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1759 __kmp_task_stealing_constraint);
1761 if (UNLIKELY(ompt_enabled.enabled))
1762 thread->th.ompt_thread_info.ompt_task_yielded = 0;
1768 if (itt_sync_obj != NULL)
1769 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1774 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1777 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1778 "returning TASK_CURRENT_NOT_QUEUED\n",
1781 return TASK_CURRENT_NOT_QUEUED;
1788 typedef struct kmp_task_red_flags {
1789 unsigned lazy_priv : 1;
1790 unsigned reserved31 : 31;
1791 } kmp_task_red_flags_t;
1794 typedef struct kmp_task_red_data {
1802 kmp_task_red_flags_t flags;
1803 } kmp_task_red_data_t;
1806 typedef struct kmp_task_red_input {
1812 kmp_task_red_flags_t flags;
1813 } kmp_task_red_input_t;
1824 void *__kmpc_task_reduction_init(
int gtid,
int num,
void *data) {
1825 kmp_info_t *thread = __kmp_threads[gtid];
1826 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1827 kmp_int32 nth = thread->th.th_team_nproc;
1828 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
1829 kmp_task_red_data_t *arr;
1832 KMP_ASSERT(tg != NULL);
1833 KMP_ASSERT(data != NULL);
1834 KMP_ASSERT(num > 0);
1836 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1840 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1842 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
1843 thread, num *
sizeof(kmp_task_red_data_t));
1844 for (
int i = 0; i < num; ++i) {
1845 void (*f_init)(
void *) = (
void (*)(
void *))(input[i].reduce_init);
1846 size_t size = input[i].reduce_size - 1;
1848 size += CACHE_LINE - size % CACHE_LINE;
1849 KMP_ASSERT(input[i].reduce_comb != NULL);
1850 arr[i].reduce_shar = input[i].reduce_shar;
1851 arr[i].reduce_size = size;
1852 arr[i].reduce_init = input[i].reduce_init;
1853 arr[i].reduce_fini = input[i].reduce_fini;
1854 arr[i].reduce_comb = input[i].reduce_comb;
1855 arr[i].flags = input[i].flags;
1856 if (!input[i].flags.lazy_priv) {
1858 arr[i].reduce_priv = __kmp_allocate(nth * size);
1859 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
1860 if (f_init != NULL) {
1862 for (
int j = 0; j < nth; ++j) {
1863 f_init((
char *)(arr[i].reduce_priv) + j * size);
1869 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
1872 tg->reduce_data = (
void *)arr;
1873 tg->reduce_num_data = num;
1886 void *__kmpc_task_reduction_get_th_data(
int gtid,
void *tskgrp,
void *data) {
1887 kmp_info_t *thread = __kmp_threads[gtid];
1888 kmp_int32 nth = thread->th.th_team_nproc;
1892 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
1894 tg = thread->th.th_current_task->td_taskgroup;
1895 KMP_ASSERT(tg != NULL);
1896 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
1897 kmp_int32 num = tg->reduce_num_data;
1898 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1900 KMP_ASSERT(data != NULL);
1901 while (tg != NULL) {
1902 for (
int i = 0; i < num; ++i) {
1903 if (!arr[i].flags.lazy_priv) {
1904 if (data == arr[i].reduce_shar ||
1905 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1906 return (
char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1909 void **p_priv = (
void **)(arr[i].reduce_priv);
1910 if (data == arr[i].reduce_shar)
1913 for (
int j = 0; j < nth; ++j)
1914 if (data == p_priv[j])
1918 if (p_priv[tid] == NULL) {
1920 void (*f_init)(
void *) = (
void (*)(
void *))(arr[i].reduce_init);
1921 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1922 if (f_init != NULL) {
1923 f_init(p_priv[tid]);
1930 arr = (kmp_task_red_data_t *)(tg->reduce_data);
1931 num = tg->reduce_num_data;
1933 KMP_ASSERT2(0,
"Unknown task reduction item");
1939 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
1940 kmp_int32 nth = th->th.th_team_nproc;
1941 KMP_DEBUG_ASSERT(nth > 1);
1942 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
1943 kmp_int32 num = tg->reduce_num_data;
1944 for (
int i = 0; i < num; ++i) {
1945 void *sh_data = arr[i].reduce_shar;
1946 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].reduce_fini);
1947 void (*f_comb)(
void *,
void *) =
1948 (
void (*)(
void *,
void *))(arr[i].reduce_comb);
1949 if (!arr[i].flags.lazy_priv) {
1950 void *pr_data = arr[i].reduce_priv;
1951 size_t size = arr[i].reduce_size;
1952 for (
int j = 0; j < nth; ++j) {
1953 void *priv_data = (
char *)pr_data + j * size;
1954 f_comb(sh_data, priv_data);
1959 void **pr_data = (
void **)(arr[i].reduce_priv);
1960 for (
int j = 0; j < nth; ++j) {
1961 if (pr_data[j] != NULL) {
1962 f_comb(sh_data, pr_data[j]);
1965 __kmp_free(pr_data[j]);
1969 __kmp_free(arr[i].reduce_priv);
1971 __kmp_thread_free(th, arr);
1972 tg->reduce_data = NULL;
1973 tg->reduce_num_data = 0;
1979 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
1980 kmp_info_t *thread = __kmp_threads[gtid];
1981 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1982 kmp_taskgroup_t *tg_new =
1983 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
1984 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
1986 tg_new->cancel_request = cancel_noreq;
1987 tg_new->parent = taskdata->td_taskgroup;
1990 tg_new->reduce_data = NULL;
1991 tg_new->reduce_num_data = 0;
1993 taskdata->td_taskgroup = tg_new;
1995 #if OMPT_SUPPORT && OMPT_OPTIONAL 1996 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
1997 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
1999 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2000 kmp_team_t *team = thread->th.th_team;
2001 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2003 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2005 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2006 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2007 &(my_task_data), codeptr);
2014 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2015 kmp_info_t *thread = __kmp_threads[gtid];
2016 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2017 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2018 int thread_finished = FALSE;
2020 #if OMPT_SUPPORT && OMPT_OPTIONAL 2022 ompt_data_t my_task_data;
2023 ompt_data_t my_parallel_data;
2025 if (UNLIKELY(ompt_enabled.enabled)) {
2026 team = thread->th.th_team;
2027 my_task_data = taskdata->ompt_task_info.task_data;
2029 my_parallel_data = team->t.ompt_team_info.parallel_data;
2030 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2032 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2036 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2037 KMP_DEBUG_ASSERT(taskgroup != NULL);
2038 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2040 if (__kmp_tasking_mode != tskm_immediate_exec) {
2042 taskdata->td_taskwait_counter += 1;
2043 taskdata->td_taskwait_ident = loc;
2044 taskdata->td_taskwait_thread = gtid + 1;
2048 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
2049 if (itt_sync_obj != NULL)
2050 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
2053 #if OMPT_SUPPORT && OMPT_OPTIONAL 2054 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2055 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2056 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2057 &(my_task_data), codeptr);
2062 if (!taskdata->td_flags.team_serial ||
2063 (thread->th.th_task_team != NULL &&
2064 thread->th.th_task_team->tt.tt_found_proxy_tasks))
2066 if (!taskdata->td_flags.team_serial)
2069 kmp_flag_32 flag(RCAST(kmp_uint32 *, &taskgroup->count), 0U);
2070 while (TCR_4(taskgroup->count) != 0) {
2071 flag.execute_tasks(thread, gtid, FALSE,
2072 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2073 __kmp_task_stealing_constraint);
2076 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2078 #if OMPT_SUPPORT && OMPT_OPTIONAL 2079 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2080 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2081 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2082 &(my_task_data), codeptr);
2087 if (itt_sync_obj != NULL)
2088 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
2091 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2095 if (taskgroup->reduce_data != NULL)
2096 __kmp_task_reduction_fini(thread, taskgroup);
2099 taskdata->td_taskgroup = taskgroup->parent;
2100 __kmp_thread_free(thread, taskgroup);
2102 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2104 ANNOTATE_HAPPENS_AFTER(taskdata);
2106 #if OMPT_SUPPORT && OMPT_OPTIONAL 2107 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2108 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2109 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2110 &(my_task_data), codeptr);
2117 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2118 kmp_task_team_t *task_team,
2119 kmp_int32 is_constrained) {
2121 kmp_taskdata_t *taskdata;
2122 kmp_thread_data_t *thread_data;
2125 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2126 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2129 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2131 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2132 gtid, thread_data->td.td_deque_ntasks,
2133 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2135 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2137 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 2138 "ntasks=%d head=%u tail=%u\n",
2139 gtid, thread_data->td.td_deque_ntasks,
2140 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2144 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2146 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2147 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2149 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2150 "ntasks=%d head=%u tail=%u\n",
2151 gtid, thread_data->td.td_deque_ntasks,
2152 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2156 tail = (thread_data->td.td_deque_tail - 1) &
2157 TASK_DEQUE_MASK(thread_data->td);
2158 taskdata = thread_data->td.td_deque[tail];
2160 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
2164 kmp_taskdata_t *current = thread->th.th_current_task->td_last_tied;
2165 KMP_DEBUG_ASSERT(current != NULL);
2167 if (current->td_flags.tasktype == TASK_EXPLICIT ||
2168 current->td_taskwait_thread > 0) {
2169 kmp_int32 level = current->td_level;
2170 kmp_taskdata_t *parent = taskdata->td_parent;
2171 while (parent != current && parent->td_level > level) {
2172 parent = parent->td_parent;
2174 KMP_DEBUG_ASSERT(parent != NULL);
2176 if (parent != current) {
2178 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2179 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 2180 "ntasks=%d head=%u tail=%u\n",
2181 gtid, thread_data->td.td_deque_ntasks,
2182 thread_data->td.td_deque_head,
2183 thread_data->td.td_deque_tail));
2189 thread_data->td.td_deque_tail = tail;
2190 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
2192 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2194 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: " 2195 "ntasks=%d head=%u tail=%u\n",
2196 gtid, taskdata, thread_data->td.td_deque_ntasks,
2197 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2199 task = KMP_TASKDATA_TO_TASK(taskdata);
2206 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
2207 kmp_task_team_t *task_team,
2208 volatile kmp_int32 *unfinished_threads,
2209 int *thread_finished,
2210 kmp_int32 is_constrained) {
2212 kmp_taskdata_t *taskdata;
2213 kmp_taskdata_t *current;
2214 kmp_thread_data_t *victim_td, *threads_data;
2215 kmp_int32 level, target;
2216 kmp_int32 victim_tid;
2218 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2220 threads_data = task_team->tt.tt_threads_data;
2221 KMP_DEBUG_ASSERT(threads_data != NULL);
2223 victim_tid = victim_thr->th.th_info.ds.ds_tid;
2224 victim_td = &threads_data[victim_tid];
2226 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 2227 "task_team=%p ntasks=%d head=%u tail=%u\n",
2228 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2229 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2230 victim_td->td.td_deque_tail));
2232 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
2233 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 2234 "task_team=%p ntasks=%d head=%u tail=%u\n",
2235 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
2236 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2237 victim_td->td.td_deque_tail));
2241 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2243 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
2246 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2247 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2248 "task_team=%p ntasks=%d head=%u tail=%u\n",
2249 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2250 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2254 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2256 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2257 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
2261 current = __kmp_threads[gtid]->th.th_current_task->td_last_tied;
2262 KMP_DEBUG_ASSERT(current != NULL);
2264 if (current->td_flags.tasktype == TASK_EXPLICIT ||
2265 current->td_taskwait_thread > 0) {
2266 level = current->td_level;
2267 kmp_taskdata_t *parent = taskdata->td_parent;
2268 while (parent != current && parent->td_level > level) {
2269 parent = parent->td_parent;
2271 KMP_DEBUG_ASSERT(parent != NULL);
2273 if (parent != current) {
2274 if (!task_team->tt.tt_untied_task_encountered) {
2276 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2278 (
"__kmp_steal_task(exit #3): T#%d could not steal from " 2279 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2280 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2281 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2288 if (taskdata != NULL) {
2290 victim_td->td.td_deque_head =
2291 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2295 target = victim_td->td.td_deque_head;
2296 for (i = 1; i < ntasks; ++i) {
2297 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2298 taskdata = victim_td->td.td_deque[target];
2299 if (taskdata->td_flags.tiedness == TASK_TIED) {
2301 kmp_taskdata_t *parent = taskdata->td_parent;
2303 while (parent != current && parent->td_level > level) {
2304 parent = parent->td_parent;
2305 KMP_DEBUG_ASSERT(parent != NULL);
2307 if (parent != current) {
2320 if (taskdata == NULL) {
2322 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2323 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from " 2324 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
2325 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
2326 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2330 for (i = i + 1; i < ntasks; ++i) {
2332 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
2333 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
2336 KMP_DEBUG_ASSERT(victim_td->td.td_deque_tail ==
2337 ((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
2338 victim_td->td.td_deque_tail = target;
2340 if (*thread_finished) {
2346 count = KMP_TEST_THEN_INC32(unfinished_threads);
2350 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2351 gtid, count + 1, task_team));
2353 *thread_finished = FALSE;
2355 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
2357 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2361 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: " 2362 "task_team=%p ntasks=%d head=%u tail=%u\n",
2363 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
2364 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2366 task = KMP_TASKDATA_TO_TASK(taskdata);
2380 static inline int __kmp_execute_tasks_template(
2381 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2382 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2383 kmp_int32 is_constrained) {
2384 kmp_task_team_t *task_team = thread->th.th_task_team;
2385 kmp_thread_data_t *threads_data;
2387 kmp_info_t *other_thread;
2388 kmp_taskdata_t *current_task = thread->th.th_current_task;
2389 volatile kmp_int32 *unfinished_threads;
2390 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
2391 tid = thread->th.th_info.ds.ds_tid;
2393 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2394 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2396 if (task_team == NULL)
2399 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2400 "*thread_finished=%d\n",
2401 gtid, final_spin, *thread_finished));
2403 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2404 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2405 KMP_DEBUG_ASSERT(threads_data != NULL);
2407 nthreads = task_team->tt.tt_nproc;
2408 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2410 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2412 KMP_DEBUG_ASSERT(nthreads > 1);
2414 KMP_DEBUG_ASSERT(TCR_4(*unfinished_threads) >= 0);
2420 if (use_own_tasks) {
2421 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2423 if ((task == NULL) && (nthreads > 1)) {
2427 if (victim_tid == -2) {
2428 victim_tid = threads_data[tid].td.td_deque_last_stolen;
2431 other_thread = threads_data[victim_tid].td.td_thr;
2433 if (victim_tid != -1) {
2435 }
else if (!new_victim) {
2441 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
2442 if (victim_tid >= tid) {
2446 other_thread = threads_data[victim_tid].td.td_thr;
2456 if ((__kmp_tasking_mode == tskm_task_teams) &&
2457 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2458 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2461 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2462 other_thread->th.th_sleep_loc);
2475 task = __kmp_steal_task(other_thread, gtid, task_team,
2476 unfinished_threads, thread_finished,
2480 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
2481 threads_data[tid].td.td_deque_last_stolen = victim_tid;
2488 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2497 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2498 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2499 if (itt_sync_obj == NULL) {
2501 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2503 __kmp_itt_task_starting(itt_sync_obj);
2506 __kmp_invoke_task(gtid, task, current_task);
2508 if (itt_sync_obj != NULL)
2509 __kmp_itt_task_finished(itt_sync_obj);
2516 if (flag == NULL || (!final_spin && flag->done_check())) {
2519 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2523 if (thread->th.th_task_team == NULL) {
2527 KMP_YIELD(__kmp_library == library_throughput);
2530 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2531 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 2532 "other tasks, restart\n",
2544 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
2552 if (!*thread_finished) {
2555 count = KMP_TEST_THEN_DEC32(unfinished_threads) - 1;
2556 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 2557 "unfinished_threads to %d task_team=%p\n",
2558 gtid, count, task_team));
2559 *thread_finished = TRUE;
2567 if (flag != NULL && flag->done_check()) {
2570 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2578 if (thread->th.th_task_team == NULL) {
2580 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2593 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2599 int __kmp_execute_tasks_32(
2600 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2601 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2602 kmp_int32 is_constrained) {
2603 return __kmp_execute_tasks_template(
2604 thread, gtid, flag, final_spin,
2605 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2608 int __kmp_execute_tasks_64(
2609 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2610 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2611 kmp_int32 is_constrained) {
2612 return __kmp_execute_tasks_template(
2613 thread, gtid, flag, final_spin,
2614 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2617 int __kmp_execute_tasks_oncore(
2618 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2619 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2620 kmp_int32 is_constrained) {
2621 return __kmp_execute_tasks_template(
2622 thread, gtid, flag, final_spin,
2623 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2629 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2630 kmp_info_t *this_thr) {
2631 kmp_thread_data_t *threads_data;
2632 int nthreads, i, is_init_thread;
2634 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
2635 __kmp_gtid_from_thread(this_thr)));
2637 KMP_DEBUG_ASSERT(task_team != NULL);
2638 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2640 nthreads = task_team->tt.tt_nproc;
2641 KMP_DEBUG_ASSERT(nthreads > 0);
2642 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2645 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
2647 if (!is_init_thread) {
2651 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2652 __kmp_gtid_from_thread(this_thr)));
2655 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2656 KMP_DEBUG_ASSERT(threads_data != NULL);
2658 if ((__kmp_tasking_mode == tskm_task_teams) &&
2659 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2663 for (i = 0; i < nthreads; i++) {
2664 volatile void *sleep_loc;
2665 kmp_info_t *thread = threads_data[i].td.td_thr;
2667 if (i == this_thr->th.th_info.ds.ds_tid) {
2676 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
2678 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2679 __kmp_gtid_from_thread(this_thr),
2680 __kmp_gtid_from_thread(thread)));
2681 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2683 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2684 __kmp_gtid_from_thread(this_thr),
2685 __kmp_gtid_from_thread(thread)));
2690 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
2691 __kmp_gtid_from_thread(this_thr)));
2728 static kmp_task_team_t *__kmp_free_task_teams =
2731 kmp_bootstrap_lock_t __kmp_task_team_lock =
2732 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
2739 static void __kmp_alloc_task_deque(kmp_info_t *thread,
2740 kmp_thread_data_t *thread_data) {
2741 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2742 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
2745 thread_data->td.td_deque_last_stolen = -1;
2747 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2748 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2749 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
2753 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2754 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2758 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2759 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2760 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2767 static void __kmp_realloc_task_deque(kmp_info_t *thread,
2768 kmp_thread_data_t *thread_data) {
2769 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2770 kmp_int32 new_size = 2 * size;
2772 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 2773 "%d] for thread_data %p\n",
2774 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
2776 kmp_taskdata_t **new_deque =
2777 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
2780 for (i = thread_data->td.td_deque_head, j = 0; j < size;
2781 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
2782 new_deque[j] = thread_data->td.td_deque[i];
2784 __kmp_free(thread_data->td.td_deque);
2786 thread_data->td.td_deque_head = 0;
2787 thread_data->td.td_deque_tail = size;
2788 thread_data->td.td_deque = new_deque;
2789 thread_data->td.td_deque_size = new_size;
2795 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2796 if (thread_data->td.td_deque != NULL) {
2797 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2798 TCW_4(thread_data->td.td_deque_ntasks, 0);
2799 __kmp_free(thread_data->td.td_deque);
2800 thread_data->td.td_deque = NULL;
2801 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2804 #ifdef BUILD_TIED_TASK_STACK 2806 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2807 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2809 #endif // BUILD_TIED_TASK_STACK 2819 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2820 kmp_task_team_t *task_team) {
2821 kmp_thread_data_t **threads_data_p;
2822 kmp_int32 nthreads, maxthreads;
2823 int is_init_thread = FALSE;
2825 if (TCR_4(task_team->tt.tt_found_tasks)) {
2830 threads_data_p = &task_team->tt.tt_threads_data;
2831 nthreads = task_team->tt.tt_nproc;
2832 maxthreads = task_team->tt.tt_max_threads;
2837 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2839 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2841 kmp_team_t *team = thread->th.th_team;
2844 is_init_thread = TRUE;
2845 if (maxthreads < nthreads) {
2847 if (*threads_data_p != NULL) {
2848 kmp_thread_data_t *old_data = *threads_data_p;
2849 kmp_thread_data_t *new_data = NULL;
2853 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 2854 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2855 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
2860 new_data = (kmp_thread_data_t *)__kmp_allocate(
2861 nthreads *
sizeof(kmp_thread_data_t));
2863 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
2864 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
2866 #ifdef BUILD_TIED_TASK_STACK 2868 for (i = maxthreads; i < nthreads; i++) {
2869 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2870 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2872 #endif // BUILD_TIED_TASK_STACK 2874 (*threads_data_p) = new_data;
2875 __kmp_free(old_data);
2877 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 2878 "threads data for task_team %p, size = %d\n",
2879 __kmp_gtid_from_thread(thread), task_team, nthreads));
2883 ANNOTATE_IGNORE_WRITES_BEGIN();
2884 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
2885 nthreads *
sizeof(kmp_thread_data_t));
2886 ANNOTATE_IGNORE_WRITES_END();
2887 #ifdef BUILD_TIED_TASK_STACK 2889 for (i = 0; i < nthreads; i++) {
2890 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2891 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2893 #endif // BUILD_TIED_TASK_STACK 2895 task_team->tt.tt_max_threads = nthreads;
2898 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
2902 for (i = 0; i < nthreads; i++) {
2903 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2904 thread_data->td.td_thr = team->t.t_threads[i];
2906 if (thread_data->td.td_deque_last_stolen >= nthreads) {
2910 thread_data->td.td_deque_last_stolen = -1;
2915 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
2918 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2919 return is_init_thread;
2925 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
2926 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2927 if (task_team->tt.tt_threads_data != NULL) {
2929 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
2930 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
2932 __kmp_free(task_team->tt.tt_threads_data);
2933 task_team->tt.tt_threads_data = NULL;
2935 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2942 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
2944 kmp_task_team_t *task_team = NULL;
2947 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2948 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
2950 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2952 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2953 if (__kmp_free_task_teams != NULL) {
2954 task_team = __kmp_free_task_teams;
2955 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
2956 task_team->tt.tt_next = NULL;
2958 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2961 if (task_team == NULL) {
2962 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 2963 "task team for team %p\n",
2964 __kmp_gtid_from_thread(thread), team));
2968 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
2969 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
2976 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2978 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2980 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
2982 TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
2983 TCW_4(task_team->tt.tt_active, TRUE);
2985 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 2986 "unfinished_threads init'd to %d\n",
2987 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
2988 task_team->tt.tt_unfinished_threads));
2995 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
2996 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2997 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3000 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3002 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3003 task_team->tt.tt_next = __kmp_free_task_teams;
3004 TCW_PTR(__kmp_free_task_teams, task_team);
3006 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3014 void __kmp_reap_task_teams(
void) {
3015 kmp_task_team_t *task_team;
3017 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3019 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3020 while ((task_team = __kmp_free_task_teams) != NULL) {
3021 __kmp_free_task_teams = task_team->tt.tt_next;
3022 task_team->tt.tt_next = NULL;
3025 if (task_team->tt.tt_threads_data != NULL) {
3026 __kmp_free_task_threads_data(task_team);
3028 __kmp_free(task_team);
3030 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3037 void __kmp_wait_to_unref_task_teams(
void) {
3042 KMP_INIT_YIELD(spins);
3050 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3051 thread = thread->th.th_next_pool) {
3055 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3056 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3057 __kmp_gtid_from_thread(thread)));
3062 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3063 thread->th.th_task_team = NULL;
3070 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 3071 "unreference task_team\n",
3072 __kmp_gtid_from_thread(thread)));
3074 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3075 volatile void *sleep_loc;
3077 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3081 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3082 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3083 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
3093 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
3094 KMP_YIELD_SPIN(spins);
3100 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3101 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3107 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3108 (always || team->t.t_nproc > 1)) {
3109 team->t.t_task_team[this_thr->th.th_task_state] =
3110 __kmp_allocate_task_team(this_thr, team);
3111 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 3112 "for team %d at parity=%d\n",
3113 __kmp_gtid_from_thread(this_thr),
3114 team->t.t_task_team[this_thr->th.th_task_state],
3115 ((team != NULL) ? team->t.t_id : -1),
3116 this_thr->th.th_task_state));
3126 if (team->t.t_nproc > 1) {
3127 int other_team = 1 - this_thr->th.th_task_state;
3128 if (team->t.t_task_team[other_team] == NULL) {
3129 team->t.t_task_team[other_team] =
3130 __kmp_allocate_task_team(this_thr, team);
3131 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 3132 "task_team %p for team %d at parity=%d\n",
3133 __kmp_gtid_from_thread(this_thr),
3134 team->t.t_task_team[other_team],
3135 ((team != NULL) ? team->t.t_id : -1), other_team));
3138 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3139 if (!task_team->tt.tt_active ||
3140 team->t.t_nproc != task_team->tt.tt_nproc) {
3141 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3142 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3144 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3146 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
3147 TCW_4(task_team->tt.tt_active, TRUE);
3151 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 3152 "%p for team %d at parity=%d\n",
3153 __kmp_gtid_from_thread(this_thr),
3154 team->t.t_task_team[other_team],
3155 ((team != NULL) ? team->t.t_id : -1), other_team));
3163 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
3164 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3168 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
3171 TCW_PTR(this_thr->th.th_task_team,
3172 team->t.t_task_team[this_thr->th.th_task_state]);
3174 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 3175 "%p from Team #%d (parity=%d)\n",
3176 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
3177 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
3187 void __kmp_task_team_wait(
3188 kmp_info_t *this_thr,
3189 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
3190 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
3192 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3193 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
3195 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
3197 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 3198 "(for unfinished_threads to reach 0) on task_team = %p\n",
3199 __kmp_gtid_from_thread(this_thr), task_team));
3204 RCAST(
volatile kmp_uint32 *, &task_team->tt.tt_unfinished_threads),
3206 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
3212 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 3213 "setting active to false, setting local and team's pointer to NULL\n",
3214 __kmp_gtid_from_thread(this_thr), task_team));
3216 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
3217 task_team->tt.tt_found_proxy_tasks == TRUE);
3218 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3220 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
3222 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
3223 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
3226 TCW_PTR(this_thr->th.th_task_team, NULL);
3235 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
3236 volatile kmp_uint32 *spin = RCAST(
3237 volatile kmp_uint32 *,
3238 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
3240 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
3243 KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
3245 kmp_flag_32 spin_flag(spin, 0U);
3246 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
3247 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
3250 KMP_FSYNC_SPIN_PREPARE(CCAST(kmp_uint32 *, spin));
3253 if (TCR_4(__kmp_global.g.g_done)) {
3254 if (__kmp_global.g.g_abort)
3255 __kmp_abort_thread();
3261 KMP_FSYNC_SPIN_ACQUIRED(CCAST(kmp_uint32 *, spin));
3272 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
3274 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3275 kmp_task_team_t *task_team = taskdata->td_task_team;
3277 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
3281 KMP_DEBUG_ASSERT(task_team != NULL);
3283 bool result =
false;
3284 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
3286 if (thread_data->td.td_deque == NULL) {
3290 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3295 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3296 TASK_DEQUE_SIZE(thread_data->td)) {
3299 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3304 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3307 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3308 __kmp_realloc_task_deque(thread, thread_data);
3312 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3314 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3315 TASK_DEQUE_SIZE(thread_data->td)) {
3316 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3322 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3323 goto release_and_exit;
3325 __kmp_realloc_task_deque(thread, thread_data);
3331 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3333 thread_data->td.td_deque_tail =
3334 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3335 TCW_4(thread_data->td.td_deque_ntasks,
3336 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3339 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3343 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3364 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3365 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3366 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3367 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3368 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3370 taskdata->td_flags.complete = 1;
3372 if (taskdata->td_taskgroup)
3373 KMP_TEST_THEN_DEC32(&taskdata->td_taskgroup->count);
3377 TCI_4(taskdata->td_incomplete_child_tasks);
3380 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3381 kmp_int32 children = 0;
3385 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3386 KMP_DEBUG_ASSERT(children >= 0);
3389 TCD_4(taskdata->td_incomplete_child_tasks);
3392 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3393 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3394 kmp_info_t *thread = __kmp_threads[gtid];
3396 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3397 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3402 while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
3405 __kmp_release_deps(gtid, taskdata);
3406 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3417 void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3418 KMP_DEBUG_ASSERT(ptask != NULL);
3419 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3421 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3424 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3426 __kmp_first_top_half_finish_proxy(taskdata);
3427 __kmp_second_top_half_finish_proxy(taskdata);
3428 __kmp_bottom_half_finish_proxy(gtid, ptask);
3431 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3442 void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3443 KMP_DEBUG_ASSERT(ptask != NULL);
3444 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3448 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3451 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3453 __kmp_first_top_half_finish_proxy(taskdata);
3457 kmp_team_t *team = taskdata->td_team;
3458 kmp_int32 nthreads = team->t.t_nproc;
3463 kmp_int32 start_k = 0;
3465 kmp_int32 k = start_k;
3469 thread = team->t.t_threads[k];
3470 k = (k + 1) % nthreads;
3476 }
while (!__kmp_give_task(thread, k, ptask, pass));
3478 __kmp_second_top_half_finish_proxy(taskdata);
3482 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3492 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3494 kmp_taskdata_t *taskdata;
3495 kmp_taskdata_t *taskdata_src;
3496 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3497 size_t shareds_offset;
3500 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3502 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3503 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3505 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3506 task_size = taskdata_src->td_size_alloc;
3509 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3512 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3514 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3516 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3518 task = KMP_TASKDATA_TO_TASK(taskdata);
3521 taskdata->td_task_id = KMP_GEN_TASK_ID();
3522 if (task->shareds != NULL) {
3523 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
3524 task->shareds = &((
char *)taskdata)[shareds_offset];
3525 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
3528 taskdata->td_alloc_thread = thread;
3529 taskdata->td_parent = parent_task;
3530 taskdata->td_taskgroup =
3536 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3537 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
3538 if (parent_task->td_taskgroup)
3539 KMP_TEST_THEN_INC32(&parent_task->td_taskgroup->count);
3542 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3543 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
3547 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3548 thread, taskdata, taskdata->td_parent));
3550 if (UNLIKELY(ompt_enabled.enabled))
3551 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
3560 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3562 KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
3567 class kmp_taskloop_bounds_t {
3569 const kmp_taskdata_t *taskdata;
3570 size_t lower_offset;
3571 size_t upper_offset;
3574 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
3575 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
3576 lower_offset((char *)lb - (char *)task),
3577 upper_offset((char *)ub - (char *)task) {
3578 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
3579 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
3581 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
3582 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
3583 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
3584 size_t get_lower_offset()
const {
return lower_offset; }
3585 size_t get_upper_offset()
const {
return upper_offset; }
3586 kmp_uint64 get_lb()
const {
3588 #if defined(KMP_GOMP_COMPAT) 3590 if (!taskdata->td_flags.native) {
3591 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3594 if (taskdata->td_size_loop_bounds == 4) {
3595 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
3596 retval = (kmp_int64)*lb;
3598 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
3599 retval = (kmp_int64)*lb;
3603 retval = *(kmp_int64 *)((
char *)task + lower_offset);
3604 #endif // defined(KMP_GOMP_COMPAT) 3607 kmp_uint64 get_ub()
const {
3609 #if defined(KMP_GOMP_COMPAT) 3611 if (!taskdata->td_flags.native) {
3612 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3615 if (taskdata->td_size_loop_bounds == 4) {
3616 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
3617 retval = (kmp_int64)*ub;
3619 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
3620 retval = (kmp_int64)*ub;
3624 retval = *(kmp_int64 *)((
char *)task + upper_offset);
3625 #endif // defined(KMP_GOMP_COMPAT) 3628 void set_lb(kmp_uint64 lb) {
3629 #if defined(KMP_GOMP_COMPAT) 3631 if (!taskdata->td_flags.native) {
3632 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3635 if (taskdata->td_size_loop_bounds == 4) {
3636 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
3637 *lower = (kmp_uint32)lb;
3639 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
3640 *lower = (kmp_uint64)lb;
3644 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
3645 #endif // defined(KMP_GOMP_COMPAT) 3647 void set_ub(kmp_uint64 ub) {
3648 #if defined(KMP_GOMP_COMPAT) 3650 if (!taskdata->td_flags.native) {
3651 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3654 if (taskdata->td_size_loop_bounds == 4) {
3655 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
3656 *upper = (kmp_uint32)ub;
3658 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
3659 *upper = (kmp_uint64)ub;
3663 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
3664 #endif // defined(KMP_GOMP_COMPAT) 3682 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
3683 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3684 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3685 kmp_uint64 grainsize, kmp_uint64 extras,
3686 kmp_uint64 tc,
void *task_dup) {
3688 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3689 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3691 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
3692 kmp_uint64 lower = task_bounds.get_lb();
3693 kmp_uint64 upper = task_bounds.get_ub();
3695 kmp_info_t *thread = __kmp_threads[gtid];
3696 kmp_taskdata_t *current_task = thread->th.th_current_task;
3697 kmp_task_t *next_task;
3698 kmp_int32 lastpriv = 0;
3700 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3701 KMP_DEBUG_ASSERT(num_tasks > extras);
3702 KMP_DEBUG_ASSERT(num_tasks > 0);
3703 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 3704 "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n",
3705 gtid, num_tasks, grainsize, extras, lower, upper, ub_glob, st,
3709 for (i = 0; i < num_tasks; ++i) {
3710 kmp_uint64 chunk_minus_1;
3712 chunk_minus_1 = grainsize - 1;
3714 chunk_minus_1 = grainsize;
3717 upper = lower + st * chunk_minus_1;
3718 if (i == num_tasks - 1) {
3721 KMP_DEBUG_ASSERT(upper == *ub);
3722 if (upper == ub_glob)
3724 }
else if (st > 0) {
3725 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
3726 if ((kmp_uint64)st > ub_glob - upper)
3729 KMP_DEBUG_ASSERT(upper + st < *ub);
3730 if (upper - ub_glob < (kmp_uint64)(-st))
3734 next_task = __kmp_task_dup_alloc(thread, task);
3735 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
3736 kmp_taskloop_bounds_t next_task_bounds =
3737 kmp_taskloop_bounds_t(next_task, task_bounds);
3740 next_task_bounds.set_lb(lower);
3741 if (next_taskdata->td_flags.native) {
3742 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
3744 next_task_bounds.set_ub(upper);
3746 if (ptask_dup != NULL)
3747 ptask_dup(next_task, task, lastpriv);
3749 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " 3750 "upper %lld stride %lld, (offsets %p %p)\n",
3751 gtid, i, next_task, lower, upper, st,
3752 next_task_bounds.get_lower_offset(),
3753 next_task_bounds.get_upper_offset()));
3754 __kmp_omp_task(gtid, next_task,
true);
3758 __kmp_task_start(gtid, task, current_task);
3760 __kmp_task_finish(gtid, task, current_task);
3765 typedef struct __taskloop_params {
3772 kmp_uint64 num_tasks;
3773 kmp_uint64 grainsize;
3776 kmp_uint64 num_t_min;
3777 } __taskloop_params_t;
3779 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
3780 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
3781 kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
3785 int __kmp_taskloop_task(
int gtid,
void *ptask) {
3786 __taskloop_params_t *p =
3787 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
3788 kmp_task_t *task = p->task;
3789 kmp_uint64 *lb = p->lb;
3790 kmp_uint64 *ub = p->ub;
3791 void *task_dup = p->task_dup;
3793 kmp_int64 st = p->st;
3794 kmp_uint64 ub_glob = p->ub_glob;
3795 kmp_uint64 num_tasks = p->num_tasks;
3796 kmp_uint64 grainsize = p->grainsize;
3797 kmp_uint64 extras = p->extras;
3798 kmp_uint64 tc = p->tc;
3799 kmp_uint64 num_t_min = p->num_t_min;
3801 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3802 KMP_DEBUG_ASSERT(task != NULL);
3803 KA_TRACE(20, (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 3804 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
3805 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
3808 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
3809 if (num_tasks > num_t_min)
3810 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3811 grainsize, extras, tc, num_t_min, task_dup);
3813 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3814 grainsize, extras, tc, task_dup);
3816 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
3836 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
3837 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3838 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3839 kmp_uint64 grainsize, kmp_uint64 extras,
3840 kmp_uint64 tc, kmp_uint64 num_t_min,
void *task_dup) {
3842 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3843 KMP_DEBUG_ASSERT(task != NULL);
3844 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
3845 KA_TRACE(20, (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 3846 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
3847 gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
3850 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3851 kmp_uint64 lower = *lb;
3852 kmp_uint64 upper = *ub;
3853 kmp_info_t *thread = __kmp_threads[gtid];
3855 kmp_task_t *next_task;
3856 kmp_int32 lastpriv = 0;
3857 size_t lower_offset =
3858 (
char *)lb - (
char *)task;
3859 size_t upper_offset =
3860 (
char *)ub - (
char *)task;
3862 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3863 KMP_DEBUG_ASSERT(num_tasks > extras);
3864 KMP_DEBUG_ASSERT(num_tasks > 0);
3867 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
3868 kmp_uint64 gr_size0 = grainsize;
3869 kmp_uint64 n_tsk0 = num_tasks >> 1;
3870 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
3871 if (n_tsk0 <= extras) {
3874 ext1 = extras - n_tsk0;
3875 tc0 = gr_size0 * n_tsk0;
3880 tc1 = grainsize * n_tsk1;
3883 ub0 = lower + st * (tc0 - 1);
3887 next_task = __kmp_task_dup_alloc(thread, task);
3889 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
3890 if (ptask_dup != NULL)
3891 ptask_dup(next_task, task, 0);
3895 kmp_task_t *new_task =
3896 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
3897 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
3898 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
3899 p->task = next_task;
3900 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
3901 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
3902 p->task_dup = task_dup;
3904 p->ub_glob = ub_glob;
3905 p->num_tasks = n_tsk1;
3906 p->grainsize = grainsize;
3909 p->num_t_min = num_t_min;
3910 __kmp_omp_task(gtid, new_task,
true);
3913 if (n_tsk0 > num_t_min)
3914 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
3915 ext0, tc0, num_t_min, task_dup);
3917 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
3918 gr_size0, ext0, tc0, task_dup);
3920 KA_TRACE(40, (
"__kmpc_taskloop_recur(exit): T#%d\n", gtid));
3939 void __kmpc_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
3940 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
3941 int sched, kmp_uint64 grainsize,
void *task_dup) {
3942 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3943 KMP_DEBUG_ASSERT(task != NULL);
3945 #if OMPT_SUPPORT && OMPT_OPTIONAL 3946 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
3947 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
3948 if (ompt_enabled.ompt_callback_work) {
3949 ompt_callbacks.ompt_callback(ompt_callback_work)(
3950 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
3951 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
3956 #if OMPT_SUPPORT && OMPT_OPTIONAL 3957 OMPT_STORE_RETURN_ADDRESS(gtid);
3959 __kmpc_taskgroup(loc, gtid);
3964 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
3967 kmp_uint64 lower = task_bounds.get_lb();
3968 kmp_uint64 upper = task_bounds.get_ub();
3969 kmp_uint64 ub_glob = upper;
3970 kmp_uint64 num_tasks = 0, extras = 0;
3971 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
3972 kmp_info_t *thread = __kmp_threads[gtid];
3973 kmp_taskdata_t *current_task = thread->th.th_current_task;
3975 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 3976 "grain %llu(%d), dup %p\n",
3977 gtid, taskdata, lower, upper, st, grainsize, sched, task_dup));
3981 tc = upper - lower + 1;
3982 }
else if (st < 0) {
3983 tc = (lower - upper) / (-st) + 1;
3985 tc = (upper - lower) / st + 1;
3988 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
3990 __kmp_task_start(gtid, task, current_task);
3992 __kmp_task_finish(gtid, task, current_task);
3995 if (num_tasks_min == 0)
3998 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
4004 grainsize = thread->th.th_team_nproc * 10;
4006 if (grainsize > tc) {
4011 num_tasks = grainsize;
4012 grainsize = tc / num_tasks;
4013 extras = tc % num_tasks;
4017 if (grainsize > tc) {
4022 num_tasks = tc / grainsize;
4024 grainsize = tc / num_tasks;
4025 extras = tc % num_tasks;
4029 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
4031 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
4032 KMP_DEBUG_ASSERT(num_tasks > extras);
4033 KMP_DEBUG_ASSERT(num_tasks > 0);
4039 taskdata->td_flags.task_serial = 1;
4040 taskdata->td_flags.tiedness = TASK_TIED;
4041 #if OMPT_SUPPORT && OMPT_OPTIONAL 4042 OMPT_STORE_RETURN_ADDRESS(gtid);
4045 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4046 grainsize, extras, tc, task_dup);
4049 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
4050 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 4051 "(%lld), grain %llu, extras %llu\n",
4052 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4053 #if OMPT_SUPPORT && OMPT_OPTIONAL 4054 OMPT_STORE_RETURN_ADDRESS(gtid);
4056 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4057 grainsize, extras, tc, num_tasks_min, task_dup);
4059 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 4060 "(%lld), grain %llu, extras %llu\n",
4061 gtid, tc, num_tasks, num_tasks_min, grainsize, extras));
4062 #if OMPT_SUPPORT && OMPT_OPTIONAL 4063 OMPT_STORE_RETURN_ADDRESS(gtid);
4065 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
4066 grainsize, extras, tc, task_dup);
4070 #if OMPT_SUPPORT && OMPT_OPTIONAL 4071 OMPT_STORE_RETURN_ADDRESS(gtid);
4073 __kmpc_end_taskgroup(loc, gtid);
4075 #if OMPT_SUPPORT && OMPT_OPTIONAL 4076 if (ompt_enabled.ompt_callback_work) {
4077 ompt_callbacks.ompt_callback(ompt_callback_work)(
4078 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
4079 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
4082 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).