LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345  kmp_int32 num_teams, kmp_int32 num_threads) {
346  KA_TRACE(20,
347  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348  global_tid, num_teams, num_threads));
349  __kmp_assert_valid_gtid(global_tid);
350  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351 }
352 
369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371  kmp_int32 num_threads) {
372  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373  " num_teams_ub=%d num_threads=%d\n",
374  global_tid, num_teams_lb, num_teams_ub, num_threads));
375  __kmp_assert_valid_gtid(global_tid);
376  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377  num_threads);
378 }
379 
390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391  ...) {
392  int gtid = __kmp_entry_gtid();
393  kmp_info_t *this_thr = __kmp_threads[gtid];
394  va_list ap;
395  va_start(ap, microtask);
396 
397 #if KMP_STATS_ENABLED
398  KMP_COUNT_BLOCK(OMP_TEAMS);
399  stats_state_e previous_state = KMP_GET_THREAD_STATE();
400  if (previous_state == stats_state_e::SERIAL_REGION) {
401  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402  } else {
403  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404  }
405 #endif
406 
407  // remember teams entry point and nesting level
408  this_thr->th.th_teams_microtask = microtask;
409  this_thr->th.th_teams_level =
410  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411 
412 #if OMPT_SUPPORT
413  kmp_team_t *parent_team = this_thr->th.th_team;
414  int tid = __kmp_tid_from_gtid(gtid);
415  if (ompt_enabled.enabled) {
416  parent_team->t.t_implicit_task_taskdata[tid]
417  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418  }
419  OMPT_STORE_RETURN_ADDRESS(gtid);
420 #endif
421 
422  // check if __kmpc_push_num_teams called, set default number of teams
423  // otherwise
424  if (this_thr->th.th_teams_size.nteams == 0) {
425  __kmp_push_num_teams(loc, gtid, 0, 0);
426  }
427  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430 
431  __kmp_fork_call(
432  loc, gtid, fork_context_intel, argc,
433  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435  __kmp_join_call(loc, gtid
436 #if OMPT_SUPPORT
437  ,
438  fork_context_intel
439 #endif
440  );
441 
442  // Pop current CG root off list
443  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445  this_thr->th.th_cg_roots = tmp->up;
446  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447  " to node %p. cg_nthreads was %d\n",
448  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450  int i = tmp->cg_nthreads--;
451  if (i == 1) { // check is we are the last thread in CG (not always the case)
452  __kmp_free(tmp);
453  }
454  // Restore current task's thread_limit from CG root
455  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456  this_thr->th.th_current_task->td_icvs.thread_limit =
457  this_thr->th.th_cg_roots->cg_thread_limit;
458 
459  this_thr->th.th_teams_microtask = NULL;
460  this_thr->th.th_teams_level = 0;
461  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462  va_end(ap);
463 #if KMP_STATS_ENABLED
464  if (previous_state == stats_state_e::SERIAL_REGION) {
465  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466  KMP_SET_THREAD_STATE(previous_state);
467  } else {
468  KMP_POP_PARTITIONED_TIMER();
469  }
470 #endif // KMP_STATS_ENABLED
471 }
472 
473 // I don't think this function should ever have been exported.
474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475 // openmp code ever called it, but it's been exported from the RTL for so
476 // long that I'm afraid to remove the definition.
477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478 
491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492  // The implementation is now in kmp_runtime.cpp so that it can share static
493  // functions with kmp_fork_call since the tasks to be done are similar in
494  // each case.
495  __kmp_assert_valid_gtid(global_tid);
496 #if OMPT_SUPPORT
497  OMPT_STORE_RETURN_ADDRESS(global_tid);
498 #endif
499  __kmp_serialized_parallel(loc, global_tid);
500 }
501 
509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510  kmp_internal_control_t *top;
511  kmp_info_t *this_thr;
512  kmp_team_t *serial_team;
513 
514  KC_TRACE(10,
515  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516 
517  /* skip all this code for autopar serialized loops since it results in
518  unacceptable overhead */
519  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520  return;
521 
522  // Not autopar code
523  __kmp_assert_valid_gtid(global_tid);
524  if (!TCR_4(__kmp_init_parallel))
525  __kmp_parallel_initialize();
526 
527  __kmp_resume_if_soft_paused();
528 
529  this_thr = __kmp_threads[global_tid];
530  serial_team = this_thr->th.th_serial_team;
531 
532  kmp_task_team_t *task_team = this_thr->th.th_task_team;
533  // we need to wait for the proxy tasks before finishing the thread
534  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535  task_team->tt.tt_hidden_helper_task_encountered))
536  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537 
538  KMP_MB();
539  KMP_DEBUG_ASSERT(serial_team);
540  KMP_ASSERT(serial_team->t.t_serialized);
541  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545 
546 #if OMPT_SUPPORT
547  if (ompt_enabled.enabled &&
548  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550  if (ompt_enabled.ompt_callback_implicit_task) {
551  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554  }
555 
556  // reset clear the task id only after unlinking the task
557  ompt_data_t *parent_task_data;
558  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559 
560  if (ompt_enabled.ompt_callback_parallel_end) {
561  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563  ompt_parallel_invoker_program | ompt_parallel_team,
564  OMPT_LOAD_RETURN_ADDRESS(global_tid));
565  }
566  __ompt_lw_taskteam_unlink(this_thr);
567  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568  }
569 #endif
570 
571  /* If necessary, pop the internal control stack values and replace the team
572  * values */
573  top = serial_team->t.t_control_stack_top;
574  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576  serial_team->t.t_control_stack_top = top->next;
577  __kmp_free(top);
578  }
579 
580  /* pop dispatch buffers stack */
581  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582  {
583  dispatch_private_info_t *disp_buffer =
584  serial_team->t.t_dispatch->th_disp_buffer;
585  serial_team->t.t_dispatch->th_disp_buffer =
586  serial_team->t.t_dispatch->th_disp_buffer->next;
587  __kmp_free(disp_buffer);
588  }
589  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590 
591  --serial_team->t.t_serialized;
592  if (serial_team->t.t_serialized == 0) {
593 
594  /* return to the parallel section */
595 
596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
597  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598  __kmp_clear_x87_fpu_status_word();
599  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601  }
602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603 
604  __kmp_pop_current_task_from_thread(this_thr);
605 #if OMPD_SUPPORT
606  if (ompd_state & OMPD_ENABLE_BP)
607  ompd_bp_parallel_end();
608 #endif
609 
610  this_thr->th.th_team = serial_team->t.t_parent;
611  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612 
613  /* restore values cached in the thread */
614  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615  this_thr->th.th_team_master =
616  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618 
619  /* TODO the below shouldn't need to be adjusted for serialized teams */
620  this_thr->th.th_dispatch =
621  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622 
623  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624  this_thr->th.th_current_task->td_flags.executing = 1;
625 
626  if (__kmp_tasking_mode != tskm_immediate_exec) {
627  // Copy the task team from the new child / old parent team to the thread.
628  this_thr->th.th_task_team =
629  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630  KA_TRACE(20,
631  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632  "team %p\n",
633  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634  }
635 #if KMP_AFFINITY_SUPPORTED
636  if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
637  __kmp_reset_root_init_mask(global_tid);
638  }
639 #endif
640  } else {
641  if (__kmp_tasking_mode != tskm_immediate_exec) {
642  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643  "depth of serial team %p to %d\n",
644  global_tid, serial_team, serial_team->t.t_serialized));
645  }
646  }
647 
648  serial_team->t.t_level--;
649  if (__kmp_env_consistency_check)
650  __kmp_pop_parallel(global_tid, NULL);
651 #if OMPT_SUPPORT
652  if (ompt_enabled.enabled)
653  this_thr->th.ompt_thread_info.state =
654  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655  : ompt_state_work_parallel);
656 #endif
657 }
658 
667 void __kmpc_flush(ident_t *loc) {
668  KC_TRACE(10, ("__kmpc_flush: called\n"));
669 
670  /* need explicit __mf() here since use volatile instead in library */
671  KMP_MB(); /* Flush all pending memory write invalidates. */
672 
673 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
674 #if KMP_MIC
675 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
676 // We shouldn't need it, though, since the ABI rules require that
677 // * If the compiler generates NGO stores it also generates the fence
678 // * If users hand-code NGO stores they should insert the fence
679 // therefore no incomplete unordered stores should be visible.
680 #else
681  // C74404
682  // This is to address non-temporal store instructions (sfence needed).
683  // The clflush instruction is addressed either (mfence needed).
684  // Probably the non-temporal load monvtdqa instruction should also be
685  // addressed.
686  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
687  if (!__kmp_cpuinfo.initialized) {
688  __kmp_query_cpuid(&__kmp_cpuinfo);
689  }
690  if (!__kmp_cpuinfo.flags.sse2) {
691  // CPU cannot execute SSE2 instructions.
692  } else {
693 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
694  _mm_mfence();
695 #elif KMP_COMPILER_MSVC
696  MemoryBarrier();
697 #else
698  __sync_synchronize();
699 #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
700  }
701 #endif // KMP_MIC
702 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
703  KMP_ARCH_RISCV64)
704 // Nothing to see here move along
705 #elif KMP_ARCH_PPC64
706 // Nothing needed here (we have a real MB above).
707 #else
708 #error Unknown or unsupported architecture
709 #endif
710 
711 #if OMPT_SUPPORT && OMPT_OPTIONAL
712  if (ompt_enabled.ompt_callback_flush) {
713  ompt_callbacks.ompt_callback(ompt_callback_flush)(
714  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
715  }
716 #endif
717 }
718 
719 /* -------------------------------------------------------------------------- */
727 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
728  KMP_COUNT_BLOCK(OMP_BARRIER);
729  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
730  __kmp_assert_valid_gtid(global_tid);
731 
732  if (!TCR_4(__kmp_init_parallel))
733  __kmp_parallel_initialize();
734 
735  __kmp_resume_if_soft_paused();
736 
737  if (__kmp_env_consistency_check) {
738  if (loc == 0) {
739  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
740  }
741  __kmp_check_barrier(global_tid, ct_barrier, loc);
742  }
743 
744 #if OMPT_SUPPORT
745  ompt_frame_t *ompt_frame;
746  if (ompt_enabled.enabled) {
747  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
748  if (ompt_frame->enter_frame.ptr == NULL)
749  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
750  }
751  OMPT_STORE_RETURN_ADDRESS(global_tid);
752 #endif
753  __kmp_threads[global_tid]->th.th_ident = loc;
754  // TODO: explicit barrier_wait_id:
755  // this function is called when 'barrier' directive is present or
756  // implicit barrier at the end of a worksharing construct.
757  // 1) better to add a per-thread barrier counter to a thread data structure
758  // 2) set to 0 when a new team is created
759  // 4) no sync is required
760 
761  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
762 #if OMPT_SUPPORT && OMPT_OPTIONAL
763  if (ompt_enabled.enabled) {
764  ompt_frame->enter_frame = ompt_data_none;
765  }
766 #endif
767 }
768 
769 /* The BARRIER for a MASTER section is always explicit */
776 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
777  int status = 0;
778 
779  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
780  __kmp_assert_valid_gtid(global_tid);
781 
782  if (!TCR_4(__kmp_init_parallel))
783  __kmp_parallel_initialize();
784 
785  __kmp_resume_if_soft_paused();
786 
787  if (KMP_MASTER_GTID(global_tid)) {
788  KMP_COUNT_BLOCK(OMP_MASTER);
789  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
790  status = 1;
791  }
792 
793 #if OMPT_SUPPORT && OMPT_OPTIONAL
794  if (status) {
795  if (ompt_enabled.ompt_callback_masked) {
796  kmp_info_t *this_thr = __kmp_threads[global_tid];
797  kmp_team_t *team = this_thr->th.th_team;
798 
799  int tid = __kmp_tid_from_gtid(global_tid);
800  ompt_callbacks.ompt_callback(ompt_callback_masked)(
801  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
802  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
803  OMPT_GET_RETURN_ADDRESS(0));
804  }
805  }
806 #endif
807 
808  if (__kmp_env_consistency_check) {
809 #if KMP_USE_DYNAMIC_LOCK
810  if (status)
811  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812  else
813  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
814 #else
815  if (status)
816  __kmp_push_sync(global_tid, ct_master, loc, NULL);
817  else
818  __kmp_check_sync(global_tid, ct_master, loc, NULL);
819 #endif
820  }
821 
822  return status;
823 }
824 
833 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
834  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
835  __kmp_assert_valid_gtid(global_tid);
836  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
837  KMP_POP_PARTITIONED_TIMER();
838 
839 #if OMPT_SUPPORT && OMPT_OPTIONAL
840  kmp_info_t *this_thr = __kmp_threads[global_tid];
841  kmp_team_t *team = this_thr->th.th_team;
842  if (ompt_enabled.ompt_callback_masked) {
843  int tid = __kmp_tid_from_gtid(global_tid);
844  ompt_callbacks.ompt_callback(ompt_callback_masked)(
845  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
846  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
847  OMPT_GET_RETURN_ADDRESS(0));
848  }
849 #endif
850 
851  if (__kmp_env_consistency_check) {
852  if (KMP_MASTER_GTID(global_tid))
853  __kmp_pop_sync(global_tid, ct_master, loc);
854  }
855 }
856 
865 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
866  int status = 0;
867  int tid;
868  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
869  __kmp_assert_valid_gtid(global_tid);
870 
871  if (!TCR_4(__kmp_init_parallel))
872  __kmp_parallel_initialize();
873 
874  __kmp_resume_if_soft_paused();
875 
876  tid = __kmp_tid_from_gtid(global_tid);
877  if (tid == filter) {
878  KMP_COUNT_BLOCK(OMP_MASKED);
879  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
880  status = 1;
881  }
882 
883 #if OMPT_SUPPORT && OMPT_OPTIONAL
884  if (status) {
885  if (ompt_enabled.ompt_callback_masked) {
886  kmp_info_t *this_thr = __kmp_threads[global_tid];
887  kmp_team_t *team = this_thr->th.th_team;
888  ompt_callbacks.ompt_callback(ompt_callback_masked)(
889  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
890  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
891  OMPT_GET_RETURN_ADDRESS(0));
892  }
893  }
894 #endif
895 
896  if (__kmp_env_consistency_check) {
897 #if KMP_USE_DYNAMIC_LOCK
898  if (status)
899  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
900  else
901  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
902 #else
903  if (status)
904  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
905  else
906  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
907 #endif
908  }
909 
910  return status;
911 }
912 
921 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
922  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
923  __kmp_assert_valid_gtid(global_tid);
924  KMP_POP_PARTITIONED_TIMER();
925 
926 #if OMPT_SUPPORT && OMPT_OPTIONAL
927  kmp_info_t *this_thr = __kmp_threads[global_tid];
928  kmp_team_t *team = this_thr->th.th_team;
929  if (ompt_enabled.ompt_callback_masked) {
930  int tid = __kmp_tid_from_gtid(global_tid);
931  ompt_callbacks.ompt_callback(ompt_callback_masked)(
932  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
933  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
934  OMPT_GET_RETURN_ADDRESS(0));
935  }
936 #endif
937 
938  if (__kmp_env_consistency_check) {
939  __kmp_pop_sync(global_tid, ct_masked, loc);
940  }
941 }
942 
950 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
951  int cid = 0;
952  kmp_info_t *th;
953  KMP_DEBUG_ASSERT(__kmp_init_serial);
954 
955  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
956  __kmp_assert_valid_gtid(gtid);
957 
958  if (!TCR_4(__kmp_init_parallel))
959  __kmp_parallel_initialize();
960 
961  __kmp_resume_if_soft_paused();
962 
963 #if USE_ITT_BUILD
964  __kmp_itt_ordered_prep(gtid);
965 // TODO: ordered_wait_id
966 #endif /* USE_ITT_BUILD */
967 
968  th = __kmp_threads[gtid];
969 
970 #if OMPT_SUPPORT && OMPT_OPTIONAL
971  kmp_team_t *team;
972  ompt_wait_id_t lck;
973  void *codeptr_ra;
974  OMPT_STORE_RETURN_ADDRESS(gtid);
975  if (ompt_enabled.enabled) {
976  team = __kmp_team_from_gtid(gtid);
977  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
978  /* OMPT state update */
979  th->th.ompt_thread_info.wait_id = lck;
980  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
981 
982  /* OMPT event callback */
983  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
984  if (ompt_enabled.ompt_callback_mutex_acquire) {
985  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
986  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
987  codeptr_ra);
988  }
989  }
990 #endif
991 
992  if (th->th.th_dispatch->th_deo_fcn != 0)
993  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
994  else
995  __kmp_parallel_deo(&gtid, &cid, loc);
996 
997 #if OMPT_SUPPORT && OMPT_OPTIONAL
998  if (ompt_enabled.enabled) {
999  /* OMPT state update */
1000  th->th.ompt_thread_info.state = ompt_state_work_parallel;
1001  th->th.ompt_thread_info.wait_id = 0;
1002 
1003  /* OMPT event callback */
1004  if (ompt_enabled.ompt_callback_mutex_acquired) {
1005  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1006  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1007  }
1008  }
1009 #endif
1010 
1011 #if USE_ITT_BUILD
1012  __kmp_itt_ordered_start(gtid);
1013 #endif /* USE_ITT_BUILD */
1014 }
1015 
1023 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1024  int cid = 0;
1025  kmp_info_t *th;
1026 
1027  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1028  __kmp_assert_valid_gtid(gtid);
1029 
1030 #if USE_ITT_BUILD
1031  __kmp_itt_ordered_end(gtid);
1032 // TODO: ordered_wait_id
1033 #endif /* USE_ITT_BUILD */
1034 
1035  th = __kmp_threads[gtid];
1036 
1037  if (th->th.th_dispatch->th_dxo_fcn != 0)
1038  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1039  else
1040  __kmp_parallel_dxo(&gtid, &cid, loc);
1041 
1042 #if OMPT_SUPPORT && OMPT_OPTIONAL
1043  OMPT_STORE_RETURN_ADDRESS(gtid);
1044  if (ompt_enabled.ompt_callback_mutex_released) {
1045  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1046  ompt_mutex_ordered,
1047  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1048  ->t.t_ordered.dt.t_value,
1049  OMPT_LOAD_RETURN_ADDRESS(gtid));
1050  }
1051 #endif
1052 }
1053 
1054 #if KMP_USE_DYNAMIC_LOCK
1055 
1056 static __forceinline void
1057 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1058  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1059  // Pointer to the allocated indirect lock is written to crit, while indexing
1060  // is ignored.
1061  void *idx;
1062  kmp_indirect_lock_t **lck;
1063  lck = (kmp_indirect_lock_t **)crit;
1064  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1065  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1066  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1067  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1068  KA_TRACE(20,
1069  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1070 #if USE_ITT_BUILD
1071  __kmp_itt_critical_creating(ilk->lock, loc);
1072 #endif
1073  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1074  if (status == 0) {
1075 #if USE_ITT_BUILD
1076  __kmp_itt_critical_destroyed(ilk->lock);
1077 #endif
1078  // We don't really need to destroy the unclaimed lock here since it will be
1079  // cleaned up at program exit.
1080  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1081  }
1082  KMP_DEBUG_ASSERT(*lck != NULL);
1083 }
1084 
1085 // Fast-path acquire tas lock
1086 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1087  { \
1088  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1089  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1090  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1091  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1092  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1093  kmp_uint32 spins; \
1094  KMP_FSYNC_PREPARE(l); \
1095  KMP_INIT_YIELD(spins); \
1096  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1097  do { \
1098  if (TCR_4(__kmp_nth) > \
1099  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1100  KMP_YIELD(TRUE); \
1101  } else { \
1102  KMP_YIELD_SPIN(spins); \
1103  } \
1104  __kmp_spin_backoff(&backoff); \
1105  } while ( \
1106  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1107  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1108  } \
1109  KMP_FSYNC_ACQUIRED(l); \
1110  }
1111 
1112 // Fast-path test tas lock
1113 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1114  { \
1115  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1116  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1117  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1118  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1119  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1120  }
1121 
1122 // Fast-path release tas lock
1123 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1124  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1125 
1126 #if KMP_USE_FUTEX
1127 
1128 #include <sys/syscall.h>
1129 #include <unistd.h>
1130 #ifndef FUTEX_WAIT
1131 #define FUTEX_WAIT 0
1132 #endif
1133 #ifndef FUTEX_WAKE
1134 #define FUTEX_WAKE 1
1135 #endif
1136 
1137 // Fast-path acquire futex lock
1138 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1139  { \
1140  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1141  kmp_int32 gtid_code = (gtid + 1) << 1; \
1142  KMP_MB(); \
1143  KMP_FSYNC_PREPARE(ftx); \
1144  kmp_int32 poll_val; \
1145  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1146  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1147  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1148  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1149  if (!cond) { \
1150  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1151  poll_val | \
1152  KMP_LOCK_BUSY(1, futex))) { \
1153  continue; \
1154  } \
1155  poll_val |= KMP_LOCK_BUSY(1, futex); \
1156  } \
1157  kmp_int32 rc; \
1158  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1159  NULL, NULL, 0)) != 0) { \
1160  continue; \
1161  } \
1162  gtid_code |= 1; \
1163  } \
1164  KMP_FSYNC_ACQUIRED(ftx); \
1165  }
1166 
1167 // Fast-path test futex lock
1168 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1169  { \
1170  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1171  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1172  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1173  KMP_FSYNC_ACQUIRED(ftx); \
1174  rc = TRUE; \
1175  } else { \
1176  rc = FALSE; \
1177  } \
1178  }
1179 
1180 // Fast-path release futex lock
1181 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1182  { \
1183  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1184  KMP_MB(); \
1185  KMP_FSYNC_RELEASING(ftx); \
1186  kmp_int32 poll_val = \
1187  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1188  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1189  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1190  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1191  } \
1192  KMP_MB(); \
1193  KMP_YIELD_OVERSUB(); \
1194  }
1195 
1196 #endif // KMP_USE_FUTEX
1197 
1198 #else // KMP_USE_DYNAMIC_LOCK
1199 
1200 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1201  ident_t const *loc,
1202  kmp_int32 gtid) {
1203  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1204 
1205  // Because of the double-check, the following load doesn't need to be volatile
1206  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1207 
1208  if (lck == NULL) {
1209  void *idx;
1210 
1211  // Allocate & initialize the lock.
1212  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1213  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1214  __kmp_init_user_lock_with_checks(lck);
1215  __kmp_set_user_lock_location(lck, loc);
1216 #if USE_ITT_BUILD
1217  __kmp_itt_critical_creating(lck);
1218 // __kmp_itt_critical_creating() should be called *before* the first usage
1219 // of underlying lock. It is the only place where we can guarantee it. There
1220 // are chances the lock will destroyed with no usage, but it is not a
1221 // problem, because this is not real event seen by user but rather setting
1222 // name for object (lock). See more details in kmp_itt.h.
1223 #endif /* USE_ITT_BUILD */
1224 
1225  // Use a cmpxchg instruction to slam the start of the critical section with
1226  // the lock pointer. If another thread beat us to it, deallocate the lock,
1227  // and use the lock that the other thread allocated.
1228  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1229 
1230  if (status == 0) {
1231 // Deallocate the lock and reload the value.
1232 #if USE_ITT_BUILD
1233  __kmp_itt_critical_destroyed(lck);
1234 // Let ITT know the lock is destroyed and the same memory location may be reused
1235 // for another purpose.
1236 #endif /* USE_ITT_BUILD */
1237  __kmp_destroy_user_lock_with_checks(lck);
1238  __kmp_user_lock_free(&idx, gtid, lck);
1239  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1240  KMP_DEBUG_ASSERT(lck != NULL);
1241  }
1242  }
1243  return lck;
1244 }
1245 
1246 #endif // KMP_USE_DYNAMIC_LOCK
1247 
1258 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1259  kmp_critical_name *crit) {
1260 #if KMP_USE_DYNAMIC_LOCK
1261 #if OMPT_SUPPORT && OMPT_OPTIONAL
1262  OMPT_STORE_RETURN_ADDRESS(global_tid);
1263 #endif // OMPT_SUPPORT
1264  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1265 #else
1266  KMP_COUNT_BLOCK(OMP_CRITICAL);
1267 #if OMPT_SUPPORT && OMPT_OPTIONAL
1268  ompt_state_t prev_state = ompt_state_undefined;
1269  ompt_thread_info_t ti;
1270 #endif
1271  kmp_user_lock_p lck;
1272 
1273  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1274  __kmp_assert_valid_gtid(global_tid);
1275 
1276  // TODO: add THR_OVHD_STATE
1277 
1278  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1279  KMP_CHECK_USER_LOCK_INIT();
1280 
1281  if ((__kmp_user_lock_kind == lk_tas) &&
1282  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1283  lck = (kmp_user_lock_p)crit;
1284  }
1285 #if KMP_USE_FUTEX
1286  else if ((__kmp_user_lock_kind == lk_futex) &&
1287  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1288  lck = (kmp_user_lock_p)crit;
1289  }
1290 #endif
1291  else { // ticket, queuing or drdpa
1292  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1293  }
1294 
1295  if (__kmp_env_consistency_check)
1296  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1297 
1298  // since the critical directive binds to all threads, not just the current
1299  // team we have to check this even if we are in a serialized team.
1300  // also, even if we are the uber thread, we still have to conduct the lock,
1301  // as we have to contend with sibling threads.
1302 
1303 #if USE_ITT_BUILD
1304  __kmp_itt_critical_acquiring(lck);
1305 #endif /* USE_ITT_BUILD */
1306 #if OMPT_SUPPORT && OMPT_OPTIONAL
1307  OMPT_STORE_RETURN_ADDRESS(gtid);
1308  void *codeptr_ra = NULL;
1309  if (ompt_enabled.enabled) {
1310  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1311  /* OMPT state update */
1312  prev_state = ti.state;
1313  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1314  ti.state = ompt_state_wait_critical;
1315 
1316  /* OMPT event callback */
1317  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1318  if (ompt_enabled.ompt_callback_mutex_acquire) {
1319  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1320  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1321  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1322  }
1323  }
1324 #endif
1325  // Value of 'crit' should be good for using as a critical_id of the critical
1326  // section directive.
1327  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1328 
1329 #if USE_ITT_BUILD
1330  __kmp_itt_critical_acquired(lck);
1331 #endif /* USE_ITT_BUILD */
1332 #if OMPT_SUPPORT && OMPT_OPTIONAL
1333  if (ompt_enabled.enabled) {
1334  /* OMPT state update */
1335  ti.state = prev_state;
1336  ti.wait_id = 0;
1337 
1338  /* OMPT event callback */
1339  if (ompt_enabled.ompt_callback_mutex_acquired) {
1340  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1341  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1342  }
1343  }
1344 #endif
1345  KMP_POP_PARTITIONED_TIMER();
1346 
1347  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1348  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1349 #endif // KMP_USE_DYNAMIC_LOCK
1350 }
1351 
1352 #if KMP_USE_DYNAMIC_LOCK
1353 
1354 // Converts the given hint to an internal lock implementation
1355 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1356 #if KMP_USE_TSX
1357 #define KMP_TSX_LOCK(seq) lockseq_##seq
1358 #else
1359 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1360 #endif
1361 
1362 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1363 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1364 #else
1365 #define KMP_CPUINFO_RTM 0
1366 #endif
1367 
1368  // Hints that do not require further logic
1369  if (hint & kmp_lock_hint_hle)
1370  return KMP_TSX_LOCK(hle);
1371  if (hint & kmp_lock_hint_rtm)
1372  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1373  if (hint & kmp_lock_hint_adaptive)
1374  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1375 
1376  // Rule out conflicting hints first by returning the default lock
1377  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1378  return __kmp_user_lock_seq;
1379  if ((hint & omp_lock_hint_speculative) &&
1380  (hint & omp_lock_hint_nonspeculative))
1381  return __kmp_user_lock_seq;
1382 
1383  // Do not even consider speculation when it appears to be contended
1384  if (hint & omp_lock_hint_contended)
1385  return lockseq_queuing;
1386 
1387  // Uncontended lock without speculation
1388  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1389  return lockseq_tas;
1390 
1391  // Use RTM lock for speculation
1392  if (hint & omp_lock_hint_speculative)
1393  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1394 
1395  return __kmp_user_lock_seq;
1396 }
1397 
1398 #if OMPT_SUPPORT && OMPT_OPTIONAL
1399 #if KMP_USE_DYNAMIC_LOCK
1400 static kmp_mutex_impl_t
1401 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1402  if (user_lock) {
1403  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1404  case 0:
1405  break;
1406 #if KMP_USE_FUTEX
1407  case locktag_futex:
1408  return kmp_mutex_impl_queuing;
1409 #endif
1410  case locktag_tas:
1411  return kmp_mutex_impl_spin;
1412 #if KMP_USE_TSX
1413  case locktag_hle:
1414  case locktag_rtm_spin:
1415  return kmp_mutex_impl_speculative;
1416 #endif
1417  default:
1418  return kmp_mutex_impl_none;
1419  }
1420  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1421  }
1422  KMP_ASSERT(ilock);
1423  switch (ilock->type) {
1424 #if KMP_USE_TSX
1425  case locktag_adaptive:
1426  case locktag_rtm_queuing:
1427  return kmp_mutex_impl_speculative;
1428 #endif
1429  case locktag_nested_tas:
1430  return kmp_mutex_impl_spin;
1431 #if KMP_USE_FUTEX
1432  case locktag_nested_futex:
1433 #endif
1434  case locktag_ticket:
1435  case locktag_queuing:
1436  case locktag_drdpa:
1437  case locktag_nested_ticket:
1438  case locktag_nested_queuing:
1439  case locktag_nested_drdpa:
1440  return kmp_mutex_impl_queuing;
1441  default:
1442  return kmp_mutex_impl_none;
1443  }
1444 }
1445 #else
1446 // For locks without dynamic binding
1447 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1448  switch (__kmp_user_lock_kind) {
1449  case lk_tas:
1450  return kmp_mutex_impl_spin;
1451 #if KMP_USE_FUTEX
1452  case lk_futex:
1453 #endif
1454  case lk_ticket:
1455  case lk_queuing:
1456  case lk_drdpa:
1457  return kmp_mutex_impl_queuing;
1458 #if KMP_USE_TSX
1459  case lk_hle:
1460  case lk_rtm_queuing:
1461  case lk_rtm_spin:
1462  case lk_adaptive:
1463  return kmp_mutex_impl_speculative;
1464 #endif
1465  default:
1466  return kmp_mutex_impl_none;
1467  }
1468 }
1469 #endif // KMP_USE_DYNAMIC_LOCK
1470 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1471 
1485 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1486  kmp_critical_name *crit, uint32_t hint) {
1487  KMP_COUNT_BLOCK(OMP_CRITICAL);
1488  kmp_user_lock_p lck;
1489 #if OMPT_SUPPORT && OMPT_OPTIONAL
1490  ompt_state_t prev_state = ompt_state_undefined;
1491  ompt_thread_info_t ti;
1492  // This is the case, if called from __kmpc_critical:
1493  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1494  if (!codeptr)
1495  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1496 #endif
1497 
1498  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1499  __kmp_assert_valid_gtid(global_tid);
1500 
1501  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1502  // Check if it is initialized.
1503  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1504  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1505  if (*lk == 0) {
1506  if (KMP_IS_D_LOCK(lockseq)) {
1507  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1508  KMP_GET_D_TAG(lockseq));
1509  } else {
1510  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1511  }
1512  }
1513  // Branch for accessing the actual lock object and set operation. This
1514  // branching is inevitable since this lock initialization does not follow the
1515  // normal dispatch path (lock table is not used).
1516  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1517  lck = (kmp_user_lock_p)lk;
1518  if (__kmp_env_consistency_check) {
1519  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1520  __kmp_map_hint_to_lock(hint));
1521  }
1522 #if USE_ITT_BUILD
1523  __kmp_itt_critical_acquiring(lck);
1524 #endif
1525 #if OMPT_SUPPORT && OMPT_OPTIONAL
1526  if (ompt_enabled.enabled) {
1527  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1528  /* OMPT state update */
1529  prev_state = ti.state;
1530  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1531  ti.state = ompt_state_wait_critical;
1532 
1533  /* OMPT event callback */
1534  if (ompt_enabled.ompt_callback_mutex_acquire) {
1535  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1536  ompt_mutex_critical, (unsigned int)hint,
1537  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1538  codeptr);
1539  }
1540  }
1541 #endif
1542 #if KMP_USE_INLINED_TAS
1543  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1544  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1545  } else
1546 #elif KMP_USE_INLINED_FUTEX
1547  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1548  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1549  } else
1550 #endif
1551  {
1552  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1553  }
1554  } else {
1555  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1556  lck = ilk->lock;
1557  if (__kmp_env_consistency_check) {
1558  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1559  __kmp_map_hint_to_lock(hint));
1560  }
1561 #if USE_ITT_BUILD
1562  __kmp_itt_critical_acquiring(lck);
1563 #endif
1564 #if OMPT_SUPPORT && OMPT_OPTIONAL
1565  if (ompt_enabled.enabled) {
1566  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1567  /* OMPT state update */
1568  prev_state = ti.state;
1569  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1570  ti.state = ompt_state_wait_critical;
1571 
1572  /* OMPT event callback */
1573  if (ompt_enabled.ompt_callback_mutex_acquire) {
1574  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1575  ompt_mutex_critical, (unsigned int)hint,
1576  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1577  codeptr);
1578  }
1579  }
1580 #endif
1581  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1582  }
1583  KMP_POP_PARTITIONED_TIMER();
1584 
1585 #if USE_ITT_BUILD
1586  __kmp_itt_critical_acquired(lck);
1587 #endif /* USE_ITT_BUILD */
1588 #if OMPT_SUPPORT && OMPT_OPTIONAL
1589  if (ompt_enabled.enabled) {
1590  /* OMPT state update */
1591  ti.state = prev_state;
1592  ti.wait_id = 0;
1593 
1594  /* OMPT event callback */
1595  if (ompt_enabled.ompt_callback_mutex_acquired) {
1596  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1597  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1598  }
1599  }
1600 #endif
1601 
1602  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1603  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1604 } // __kmpc_critical_with_hint
1605 
1606 #endif // KMP_USE_DYNAMIC_LOCK
1607 
1617 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1618  kmp_critical_name *crit) {
1619  kmp_user_lock_p lck;
1620 
1621  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1622 
1623 #if KMP_USE_DYNAMIC_LOCK
1624  int locktag = KMP_EXTRACT_D_TAG(crit);
1625  if (locktag) {
1626  lck = (kmp_user_lock_p)crit;
1627  KMP_ASSERT(lck != NULL);
1628  if (__kmp_env_consistency_check) {
1629  __kmp_pop_sync(global_tid, ct_critical, loc);
1630  }
1631 #if USE_ITT_BUILD
1632  __kmp_itt_critical_releasing(lck);
1633 #endif
1634 #if KMP_USE_INLINED_TAS
1635  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1636  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1637  } else
1638 #elif KMP_USE_INLINED_FUTEX
1639  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1640  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1641  } else
1642 #endif
1643  {
1644  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1645  }
1646  } else {
1647  kmp_indirect_lock_t *ilk =
1648  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1649  KMP_ASSERT(ilk != NULL);
1650  lck = ilk->lock;
1651  if (__kmp_env_consistency_check) {
1652  __kmp_pop_sync(global_tid, ct_critical, loc);
1653  }
1654 #if USE_ITT_BUILD
1655  __kmp_itt_critical_releasing(lck);
1656 #endif
1657  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1658  }
1659 
1660 #else // KMP_USE_DYNAMIC_LOCK
1661 
1662  if ((__kmp_user_lock_kind == lk_tas) &&
1663  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1664  lck = (kmp_user_lock_p)crit;
1665  }
1666 #if KMP_USE_FUTEX
1667  else if ((__kmp_user_lock_kind == lk_futex) &&
1668  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1669  lck = (kmp_user_lock_p)crit;
1670  }
1671 #endif
1672  else { // ticket, queuing or drdpa
1673  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1674  }
1675 
1676  KMP_ASSERT(lck != NULL);
1677 
1678  if (__kmp_env_consistency_check)
1679  __kmp_pop_sync(global_tid, ct_critical, loc);
1680 
1681 #if USE_ITT_BUILD
1682  __kmp_itt_critical_releasing(lck);
1683 #endif /* USE_ITT_BUILD */
1684  // Value of 'crit' should be good for using as a critical_id of the critical
1685  // section directive.
1686  __kmp_release_user_lock_with_checks(lck, global_tid);
1687 
1688 #endif // KMP_USE_DYNAMIC_LOCK
1689 
1690 #if OMPT_SUPPORT && OMPT_OPTIONAL
1691  /* OMPT release event triggers after lock is released; place here to trigger
1692  * for all #if branches */
1693  OMPT_STORE_RETURN_ADDRESS(global_tid);
1694  if (ompt_enabled.ompt_callback_mutex_released) {
1695  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1696  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1697  OMPT_LOAD_RETURN_ADDRESS(0));
1698  }
1699 #endif
1700 
1701  KMP_POP_PARTITIONED_TIMER();
1702  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1703 }
1704 
1714 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1715  int status;
1716  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1717  __kmp_assert_valid_gtid(global_tid);
1718 
1719  if (!TCR_4(__kmp_init_parallel))
1720  __kmp_parallel_initialize();
1721 
1722  __kmp_resume_if_soft_paused();
1723 
1724  if (__kmp_env_consistency_check)
1725  __kmp_check_barrier(global_tid, ct_barrier, loc);
1726 
1727 #if OMPT_SUPPORT
1728  ompt_frame_t *ompt_frame;
1729  if (ompt_enabled.enabled) {
1730  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1731  if (ompt_frame->enter_frame.ptr == NULL)
1732  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1733  }
1734  OMPT_STORE_RETURN_ADDRESS(global_tid);
1735 #endif
1736 #if USE_ITT_NOTIFY
1737  __kmp_threads[global_tid]->th.th_ident = loc;
1738 #endif
1739  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1740 #if OMPT_SUPPORT && OMPT_OPTIONAL
1741  if (ompt_enabled.enabled) {
1742  ompt_frame->enter_frame = ompt_data_none;
1743  }
1744 #endif
1745 
1746  return (status != 0) ? 0 : 1;
1747 }
1748 
1758 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1759  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1760  __kmp_assert_valid_gtid(global_tid);
1761  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1762 }
1763 
1774 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1775  kmp_int32 ret;
1776  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1777  __kmp_assert_valid_gtid(global_tid);
1778 
1779  if (!TCR_4(__kmp_init_parallel))
1780  __kmp_parallel_initialize();
1781 
1782  __kmp_resume_if_soft_paused();
1783 
1784  if (__kmp_env_consistency_check) {
1785  if (loc == 0) {
1786  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1787  }
1788  __kmp_check_barrier(global_tid, ct_barrier, loc);
1789  }
1790 
1791 #if OMPT_SUPPORT
1792  ompt_frame_t *ompt_frame;
1793  if (ompt_enabled.enabled) {
1794  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1795  if (ompt_frame->enter_frame.ptr == NULL)
1796  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1797  }
1798  OMPT_STORE_RETURN_ADDRESS(global_tid);
1799 #endif
1800 #if USE_ITT_NOTIFY
1801  __kmp_threads[global_tid]->th.th_ident = loc;
1802 #endif
1803  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1804 #if OMPT_SUPPORT && OMPT_OPTIONAL
1805  if (ompt_enabled.enabled) {
1806  ompt_frame->enter_frame = ompt_data_none;
1807  }
1808 #endif
1809 
1810  ret = __kmpc_master(loc, global_tid);
1811 
1812  if (__kmp_env_consistency_check) {
1813  /* there's no __kmpc_end_master called; so the (stats) */
1814  /* actions of __kmpc_end_master are done here */
1815  if (ret) {
1816  /* only one thread should do the pop since only */
1817  /* one did the push (see __kmpc_master()) */
1818  __kmp_pop_sync(global_tid, ct_master, loc);
1819  }
1820  }
1821 
1822  return (ret);
1823 }
1824 
1825 /* The BARRIER for a SINGLE process section is always explicit */
1837 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1838  __kmp_assert_valid_gtid(global_tid);
1839  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1840 
1841  if (rc) {
1842  // We are going to execute the single statement, so we should count it.
1843  KMP_COUNT_BLOCK(OMP_SINGLE);
1844  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1845  }
1846 
1847 #if OMPT_SUPPORT && OMPT_OPTIONAL
1848  kmp_info_t *this_thr = __kmp_threads[global_tid];
1849  kmp_team_t *team = this_thr->th.th_team;
1850  int tid = __kmp_tid_from_gtid(global_tid);
1851 
1852  if (ompt_enabled.enabled) {
1853  if (rc) {
1854  if (ompt_enabled.ompt_callback_work) {
1855  ompt_callbacks.ompt_callback(ompt_callback_work)(
1856  ompt_work_single_executor, ompt_scope_begin,
1857  &(team->t.ompt_team_info.parallel_data),
1858  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1859  1, OMPT_GET_RETURN_ADDRESS(0));
1860  }
1861  } else {
1862  if (ompt_enabled.ompt_callback_work) {
1863  ompt_callbacks.ompt_callback(ompt_callback_work)(
1864  ompt_work_single_other, ompt_scope_begin,
1865  &(team->t.ompt_team_info.parallel_data),
1866  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1867  1, OMPT_GET_RETURN_ADDRESS(0));
1868  ompt_callbacks.ompt_callback(ompt_callback_work)(
1869  ompt_work_single_other, ompt_scope_end,
1870  &(team->t.ompt_team_info.parallel_data),
1871  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1872  1, OMPT_GET_RETURN_ADDRESS(0));
1873  }
1874  }
1875  }
1876 #endif
1877 
1878  return rc;
1879 }
1880 
1890 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1891  __kmp_assert_valid_gtid(global_tid);
1892  __kmp_exit_single(global_tid);
1893  KMP_POP_PARTITIONED_TIMER();
1894 
1895 #if OMPT_SUPPORT && OMPT_OPTIONAL
1896  kmp_info_t *this_thr = __kmp_threads[global_tid];
1897  kmp_team_t *team = this_thr->th.th_team;
1898  int tid = __kmp_tid_from_gtid(global_tid);
1899 
1900  if (ompt_enabled.ompt_callback_work) {
1901  ompt_callbacks.ompt_callback(ompt_callback_work)(
1902  ompt_work_single_executor, ompt_scope_end,
1903  &(team->t.ompt_team_info.parallel_data),
1904  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1905  OMPT_GET_RETURN_ADDRESS(0));
1906  }
1907 #endif
1908 }
1909 
1917 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1918  KMP_POP_PARTITIONED_TIMER();
1919  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1920 
1921 #if OMPT_SUPPORT && OMPT_OPTIONAL
1922  if (ompt_enabled.ompt_callback_work) {
1923  ompt_work_t ompt_work_type = ompt_work_loop;
1924  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1925  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1926  // Determine workshare type
1927  if (loc != NULL) {
1928  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1929  ompt_work_type = ompt_work_loop;
1930  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1931  ompt_work_type = ompt_work_sections;
1932  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1933  ompt_work_type = ompt_work_distribute;
1934  } else {
1935  // use default set above.
1936  // a warning about this case is provided in __kmpc_for_static_init
1937  }
1938  KMP_DEBUG_ASSERT(ompt_work_type);
1939  }
1940  ompt_callbacks.ompt_callback(ompt_callback_work)(
1941  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1942  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1943  }
1944 #endif
1945  if (__kmp_env_consistency_check)
1946  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1947 }
1948 
1949 // User routines which take C-style arguments (call by value)
1950 // different from the Fortran equivalent routines
1951 
1952 void ompc_set_num_threads(int arg) {
1953  // !!!!! TODO: check the per-task binding
1954  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1955 }
1956 
1957 void ompc_set_dynamic(int flag) {
1958  kmp_info_t *thread;
1959 
1960  /* For the thread-private implementation of the internal controls */
1961  thread = __kmp_entry_thread();
1962 
1963  __kmp_save_internal_controls(thread);
1964 
1965  set__dynamic(thread, flag ? true : false);
1966 }
1967 
1968 void ompc_set_nested(int flag) {
1969  kmp_info_t *thread;
1970 
1971  /* For the thread-private internal controls implementation */
1972  thread = __kmp_entry_thread();
1973 
1974  __kmp_save_internal_controls(thread);
1975 
1976  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1977 }
1978 
1979 void ompc_set_max_active_levels(int max_active_levels) {
1980  /* TO DO */
1981  /* we want per-task implementation of this internal control */
1982 
1983  /* For the per-thread internal controls implementation */
1984  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1985 }
1986 
1987 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1988  // !!!!! TODO: check the per-task binding
1989  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1990 }
1991 
1992 int ompc_get_ancestor_thread_num(int level) {
1993  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1994 }
1995 
1996 int ompc_get_team_size(int level) {
1997  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1998 }
1999 
2000 /* OpenMP 5.0 Affinity Format API */
2001 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2002  if (!__kmp_init_serial) {
2003  __kmp_serial_initialize();
2004  }
2005  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2006  format, KMP_STRLEN(format) + 1);
2007 }
2008 
2009 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2010  size_t format_size;
2011  if (!__kmp_init_serial) {
2012  __kmp_serial_initialize();
2013  }
2014  format_size = KMP_STRLEN(__kmp_affinity_format);
2015  if (buffer && size) {
2016  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2017  format_size + 1);
2018  }
2019  return format_size;
2020 }
2021 
2022 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2023  int gtid;
2024  if (!TCR_4(__kmp_init_middle)) {
2025  __kmp_middle_initialize();
2026  }
2027  __kmp_assign_root_init_mask();
2028  gtid = __kmp_get_gtid();
2029 #if KMP_AFFINITY_SUPPORTED
2030  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2031  __kmp_reset_root_init_mask(gtid);
2032  }
2033 #endif
2034  __kmp_aux_display_affinity(gtid, format);
2035 }
2036 
2037 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2038  char const *format) {
2039  int gtid;
2040  size_t num_required;
2041  kmp_str_buf_t capture_buf;
2042  if (!TCR_4(__kmp_init_middle)) {
2043  __kmp_middle_initialize();
2044  }
2045  __kmp_assign_root_init_mask();
2046  gtid = __kmp_get_gtid();
2047 #if KMP_AFFINITY_SUPPORTED
2048  if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2049  __kmp_reset_root_init_mask(gtid);
2050  }
2051 #endif
2052  __kmp_str_buf_init(&capture_buf);
2053  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2054  if (buffer && buf_size) {
2055  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2056  capture_buf.used + 1);
2057  }
2058  __kmp_str_buf_free(&capture_buf);
2059  return num_required;
2060 }
2061 
2062 void kmpc_set_stacksize(int arg) {
2063  // __kmp_aux_set_stacksize initializes the library if needed
2064  __kmp_aux_set_stacksize(arg);
2065 }
2066 
2067 void kmpc_set_stacksize_s(size_t arg) {
2068  // __kmp_aux_set_stacksize initializes the library if needed
2069  __kmp_aux_set_stacksize(arg);
2070 }
2071 
2072 void kmpc_set_blocktime(int arg) {
2073  int gtid, tid;
2074  kmp_info_t *thread;
2075 
2076  gtid = __kmp_entry_gtid();
2077  tid = __kmp_tid_from_gtid(gtid);
2078  thread = __kmp_thread_from_gtid(gtid);
2079 
2080  __kmp_aux_set_blocktime(arg, thread, tid);
2081 }
2082 
2083 void kmpc_set_library(int arg) {
2084  // __kmp_user_set_library initializes the library if needed
2085  __kmp_user_set_library((enum library_type)arg);
2086 }
2087 
2088 void kmpc_set_defaults(char const *str) {
2089  // __kmp_aux_set_defaults initializes the library if needed
2090  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2091 }
2092 
2093 void kmpc_set_disp_num_buffers(int arg) {
2094  // ignore after initialization because some teams have already
2095  // allocated dispatch buffers
2096  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2097  arg <= KMP_MAX_DISP_NUM_BUFF) {
2098  __kmp_dispatch_num_buffers = arg;
2099  }
2100 }
2101 
2102 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2103 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2104  return -1;
2105 #else
2106  if (!TCR_4(__kmp_init_middle)) {
2107  __kmp_middle_initialize();
2108  }
2109  __kmp_assign_root_init_mask();
2110  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2111 #endif
2112 }
2113 
2114 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2115 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2116  return -1;
2117 #else
2118  if (!TCR_4(__kmp_init_middle)) {
2119  __kmp_middle_initialize();
2120  }
2121  __kmp_assign_root_init_mask();
2122  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2123 #endif
2124 }
2125 
2126 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2127 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2128  return -1;
2129 #else
2130  if (!TCR_4(__kmp_init_middle)) {
2131  __kmp_middle_initialize();
2132  }
2133  __kmp_assign_root_init_mask();
2134  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2135 #endif
2136 }
2137 
2138 /* -------------------------------------------------------------------------- */
2183 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2184  void *cpy_data, void (*cpy_func)(void *, void *),
2185  kmp_int32 didit) {
2186  void **data_ptr;
2187  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2188  __kmp_assert_valid_gtid(gtid);
2189 
2190  KMP_MB();
2191 
2192  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2193 
2194  if (__kmp_env_consistency_check) {
2195  if (loc == 0) {
2196  KMP_WARNING(ConstructIdentInvalid);
2197  }
2198  }
2199 
2200  // ToDo: Optimize the following two barriers into some kind of split barrier
2201 
2202  if (didit)
2203  *data_ptr = cpy_data;
2204 
2205 #if OMPT_SUPPORT
2206  ompt_frame_t *ompt_frame;
2207  if (ompt_enabled.enabled) {
2208  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2209  if (ompt_frame->enter_frame.ptr == NULL)
2210  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2211  }
2212  OMPT_STORE_RETURN_ADDRESS(gtid);
2213 #endif
2214 /* This barrier is not a barrier region boundary */
2215 #if USE_ITT_NOTIFY
2216  __kmp_threads[gtid]->th.th_ident = loc;
2217 #endif
2218  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2219 
2220  if (!didit)
2221  (*cpy_func)(cpy_data, *data_ptr);
2222 
2223  // Consider next barrier a user-visible barrier for barrier region boundaries
2224  // Nesting checks are already handled by the single construct checks
2225  {
2226 #if OMPT_SUPPORT
2227  OMPT_STORE_RETURN_ADDRESS(gtid);
2228 #endif
2229 #if USE_ITT_NOTIFY
2230  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2231 // tasks can overwrite the location)
2232 #endif
2233  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2234 #if OMPT_SUPPORT && OMPT_OPTIONAL
2235  if (ompt_enabled.enabled) {
2236  ompt_frame->enter_frame = ompt_data_none;
2237  }
2238 #endif
2239  }
2240 }
2241 
2242 /* --------------------------------------------------------------------------*/
2259 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2260  void **data_ptr;
2261 
2262  KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2263 
2264  KMP_MB();
2265 
2266  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2267 
2268  if (__kmp_env_consistency_check) {
2269  if (loc == 0) {
2270  KMP_WARNING(ConstructIdentInvalid);
2271  }
2272  }
2273 
2274  // ToDo: Optimize the following barrier
2275 
2276  if (cpy_data)
2277  *data_ptr = cpy_data;
2278 
2279 #if OMPT_SUPPORT
2280  ompt_frame_t *ompt_frame;
2281  if (ompt_enabled.enabled) {
2282  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2283  if (ompt_frame->enter_frame.ptr == NULL)
2284  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2285  OMPT_STORE_RETURN_ADDRESS(gtid);
2286  }
2287 #endif
2288 /* This barrier is not a barrier region boundary */
2289 #if USE_ITT_NOTIFY
2290  __kmp_threads[gtid]->th.th_ident = loc;
2291 #endif
2292  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2293 
2294  return *data_ptr;
2295 }
2296 
2297 /* -------------------------------------------------------------------------- */
2298 
2299 #define INIT_LOCK __kmp_init_user_lock_with_checks
2300 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2301 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2302 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2303 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2304 #define ACQUIRE_NESTED_LOCK_TIMED \
2305  __kmp_acquire_nested_user_lock_with_checks_timed
2306 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2307 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2308 #define TEST_LOCK __kmp_test_user_lock_with_checks
2309 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2310 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2311 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2312 
2313 // TODO: Make check abort messages use location info & pass it into
2314 // with_checks routines
2315 
2316 #if KMP_USE_DYNAMIC_LOCK
2317 
2318 // internal lock initializer
2319 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2320  kmp_dyna_lockseq_t seq) {
2321  if (KMP_IS_D_LOCK(seq)) {
2322  KMP_INIT_D_LOCK(lock, seq);
2323 #if USE_ITT_BUILD
2324  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2325 #endif
2326  } else {
2327  KMP_INIT_I_LOCK(lock, seq);
2328 #if USE_ITT_BUILD
2329  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2330  __kmp_itt_lock_creating(ilk->lock, loc);
2331 #endif
2332  }
2333 }
2334 
2335 // internal nest lock initializer
2336 static __forceinline void
2337 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2338  kmp_dyna_lockseq_t seq) {
2339 #if KMP_USE_TSX
2340  // Don't have nested lock implementation for speculative locks
2341  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2342  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2343  seq = __kmp_user_lock_seq;
2344 #endif
2345  switch (seq) {
2346  case lockseq_tas:
2347  seq = lockseq_nested_tas;
2348  break;
2349 #if KMP_USE_FUTEX
2350  case lockseq_futex:
2351  seq = lockseq_nested_futex;
2352  break;
2353 #endif
2354  case lockseq_ticket:
2355  seq = lockseq_nested_ticket;
2356  break;
2357  case lockseq_queuing:
2358  seq = lockseq_nested_queuing;
2359  break;
2360  case lockseq_drdpa:
2361  seq = lockseq_nested_drdpa;
2362  break;
2363  default:
2364  seq = lockseq_nested_queuing;
2365  }
2366  KMP_INIT_I_LOCK(lock, seq);
2367 #if USE_ITT_BUILD
2368  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2369  __kmp_itt_lock_creating(ilk->lock, loc);
2370 #endif
2371 }
2372 
2373 /* initialize the lock with a hint */
2374 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2375  uintptr_t hint) {
2376  KMP_DEBUG_ASSERT(__kmp_init_serial);
2377  if (__kmp_env_consistency_check && user_lock == NULL) {
2378  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2379  }
2380 
2381  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2382 
2383 #if OMPT_SUPPORT && OMPT_OPTIONAL
2384  // This is the case, if called from omp_init_lock_with_hint:
2385  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2386  if (!codeptr)
2387  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2388  if (ompt_enabled.ompt_callback_lock_init) {
2389  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2390  ompt_mutex_lock, (omp_lock_hint_t)hint,
2391  __ompt_get_mutex_impl_type(user_lock),
2392  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2393  }
2394 #endif
2395 }
2396 
2397 /* initialize the lock with a hint */
2398 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2399  void **user_lock, uintptr_t hint) {
2400  KMP_DEBUG_ASSERT(__kmp_init_serial);
2401  if (__kmp_env_consistency_check && user_lock == NULL) {
2402  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2403  }
2404 
2405  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2406 
2407 #if OMPT_SUPPORT && OMPT_OPTIONAL
2408  // This is the case, if called from omp_init_lock_with_hint:
2409  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2410  if (!codeptr)
2411  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412  if (ompt_enabled.ompt_callback_lock_init) {
2413  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2414  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2415  __ompt_get_mutex_impl_type(user_lock),
2416  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2417  }
2418 #endif
2419 }
2420 
2421 #endif // KMP_USE_DYNAMIC_LOCK
2422 
2423 /* initialize the lock */
2424 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2425 #if KMP_USE_DYNAMIC_LOCK
2426 
2427  KMP_DEBUG_ASSERT(__kmp_init_serial);
2428  if (__kmp_env_consistency_check && user_lock == NULL) {
2429  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2430  }
2431  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2432 
2433 #if OMPT_SUPPORT && OMPT_OPTIONAL
2434  // This is the case, if called from omp_init_lock_with_hint:
2435  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2436  if (!codeptr)
2437  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2438  if (ompt_enabled.ompt_callback_lock_init) {
2439  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2440  ompt_mutex_lock, omp_lock_hint_none,
2441  __ompt_get_mutex_impl_type(user_lock),
2442  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2443  }
2444 #endif
2445 
2446 #else // KMP_USE_DYNAMIC_LOCK
2447 
2448  static char const *const func = "omp_init_lock";
2449  kmp_user_lock_p lck;
2450  KMP_DEBUG_ASSERT(__kmp_init_serial);
2451 
2452  if (__kmp_env_consistency_check) {
2453  if (user_lock == NULL) {
2454  KMP_FATAL(LockIsUninitialized, func);
2455  }
2456  }
2457 
2458  KMP_CHECK_USER_LOCK_INIT();
2459 
2460  if ((__kmp_user_lock_kind == lk_tas) &&
2461  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462  lck = (kmp_user_lock_p)user_lock;
2463  }
2464 #if KMP_USE_FUTEX
2465  else if ((__kmp_user_lock_kind == lk_futex) &&
2466  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2467  lck = (kmp_user_lock_p)user_lock;
2468  }
2469 #endif
2470  else {
2471  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2472  }
2473  INIT_LOCK(lck);
2474  __kmp_set_user_lock_location(lck, loc);
2475 
2476 #if OMPT_SUPPORT && OMPT_OPTIONAL
2477  // This is the case, if called from omp_init_lock_with_hint:
2478  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2479  if (!codeptr)
2480  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2481  if (ompt_enabled.ompt_callback_lock_init) {
2482  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2483  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2484  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2485  }
2486 #endif
2487 
2488 #if USE_ITT_BUILD
2489  __kmp_itt_lock_creating(lck);
2490 #endif /* USE_ITT_BUILD */
2491 
2492 #endif // KMP_USE_DYNAMIC_LOCK
2493 } // __kmpc_init_lock
2494 
2495 /* initialize the lock */
2496 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2497 #if KMP_USE_DYNAMIC_LOCK
2498 
2499  KMP_DEBUG_ASSERT(__kmp_init_serial);
2500  if (__kmp_env_consistency_check && user_lock == NULL) {
2501  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2502  }
2503  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2504 
2505 #if OMPT_SUPPORT && OMPT_OPTIONAL
2506  // This is the case, if called from omp_init_lock_with_hint:
2507  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2508  if (!codeptr)
2509  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510  if (ompt_enabled.ompt_callback_lock_init) {
2511  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2512  ompt_mutex_nest_lock, omp_lock_hint_none,
2513  __ompt_get_mutex_impl_type(user_lock),
2514  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2515  }
2516 #endif
2517 
2518 #else // KMP_USE_DYNAMIC_LOCK
2519 
2520  static char const *const func = "omp_init_nest_lock";
2521  kmp_user_lock_p lck;
2522  KMP_DEBUG_ASSERT(__kmp_init_serial);
2523 
2524  if (__kmp_env_consistency_check) {
2525  if (user_lock == NULL) {
2526  KMP_FATAL(LockIsUninitialized, func);
2527  }
2528  }
2529 
2530  KMP_CHECK_USER_LOCK_INIT();
2531 
2532  if ((__kmp_user_lock_kind == lk_tas) &&
2533  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2534  OMP_NEST_LOCK_T_SIZE)) {
2535  lck = (kmp_user_lock_p)user_lock;
2536  }
2537 #if KMP_USE_FUTEX
2538  else if ((__kmp_user_lock_kind == lk_futex) &&
2539  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2540  OMP_NEST_LOCK_T_SIZE)) {
2541  lck = (kmp_user_lock_p)user_lock;
2542  }
2543 #endif
2544  else {
2545  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2546  }
2547 
2548  INIT_NESTED_LOCK(lck);
2549  __kmp_set_user_lock_location(lck, loc);
2550 
2551 #if OMPT_SUPPORT && OMPT_OPTIONAL
2552  // This is the case, if called from omp_init_lock_with_hint:
2553  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2554  if (!codeptr)
2555  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2556  if (ompt_enabled.ompt_callback_lock_init) {
2557  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2558  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2559  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2560  }
2561 #endif
2562 
2563 #if USE_ITT_BUILD
2564  __kmp_itt_lock_creating(lck);
2565 #endif /* USE_ITT_BUILD */
2566 
2567 #endif // KMP_USE_DYNAMIC_LOCK
2568 } // __kmpc_init_nest_lock
2569 
2570 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2571 #if KMP_USE_DYNAMIC_LOCK
2572 
2573 #if USE_ITT_BUILD
2574  kmp_user_lock_p lck;
2575  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2576  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2577  } else {
2578  lck = (kmp_user_lock_p)user_lock;
2579  }
2580  __kmp_itt_lock_destroyed(lck);
2581 #endif
2582 #if OMPT_SUPPORT && OMPT_OPTIONAL
2583  // This is the case, if called from omp_init_lock_with_hint:
2584  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2585  if (!codeptr)
2586  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2587  if (ompt_enabled.ompt_callback_lock_destroy) {
2588  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2589  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2590  }
2591 #endif
2592  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2593 #else
2594  kmp_user_lock_p lck;
2595 
2596  if ((__kmp_user_lock_kind == lk_tas) &&
2597  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598  lck = (kmp_user_lock_p)user_lock;
2599  }
2600 #if KMP_USE_FUTEX
2601  else if ((__kmp_user_lock_kind == lk_futex) &&
2602  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603  lck = (kmp_user_lock_p)user_lock;
2604  }
2605 #endif
2606  else {
2607  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2608  }
2609 
2610 #if OMPT_SUPPORT && OMPT_OPTIONAL
2611  // This is the case, if called from omp_init_lock_with_hint:
2612  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2613  if (!codeptr)
2614  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2615  if (ompt_enabled.ompt_callback_lock_destroy) {
2616  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2617  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2618  }
2619 #endif
2620 
2621 #if USE_ITT_BUILD
2622  __kmp_itt_lock_destroyed(lck);
2623 #endif /* USE_ITT_BUILD */
2624  DESTROY_LOCK(lck);
2625 
2626  if ((__kmp_user_lock_kind == lk_tas) &&
2627  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2628  ;
2629  }
2630 #if KMP_USE_FUTEX
2631  else if ((__kmp_user_lock_kind == lk_futex) &&
2632  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2633  ;
2634  }
2635 #endif
2636  else {
2637  __kmp_user_lock_free(user_lock, gtid, lck);
2638  }
2639 #endif // KMP_USE_DYNAMIC_LOCK
2640 } // __kmpc_destroy_lock
2641 
2642 /* destroy the lock */
2643 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2644 #if KMP_USE_DYNAMIC_LOCK
2645 
2646 #if USE_ITT_BUILD
2647  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2648  __kmp_itt_lock_destroyed(ilk->lock);
2649 #endif
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651  // This is the case, if called from omp_init_lock_with_hint:
2652  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2653  if (!codeptr)
2654  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2655  if (ompt_enabled.ompt_callback_lock_destroy) {
2656  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2657  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2658  }
2659 #endif
2660  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2661 
2662 #else // KMP_USE_DYNAMIC_LOCK
2663 
2664  kmp_user_lock_p lck;
2665 
2666  if ((__kmp_user_lock_kind == lk_tas) &&
2667  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2668  OMP_NEST_LOCK_T_SIZE)) {
2669  lck = (kmp_user_lock_p)user_lock;
2670  }
2671 #if KMP_USE_FUTEX
2672  else if ((__kmp_user_lock_kind == lk_futex) &&
2673  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2674  OMP_NEST_LOCK_T_SIZE)) {
2675  lck = (kmp_user_lock_p)user_lock;
2676  }
2677 #endif
2678  else {
2679  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2680  }
2681 
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683  // This is the case, if called from omp_init_lock_with_hint:
2684  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685  if (!codeptr)
2686  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687  if (ompt_enabled.ompt_callback_lock_destroy) {
2688  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690  }
2691 #endif
2692 
2693 #if USE_ITT_BUILD
2694  __kmp_itt_lock_destroyed(lck);
2695 #endif /* USE_ITT_BUILD */
2696 
2697  DESTROY_NESTED_LOCK(lck);
2698 
2699  if ((__kmp_user_lock_kind == lk_tas) &&
2700  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2701  OMP_NEST_LOCK_T_SIZE)) {
2702  ;
2703  }
2704 #if KMP_USE_FUTEX
2705  else if ((__kmp_user_lock_kind == lk_futex) &&
2706  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2707  OMP_NEST_LOCK_T_SIZE)) {
2708  ;
2709  }
2710 #endif
2711  else {
2712  __kmp_user_lock_free(user_lock, gtid, lck);
2713  }
2714 #endif // KMP_USE_DYNAMIC_LOCK
2715 } // __kmpc_destroy_nest_lock
2716 
2717 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2718  KMP_COUNT_BLOCK(OMP_set_lock);
2719 #if KMP_USE_DYNAMIC_LOCK
2720  int tag = KMP_EXTRACT_D_TAG(user_lock);
2721 #if USE_ITT_BUILD
2722  __kmp_itt_lock_acquiring(
2723  (kmp_user_lock_p)
2724  user_lock); // itt function will get to the right lock object.
2725 #endif
2726 #if OMPT_SUPPORT && OMPT_OPTIONAL
2727  // This is the case, if called from omp_init_lock_with_hint:
2728  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2729  if (!codeptr)
2730  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2731  if (ompt_enabled.ompt_callback_mutex_acquire) {
2732  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2733  ompt_mutex_lock, omp_lock_hint_none,
2734  __ompt_get_mutex_impl_type(user_lock),
2735  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2736  }
2737 #endif
2738 #if KMP_USE_INLINED_TAS
2739  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2741  } else
2742 #elif KMP_USE_INLINED_FUTEX
2743  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2745  } else
2746 #endif
2747  {
2748  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2749  }
2750 #if USE_ITT_BUILD
2751  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2752 #endif
2753 #if OMPT_SUPPORT && OMPT_OPTIONAL
2754  if (ompt_enabled.ompt_callback_mutex_acquired) {
2755  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2756  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2757  }
2758 #endif
2759 
2760 #else // KMP_USE_DYNAMIC_LOCK
2761 
2762  kmp_user_lock_p lck;
2763 
2764  if ((__kmp_user_lock_kind == lk_tas) &&
2765  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2766  lck = (kmp_user_lock_p)user_lock;
2767  }
2768 #if KMP_USE_FUTEX
2769  else if ((__kmp_user_lock_kind == lk_futex) &&
2770  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771  lck = (kmp_user_lock_p)user_lock;
2772  }
2773 #endif
2774  else {
2775  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2776  }
2777 
2778 #if USE_ITT_BUILD
2779  __kmp_itt_lock_acquiring(lck);
2780 #endif /* USE_ITT_BUILD */
2781 #if OMPT_SUPPORT && OMPT_OPTIONAL
2782  // This is the case, if called from omp_init_lock_with_hint:
2783  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784  if (!codeptr)
2785  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786  if (ompt_enabled.ompt_callback_mutex_acquire) {
2787  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2788  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2789  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2790  }
2791 #endif
2792 
2793  ACQUIRE_LOCK(lck, gtid);
2794 
2795 #if USE_ITT_BUILD
2796  __kmp_itt_lock_acquired(lck);
2797 #endif /* USE_ITT_BUILD */
2798 
2799 #if OMPT_SUPPORT && OMPT_OPTIONAL
2800  if (ompt_enabled.ompt_callback_mutex_acquired) {
2801  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2802  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2803  }
2804 #endif
2805 
2806 #endif // KMP_USE_DYNAMIC_LOCK
2807 }
2808 
2809 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2810 #if KMP_USE_DYNAMIC_LOCK
2811 
2812 #if USE_ITT_BUILD
2813  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2814 #endif
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816  // This is the case, if called from omp_init_lock_with_hint:
2817  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818  if (!codeptr)
2819  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820  if (ompt_enabled.enabled) {
2821  if (ompt_enabled.ompt_callback_mutex_acquire) {
2822  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2823  ompt_mutex_nest_lock, omp_lock_hint_none,
2824  __ompt_get_mutex_impl_type(user_lock),
2825  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2826  }
2827  }
2828 #endif
2829  int acquire_status =
2830  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2831  (void)acquire_status;
2832 #if USE_ITT_BUILD
2833  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2834 #endif
2835 
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837  if (ompt_enabled.enabled) {
2838  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2839  if (ompt_enabled.ompt_callback_mutex_acquired) {
2840  // lock_first
2841  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2842  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2843  codeptr);
2844  }
2845  } else {
2846  if (ompt_enabled.ompt_callback_nest_lock) {
2847  // lock_next
2848  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2849  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2850  }
2851  }
2852  }
2853 #endif
2854 
2855 #else // KMP_USE_DYNAMIC_LOCK
2856  int acquire_status;
2857  kmp_user_lock_p lck;
2858 
2859  if ((__kmp_user_lock_kind == lk_tas) &&
2860  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2861  OMP_NEST_LOCK_T_SIZE)) {
2862  lck = (kmp_user_lock_p)user_lock;
2863  }
2864 #if KMP_USE_FUTEX
2865  else if ((__kmp_user_lock_kind == lk_futex) &&
2866  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2867  OMP_NEST_LOCK_T_SIZE)) {
2868  lck = (kmp_user_lock_p)user_lock;
2869  }
2870 #endif
2871  else {
2872  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2873  }
2874 
2875 #if USE_ITT_BUILD
2876  __kmp_itt_lock_acquiring(lck);
2877 #endif /* USE_ITT_BUILD */
2878 #if OMPT_SUPPORT && OMPT_OPTIONAL
2879  // This is the case, if called from omp_init_lock_with_hint:
2880  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2881  if (!codeptr)
2882  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2883  if (ompt_enabled.enabled) {
2884  if (ompt_enabled.ompt_callback_mutex_acquire) {
2885  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2886  ompt_mutex_nest_lock, omp_lock_hint_none,
2887  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2888  codeptr);
2889  }
2890  }
2891 #endif
2892 
2893  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2894 
2895 #if USE_ITT_BUILD
2896  __kmp_itt_lock_acquired(lck);
2897 #endif /* USE_ITT_BUILD */
2898 
2899 #if OMPT_SUPPORT && OMPT_OPTIONAL
2900  if (ompt_enabled.enabled) {
2901  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2902  if (ompt_enabled.ompt_callback_mutex_acquired) {
2903  // lock_first
2904  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2905  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2906  }
2907  } else {
2908  if (ompt_enabled.ompt_callback_nest_lock) {
2909  // lock_next
2910  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2911  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2912  }
2913  }
2914  }
2915 #endif
2916 
2917 #endif // KMP_USE_DYNAMIC_LOCK
2918 }
2919 
2920 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2921 #if KMP_USE_DYNAMIC_LOCK
2922 
2923  int tag = KMP_EXTRACT_D_TAG(user_lock);
2924 #if USE_ITT_BUILD
2925  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2926 #endif
2927 #if KMP_USE_INLINED_TAS
2928  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2929  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2930  } else
2931 #elif KMP_USE_INLINED_FUTEX
2932  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2933  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2934  } else
2935 #endif
2936  {
2937  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2938  }
2939 
2940 #if OMPT_SUPPORT && OMPT_OPTIONAL
2941  // This is the case, if called from omp_init_lock_with_hint:
2942  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2943  if (!codeptr)
2944  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2945  if (ompt_enabled.ompt_callback_mutex_released) {
2946  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2947  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2948  }
2949 #endif
2950 
2951 #else // KMP_USE_DYNAMIC_LOCK
2952 
2953  kmp_user_lock_p lck;
2954 
2955  /* Can't use serial interval since not block structured */
2956  /* release the lock */
2957 
2958  if ((__kmp_user_lock_kind == lk_tas) &&
2959  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2960 #if KMP_OS_LINUX && \
2961  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2962 // "fast" path implemented to fix customer performance issue
2963 #if USE_ITT_BUILD
2964  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2965 #endif /* USE_ITT_BUILD */
2966  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2967  KMP_MB();
2968 
2969 #if OMPT_SUPPORT && OMPT_OPTIONAL
2970  // This is the case, if called from omp_init_lock_with_hint:
2971  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2972  if (!codeptr)
2973  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2974  if (ompt_enabled.ompt_callback_mutex_released) {
2975  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2976  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2977  }
2978 #endif
2979 
2980  return;
2981 #else
2982  lck = (kmp_user_lock_p)user_lock;
2983 #endif
2984  }
2985 #if KMP_USE_FUTEX
2986  else if ((__kmp_user_lock_kind == lk_futex) &&
2987  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2988  lck = (kmp_user_lock_p)user_lock;
2989  }
2990 #endif
2991  else {
2992  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2993  }
2994 
2995 #if USE_ITT_BUILD
2996  __kmp_itt_lock_releasing(lck);
2997 #endif /* USE_ITT_BUILD */
2998 
2999  RELEASE_LOCK(lck, gtid);
3000 
3001 #if OMPT_SUPPORT && OMPT_OPTIONAL
3002  // This is the case, if called from omp_init_lock_with_hint:
3003  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3004  if (!codeptr)
3005  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006  if (ompt_enabled.ompt_callback_mutex_released) {
3007  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009  }
3010 #endif
3011 
3012 #endif // KMP_USE_DYNAMIC_LOCK
3013 }
3014 
3015 /* release the lock */
3016 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3017 #if KMP_USE_DYNAMIC_LOCK
3018 
3019 #if USE_ITT_BUILD
3020  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3021 #endif
3022  int release_status =
3023  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3024  (void)release_status;
3025 
3026 #if OMPT_SUPPORT && OMPT_OPTIONAL
3027  // This is the case, if called from omp_init_lock_with_hint:
3028  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029  if (!codeptr)
3030  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031  if (ompt_enabled.enabled) {
3032  if (release_status == KMP_LOCK_RELEASED) {
3033  if (ompt_enabled.ompt_callback_mutex_released) {
3034  // release_lock_last
3035  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3037  codeptr);
3038  }
3039  } else if (ompt_enabled.ompt_callback_nest_lock) {
3040  // release_lock_prev
3041  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3042  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3043  }
3044  }
3045 #endif
3046 
3047 #else // KMP_USE_DYNAMIC_LOCK
3048 
3049  kmp_user_lock_p lck;
3050 
3051  /* Can't use serial interval since not block structured */
3052 
3053  if ((__kmp_user_lock_kind == lk_tas) &&
3054  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3055  OMP_NEST_LOCK_T_SIZE)) {
3056 #if KMP_OS_LINUX && \
3057  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3058  // "fast" path implemented to fix customer performance issue
3059  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3060 #if USE_ITT_BUILD
3061  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3062 #endif /* USE_ITT_BUILD */
3063 
3064 #if OMPT_SUPPORT && OMPT_OPTIONAL
3065  int release_status = KMP_LOCK_STILL_HELD;
3066 #endif
3067 
3068  if (--(tl->lk.depth_locked) == 0) {
3069  TCW_4(tl->lk.poll, 0);
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3071  release_status = KMP_LOCK_RELEASED;
3072 #endif
3073  }
3074  KMP_MB();
3075 
3076 #if OMPT_SUPPORT && OMPT_OPTIONAL
3077  // This is the case, if called from omp_init_lock_with_hint:
3078  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3079  if (!codeptr)
3080  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3081  if (ompt_enabled.enabled) {
3082  if (release_status == KMP_LOCK_RELEASED) {
3083  if (ompt_enabled.ompt_callback_mutex_released) {
3084  // release_lock_last
3085  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3086  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3087  }
3088  } else if (ompt_enabled.ompt_callback_nest_lock) {
3089  // release_lock_previous
3090  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3091  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3092  }
3093  }
3094 #endif
3095 
3096  return;
3097 #else
3098  lck = (kmp_user_lock_p)user_lock;
3099 #endif
3100  }
3101 #if KMP_USE_FUTEX
3102  else if ((__kmp_user_lock_kind == lk_futex) &&
3103  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3104  OMP_NEST_LOCK_T_SIZE)) {
3105  lck = (kmp_user_lock_p)user_lock;
3106  }
3107 #endif
3108  else {
3109  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3110  }
3111 
3112 #if USE_ITT_BUILD
3113  __kmp_itt_lock_releasing(lck);
3114 #endif /* USE_ITT_BUILD */
3115 
3116  int release_status;
3117  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3118 #if OMPT_SUPPORT && OMPT_OPTIONAL
3119  // This is the case, if called from omp_init_lock_with_hint:
3120  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3121  if (!codeptr)
3122  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3123  if (ompt_enabled.enabled) {
3124  if (release_status == KMP_LOCK_RELEASED) {
3125  if (ompt_enabled.ompt_callback_mutex_released) {
3126  // release_lock_last
3127  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3128  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3129  }
3130  } else if (ompt_enabled.ompt_callback_nest_lock) {
3131  // release_lock_previous
3132  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3133  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3134  }
3135  }
3136 #endif
3137 
3138 #endif // KMP_USE_DYNAMIC_LOCK
3139 }
3140 
3141 /* try to acquire the lock */
3142 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3143  KMP_COUNT_BLOCK(OMP_test_lock);
3144 
3145 #if KMP_USE_DYNAMIC_LOCK
3146  int rc;
3147  int tag = KMP_EXTRACT_D_TAG(user_lock);
3148 #if USE_ITT_BUILD
3149  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3150 #endif
3151 #if OMPT_SUPPORT && OMPT_OPTIONAL
3152  // This is the case, if called from omp_init_lock_with_hint:
3153  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154  if (!codeptr)
3155  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3156  if (ompt_enabled.ompt_callback_mutex_acquire) {
3157  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3158  ompt_mutex_lock, omp_lock_hint_none,
3159  __ompt_get_mutex_impl_type(user_lock),
3160  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3161  }
3162 #endif
3163 #if KMP_USE_INLINED_TAS
3164  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3165  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3166  } else
3167 #elif KMP_USE_INLINED_FUTEX
3168  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3169  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3170  } else
3171 #endif
3172  {
3173  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3174  }
3175  if (rc) {
3176 #if USE_ITT_BUILD
3177  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3178 #endif
3179 #if OMPT_SUPPORT && OMPT_OPTIONAL
3180  if (ompt_enabled.ompt_callback_mutex_acquired) {
3181  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3182  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3183  }
3184 #endif
3185  return FTN_TRUE;
3186  } else {
3187 #if USE_ITT_BUILD
3188  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3189 #endif
3190  return FTN_FALSE;
3191  }
3192 
3193 #else // KMP_USE_DYNAMIC_LOCK
3194 
3195  kmp_user_lock_p lck;
3196  int rc;
3197 
3198  if ((__kmp_user_lock_kind == lk_tas) &&
3199  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3200  lck = (kmp_user_lock_p)user_lock;
3201  }
3202 #if KMP_USE_FUTEX
3203  else if ((__kmp_user_lock_kind == lk_futex) &&
3204  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3205  lck = (kmp_user_lock_p)user_lock;
3206  }
3207 #endif
3208  else {
3209  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3210  }
3211 
3212 #if USE_ITT_BUILD
3213  __kmp_itt_lock_acquiring(lck);
3214 #endif /* USE_ITT_BUILD */
3215 #if OMPT_SUPPORT && OMPT_OPTIONAL
3216  // This is the case, if called from omp_init_lock_with_hint:
3217  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3218  if (!codeptr)
3219  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3220  if (ompt_enabled.ompt_callback_mutex_acquire) {
3221  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3222  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3223  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3224  }
3225 #endif
3226 
3227  rc = TEST_LOCK(lck, gtid);
3228 #if USE_ITT_BUILD
3229  if (rc) {
3230  __kmp_itt_lock_acquired(lck);
3231  } else {
3232  __kmp_itt_lock_cancelled(lck);
3233  }
3234 #endif /* USE_ITT_BUILD */
3235 #if OMPT_SUPPORT && OMPT_OPTIONAL
3236  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3237  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3238  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3239  }
3240 #endif
3241 
3242  return (rc ? FTN_TRUE : FTN_FALSE);
3243 
3244  /* Can't use serial interval since not block structured */
3245 
3246 #endif // KMP_USE_DYNAMIC_LOCK
3247 }
3248 
3249 /* try to acquire the lock */
3250 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3251 #if KMP_USE_DYNAMIC_LOCK
3252  int rc;
3253 #if USE_ITT_BUILD
3254  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3255 #endif
3256 #if OMPT_SUPPORT && OMPT_OPTIONAL
3257  // This is the case, if called from omp_init_lock_with_hint:
3258  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3259  if (!codeptr)
3260  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3261  if (ompt_enabled.ompt_callback_mutex_acquire) {
3262  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263  ompt_mutex_nest_lock, omp_lock_hint_none,
3264  __ompt_get_mutex_impl_type(user_lock),
3265  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3266  }
3267 #endif
3268  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3269 #if USE_ITT_BUILD
3270  if (rc) {
3271  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3272  } else {
3273  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3274  }
3275 #endif
3276 #if OMPT_SUPPORT && OMPT_OPTIONAL
3277  if (ompt_enabled.enabled && rc) {
3278  if (rc == 1) {
3279  if (ompt_enabled.ompt_callback_mutex_acquired) {
3280  // lock_first
3281  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3282  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3283  codeptr);
3284  }
3285  } else {
3286  if (ompt_enabled.ompt_callback_nest_lock) {
3287  // lock_next
3288  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3290  }
3291  }
3292  }
3293 #endif
3294  return rc;
3295 
3296 #else // KMP_USE_DYNAMIC_LOCK
3297 
3298  kmp_user_lock_p lck;
3299  int rc;
3300 
3301  if ((__kmp_user_lock_kind == lk_tas) &&
3302  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3303  OMP_NEST_LOCK_T_SIZE)) {
3304  lck = (kmp_user_lock_p)user_lock;
3305  }
3306 #if KMP_USE_FUTEX
3307  else if ((__kmp_user_lock_kind == lk_futex) &&
3308  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3309  OMP_NEST_LOCK_T_SIZE)) {
3310  lck = (kmp_user_lock_p)user_lock;
3311  }
3312 #endif
3313  else {
3314  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3315  }
3316 
3317 #if USE_ITT_BUILD
3318  __kmp_itt_lock_acquiring(lck);
3319 #endif /* USE_ITT_BUILD */
3320 
3321 #if OMPT_SUPPORT && OMPT_OPTIONAL
3322  // This is the case, if called from omp_init_lock_with_hint:
3323  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3324  if (!codeptr)
3325  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3326  if (ompt_enabled.enabled) &&
3327  ompt_enabled.ompt_callback_mutex_acquire) {
3328  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3329  ompt_mutex_nest_lock, omp_lock_hint_none,
3330  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3331  codeptr);
3332  }
3333 #endif
3334 
3335  rc = TEST_NESTED_LOCK(lck, gtid);
3336 #if USE_ITT_BUILD
3337  if (rc) {
3338  __kmp_itt_lock_acquired(lck);
3339  } else {
3340  __kmp_itt_lock_cancelled(lck);
3341  }
3342 #endif /* USE_ITT_BUILD */
3343 #if OMPT_SUPPORT && OMPT_OPTIONAL
3344  if (ompt_enabled.enabled && rc) {
3345  if (rc == 1) {
3346  if (ompt_enabled.ompt_callback_mutex_acquired) {
3347  // lock_first
3348  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3349  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3350  }
3351  } else {
3352  if (ompt_enabled.ompt_callback_nest_lock) {
3353  // lock_next
3354  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3355  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3356  }
3357  }
3358  }
3359 #endif
3360  return rc;
3361 
3362  /* Can't use serial interval since not block structured */
3363 
3364 #endif // KMP_USE_DYNAMIC_LOCK
3365 }
3366 
3367 // Interface to fast scalable reduce methods routines
3368 
3369 // keep the selected method in a thread local structure for cross-function
3370 // usage: will be used in __kmpc_end_reduce* functions;
3371 // another solution: to re-determine the method one more time in
3372 // __kmpc_end_reduce* functions (new prototype required then)
3373 // AT: which solution is better?
3374 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3375  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3376 
3377 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3378  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3379 
3380 // description of the packed_reduction_method variable: look at the macros in
3381 // kmp.h
3382 
3383 // used in a critical section reduce block
3384 static __forceinline void
3385 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3386  kmp_critical_name *crit) {
3387 
3388  // this lock was visible to a customer and to the threading profile tool as a
3389  // serial overhead span (although it's used for an internal purpose only)
3390  // why was it visible in previous implementation?
3391  // should we keep it visible in new reduce block?
3392  kmp_user_lock_p lck;
3393 
3394 #if KMP_USE_DYNAMIC_LOCK
3395 
3396  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3397  // Check if it is initialized.
3398  if (*lk == 0) {
3399  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3400  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3401  KMP_GET_D_TAG(__kmp_user_lock_seq));
3402  } else {
3403  __kmp_init_indirect_csptr(crit, loc, global_tid,
3404  KMP_GET_I_TAG(__kmp_user_lock_seq));
3405  }
3406  }
3407  // Branch for accessing the actual lock object and set operation. This
3408  // branching is inevitable since this lock initialization does not follow the
3409  // normal dispatch path (lock table is not used).
3410  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3411  lck = (kmp_user_lock_p)lk;
3412  KMP_DEBUG_ASSERT(lck != NULL);
3413  if (__kmp_env_consistency_check) {
3414  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3415  }
3416  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3417  } else {
3418  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3419  lck = ilk->lock;
3420  KMP_DEBUG_ASSERT(lck != NULL);
3421  if (__kmp_env_consistency_check) {
3422  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3423  }
3424  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3425  }
3426 
3427 #else // KMP_USE_DYNAMIC_LOCK
3428 
3429  // We know that the fast reduction code is only emitted by Intel compilers
3430  // with 32 byte critical sections. If there isn't enough space, then we
3431  // have to use a pointer.
3432  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3433  lck = (kmp_user_lock_p)crit;
3434  } else {
3435  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3436  }
3437  KMP_DEBUG_ASSERT(lck != NULL);
3438 
3439  if (__kmp_env_consistency_check)
3440  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3441 
3442  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3443 
3444 #endif // KMP_USE_DYNAMIC_LOCK
3445 }
3446 
3447 // used in a critical section reduce block
3448 static __forceinline void
3449 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3450  kmp_critical_name *crit) {
3451 
3452  kmp_user_lock_p lck;
3453 
3454 #if KMP_USE_DYNAMIC_LOCK
3455 
3456  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3457  lck = (kmp_user_lock_p)crit;
3458  if (__kmp_env_consistency_check)
3459  __kmp_pop_sync(global_tid, ct_critical, loc);
3460  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3461  } else {
3462  kmp_indirect_lock_t *ilk =
3463  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3464  if (__kmp_env_consistency_check)
3465  __kmp_pop_sync(global_tid, ct_critical, loc);
3466  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3467  }
3468 
3469 #else // KMP_USE_DYNAMIC_LOCK
3470 
3471  // We know that the fast reduction code is only emitted by Intel compilers
3472  // with 32 byte critical sections. If there isn't enough space, then we have
3473  // to use a pointer.
3474  if (__kmp_base_user_lock_size > 32) {
3475  lck = *((kmp_user_lock_p *)crit);
3476  KMP_ASSERT(lck != NULL);
3477  } else {
3478  lck = (kmp_user_lock_p)crit;
3479  }
3480 
3481  if (__kmp_env_consistency_check)
3482  __kmp_pop_sync(global_tid, ct_critical, loc);
3483 
3484  __kmp_release_user_lock_with_checks(lck, global_tid);
3485 
3486 #endif // KMP_USE_DYNAMIC_LOCK
3487 } // __kmp_end_critical_section_reduce_block
3488 
3489 static __forceinline int
3490 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3491  int *task_state) {
3492  kmp_team_t *team;
3493 
3494  // Check if we are inside the teams construct?
3495  if (th->th.th_teams_microtask) {
3496  *team_p = team = th->th.th_team;
3497  if (team->t.t_level == th->th.th_teams_level) {
3498  // This is reduction at teams construct.
3499  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3500  // Let's swap teams temporarily for the reduction.
3501  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3502  th->th.th_team = team->t.t_parent;
3503  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3504  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3505  *task_state = th->th.th_task_state;
3506  th->th.th_task_state = 0;
3507 
3508  return 1;
3509  }
3510  }
3511  return 0;
3512 }
3513 
3514 static __forceinline void
3515 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3516  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3517  th->th.th_info.ds.ds_tid = 0;
3518  th->th.th_team = team;
3519  th->th.th_team_nproc = team->t.t_nproc;
3520  th->th.th_task_team = team->t.t_task_team[task_state];
3521  __kmp_type_convert(task_state, &(th->th.th_task_state));
3522 }
3523 
3524 /* 2.a.i. Reduce Block without a terminating barrier */
3540 kmp_int32
3541 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3542  size_t reduce_size, void *reduce_data,
3543  void (*reduce_func)(void *lhs_data, void *rhs_data),
3544  kmp_critical_name *lck) {
3545 
3546  KMP_COUNT_BLOCK(REDUCE_nowait);
3547  int retval = 0;
3548  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3549  kmp_info_t *th;
3550  kmp_team_t *team;
3551  int teams_swapped = 0, task_state;
3552  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3553  __kmp_assert_valid_gtid(global_tid);
3554 
3555  // why do we need this initialization here at all?
3556  // Reduction clause can not be used as a stand-alone directive.
3557 
3558  // do not call __kmp_serial_initialize(), it will be called by
3559  // __kmp_parallel_initialize() if needed
3560  // possible detection of false-positive race by the threadchecker ???
3561  if (!TCR_4(__kmp_init_parallel))
3562  __kmp_parallel_initialize();
3563 
3564  __kmp_resume_if_soft_paused();
3565 
3566 // check correctness of reduce block nesting
3567 #if KMP_USE_DYNAMIC_LOCK
3568  if (__kmp_env_consistency_check)
3569  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3570 #else
3571  if (__kmp_env_consistency_check)
3572  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3573 #endif
3574 
3575  th = __kmp_thread_from_gtid(global_tid);
3576  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3577 
3578  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3579  // the value should be kept in a variable
3580  // the variable should be either a construct-specific or thread-specific
3581  // property, not a team specific property
3582  // (a thread can reach the next reduce block on the next construct, reduce
3583  // method may differ on the next construct)
3584  // an ident_t "loc" parameter could be used as a construct-specific property
3585  // (what if loc == 0?)
3586  // (if both construct-specific and team-specific variables were shared,
3587  // then unness extra syncs should be needed)
3588  // a thread-specific variable is better regarding two issues above (next
3589  // construct and extra syncs)
3590  // a thread-specific "th_local.reduction_method" variable is used currently
3591  // each thread executes 'determine' and 'set' lines (no need to execute by one
3592  // thread, to avoid unness extra syncs)
3593 
3594  packed_reduction_method = __kmp_determine_reduction_method(
3595  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3596  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3597 
3598  OMPT_REDUCTION_DECL(th, global_tid);
3599  if (packed_reduction_method == critical_reduce_block) {
3600 
3601  OMPT_REDUCTION_BEGIN;
3602 
3603  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3604  retval = 1;
3605 
3606  } else if (packed_reduction_method == empty_reduce_block) {
3607 
3608  OMPT_REDUCTION_BEGIN;
3609 
3610  // usage: if team size == 1, no synchronization is required ( Intel
3611  // platforms only )
3612  retval = 1;
3613 
3614  } else if (packed_reduction_method == atomic_reduce_block) {
3615 
3616  retval = 2;
3617 
3618  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3619  // won't be called by the code gen)
3620  // (it's not quite good, because the checking block has been closed by
3621  // this 'pop',
3622  // but atomic operation has not been executed yet, will be executed
3623  // slightly later, literally on next instruction)
3624  if (__kmp_env_consistency_check)
3625  __kmp_pop_sync(global_tid, ct_reduce, loc);
3626 
3627  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3628  tree_reduce_block)) {
3629 
3630 // AT: performance issue: a real barrier here
3631 // AT: (if primary thread is slow, other threads are blocked here waiting for
3632 // the primary thread to come and release them)
3633 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3634 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3635 // be confusing to a customer)
3636 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3637 // might go faster and be more in line with sense of NOWAIT
3638 // AT: TO DO: do epcc test and compare times
3639 
3640 // this barrier should be invisible to a customer and to the threading profile
3641 // tool (it's neither a terminating barrier nor customer's code, it's
3642 // used for an internal purpose)
3643 #if OMPT_SUPPORT
3644  // JP: can this barrier potentially leed to task scheduling?
3645  // JP: as long as there is a barrier in the implementation, OMPT should and
3646  // will provide the barrier events
3647  // so we set-up the necessary frame/return addresses.
3648  ompt_frame_t *ompt_frame;
3649  if (ompt_enabled.enabled) {
3650  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3651  if (ompt_frame->enter_frame.ptr == NULL)
3652  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3653  }
3654  OMPT_STORE_RETURN_ADDRESS(global_tid);
3655 #endif
3656 #if USE_ITT_NOTIFY
3657  __kmp_threads[global_tid]->th.th_ident = loc;
3658 #endif
3659  retval =
3660  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3661  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3662  retval = (retval != 0) ? (0) : (1);
3663 #if OMPT_SUPPORT && OMPT_OPTIONAL
3664  if (ompt_enabled.enabled) {
3665  ompt_frame->enter_frame = ompt_data_none;
3666  }
3667 #endif
3668 
3669  // all other workers except primary thread should do this pop here
3670  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3671  if (__kmp_env_consistency_check) {
3672  if (retval == 0) {
3673  __kmp_pop_sync(global_tid, ct_reduce, loc);
3674  }
3675  }
3676 
3677  } else {
3678 
3679  // should never reach this block
3680  KMP_ASSERT(0); // "unexpected method"
3681  }
3682  if (teams_swapped) {
3683  __kmp_restore_swapped_teams(th, team, task_state);
3684  }
3685  KA_TRACE(
3686  10,
3687  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3688  global_tid, packed_reduction_method, retval));
3689 
3690  return retval;
3691 }
3692 
3701 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3702  kmp_critical_name *lck) {
3703 
3704  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3705 
3706  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3707  __kmp_assert_valid_gtid(global_tid);
3708 
3709  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3710 
3711  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3712 
3713  if (packed_reduction_method == critical_reduce_block) {
3714 
3715  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3716  OMPT_REDUCTION_END;
3717 
3718  } else if (packed_reduction_method == empty_reduce_block) {
3719 
3720  // usage: if team size == 1, no synchronization is required ( on Intel
3721  // platforms only )
3722 
3723  OMPT_REDUCTION_END;
3724 
3725  } else if (packed_reduction_method == atomic_reduce_block) {
3726 
3727  // neither primary thread nor other workers should get here
3728  // (code gen does not generate this call in case 2: atomic reduce block)
3729  // actually it's better to remove this elseif at all;
3730  // after removal this value will checked by the 'else' and will assert
3731 
3732  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3733  tree_reduce_block)) {
3734 
3735  // only primary thread gets here
3736  // OMPT: tree reduction is annotated in the barrier code
3737 
3738  } else {
3739 
3740  // should never reach this block
3741  KMP_ASSERT(0); // "unexpected method"
3742  }
3743 
3744  if (__kmp_env_consistency_check)
3745  __kmp_pop_sync(global_tid, ct_reduce, loc);
3746 
3747  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3748  global_tid, packed_reduction_method));
3749 
3750  return;
3751 }
3752 
3753 /* 2.a.ii. Reduce Block with a terminating barrier */
3754 
3770 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3771  size_t reduce_size, void *reduce_data,
3772  void (*reduce_func)(void *lhs_data, void *rhs_data),
3773  kmp_critical_name *lck) {
3774  KMP_COUNT_BLOCK(REDUCE_wait);
3775  int retval = 0;
3776  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3777  kmp_info_t *th;
3778  kmp_team_t *team;
3779  int teams_swapped = 0, task_state;
3780 
3781  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3782  __kmp_assert_valid_gtid(global_tid);
3783 
3784  // why do we need this initialization here at all?
3785  // Reduction clause can not be a stand-alone directive.
3786 
3787  // do not call __kmp_serial_initialize(), it will be called by
3788  // __kmp_parallel_initialize() if needed
3789  // possible detection of false-positive race by the threadchecker ???
3790  if (!TCR_4(__kmp_init_parallel))
3791  __kmp_parallel_initialize();
3792 
3793  __kmp_resume_if_soft_paused();
3794 
3795 // check correctness of reduce block nesting
3796 #if KMP_USE_DYNAMIC_LOCK
3797  if (__kmp_env_consistency_check)
3798  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3799 #else
3800  if (__kmp_env_consistency_check)
3801  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3802 #endif
3803 
3804  th = __kmp_thread_from_gtid(global_tid);
3805  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3806 
3807  packed_reduction_method = __kmp_determine_reduction_method(
3808  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3809  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3810 
3811  OMPT_REDUCTION_DECL(th, global_tid);
3812 
3813  if (packed_reduction_method == critical_reduce_block) {
3814 
3815  OMPT_REDUCTION_BEGIN;
3816  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3817  retval = 1;
3818 
3819  } else if (packed_reduction_method == empty_reduce_block) {
3820 
3821  OMPT_REDUCTION_BEGIN;
3822  // usage: if team size == 1, no synchronization is required ( Intel
3823  // platforms only )
3824  retval = 1;
3825 
3826  } else if (packed_reduction_method == atomic_reduce_block) {
3827 
3828  retval = 2;
3829 
3830  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3831  tree_reduce_block)) {
3832 
3833 // case tree_reduce_block:
3834 // this barrier should be visible to a customer and to the threading profile
3835 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3836 #if OMPT_SUPPORT
3837  ompt_frame_t *ompt_frame;
3838  if (ompt_enabled.enabled) {
3839  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3840  if (ompt_frame->enter_frame.ptr == NULL)
3841  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3842  }
3843  OMPT_STORE_RETURN_ADDRESS(global_tid);
3844 #endif
3845 #if USE_ITT_NOTIFY
3846  __kmp_threads[global_tid]->th.th_ident =
3847  loc; // needed for correct notification of frames
3848 #endif
3849  retval =
3850  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3851  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3852  retval = (retval != 0) ? (0) : (1);
3853 #if OMPT_SUPPORT && OMPT_OPTIONAL
3854  if (ompt_enabled.enabled) {
3855  ompt_frame->enter_frame = ompt_data_none;
3856  }
3857 #endif
3858 
3859  // all other workers except primary thread should do this pop here
3860  // (none of other workers except primary will enter __kmpc_end_reduce())
3861  if (__kmp_env_consistency_check) {
3862  if (retval == 0) { // 0: all other workers; 1: primary thread
3863  __kmp_pop_sync(global_tid, ct_reduce, loc);
3864  }
3865  }
3866 
3867  } else {
3868 
3869  // should never reach this block
3870  KMP_ASSERT(0); // "unexpected method"
3871  }
3872  if (teams_swapped) {
3873  __kmp_restore_swapped_teams(th, team, task_state);
3874  }
3875 
3876  KA_TRACE(10,
3877  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3878  global_tid, packed_reduction_method, retval));
3879  return retval;
3880 }
3881 
3892 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3893  kmp_critical_name *lck) {
3894 
3895  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3896  kmp_info_t *th;
3897  kmp_team_t *team;
3898  int teams_swapped = 0, task_state;
3899 
3900  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3901  __kmp_assert_valid_gtid(global_tid);
3902 
3903  th = __kmp_thread_from_gtid(global_tid);
3904  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3905 
3906  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3907 
3908  // this barrier should be visible to a customer and to the threading profile
3909  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3910  OMPT_REDUCTION_DECL(th, global_tid);
3911 
3912  if (packed_reduction_method == critical_reduce_block) {
3913  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3914 
3915  OMPT_REDUCTION_END;
3916 
3917 // TODO: implicit barrier: should be exposed
3918 #if OMPT_SUPPORT
3919  ompt_frame_t *ompt_frame;
3920  if (ompt_enabled.enabled) {
3921  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3922  if (ompt_frame->enter_frame.ptr == NULL)
3923  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3924  }
3925  OMPT_STORE_RETURN_ADDRESS(global_tid);
3926 #endif
3927 #if USE_ITT_NOTIFY
3928  __kmp_threads[global_tid]->th.th_ident = loc;
3929 #endif
3930  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3931 #if OMPT_SUPPORT && OMPT_OPTIONAL
3932  if (ompt_enabled.enabled) {
3933  ompt_frame->enter_frame = ompt_data_none;
3934  }
3935 #endif
3936 
3937  } else if (packed_reduction_method == empty_reduce_block) {
3938 
3939  OMPT_REDUCTION_END;
3940 
3941 // usage: if team size==1, no synchronization is required (Intel platforms only)
3942 
3943 // TODO: implicit barrier: should be exposed
3944 #if OMPT_SUPPORT
3945  ompt_frame_t *ompt_frame;
3946  if (ompt_enabled.enabled) {
3947  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3948  if (ompt_frame->enter_frame.ptr == NULL)
3949  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3950  }
3951  OMPT_STORE_RETURN_ADDRESS(global_tid);
3952 #endif
3953 #if USE_ITT_NOTIFY
3954  __kmp_threads[global_tid]->th.th_ident = loc;
3955 #endif
3956  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3957 #if OMPT_SUPPORT && OMPT_OPTIONAL
3958  if (ompt_enabled.enabled) {
3959  ompt_frame->enter_frame = ompt_data_none;
3960  }
3961 #endif
3962 
3963  } else if (packed_reduction_method == atomic_reduce_block) {
3964 
3965 #if OMPT_SUPPORT
3966  ompt_frame_t *ompt_frame;
3967  if (ompt_enabled.enabled) {
3968  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3969  if (ompt_frame->enter_frame.ptr == NULL)
3970  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3971  }
3972  OMPT_STORE_RETURN_ADDRESS(global_tid);
3973 #endif
3974 // TODO: implicit barrier: should be exposed
3975 #if USE_ITT_NOTIFY
3976  __kmp_threads[global_tid]->th.th_ident = loc;
3977 #endif
3978  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3979 #if OMPT_SUPPORT && OMPT_OPTIONAL
3980  if (ompt_enabled.enabled) {
3981  ompt_frame->enter_frame = ompt_data_none;
3982  }
3983 #endif
3984 
3985  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3986  tree_reduce_block)) {
3987 
3988  // only primary thread executes here (primary releases all other workers)
3989  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3990  global_tid);
3991 
3992  } else {
3993 
3994  // should never reach this block
3995  KMP_ASSERT(0); // "unexpected method"
3996  }
3997  if (teams_swapped) {
3998  __kmp_restore_swapped_teams(th, team, task_state);
3999  }
4000 
4001  if (__kmp_env_consistency_check)
4002  __kmp_pop_sync(global_tid, ct_reduce, loc);
4003 
4004  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4005  global_tid, packed_reduction_method));
4006 
4007  return;
4008 }
4009 
4010 #undef __KMP_GET_REDUCTION_METHOD
4011 #undef __KMP_SET_REDUCTION_METHOD
4012 
4013 /* end of interface to fast scalable reduce routines */
4014 
4015 kmp_uint64 __kmpc_get_taskid() {
4016 
4017  kmp_int32 gtid;
4018  kmp_info_t *thread;
4019 
4020  gtid = __kmp_get_gtid();
4021  if (gtid < 0) {
4022  return 0;
4023  }
4024  thread = __kmp_thread_from_gtid(gtid);
4025  return thread->th.th_current_task->td_task_id;
4026 
4027 } // __kmpc_get_taskid
4028 
4029 kmp_uint64 __kmpc_get_parent_taskid() {
4030 
4031  kmp_int32 gtid;
4032  kmp_info_t *thread;
4033  kmp_taskdata_t *parent_task;
4034 
4035  gtid = __kmp_get_gtid();
4036  if (gtid < 0) {
4037  return 0;
4038  }
4039  thread = __kmp_thread_from_gtid(gtid);
4040  parent_task = thread->th.th_current_task->td_parent;
4041  return (parent_task == NULL ? 0 : parent_task->td_task_id);
4042 
4043 } // __kmpc_get_parent_taskid
4044 
4056 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4057  const struct kmp_dim *dims) {
4058  __kmp_assert_valid_gtid(gtid);
4059  int j, idx;
4060  kmp_int64 last, trace_count;
4061  kmp_info_t *th = __kmp_threads[gtid];
4062  kmp_team_t *team = th->th.th_team;
4063  kmp_uint32 *flags;
4064  kmp_disp_t *pr_buf = th->th.th_dispatch;
4065  dispatch_shared_info_t *sh_buf;
4066 
4067  KA_TRACE(
4068  20,
4069  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4070  gtid, num_dims, !team->t.t_serialized));
4071  KMP_DEBUG_ASSERT(dims != NULL);
4072  KMP_DEBUG_ASSERT(num_dims > 0);
4073 
4074  if (team->t.t_serialized) {
4075  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4076  return; // no dependencies if team is serialized
4077  }
4078  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4079  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4080  // the next loop
4081  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4082 
4083  // Save bounds info into allocated private buffer
4084  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4085  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4086  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4087  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4088  pr_buf->th_doacross_info[0] =
4089  (kmp_int64)num_dims; // first element is number of dimensions
4090  // Save also address of num_done in order to access it later without knowing
4091  // the buffer index
4092  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4093  pr_buf->th_doacross_info[2] = dims[0].lo;
4094  pr_buf->th_doacross_info[3] = dims[0].up;
4095  pr_buf->th_doacross_info[4] = dims[0].st;
4096  last = 5;
4097  for (j = 1; j < num_dims; ++j) {
4098  kmp_int64
4099  range_length; // To keep ranges of all dimensions but the first dims[0]
4100  if (dims[j].st == 1) { // most common case
4101  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4102  range_length = dims[j].up - dims[j].lo + 1;
4103  } else {
4104  if (dims[j].st > 0) {
4105  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4106  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4107  } else { // negative increment
4108  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4109  range_length =
4110  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4111  }
4112  }
4113  pr_buf->th_doacross_info[last++] = range_length;
4114  pr_buf->th_doacross_info[last++] = dims[j].lo;
4115  pr_buf->th_doacross_info[last++] = dims[j].up;
4116  pr_buf->th_doacross_info[last++] = dims[j].st;
4117  }
4118 
4119  // Compute total trip count.
4120  // Start with range of dims[0] which we don't need to keep in the buffer.
4121  if (dims[0].st == 1) { // most common case
4122  trace_count = dims[0].up - dims[0].lo + 1;
4123  } else if (dims[0].st > 0) {
4124  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4125  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4126  } else { // negative increment
4127  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4128  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4129  }
4130  for (j = 1; j < num_dims; ++j) {
4131  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4132  }
4133  KMP_DEBUG_ASSERT(trace_count > 0);
4134 
4135  // Check if shared buffer is not occupied by other loop (idx -
4136  // __kmp_dispatch_num_buffers)
4137  if (idx != sh_buf->doacross_buf_idx) {
4138  // Shared buffer is occupied, wait for it to be free
4139  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4140  __kmp_eq_4, NULL);
4141  }
4142 #if KMP_32_BIT_ARCH
4143  // Check if we are the first thread. After the CAS the first thread gets 0,
4144  // others get 1 if initialization is in progress, allocated pointer otherwise.
4145  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4146  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4147  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4148 #else
4149  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4150  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4151 #endif
4152  if (flags == NULL) {
4153  // we are the first thread, allocate the array of flags
4154  size_t size =
4155  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4156  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4157  KMP_MB();
4158  sh_buf->doacross_flags = flags;
4159  } else if (flags == (kmp_uint32 *)1) {
4160 #if KMP_32_BIT_ARCH
4161  // initialization is still in progress, need to wait
4162  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4163 #else
4164  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4165 #endif
4166  KMP_YIELD(TRUE);
4167  KMP_MB();
4168  } else {
4169  KMP_MB();
4170  }
4171  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4172  pr_buf->th_doacross_flags =
4173  sh_buf->doacross_flags; // save private copy in order to not
4174  // touch shared buffer on each iteration
4175  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4176 }
4177 
4178 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4179  __kmp_assert_valid_gtid(gtid);
4180  kmp_int64 shft;
4181  size_t num_dims, i;
4182  kmp_uint32 flag;
4183  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4184  kmp_info_t *th = __kmp_threads[gtid];
4185  kmp_team_t *team = th->th.th_team;
4186  kmp_disp_t *pr_buf;
4187  kmp_int64 lo, up, st;
4188 
4189  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4190  if (team->t.t_serialized) {
4191  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4192  return; // no dependencies if team is serialized
4193  }
4194 
4195  // calculate sequential iteration number and check out-of-bounds condition
4196  pr_buf = th->th.th_dispatch;
4197  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4198  num_dims = (size_t)pr_buf->th_doacross_info[0];
4199  lo = pr_buf->th_doacross_info[2];
4200  up = pr_buf->th_doacross_info[3];
4201  st = pr_buf->th_doacross_info[4];
4202 #if OMPT_SUPPORT && OMPT_OPTIONAL
4203  ompt_dependence_t deps[num_dims];
4204 #endif
4205  if (st == 1) { // most common case
4206  if (vec[0] < lo || vec[0] > up) {
4207  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4208  "bounds [%lld,%lld]\n",
4209  gtid, vec[0], lo, up));
4210  return;
4211  }
4212  iter_number = vec[0] - lo;
4213  } else if (st > 0) {
4214  if (vec[0] < lo || vec[0] > up) {
4215  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4216  "bounds [%lld,%lld]\n",
4217  gtid, vec[0], lo, up));
4218  return;
4219  }
4220  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4221  } else { // negative increment
4222  if (vec[0] > lo || vec[0] < up) {
4223  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4224  "bounds [%lld,%lld]\n",
4225  gtid, vec[0], lo, up));
4226  return;
4227  }
4228  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4229  }
4230 #if OMPT_SUPPORT && OMPT_OPTIONAL
4231  deps[0].variable.value = iter_number;
4232  deps[0].dependence_type = ompt_dependence_type_sink;
4233 #endif
4234  for (i = 1; i < num_dims; ++i) {
4235  kmp_int64 iter, ln;
4236  size_t j = i * 4;
4237  ln = pr_buf->th_doacross_info[j + 1];
4238  lo = pr_buf->th_doacross_info[j + 2];
4239  up = pr_buf->th_doacross_info[j + 3];
4240  st = pr_buf->th_doacross_info[j + 4];
4241  if (st == 1) {
4242  if (vec[i] < lo || vec[i] > up) {
4243  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4244  "bounds [%lld,%lld]\n",
4245  gtid, vec[i], lo, up));
4246  return;
4247  }
4248  iter = vec[i] - lo;
4249  } else if (st > 0) {
4250  if (vec[i] < lo || vec[i] > up) {
4251  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4252  "bounds [%lld,%lld]\n",
4253  gtid, vec[i], lo, up));
4254  return;
4255  }
4256  iter = (kmp_uint64)(vec[i] - lo) / st;
4257  } else { // st < 0
4258  if (vec[i] > lo || vec[i] < up) {
4259  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4260  "bounds [%lld,%lld]\n",
4261  gtid, vec[i], lo, up));
4262  return;
4263  }
4264  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4265  }
4266  iter_number = iter + ln * iter_number;
4267 #if OMPT_SUPPORT && OMPT_OPTIONAL
4268  deps[i].variable.value = iter;
4269  deps[i].dependence_type = ompt_dependence_type_sink;
4270 #endif
4271  }
4272  shft = iter_number % 32; // use 32-bit granularity
4273  iter_number >>= 5; // divided by 32
4274  flag = 1 << shft;
4275  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4276  KMP_YIELD(TRUE);
4277  }
4278  KMP_MB();
4279 #if OMPT_SUPPORT && OMPT_OPTIONAL
4280  if (ompt_enabled.ompt_callback_dependences) {
4281  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4282  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4283  }
4284 #endif
4285  KA_TRACE(20,
4286  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4287  gtid, (iter_number << 5) + shft));
4288 }
4289 
4290 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4291  __kmp_assert_valid_gtid(gtid);
4292  kmp_int64 shft;
4293  size_t num_dims, i;
4294  kmp_uint32 flag;
4295  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4296  kmp_info_t *th = __kmp_threads[gtid];
4297  kmp_team_t *team = th->th.th_team;
4298  kmp_disp_t *pr_buf;
4299  kmp_int64 lo, st;
4300 
4301  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4302  if (team->t.t_serialized) {
4303  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4304  return; // no dependencies if team is serialized
4305  }
4306 
4307  // calculate sequential iteration number (same as in "wait" but no
4308  // out-of-bounds checks)
4309  pr_buf = th->th.th_dispatch;
4310  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4311  num_dims = (size_t)pr_buf->th_doacross_info[0];
4312  lo = pr_buf->th_doacross_info[2];
4313  st = pr_buf->th_doacross_info[4];
4314 #if OMPT_SUPPORT && OMPT_OPTIONAL
4315  ompt_dependence_t deps[num_dims];
4316 #endif
4317  if (st == 1) { // most common case
4318  iter_number = vec[0] - lo;
4319  } else if (st > 0) {
4320  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4321  } else { // negative increment
4322  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4323  }
4324 #if OMPT_SUPPORT && OMPT_OPTIONAL
4325  deps[0].variable.value = iter_number;
4326  deps[0].dependence_type = ompt_dependence_type_source;
4327 #endif
4328  for (i = 1; i < num_dims; ++i) {
4329  kmp_int64 iter, ln;
4330  size_t j = i * 4;
4331  ln = pr_buf->th_doacross_info[j + 1];
4332  lo = pr_buf->th_doacross_info[j + 2];
4333  st = pr_buf->th_doacross_info[j + 4];
4334  if (st == 1) {
4335  iter = vec[i] - lo;
4336  } else if (st > 0) {
4337  iter = (kmp_uint64)(vec[i] - lo) / st;
4338  } else { // st < 0
4339  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4340  }
4341  iter_number = iter + ln * iter_number;
4342 #if OMPT_SUPPORT && OMPT_OPTIONAL
4343  deps[i].variable.value = iter;
4344  deps[i].dependence_type = ompt_dependence_type_source;
4345 #endif
4346  }
4347 #if OMPT_SUPPORT && OMPT_OPTIONAL
4348  if (ompt_enabled.ompt_callback_dependences) {
4349  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4350  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4351  }
4352 #endif
4353  shft = iter_number % 32; // use 32-bit granularity
4354  iter_number >>= 5; // divided by 32
4355  flag = 1 << shft;
4356  KMP_MB();
4357  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4358  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4359  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4360  (iter_number << 5) + shft));
4361 }
4362 
4363 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4364  __kmp_assert_valid_gtid(gtid);
4365  kmp_int32 num_done;
4366  kmp_info_t *th = __kmp_threads[gtid];
4367  kmp_team_t *team = th->th.th_team;
4368  kmp_disp_t *pr_buf = th->th.th_dispatch;
4369 
4370  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4371  if (team->t.t_serialized) {
4372  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4373  return; // nothing to do
4374  }
4375  num_done =
4376  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4377  if (num_done == th->th.th_team_nproc) {
4378  // we are the last thread, need to free shared resources
4379  int idx = pr_buf->th_doacross_buf_idx - 1;
4380  dispatch_shared_info_t *sh_buf =
4381  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4382  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4383  (kmp_int64)&sh_buf->doacross_num_done);
4384  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4385  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4386  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4387  sh_buf->doacross_flags = NULL;
4388  sh_buf->doacross_num_done = 0;
4389  sh_buf->doacross_buf_idx +=
4390  __kmp_dispatch_num_buffers; // free buffer for future re-use
4391  }
4392  // free private resources (need to keep buffer index forever)
4393  pr_buf->th_doacross_flags = NULL;
4394  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4395  pr_buf->th_doacross_info = NULL;
4396  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4397 }
4398 
4399 /* OpenMP 5.1 Memory Management routines */
4400 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4401  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4402 }
4403 
4404 void *omp_aligned_alloc(size_t align, size_t size,
4405  omp_allocator_handle_t allocator) {
4406  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4407 }
4408 
4409 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4410  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4411 }
4412 
4413 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4414  omp_allocator_handle_t allocator) {
4415  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4416 }
4417 
4418 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4419  omp_allocator_handle_t free_allocator) {
4420  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4421  free_allocator);
4422 }
4423 
4424 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4425  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4426 }
4427 /* end of OpenMP 5.1 Memory Management routines */
4428 
4429 int __kmpc_get_target_offload(void) {
4430  if (!__kmp_init_serial) {
4431  __kmp_serial_initialize();
4432  }
4433  return __kmp_target_offload;
4434 }
4435 
4436 int __kmpc_pause_resource(kmp_pause_status_t level) {
4437  if (!__kmp_init_serial) {
4438  return 1; // Can't pause if runtime is not initialized
4439  }
4440  return __kmp_pause_resource(level);
4441 }
4442 
4443 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4444  if (!__kmp_init_serial)
4445  __kmp_serial_initialize();
4446 
4447  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4448 
4449 #if OMPT_SUPPORT
4450  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4451  ompt_callbacks.ompt_callback(ompt_callback_error)(
4452  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4453  OMPT_GET_RETURN_ADDRESS(0));
4454  }
4455 #endif // OMPT_SUPPORT
4456 
4457  char *src_loc;
4458  if (loc && loc->psource) {
4459  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4460  src_loc =
4461  __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4462  __kmp_str_loc_free(&str_loc);
4463  } else {
4464  src_loc = __kmp_str_format("unknown");
4465  }
4466 
4467  if (severity == severity_warning)
4468  KMP_WARNING(UserDirectedWarning, src_loc, message);
4469  else
4470  KMP_FATAL(UserDirectedError, src_loc, message);
4471 
4472  __kmp_str_free(&src_loc);
4473 }
4474 
4475 // Mark begin of scope directive.
4476 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4477 // reserved is for extension of scope directive and not used.
4478 #if OMPT_SUPPORT && OMPT_OPTIONAL
4479  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4480  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4481  int tid = __kmp_tid_from_gtid(gtid);
4482  ompt_callbacks.ompt_callback(ompt_callback_work)(
4483  ompt_work_scope, ompt_scope_begin,
4484  &(team->t.ompt_team_info.parallel_data),
4485  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4486  OMPT_GET_RETURN_ADDRESS(0));
4487  }
4488 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4489 }
4490 
4491 // Mark end of scope directive
4492 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4493 // reserved is for extension of scope directive and not used.
4494 #if OMPT_SUPPORT && OMPT_OPTIONAL
4495  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4496  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4497  int tid = __kmp_tid_from_gtid(gtid);
4498  ompt_callbacks.ompt_callback(ompt_callback_work)(
4499  ompt_work_scope, ompt_scope_end,
4500  &(team->t.ompt_team_info.parallel_data),
4501  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4502  OMPT_GET_RETURN_ADDRESS(0));
4503  }
4504 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4505 }
4506 
4507 #ifdef KMP_USE_VERSION_SYMBOLS
4508 // For GOMP compatibility there are two versions of each omp_* API.
4509 // One is the plain C symbol and one is the Fortran symbol with an appended
4510 // underscore. When we implement a specific ompc_* version of an omp_*
4511 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4512 // instead of the Fortran versions in kmp_ftn_entry.h
4513 extern "C" {
4514 // Have to undef these from omp.h so they aren't translated into
4515 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4516 #ifdef omp_set_affinity_format
4517 #undef omp_set_affinity_format
4518 #endif
4519 #ifdef omp_get_affinity_format
4520 #undef omp_get_affinity_format
4521 #endif
4522 #ifdef omp_display_affinity
4523 #undef omp_display_affinity
4524 #endif
4525 #ifdef omp_capture_affinity
4526 #undef omp_capture_affinity
4527 #endif
4528 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4529  "OMP_5.0");
4530 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4531  "OMP_5.0");
4532 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4533  "OMP_5.0");
4534 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4535  "OMP_5.0");
4536 } // extern "C"
4537 #endif
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:234
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1601
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:244
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
kmp_int32 flags
Definition: kmp.h:236