LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 }
74 
94  kmp_int32 gtid = __kmp_entry_gtid();
95 
96  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
97 
98  return gtid;
99 }
100 
116  KC_TRACE(10,
117  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
118 
119  return TCR_4(__kmp_all_nth);
120 }
121 
129  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
130  return __kmp_tid_from_gtid(__kmp_entry_gtid());
131 }
132 
139  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
140 
141  return __kmp_entry_thread()->th.th_team->t.t_nproc;
142 }
143 
150 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
151 #ifndef KMP_DEBUG
152 
153  return TRUE;
154 
155 #else
156 
157  const char *semi2;
158  const char *semi3;
159  int line_no;
160 
161  if (__kmp_par_range == 0) {
162  return TRUE;
163  }
164  semi2 = loc->psource;
165  if (semi2 == NULL) {
166  return TRUE;
167  }
168  semi2 = strchr(semi2, ';');
169  if (semi2 == NULL) {
170  return TRUE;
171  }
172  semi2 = strchr(semi2 + 1, ';');
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  if (__kmp_par_range_filename[0]) {
177  const char *name = semi2 - 1;
178  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
179  name--;
180  }
181  if ((*name == '/') || (*name == ';')) {
182  name++;
183  }
184  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
185  return __kmp_par_range < 0;
186  }
187  }
188  semi3 = strchr(semi2 + 1, ';');
189  if (__kmp_par_range_routine[0]) {
190  if ((semi3 != NULL) && (semi3 > semi2) &&
191  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
192  return __kmp_par_range < 0;
193  }
194  }
195  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
196  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
197  return __kmp_par_range > 0;
198  }
199  return __kmp_par_range < 0;
200  }
201  return TRUE;
202 
203 #endif /* KMP_DEBUG */
204 }
205 
212 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
213  return __kmp_entry_thread()->th.th_root->r.r_active;
214 }
215 
225 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
226  kmp_int32 num_threads) {
227  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
228  global_tid, num_threads));
229 
230  __kmp_push_num_threads(loc, global_tid, num_threads);
231 }
232 
233 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
234  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
235 
236  /* the num_threads are automatically popped */
237 }
238 
239 #if OMP_40_ENABLED
240 
241 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
242  kmp_int32 proc_bind) {
243  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
244  proc_bind));
245 
246  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
247 }
248 
249 #endif /* OMP_40_ENABLED */
250 
261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
262  int gtid = __kmp_entry_gtid();
263 
264 #if (KMP_STATS_ENABLED)
265  int inParallel = __kmpc_in_parallel(loc);
266  if (inParallel) {
267  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
268  } else {
269  KMP_COUNT_BLOCK(OMP_PARALLEL);
270  }
271 #endif
272 
273  // maybe to save thr_state is enough here
274  {
275  va_list ap;
276  va_start(ap, microtask);
277 
278 #if OMPT_SUPPORT
279  omp_frame_t *ompt_frame;
280  if (ompt_enabled.enabled) {
281  kmp_info_t *master_th = __kmp_threads[gtid];
282  kmp_team_t *parent_team = master_th->th.th_team;
283  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
284  if (lwt)
285  ompt_frame = &(lwt->ompt_task_info.frame);
286  else {
287  int tid = __kmp_tid_from_gtid(gtid);
288  ompt_frame = &(
289  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
290  }
291  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
292  OMPT_STORE_RETURN_ADDRESS(gtid);
293  }
294 #endif
295 
296 #if INCLUDE_SSC_MARKS
297  SSC_MARK_FORKING();
298 #endif
299  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
300  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
301  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
302 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
303 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
304  &ap
305 #else
306  ap
307 #endif
308  );
309 #if INCLUDE_SSC_MARKS
310  SSC_MARK_JOINING();
311 #endif
312  __kmp_join_call(loc, gtid
313 #if OMPT_SUPPORT
314  ,
315  fork_context_intel
316 #endif
317  );
318 
319  va_end(ap);
320  }
321 }
322 
323 #if OMP_40_ENABLED
324 
335 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
336  kmp_int32 num_teams, kmp_int32 num_threads) {
337  KA_TRACE(20,
338  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
339  global_tid, num_teams, num_threads));
340 
341  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
342 }
343 
354 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
355  ...) {
356  int gtid = __kmp_entry_gtid();
357  kmp_info_t *this_thr = __kmp_threads[gtid];
358  va_list ap;
359  va_start(ap, microtask);
360 
361  KMP_COUNT_BLOCK(OMP_TEAMS);
362 
363  // remember teams entry point and nesting level
364  this_thr->th.th_teams_microtask = microtask;
365  this_thr->th.th_teams_level =
366  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
367 
368 #if OMPT_SUPPORT
369  kmp_team_t *parent_team = this_thr->th.th_team;
370  int tid = __kmp_tid_from_gtid(gtid);
371  if (ompt_enabled.enabled) {
372  parent_team->t.t_implicit_task_taskdata[tid]
373  .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
374  }
375  OMPT_STORE_RETURN_ADDRESS(gtid);
376 #endif
377 
378  // check if __kmpc_push_num_teams called, set default number of teams
379  // otherwise
380  if (this_thr->th.th_teams_size.nteams == 0) {
381  __kmp_push_num_teams(loc, gtid, 0, 0);
382  }
383  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
384  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
385  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
386 
387  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
388  VOLATILE_CAST(microtask_t)
389  __kmp_teams_master, // "wrapped" task
390  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
391 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
392  &ap
393 #else
394  ap
395 #endif
396  );
397  __kmp_join_call(loc, gtid
398 #if OMPT_SUPPORT
399  ,
400  fork_context_intel
401 #endif
402  );
403 
404  this_thr->th.th_teams_microtask = NULL;
405  this_thr->th.th_teams_level = 0;
406  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
407  va_end(ap);
408 }
409 #endif /* OMP_40_ENABLED */
410 
411 // I don't think this function should ever have been exported.
412 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
413 // openmp code ever called it, but it's been exported from the RTL for so
414 // long that I'm afraid to remove the definition.
415 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
416 
429 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
430 // The implementation is now in kmp_runtime.cpp so that it can share static
431 // functions with kmp_fork_call since the tasks to be done are similar in
432 // each case.
433 #if OMPT_SUPPORT
434  OMPT_STORE_RETURN_ADDRESS(global_tid);
435 #endif
436  __kmp_serialized_parallel(loc, global_tid);
437 }
438 
446 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
447  kmp_internal_control_t *top;
448  kmp_info_t *this_thr;
449  kmp_team_t *serial_team;
450 
451  KC_TRACE(10,
452  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
453 
454  /* skip all this code for autopar serialized loops since it results in
455  unacceptable overhead */
456  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
457  return;
458 
459  // Not autopar code
460  if (!TCR_4(__kmp_init_parallel))
461  __kmp_parallel_initialize();
462 
463  this_thr = __kmp_threads[global_tid];
464  serial_team = this_thr->th.th_serial_team;
465 
466 #if OMP_45_ENABLED
467  kmp_task_team_t *task_team = this_thr->th.th_task_team;
468 
469  // we need to wait for the proxy tasks before finishing the thread
470  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
471  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
472 #endif
473 
474  KMP_MB();
475  KMP_DEBUG_ASSERT(serial_team);
476  KMP_ASSERT(serial_team->t.t_serialized);
477  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
478  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
479  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
480  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
481 
482 #if OMPT_SUPPORT
483  if (ompt_enabled.enabled &&
484  this_thr->th.ompt_thread_info.state != omp_state_overhead) {
485  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
486  if (ompt_enabled.ompt_callback_implicit_task) {
487  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
488  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
489  OMPT_CUR_TASK_INFO(this_thr)->thread_num);
490  }
491 
492  // reset clear the task id only after unlinking the task
493  ompt_data_t *parent_task_data;
494  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
495 
496  if (ompt_enabled.ompt_callback_parallel_end) {
497  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
498  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
499  ompt_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
500  }
501  __ompt_lw_taskteam_unlink(this_thr);
502  this_thr->th.ompt_thread_info.state = omp_state_overhead;
503  }
504 #endif
505 
506  /* If necessary, pop the internal control stack values and replace the team
507  * values */
508  top = serial_team->t.t_control_stack_top;
509  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
510  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
511  serial_team->t.t_control_stack_top = top->next;
512  __kmp_free(top);
513  }
514 
515  // if( serial_team -> t.t_serialized > 1 )
516  serial_team->t.t_level--;
517 
518  /* pop dispatch buffers stack */
519  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
520  {
521  dispatch_private_info_t *disp_buffer =
522  serial_team->t.t_dispatch->th_disp_buffer;
523  serial_team->t.t_dispatch->th_disp_buffer =
524  serial_team->t.t_dispatch->th_disp_buffer->next;
525  __kmp_free(disp_buffer);
526  }
527 
528  --serial_team->t.t_serialized;
529  if (serial_team->t.t_serialized == 0) {
530 
531 /* return to the parallel section */
532 
533 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
534  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
535  __kmp_clear_x87_fpu_status_word();
536  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
537  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
538  }
539 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
540 
541  this_thr->th.th_team = serial_team->t.t_parent;
542  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
543 
544  /* restore values cached in the thread */
545  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
546  this_thr->th.th_team_master =
547  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
548  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
549 
550  /* TODO the below shouldn't need to be adjusted for serialized teams */
551  this_thr->th.th_dispatch =
552  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
553 
554  __kmp_pop_current_task_from_thread(this_thr);
555 
556  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
557  this_thr->th.th_current_task->td_flags.executing = 1;
558 
559  if (__kmp_tasking_mode != tskm_immediate_exec) {
560  // Copy the task team from the new child / old parent team to the thread.
561  this_thr->th.th_task_team =
562  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
563  KA_TRACE(20,
564  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
565  "team %p\n",
566  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
567  }
568  } else {
569  if (__kmp_tasking_mode != tskm_immediate_exec) {
570  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
571  "depth of serial team %p to %d\n",
572  global_tid, serial_team, serial_team->t.t_serialized));
573  }
574  }
575 
576  if (__kmp_env_consistency_check)
577  __kmp_pop_parallel(global_tid, NULL);
578 #if OMPT_SUPPORT
579  if (ompt_enabled.enabled)
580  this_thr->th.ompt_thread_info.state =
581  ((this_thr->th.th_team_serialized) ? omp_state_work_serial
582  : omp_state_work_parallel);
583 #endif
584 }
585 
594 void __kmpc_flush(ident_t *loc) {
595  KC_TRACE(10, ("__kmpc_flush: called\n"));
596 
597  /* need explicit __mf() here since use volatile instead in library */
598  KMP_MB(); /* Flush all pending memory write invalidates. */
599 
600 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
601 #if KMP_MIC
602 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
603 // We shouldn't need it, though, since the ABI rules require that
604 // * If the compiler generates NGO stores it also generates the fence
605 // * If users hand-code NGO stores they should insert the fence
606 // therefore no incomplete unordered stores should be visible.
607 #else
608  // C74404
609  // This is to address non-temporal store instructions (sfence needed).
610  // The clflush instruction is addressed either (mfence needed).
611  // Probably the non-temporal load monvtdqa instruction should also be
612  // addressed.
613  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
614  if (!__kmp_cpuinfo.initialized) {
615  __kmp_query_cpuid(&__kmp_cpuinfo);
616  }
617  if (!__kmp_cpuinfo.sse2) {
618  // CPU cannot execute SSE2 instructions.
619  } else {
620 #if KMP_COMPILER_ICC
621  _mm_mfence();
622 #elif KMP_COMPILER_MSVC
623  MemoryBarrier();
624 #else
625  __sync_synchronize();
626 #endif // KMP_COMPILER_ICC
627  }
628 #endif // KMP_MIC
629 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
630 // Nothing to see here move along
631 #elif KMP_ARCH_PPC64
632 // Nothing needed here (we have a real MB above).
633 #if KMP_OS_CNK
634  // The flushing thread needs to yield here; this prevents a
635  // busy-waiting thread from saturating the pipeline. flush is
636  // often used in loops like this:
637  // while (!flag) {
638  // #pragma omp flush(flag)
639  // }
640  // and adding the yield here is good for at least a 10x speedup
641  // when running >2 threads per core (on the NAS LU benchmark).
642  __kmp_yield(TRUE);
643 #endif
644 #else
645 #error Unknown or unsupported architecture
646 #endif
647 
648 #if OMPT_SUPPORT && OMPT_OPTIONAL
649  if (ompt_enabled.ompt_callback_flush) {
650  ompt_callbacks.ompt_callback(ompt_callback_flush)(
651  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
652  }
653 #endif
654 }
655 
656 /* -------------------------------------------------------------------------- */
664 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
665  KMP_COUNT_BLOCK(OMP_BARRIER);
666  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
667 
668  if (!TCR_4(__kmp_init_parallel))
669  __kmp_parallel_initialize();
670 
671  if (__kmp_env_consistency_check) {
672  if (loc == 0) {
673  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
674  }
675 
676  __kmp_check_barrier(global_tid, ct_barrier, loc);
677  }
678 
679 #if OMPT_SUPPORT
680  omp_frame_t *ompt_frame;
681  if (ompt_enabled.enabled) {
682  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
683  if (ompt_frame->enter_frame == NULL)
684  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
685  OMPT_STORE_RETURN_ADDRESS(global_tid);
686  }
687 #endif
688  __kmp_threads[global_tid]->th.th_ident = loc;
689  // TODO: explicit barrier_wait_id:
690  // this function is called when 'barrier' directive is present or
691  // implicit barrier at the end of a worksharing construct.
692  // 1) better to add a per-thread barrier counter to a thread data structure
693  // 2) set to 0 when a new team is created
694  // 4) no sync is required
695 
696  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
697 #if OMPT_SUPPORT && OMPT_OPTIONAL
698  if (ompt_enabled.enabled) {
699  ompt_frame->enter_frame = NULL;
700  }
701 #endif
702 }
703 
704 /* The BARRIER for a MASTER section is always explicit */
711 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
712  int status = 0;
713 
714  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
715 
716  if (!TCR_4(__kmp_init_parallel))
717  __kmp_parallel_initialize();
718 
719  if (KMP_MASTER_GTID(global_tid)) {
720  KMP_COUNT_BLOCK(OMP_MASTER);
721  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
722  status = 1;
723  }
724 
725 #if OMPT_SUPPORT && OMPT_OPTIONAL
726  if (status) {
727  if (ompt_enabled.ompt_callback_master) {
728  kmp_info_t *this_thr = __kmp_threads[global_tid];
729  kmp_team_t *team = this_thr->th.th_team;
730 
731  int tid = __kmp_tid_from_gtid(global_tid);
732  ompt_callbacks.ompt_callback(ompt_callback_master)(
733  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
734  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
735  OMPT_GET_RETURN_ADDRESS(0));
736  }
737  }
738 #endif
739 
740  if (__kmp_env_consistency_check) {
741 #if KMP_USE_DYNAMIC_LOCK
742  if (status)
743  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
744  else
745  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
746 #else
747  if (status)
748  __kmp_push_sync(global_tid, ct_master, loc, NULL);
749  else
750  __kmp_check_sync(global_tid, ct_master, loc, NULL);
751 #endif
752  }
753 
754  return status;
755 }
756 
765 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
766  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
767 
768  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
769  KMP_POP_PARTITIONED_TIMER();
770 
771 #if OMPT_SUPPORT && OMPT_OPTIONAL
772  kmp_info_t *this_thr = __kmp_threads[global_tid];
773  kmp_team_t *team = this_thr->th.th_team;
774  if (ompt_enabled.ompt_callback_master) {
775  int tid = __kmp_tid_from_gtid(global_tid);
776  ompt_callbacks.ompt_callback(ompt_callback_master)(
777  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
778  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
779  OMPT_GET_RETURN_ADDRESS(0));
780  }
781 #endif
782 
783  if (__kmp_env_consistency_check) {
784  if (global_tid < 0)
785  KMP_WARNING(ThreadIdentInvalid);
786 
787  if (KMP_MASTER_GTID(global_tid))
788  __kmp_pop_sync(global_tid, ct_master, loc);
789  }
790 }
791 
799 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
800  int cid = 0;
801  kmp_info_t *th;
802  KMP_DEBUG_ASSERT(__kmp_init_serial);
803 
804  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
805 
806  if (!TCR_4(__kmp_init_parallel))
807  __kmp_parallel_initialize();
808 
809 #if USE_ITT_BUILD
810  __kmp_itt_ordered_prep(gtid);
811 // TODO: ordered_wait_id
812 #endif /* USE_ITT_BUILD */
813 
814  th = __kmp_threads[gtid];
815 
816 #if OMPT_SUPPORT && OMPT_OPTIONAL
817  kmp_team_t *team;
818  omp_wait_id_t lck;
819  void *codeptr_ra;
820  if (ompt_enabled.enabled) {
821  OMPT_STORE_RETURN_ADDRESS(gtid);
822  team = __kmp_team_from_gtid(gtid);
823  lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
824  /* OMPT state update */
825  th->th.ompt_thread_info.wait_id = lck;
826  th->th.ompt_thread_info.state = omp_state_wait_ordered;
827 
828  /* OMPT event callback */
829  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
830  if (ompt_enabled.ompt_callback_mutex_acquire) {
831  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
832  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
833  (omp_wait_id_t)lck, codeptr_ra);
834  }
835  }
836 #endif
837 
838  if (th->th.th_dispatch->th_deo_fcn != 0)
839  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
840  else
841  __kmp_parallel_deo(&gtid, &cid, loc);
842 
843 #if OMPT_SUPPORT && OMPT_OPTIONAL
844  if (ompt_enabled.enabled) {
845  /* OMPT state update */
846  th->th.ompt_thread_info.state = omp_state_work_parallel;
847  th->th.ompt_thread_info.wait_id = 0;
848 
849  /* OMPT event callback */
850  if (ompt_enabled.ompt_callback_mutex_acquired) {
851  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
852  ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
853  }
854  }
855 #endif
856 
857 #if USE_ITT_BUILD
858  __kmp_itt_ordered_start(gtid);
859 #endif /* USE_ITT_BUILD */
860 }
861 
869 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
870  int cid = 0;
871  kmp_info_t *th;
872 
873  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
874 
875 #if USE_ITT_BUILD
876  __kmp_itt_ordered_end(gtid);
877 // TODO: ordered_wait_id
878 #endif /* USE_ITT_BUILD */
879 
880  th = __kmp_threads[gtid];
881 
882  if (th->th.th_dispatch->th_dxo_fcn != 0)
883  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
884  else
885  __kmp_parallel_dxo(&gtid, &cid, loc);
886 
887 #if OMPT_SUPPORT && OMPT_OPTIONAL
888  OMPT_STORE_RETURN_ADDRESS(gtid);
889  if (ompt_enabled.ompt_callback_mutex_released) {
890  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
891  ompt_mutex_ordered,
892  (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
893  OMPT_LOAD_RETURN_ADDRESS(gtid));
894  }
895 #endif
896 }
897 
898 #if KMP_USE_DYNAMIC_LOCK
899 
900 static __forceinline void
901 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
902  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
903  // Pointer to the allocated indirect lock is written to crit, while indexing
904  // is ignored.
905  void *idx;
906  kmp_indirect_lock_t **lck;
907  lck = (kmp_indirect_lock_t **)crit;
908  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
909  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
910  KMP_SET_I_LOCK_LOCATION(ilk, loc);
911  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
912  KA_TRACE(20,
913  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
914 #if USE_ITT_BUILD
915  __kmp_itt_critical_creating(ilk->lock, loc);
916 #endif
917  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
918  if (status == 0) {
919 #if USE_ITT_BUILD
920  __kmp_itt_critical_destroyed(ilk->lock);
921 #endif
922  // We don't really need to destroy the unclaimed lock here since it will be
923  // cleaned up at program exit.
924  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
925  }
926  KMP_DEBUG_ASSERT(*lck != NULL);
927 }
928 
929 // Fast-path acquire tas lock
930 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
931  { \
932  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
933  if (l->lk.poll != KMP_LOCK_FREE(tas) || \
934  !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
935  KMP_LOCK_BUSY(gtid + 1, tas))) { \
936  kmp_uint32 spins; \
937  KMP_FSYNC_PREPARE(l); \
938  KMP_INIT_YIELD(spins); \
939  if (TCR_4(__kmp_nth) > \
940  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
941  KMP_YIELD(TRUE); \
942  } else { \
943  KMP_YIELD_SPIN(spins); \
944  } \
945  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
946  while (l->lk.poll != KMP_LOCK_FREE(tas) || \
947  !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
948  KMP_LOCK_BUSY(gtid + 1, tas))) { \
949  __kmp_spin_backoff(&backoff); \
950  if (TCR_4(__kmp_nth) > \
951  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
952  KMP_YIELD(TRUE); \
953  } else { \
954  KMP_YIELD_SPIN(spins); \
955  } \
956  } \
957  } \
958  KMP_FSYNC_ACQUIRED(l); \
959  }
960 
961 // Fast-path test tas lock
962 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
963  { \
964  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
965  rc = l->lk.poll == KMP_LOCK_FREE(tas) && \
966  KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \
967  KMP_LOCK_BUSY(gtid + 1, tas)); \
968  }
969 
970 // Fast-path release tas lock
971 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
972  { \
973  TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \
974  KMP_MB(); \
975  }
976 
977 #if KMP_USE_FUTEX
978 
979 #include <sys/syscall.h>
980 #include <unistd.h>
981 #ifndef FUTEX_WAIT
982 #define FUTEX_WAIT 0
983 #endif
984 #ifndef FUTEX_WAKE
985 #define FUTEX_WAKE 1
986 #endif
987 
988 // Fast-path acquire futex lock
989 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
990  { \
991  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
992  kmp_int32 gtid_code = (gtid + 1) << 1; \
993  KMP_MB(); \
994  KMP_FSYNC_PREPARE(ftx); \
995  kmp_int32 poll_val; \
996  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
997  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
998  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
999  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1000  if (!cond) { \
1001  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1002  poll_val | \
1003  KMP_LOCK_BUSY(1, futex))) { \
1004  continue; \
1005  } \
1006  poll_val |= KMP_LOCK_BUSY(1, futex); \
1007  } \
1008  kmp_int32 rc; \
1009  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1010  NULL, NULL, 0)) != 0) { \
1011  continue; \
1012  } \
1013  gtid_code |= 1; \
1014  } \
1015  KMP_FSYNC_ACQUIRED(ftx); \
1016  }
1017 
1018 // Fast-path test futex lock
1019 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1020  { \
1021  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1022  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1023  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1024  KMP_FSYNC_ACQUIRED(ftx); \
1025  rc = TRUE; \
1026  } else { \
1027  rc = FALSE; \
1028  } \
1029  }
1030 
1031 // Fast-path release futex lock
1032 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1033  { \
1034  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1035  KMP_MB(); \
1036  KMP_FSYNC_RELEASING(ftx); \
1037  kmp_int32 poll_val = \
1038  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1039  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1040  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1041  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1042  } \
1043  KMP_MB(); \
1044  KMP_YIELD(TCR_4(__kmp_nth) > \
1045  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1046  }
1047 
1048 #endif // KMP_USE_FUTEX
1049 
1050 #else // KMP_USE_DYNAMIC_LOCK
1051 
1052 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1053  ident_t const *loc,
1054  kmp_int32 gtid) {
1055  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1056 
1057  // Because of the double-check, the following load doesn't need to be volatile
1058  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1059 
1060  if (lck == NULL) {
1061  void *idx;
1062 
1063  // Allocate & initialize the lock.
1064  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1065  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1066  __kmp_init_user_lock_with_checks(lck);
1067  __kmp_set_user_lock_location(lck, loc);
1068 #if USE_ITT_BUILD
1069  __kmp_itt_critical_creating(lck);
1070 // __kmp_itt_critical_creating() should be called *before* the first usage
1071 // of underlying lock. It is the only place where we can guarantee it. There
1072 // are chances the lock will destroyed with no usage, but it is not a
1073 // problem, because this is not real event seen by user but rather setting
1074 // name for object (lock). See more details in kmp_itt.h.
1075 #endif /* USE_ITT_BUILD */
1076 
1077  // Use a cmpxchg instruction to slam the start of the critical section with
1078  // the lock pointer. If another thread beat us to it, deallocate the lock,
1079  // and use the lock that the other thread allocated.
1080  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1081 
1082  if (status == 0) {
1083 // Deallocate the lock and reload the value.
1084 #if USE_ITT_BUILD
1085  __kmp_itt_critical_destroyed(lck);
1086 // Let ITT know the lock is destroyed and the same memory location may be reused
1087 // for another purpose.
1088 #endif /* USE_ITT_BUILD */
1089  __kmp_destroy_user_lock_with_checks(lck);
1090  __kmp_user_lock_free(&idx, gtid, lck);
1091  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1092  KMP_DEBUG_ASSERT(lck != NULL);
1093  }
1094  }
1095  return lck;
1096 }
1097 
1098 #endif // KMP_USE_DYNAMIC_LOCK
1099 
1110 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1111  kmp_critical_name *crit) {
1112 #if KMP_USE_DYNAMIC_LOCK
1113 #if OMPT_SUPPORT && OMPT_OPTIONAL
1114  OMPT_STORE_RETURN_ADDRESS(global_tid);
1115 #endif // OMPT_SUPPORT
1116  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1117 #else
1118  KMP_COUNT_BLOCK(OMP_CRITICAL);
1119  KMP_TIME_PARTITIONED_BLOCK(
1120  OMP_critical_wait); /* Time spent waiting to enter the critical section */
1121 #if OMPT_SUPPORT && OMPT_OPTIONAL
1122  omp_state_t prev_state = omp_state_undefined;
1123  ompt_thread_info_t ti;
1124 #endif
1125  kmp_user_lock_p lck;
1126 
1127  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1128 
1129  // TODO: add THR_OVHD_STATE
1130 
1131  KMP_CHECK_USER_LOCK_INIT();
1132 
1133  if ((__kmp_user_lock_kind == lk_tas) &&
1134  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1135  lck = (kmp_user_lock_p)crit;
1136  }
1137 #if KMP_USE_FUTEX
1138  else if ((__kmp_user_lock_kind == lk_futex) &&
1139  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1140  lck = (kmp_user_lock_p)crit;
1141  }
1142 #endif
1143  else { // ticket, queuing or drdpa
1144  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1145  }
1146 
1147  if (__kmp_env_consistency_check)
1148  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1149 
1150 // since the critical directive binds to all threads, not just the current
1151 // team we have to check this even if we are in a serialized team.
1152 // also, even if we are the uber thread, we still have to conduct the lock,
1153 // as we have to contend with sibling threads.
1154 
1155 #if USE_ITT_BUILD
1156  __kmp_itt_critical_acquiring(lck);
1157 #endif /* USE_ITT_BUILD */
1158 #if OMPT_SUPPORT && OMPT_OPTIONAL
1159  OMPT_STORE_RETURN_ADDRESS(gtid);
1160  void *codeptr_ra = NULL;
1161  if (ompt_enabled.enabled) {
1162  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1163  /* OMPT state update */
1164  prev_state = ti.state;
1165  ti.wait_id = (omp_wait_id_t)lck;
1166  ti.state = omp_state_wait_critical;
1167 
1168  /* OMPT event callback */
1169  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1170  if (ompt_enabled.ompt_callback_mutex_acquire) {
1171  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1172  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1173  (omp_wait_id_t)crit, codeptr_ra);
1174  }
1175  }
1176 #endif
1177  // Value of 'crit' should be good for using as a critical_id of the critical
1178  // section directive.
1179  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1180 
1181 #if USE_ITT_BUILD
1182  __kmp_itt_critical_acquired(lck);
1183 #endif /* USE_ITT_BUILD */
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL
1185  if (ompt_enabled.enabled) {
1186  /* OMPT state update */
1187  ti.state = prev_state;
1188  ti.wait_id = 0;
1189 
1190  /* OMPT event callback */
1191  if (ompt_enabled.ompt_callback_mutex_acquired) {
1192  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1193  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
1194  }
1195  }
1196 #endif
1197 
1198  KMP_START_EXPLICIT_TIMER(OMP_critical);
1199  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1200 #endif // KMP_USE_DYNAMIC_LOCK
1201 }
1202 
1203 #if KMP_USE_DYNAMIC_LOCK
1204 
1205 // Converts the given hint to an internal lock implementation
1206 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1207 #if KMP_USE_TSX
1208 #define KMP_TSX_LOCK(seq) lockseq_##seq
1209 #else
1210 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1211 #endif
1212 
1213 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1214 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1215 #else
1216 #define KMP_CPUINFO_RTM 0
1217 #endif
1218 
1219  // Hints that do not require further logic
1220  if (hint & kmp_lock_hint_hle)
1221  return KMP_TSX_LOCK(hle);
1222  if (hint & kmp_lock_hint_rtm)
1223  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1224  if (hint & kmp_lock_hint_adaptive)
1225  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1226 
1227  // Rule out conflicting hints first by returning the default lock
1228  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1229  return __kmp_user_lock_seq;
1230  if ((hint & omp_lock_hint_speculative) &&
1231  (hint & omp_lock_hint_nonspeculative))
1232  return __kmp_user_lock_seq;
1233 
1234  // Do not even consider speculation when it appears to be contended
1235  if (hint & omp_lock_hint_contended)
1236  return lockseq_queuing;
1237 
1238  // Uncontended lock without speculation
1239  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1240  return lockseq_tas;
1241 
1242  // HLE lock for speculation
1243  if (hint & omp_lock_hint_speculative)
1244  return KMP_TSX_LOCK(hle);
1245 
1246  return __kmp_user_lock_seq;
1247 }
1248 
1249 #if OMPT_SUPPORT && OMPT_OPTIONAL
1250 static kmp_mutex_impl_t
1251 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1252  if (user_lock) {
1253  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1254  case 0:
1255  break;
1256 #if KMP_USE_FUTEX
1257  case locktag_futex:
1258  return kmp_mutex_impl_queuing;
1259 #endif
1260  case locktag_tas:
1261  return kmp_mutex_impl_spin;
1262 #if KMP_USE_TSX
1263  case locktag_hle:
1264  return kmp_mutex_impl_speculative;
1265 #endif
1266  default:
1267  return ompt_mutex_impl_unknown;
1268  }
1269  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1270  }
1271  KMP_ASSERT(ilock);
1272  switch (ilock->type) {
1273 #if KMP_USE_TSX
1274  case locktag_adaptive:
1275  case locktag_rtm:
1276  return kmp_mutex_impl_speculative;
1277 #endif
1278  case locktag_nested_tas:
1279  return kmp_mutex_impl_spin;
1280 #if KMP_USE_FUTEX
1281  case locktag_nested_futex:
1282 #endif
1283  case locktag_ticket:
1284  case locktag_queuing:
1285  case locktag_drdpa:
1286  case locktag_nested_ticket:
1287  case locktag_nested_queuing:
1288  case locktag_nested_drdpa:
1289  return kmp_mutex_impl_queuing;
1290  default:
1291  return ompt_mutex_impl_unknown;
1292  }
1293 }
1294 
1295 // For locks without dynamic binding
1296 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1297  switch (__kmp_user_lock_kind) {
1298  case lk_tas:
1299  return kmp_mutex_impl_spin;
1300 #if KMP_USE_FUTEX
1301  case lk_futex:
1302 #endif
1303  case lk_ticket:
1304  case lk_queuing:
1305  case lk_drdpa:
1306  return kmp_mutex_impl_queuing;
1307 #if KMP_USE_TSX
1308  case lk_hle:
1309  case lk_rtm:
1310  case lk_adaptive:
1311  return kmp_mutex_impl_speculative;
1312 #endif
1313  default:
1314  return ompt_mutex_impl_unknown;
1315  }
1316 }
1317 #endif
1318 
1332 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1333  kmp_critical_name *crit, uintptr_t hint) {
1334  KMP_COUNT_BLOCK(OMP_CRITICAL);
1335  kmp_user_lock_p lck;
1336 #if OMPT_SUPPORT && OMPT_OPTIONAL
1337  omp_state_t prev_state = omp_state_undefined;
1338  ompt_thread_info_t ti;
1339  // This is the case, if called from __kmpc_critical:
1340  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1341  if (!codeptr)
1342  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1343 #endif
1344 
1345  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1346 
1347  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1348  // Check if it is initialized.
1349  if (*lk == 0) {
1350  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1351  if (KMP_IS_D_LOCK(lckseq)) {
1352  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1353  KMP_GET_D_TAG(lckseq));
1354  } else {
1355  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1356  }
1357  }
1358  // Branch for accessing the actual lock object and set operation. This
1359  // branching is inevitable since this lock initialization does not follow the
1360  // normal dispatch path (lock table is not used).
1361  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1362  lck = (kmp_user_lock_p)lk;
1363  if (__kmp_env_consistency_check) {
1364  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1365  __kmp_map_hint_to_lock(hint));
1366  }
1367 #if USE_ITT_BUILD
1368  __kmp_itt_critical_acquiring(lck);
1369 #endif
1370 #if OMPT_SUPPORT && OMPT_OPTIONAL
1371  if (ompt_enabled.enabled) {
1372  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1373  /* OMPT state update */
1374  prev_state = ti.state;
1375  ti.wait_id = (omp_wait_id_t)lck;
1376  ti.state = omp_state_wait_critical;
1377 
1378  /* OMPT event callback */
1379  if (ompt_enabled.ompt_callback_mutex_acquire) {
1380  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1381  ompt_mutex_critical, (unsigned int)hint,
1382  __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
1383  }
1384  }
1385 #endif
1386 #if KMP_USE_INLINED_TAS
1387  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1388  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1389  } else
1390 #elif KMP_USE_INLINED_FUTEX
1391  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1392  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1393  } else
1394 #endif
1395  {
1396  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1397  }
1398  } else {
1399  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1400  lck = ilk->lock;
1401  if (__kmp_env_consistency_check) {
1402  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1403  __kmp_map_hint_to_lock(hint));
1404  }
1405 #if USE_ITT_BUILD
1406  __kmp_itt_critical_acquiring(lck);
1407 #endif
1408 #if OMPT_SUPPORT && OMPT_OPTIONAL
1409  if (ompt_enabled.enabled) {
1410  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1411  /* OMPT state update */
1412  prev_state = ti.state;
1413  ti.wait_id = (omp_wait_id_t)lck;
1414  ti.state = omp_state_wait_critical;
1415 
1416  /* OMPT event callback */
1417  if (ompt_enabled.ompt_callback_mutex_acquire) {
1418  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1419  ompt_mutex_critical, (unsigned int)hint,
1420  __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
1421  }
1422  }
1423 #endif
1424  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1425  }
1426 
1427 #if USE_ITT_BUILD
1428  __kmp_itt_critical_acquired(lck);
1429 #endif /* USE_ITT_BUILD */
1430 #if OMPT_SUPPORT && OMPT_OPTIONAL
1431  if (ompt_enabled.enabled) {
1432  /* OMPT state update */
1433  ti.state = prev_state;
1434  ti.wait_id = 0;
1435 
1436  /* OMPT event callback */
1437  if (ompt_enabled.ompt_callback_mutex_acquired) {
1438  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1439  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
1440  }
1441  }
1442 #endif
1443 
1444  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1445  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1446 } // __kmpc_critical_with_hint
1447 
1448 #endif // KMP_USE_DYNAMIC_LOCK
1449 
1459 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1460  kmp_critical_name *crit) {
1461  kmp_user_lock_p lck;
1462 
1463  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1464 
1465 #if KMP_USE_DYNAMIC_LOCK
1466  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1467  lck = (kmp_user_lock_p)crit;
1468  KMP_ASSERT(lck != NULL);
1469  if (__kmp_env_consistency_check) {
1470  __kmp_pop_sync(global_tid, ct_critical, loc);
1471  }
1472 #if USE_ITT_BUILD
1473  __kmp_itt_critical_releasing(lck);
1474 #endif
1475 #if KMP_USE_INLINED_TAS
1476  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1477  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1478  } else
1479 #elif KMP_USE_INLINED_FUTEX
1480  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1481  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1482  } else
1483 #endif
1484  {
1485  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1486  }
1487  } else {
1488  kmp_indirect_lock_t *ilk =
1489  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1490  KMP_ASSERT(ilk != NULL);
1491  lck = ilk->lock;
1492  if (__kmp_env_consistency_check) {
1493  __kmp_pop_sync(global_tid, ct_critical, loc);
1494  }
1495 #if USE_ITT_BUILD
1496  __kmp_itt_critical_releasing(lck);
1497 #endif
1498  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1499  }
1500 
1501 #else // KMP_USE_DYNAMIC_LOCK
1502 
1503  if ((__kmp_user_lock_kind == lk_tas) &&
1504  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1505  lck = (kmp_user_lock_p)crit;
1506  }
1507 #if KMP_USE_FUTEX
1508  else if ((__kmp_user_lock_kind == lk_futex) &&
1509  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1510  lck = (kmp_user_lock_p)crit;
1511  }
1512 #endif
1513  else { // ticket, queuing or drdpa
1514  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1515  }
1516 
1517  KMP_ASSERT(lck != NULL);
1518 
1519  if (__kmp_env_consistency_check)
1520  __kmp_pop_sync(global_tid, ct_critical, loc);
1521 
1522 #if USE_ITT_BUILD
1523  __kmp_itt_critical_releasing(lck);
1524 #endif /* USE_ITT_BUILD */
1525  // Value of 'crit' should be good for using as a critical_id of the critical
1526  // section directive.
1527  __kmp_release_user_lock_with_checks(lck, global_tid);
1528 
1529 #endif // KMP_USE_DYNAMIC_LOCK
1530 
1531 #if OMPT_SUPPORT && OMPT_OPTIONAL
1532  /* OMPT release event triggers after lock is released; place here to trigger
1533  * for all #if branches */
1534  OMPT_STORE_RETURN_ADDRESS(global_tid);
1535  if (ompt_enabled.ompt_callback_mutex_released) {
1536  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1537  ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1538  }
1539 #endif
1540 
1541  KMP_POP_PARTITIONED_TIMER();
1542  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1543 }
1544 
1554 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1555  int status;
1556 
1557  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1558 
1559  if (!TCR_4(__kmp_init_parallel))
1560  __kmp_parallel_initialize();
1561 
1562  if (__kmp_env_consistency_check)
1563  __kmp_check_barrier(global_tid, ct_barrier, loc);
1564 
1565 #if OMPT_SUPPORT
1566  omp_frame_t *ompt_frame;
1567  if (ompt_enabled.enabled) {
1568  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1569  if (ompt_frame->enter_frame == NULL)
1570  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1571  OMPT_STORE_RETURN_ADDRESS(global_tid);
1572  }
1573 #endif
1574 #if USE_ITT_NOTIFY
1575  __kmp_threads[global_tid]->th.th_ident = loc;
1576 #endif
1577  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1578 #if OMPT_SUPPORT && OMPT_OPTIONAL
1579  if (ompt_enabled.enabled) {
1580  ompt_frame->enter_frame = NULL;
1581  }
1582 #endif
1583 
1584  return (status != 0) ? 0 : 1;
1585 }
1586 
1596 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1597  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1598 
1599  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1600 }
1601 
1612 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1613  kmp_int32 ret;
1614 
1615  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1616 
1617  if (!TCR_4(__kmp_init_parallel))
1618  __kmp_parallel_initialize();
1619 
1620  if (__kmp_env_consistency_check) {
1621  if (loc == 0) {
1622  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1623  }
1624  __kmp_check_barrier(global_tid, ct_barrier, loc);
1625  }
1626 
1627 #if OMPT_SUPPORT
1628  omp_frame_t *ompt_frame;
1629  if (ompt_enabled.enabled) {
1630  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1631  if (ompt_frame->enter_frame == NULL)
1632  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1633  OMPT_STORE_RETURN_ADDRESS(global_tid);
1634  }
1635 #endif
1636 #if USE_ITT_NOTIFY
1637  __kmp_threads[global_tid]->th.th_ident = loc;
1638 #endif
1639  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1640 #if OMPT_SUPPORT && OMPT_OPTIONAL
1641  if (ompt_enabled.enabled) {
1642  ompt_frame->enter_frame = NULL;
1643  }
1644 #endif
1645 
1646  ret = __kmpc_master(loc, global_tid);
1647 
1648  if (__kmp_env_consistency_check) {
1649  /* there's no __kmpc_end_master called; so the (stats) */
1650  /* actions of __kmpc_end_master are done here */
1651 
1652  if (global_tid < 0) {
1653  KMP_WARNING(ThreadIdentInvalid);
1654  }
1655  if (ret) {
1656  /* only one thread should do the pop since only */
1657  /* one did the push (see __kmpc_master()) */
1658 
1659  __kmp_pop_sync(global_tid, ct_master, loc);
1660  }
1661  }
1662 
1663  return (ret);
1664 }
1665 
1666 /* The BARRIER for a SINGLE process section is always explicit */
1678 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1679  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1680 
1681  if (rc) {
1682  // We are going to execute the single statement, so we should count it.
1683  KMP_COUNT_BLOCK(OMP_SINGLE);
1684  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1685  }
1686 
1687 #if OMPT_SUPPORT && OMPT_OPTIONAL
1688  kmp_info_t *this_thr = __kmp_threads[global_tid];
1689  kmp_team_t *team = this_thr->th.th_team;
1690  int tid = __kmp_tid_from_gtid(global_tid);
1691 
1692  if (ompt_enabled.enabled) {
1693  if (rc) {
1694  if (ompt_enabled.ompt_callback_work) {
1695  ompt_callbacks.ompt_callback(ompt_callback_work)(
1696  ompt_work_single_executor, ompt_scope_begin,
1697  &(team->t.ompt_team_info.parallel_data),
1698  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1699  1, OMPT_GET_RETURN_ADDRESS(0));
1700  }
1701  } else {
1702  if (ompt_enabled.ompt_callback_work) {
1703  ompt_callbacks.ompt_callback(ompt_callback_work)(
1704  ompt_work_single_other, ompt_scope_begin,
1705  &(team->t.ompt_team_info.parallel_data),
1706  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1707  1, OMPT_GET_RETURN_ADDRESS(0));
1708  ompt_callbacks.ompt_callback(ompt_callback_work)(
1709  ompt_work_single_other, ompt_scope_end,
1710  &(team->t.ompt_team_info.parallel_data),
1711  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1712  1, OMPT_GET_RETURN_ADDRESS(0));
1713  }
1714  }
1715  }
1716 #endif
1717 
1718  return rc;
1719 }
1720 
1730 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1731  __kmp_exit_single(global_tid);
1732  KMP_POP_PARTITIONED_TIMER();
1733 
1734 #if OMPT_SUPPORT && OMPT_OPTIONAL
1735  kmp_info_t *this_thr = __kmp_threads[global_tid];
1736  kmp_team_t *team = this_thr->th.th_team;
1737  int tid = __kmp_tid_from_gtid(global_tid);
1738 
1739  if (ompt_enabled.ompt_callback_work) {
1740  ompt_callbacks.ompt_callback(ompt_callback_work)(
1741  ompt_work_single_executor, ompt_scope_end,
1742  &(team->t.ompt_team_info.parallel_data),
1743  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1744  OMPT_GET_RETURN_ADDRESS(0));
1745  }
1746 #endif
1747 }
1748 
1756 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1757  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1758 
1759 #if OMPT_SUPPORT && OMPT_OPTIONAL
1760  if (ompt_enabled.ompt_callback_work) {
1761  ompt_work_type_t ompt_work_type = ompt_work_loop;
1762  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1763  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1764  // Determine workshare type
1765  if (loc != NULL) {
1766  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1767  ompt_work_type = ompt_work_loop;
1768  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1769  ompt_work_type = ompt_work_sections;
1770  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1771  ompt_work_type = ompt_work_distribute;
1772  } else {
1773  // use default set above.
1774  // a warning about this case is provided in __kmpc_for_static_init
1775  }
1776  KMP_DEBUG_ASSERT(ompt_work_type);
1777  }
1778  ompt_callbacks.ompt_callback(ompt_callback_work)(
1779  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1780  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1781  }
1782 #endif
1783 
1784  if (__kmp_env_consistency_check)
1785  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1786 }
1787 
1788 // User routines which take C-style arguments (call by value)
1789 // different from the Fortran equivalent routines
1790 
1791 void ompc_set_num_threads(int arg) {
1792  // !!!!! TODO: check the per-task binding
1793  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1794 }
1795 
1796 void ompc_set_dynamic(int flag) {
1797  kmp_info_t *thread;
1798 
1799  /* For the thread-private implementation of the internal controls */
1800  thread = __kmp_entry_thread();
1801 
1802  __kmp_save_internal_controls(thread);
1803 
1804  set__dynamic(thread, flag ? TRUE : FALSE);
1805 }
1806 
1807 void ompc_set_nested(int flag) {
1808  kmp_info_t *thread;
1809 
1810  /* For the thread-private internal controls implementation */
1811  thread = __kmp_entry_thread();
1812 
1813  __kmp_save_internal_controls(thread);
1814 
1815  set__nested(thread, flag ? TRUE : FALSE);
1816 }
1817 
1818 void ompc_set_max_active_levels(int max_active_levels) {
1819  /* TO DO */
1820  /* we want per-task implementation of this internal control */
1821 
1822  /* For the per-thread internal controls implementation */
1823  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1824 }
1825 
1826 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1827  // !!!!! TODO: check the per-task binding
1828  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1829 }
1830 
1831 int ompc_get_ancestor_thread_num(int level) {
1832  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1833 }
1834 
1835 int ompc_get_team_size(int level) {
1836  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1837 }
1838 
1839 void kmpc_set_stacksize(int arg) {
1840  // __kmp_aux_set_stacksize initializes the library if needed
1841  __kmp_aux_set_stacksize(arg);
1842 }
1843 
1844 void kmpc_set_stacksize_s(size_t arg) {
1845  // __kmp_aux_set_stacksize initializes the library if needed
1846  __kmp_aux_set_stacksize(arg);
1847 }
1848 
1849 void kmpc_set_blocktime(int arg) {
1850  int gtid, tid;
1851  kmp_info_t *thread;
1852 
1853  gtid = __kmp_entry_gtid();
1854  tid = __kmp_tid_from_gtid(gtid);
1855  thread = __kmp_thread_from_gtid(gtid);
1856 
1857  __kmp_aux_set_blocktime(arg, thread, tid);
1858 }
1859 
1860 void kmpc_set_library(int arg) {
1861  // __kmp_user_set_library initializes the library if needed
1862  __kmp_user_set_library((enum library_type)arg);
1863 }
1864 
1865 void kmpc_set_defaults(char const *str) {
1866  // __kmp_aux_set_defaults initializes the library if needed
1867  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1868 }
1869 
1870 void kmpc_set_disp_num_buffers(int arg) {
1871  // ignore after initialization because some teams have already
1872  // allocated dispatch buffers
1873  if (__kmp_init_serial == 0 && arg > 0)
1874  __kmp_dispatch_num_buffers = arg;
1875 }
1876 
1877 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1878 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1879  return -1;
1880 #else
1881  if (!TCR_4(__kmp_init_middle)) {
1882  __kmp_middle_initialize();
1883  }
1884  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1885 #endif
1886 }
1887 
1888 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1889 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1890  return -1;
1891 #else
1892  if (!TCR_4(__kmp_init_middle)) {
1893  __kmp_middle_initialize();
1894  }
1895  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1896 #endif
1897 }
1898 
1899 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1900 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1901  return -1;
1902 #else
1903  if (!TCR_4(__kmp_init_middle)) {
1904  __kmp_middle_initialize();
1905  }
1906  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1907 #endif
1908 }
1909 
1910 /* -------------------------------------------------------------------------- */
1955 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1956  void *cpy_data, void (*cpy_func)(void *, void *),
1957  kmp_int32 didit) {
1958  void **data_ptr;
1959 
1960  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
1961 
1962  KMP_MB();
1963 
1964  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1965 
1966  if (__kmp_env_consistency_check) {
1967  if (loc == 0) {
1968  KMP_WARNING(ConstructIdentInvalid);
1969  }
1970  }
1971 
1972  // ToDo: Optimize the following two barriers into some kind of split barrier
1973 
1974  if (didit)
1975  *data_ptr = cpy_data;
1976 
1977 #if OMPT_SUPPORT
1978  omp_frame_t *ompt_frame;
1979  if (ompt_enabled.enabled) {
1980  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1981  if (ompt_frame->enter_frame == NULL)
1982  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1983  OMPT_STORE_RETURN_ADDRESS(gtid);
1984  }
1985 #endif
1986 /* This barrier is not a barrier region boundary */
1987 #if USE_ITT_NOTIFY
1988  __kmp_threads[gtid]->th.th_ident = loc;
1989 #endif
1990  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1991 
1992  if (!didit)
1993  (*cpy_func)(cpy_data, *data_ptr);
1994 
1995 // Consider next barrier a user-visible barrier for barrier region boundaries
1996 // Nesting checks are already handled by the single construct checks
1997 
1998 #if OMPT_SUPPORT
1999  if (ompt_enabled.enabled) {
2000  OMPT_STORE_RETURN_ADDRESS(gtid);
2001  }
2002 #endif
2003 #if USE_ITT_NOTIFY
2004  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2005 // tasks can overwrite the location)
2006 #endif
2007  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2008 #if OMPT_SUPPORT && OMPT_OPTIONAL
2009  if (ompt_enabled.enabled) {
2010  ompt_frame->enter_frame = NULL;
2011  }
2012 #endif
2013 }
2014 
2015 /* -------------------------------------------------------------------------- */
2016 
2017 #define INIT_LOCK __kmp_init_user_lock_with_checks
2018 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2019 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2020 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2021 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2022 #define ACQUIRE_NESTED_LOCK_TIMED \
2023  __kmp_acquire_nested_user_lock_with_checks_timed
2024 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2025 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2026 #define TEST_LOCK __kmp_test_user_lock_with_checks
2027 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2028 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2029 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2030 
2031 // TODO: Make check abort messages use location info & pass it into
2032 // with_checks routines
2033 
2034 #if KMP_USE_DYNAMIC_LOCK
2035 
2036 // internal lock initializer
2037 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2038  kmp_dyna_lockseq_t seq) {
2039  if (KMP_IS_D_LOCK(seq)) {
2040  KMP_INIT_D_LOCK(lock, seq);
2041 #if USE_ITT_BUILD
2042  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2043 #endif
2044  } else {
2045  KMP_INIT_I_LOCK(lock, seq);
2046 #if USE_ITT_BUILD
2047  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2048  __kmp_itt_lock_creating(ilk->lock, loc);
2049 #endif
2050  }
2051 }
2052 
2053 // internal nest lock initializer
2054 static __forceinline void
2055 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2056  kmp_dyna_lockseq_t seq) {
2057 #if KMP_USE_TSX
2058  // Don't have nested lock implementation for speculative locks
2059  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2060  seq = __kmp_user_lock_seq;
2061 #endif
2062  switch (seq) {
2063  case lockseq_tas:
2064  seq = lockseq_nested_tas;
2065  break;
2066 #if KMP_USE_FUTEX
2067  case lockseq_futex:
2068  seq = lockseq_nested_futex;
2069  break;
2070 #endif
2071  case lockseq_ticket:
2072  seq = lockseq_nested_ticket;
2073  break;
2074  case lockseq_queuing:
2075  seq = lockseq_nested_queuing;
2076  break;
2077  case lockseq_drdpa:
2078  seq = lockseq_nested_drdpa;
2079  break;
2080  default:
2081  seq = lockseq_nested_queuing;
2082  }
2083  KMP_INIT_I_LOCK(lock, seq);
2084 #if USE_ITT_BUILD
2085  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2086  __kmp_itt_lock_creating(ilk->lock, loc);
2087 #endif
2088 }
2089 
2090 /* initialize the lock with a hint */
2091 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2092  uintptr_t hint) {
2093  KMP_DEBUG_ASSERT(__kmp_init_serial);
2094  if (__kmp_env_consistency_check && user_lock == NULL) {
2095  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2096  }
2097 
2098  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2099 
2100 #if OMPT_SUPPORT && OMPT_OPTIONAL
2101  // This is the case, if called from omp_init_lock_with_hint:
2102  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2103  if (!codeptr)
2104  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2105  if (ompt_enabled.ompt_callback_lock_init) {
2106  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2107  ompt_mutex_lock, (omp_lock_hint_t)hint,
2108  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2109  codeptr);
2110  }
2111 #endif
2112 }
2113 
2114 /* initialize the lock with a hint */
2115 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2116  void **user_lock, uintptr_t hint) {
2117  KMP_DEBUG_ASSERT(__kmp_init_serial);
2118  if (__kmp_env_consistency_check && user_lock == NULL) {
2119  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2120  }
2121 
2122  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2123 
2124 #if OMPT_SUPPORT && OMPT_OPTIONAL
2125  // This is the case, if called from omp_init_lock_with_hint:
2126  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2127  if (!codeptr)
2128  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2129  if (ompt_enabled.ompt_callback_lock_init) {
2130  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2131  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2132  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2133  codeptr);
2134  }
2135 #endif
2136 }
2137 
2138 #endif // KMP_USE_DYNAMIC_LOCK
2139 
2140 /* initialize the lock */
2141 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2142 #if KMP_USE_DYNAMIC_LOCK
2143 
2144  KMP_DEBUG_ASSERT(__kmp_init_serial);
2145  if (__kmp_env_consistency_check && user_lock == NULL) {
2146  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2147  }
2148  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2149 
2150 #if OMPT_SUPPORT && OMPT_OPTIONAL
2151  // This is the case, if called from omp_init_lock_with_hint:
2152  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2153  if (!codeptr)
2154  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2155  if (ompt_enabled.ompt_callback_lock_init) {
2156  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2157  ompt_mutex_lock, omp_lock_hint_none,
2158  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2159  codeptr);
2160  }
2161 #endif
2162 
2163 #else // KMP_USE_DYNAMIC_LOCK
2164 
2165  static char const *const func = "omp_init_lock";
2166  kmp_user_lock_p lck;
2167  KMP_DEBUG_ASSERT(__kmp_init_serial);
2168 
2169  if (__kmp_env_consistency_check) {
2170  if (user_lock == NULL) {
2171  KMP_FATAL(LockIsUninitialized, func);
2172  }
2173  }
2174 
2175  KMP_CHECK_USER_LOCK_INIT();
2176 
2177  if ((__kmp_user_lock_kind == lk_tas) &&
2178  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2179  lck = (kmp_user_lock_p)user_lock;
2180  }
2181 #if KMP_USE_FUTEX
2182  else if ((__kmp_user_lock_kind == lk_futex) &&
2183  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2184  lck = (kmp_user_lock_p)user_lock;
2185  }
2186 #endif
2187  else {
2188  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2189  }
2190  INIT_LOCK(lck);
2191  __kmp_set_user_lock_location(lck, loc);
2192 
2193 #if OMPT_SUPPORT && OMPT_OPTIONAL
2194  // This is the case, if called from omp_init_lock_with_hint:
2195  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2196  if (!codeptr)
2197  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2198  if (ompt_enabled.ompt_callback_lock_init) {
2199  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2200  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2201  (omp_wait_id_t)user_lock, codeptr);
2202  }
2203 #endif
2204 
2205 #if USE_ITT_BUILD
2206  __kmp_itt_lock_creating(lck);
2207 #endif /* USE_ITT_BUILD */
2208 
2209 #endif // KMP_USE_DYNAMIC_LOCK
2210 } // __kmpc_init_lock
2211 
2212 /* initialize the lock */
2213 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2214 #if KMP_USE_DYNAMIC_LOCK
2215 
2216  KMP_DEBUG_ASSERT(__kmp_init_serial);
2217  if (__kmp_env_consistency_check && user_lock == NULL) {
2218  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2219  }
2220  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2221 
2222 #if OMPT_SUPPORT && OMPT_OPTIONAL
2223  // This is the case, if called from omp_init_lock_with_hint:
2224  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2225  if (!codeptr)
2226  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2227  if (ompt_enabled.ompt_callback_lock_init) {
2228  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2229  ompt_mutex_nest_lock, omp_lock_hint_none,
2230  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2231  codeptr);
2232  }
2233 #endif
2234 
2235 #else // KMP_USE_DYNAMIC_LOCK
2236 
2237  static char const *const func = "omp_init_nest_lock";
2238  kmp_user_lock_p lck;
2239  KMP_DEBUG_ASSERT(__kmp_init_serial);
2240 
2241  if (__kmp_env_consistency_check) {
2242  if (user_lock == NULL) {
2243  KMP_FATAL(LockIsUninitialized, func);
2244  }
2245  }
2246 
2247  KMP_CHECK_USER_LOCK_INIT();
2248 
2249  if ((__kmp_user_lock_kind == lk_tas) &&
2250  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2251  OMP_NEST_LOCK_T_SIZE)) {
2252  lck = (kmp_user_lock_p)user_lock;
2253  }
2254 #if KMP_USE_FUTEX
2255  else if ((__kmp_user_lock_kind == lk_futex) &&
2256  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2257  OMP_NEST_LOCK_T_SIZE)) {
2258  lck = (kmp_user_lock_p)user_lock;
2259  }
2260 #endif
2261  else {
2262  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2263  }
2264 
2265  INIT_NESTED_LOCK(lck);
2266  __kmp_set_user_lock_location(lck, loc);
2267 
2268 #if OMPT_SUPPORT && OMPT_OPTIONAL
2269  // This is the case, if called from omp_init_lock_with_hint:
2270  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2271  if (!codeptr)
2272  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2273  if (ompt_enabled.ompt_callback_lock_init) {
2274  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2275  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2276  (omp_wait_id_t)user_lock, codeptr);
2277  }
2278 #endif
2279 
2280 #if USE_ITT_BUILD
2281  __kmp_itt_lock_creating(lck);
2282 #endif /* USE_ITT_BUILD */
2283 
2284 #endif // KMP_USE_DYNAMIC_LOCK
2285 } // __kmpc_init_nest_lock
2286 
2287 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2288 #if KMP_USE_DYNAMIC_LOCK
2289 
2290 #if USE_ITT_BUILD
2291  kmp_user_lock_p lck;
2292  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2293  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2294  } else {
2295  lck = (kmp_user_lock_p)user_lock;
2296  }
2297  __kmp_itt_lock_destroyed(lck);
2298 #endif
2299 #if OMPT_SUPPORT && OMPT_OPTIONAL
2300  // This is the case, if called from omp_init_lock_with_hint:
2301  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2302  if (!codeptr)
2303  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2304  if (ompt_enabled.ompt_callback_lock_destroy) {
2305  kmp_user_lock_p lck;
2306  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2307  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2308  } else {
2309  lck = (kmp_user_lock_p)user_lock;
2310  }
2311  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2312  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2313  }
2314 #endif
2315  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2316 #else
2317  kmp_user_lock_p lck;
2318 
2319  if ((__kmp_user_lock_kind == lk_tas) &&
2320  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2321  lck = (kmp_user_lock_p)user_lock;
2322  }
2323 #if KMP_USE_FUTEX
2324  else if ((__kmp_user_lock_kind == lk_futex) &&
2325  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2326  lck = (kmp_user_lock_p)user_lock;
2327  }
2328 #endif
2329  else {
2330  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2331  }
2332 
2333 #if OMPT_SUPPORT && OMPT_OPTIONAL
2334  // This is the case, if called from omp_init_lock_with_hint:
2335  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2336  if (!codeptr)
2337  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2338  if (ompt_enabled.ompt_callback_lock_destroy) {
2339  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2340  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2341  }
2342 #endif
2343 
2344 #if USE_ITT_BUILD
2345  __kmp_itt_lock_destroyed(lck);
2346 #endif /* USE_ITT_BUILD */
2347  DESTROY_LOCK(lck);
2348 
2349  if ((__kmp_user_lock_kind == lk_tas) &&
2350  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2351  ;
2352  }
2353 #if KMP_USE_FUTEX
2354  else if ((__kmp_user_lock_kind == lk_futex) &&
2355  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2356  ;
2357  }
2358 #endif
2359  else {
2360  __kmp_user_lock_free(user_lock, gtid, lck);
2361  }
2362 #endif // KMP_USE_DYNAMIC_LOCK
2363 } // __kmpc_destroy_lock
2364 
2365 /* destroy the lock */
2366 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2367 #if KMP_USE_DYNAMIC_LOCK
2368 
2369 #if USE_ITT_BUILD
2370  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2371  __kmp_itt_lock_destroyed(ilk->lock);
2372 #endif
2373 #if OMPT_SUPPORT && OMPT_OPTIONAL
2374  // This is the case, if called from omp_init_lock_with_hint:
2375  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2376  if (!codeptr)
2377  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2378  if (ompt_enabled.ompt_callback_lock_destroy) {
2379  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2380  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2381  }
2382 #endif
2383  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2384 
2385 #else // KMP_USE_DYNAMIC_LOCK
2386 
2387  kmp_user_lock_p lck;
2388 
2389  if ((__kmp_user_lock_kind == lk_tas) &&
2390  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2391  OMP_NEST_LOCK_T_SIZE)) {
2392  lck = (kmp_user_lock_p)user_lock;
2393  }
2394 #if KMP_USE_FUTEX
2395  else if ((__kmp_user_lock_kind == lk_futex) &&
2396  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2397  OMP_NEST_LOCK_T_SIZE)) {
2398  lck = (kmp_user_lock_p)user_lock;
2399  }
2400 #endif
2401  else {
2402  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2403  }
2404 
2405 #if OMPT_SUPPORT && OMPT_OPTIONAL
2406  // This is the case, if called from omp_init_lock_with_hint:
2407  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2408  if (!codeptr)
2409  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2410  if (ompt_enabled.ompt_callback_lock_destroy) {
2411  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2412  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2413  }
2414 #endif
2415 
2416 #if USE_ITT_BUILD
2417  __kmp_itt_lock_destroyed(lck);
2418 #endif /* USE_ITT_BUILD */
2419 
2420  DESTROY_NESTED_LOCK(lck);
2421 
2422  if ((__kmp_user_lock_kind == lk_tas) &&
2423  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2424  OMP_NEST_LOCK_T_SIZE)) {
2425  ;
2426  }
2427 #if KMP_USE_FUTEX
2428  else if ((__kmp_user_lock_kind == lk_futex) &&
2429  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2430  OMP_NEST_LOCK_T_SIZE)) {
2431  ;
2432  }
2433 #endif
2434  else {
2435  __kmp_user_lock_free(user_lock, gtid, lck);
2436  }
2437 #endif // KMP_USE_DYNAMIC_LOCK
2438 } // __kmpc_destroy_nest_lock
2439 
2440 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2441  KMP_COUNT_BLOCK(OMP_set_lock);
2442 #if KMP_USE_DYNAMIC_LOCK
2443  int tag = KMP_EXTRACT_D_TAG(user_lock);
2444 #if USE_ITT_BUILD
2445  __kmp_itt_lock_acquiring(
2446  (kmp_user_lock_p)
2447  user_lock); // itt function will get to the right lock object.
2448 #endif
2449 #if OMPT_SUPPORT && OMPT_OPTIONAL
2450  // This is the case, if called from omp_init_lock_with_hint:
2451  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2452  if (!codeptr)
2453  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2454  if (ompt_enabled.ompt_callback_mutex_acquire) {
2455  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2456  ompt_mutex_lock, omp_lock_hint_none,
2457  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2458  codeptr);
2459  }
2460 #endif
2461 #if KMP_USE_INLINED_TAS
2462  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2463  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2464  } else
2465 #elif KMP_USE_INLINED_FUTEX
2466  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2467  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2468  } else
2469 #endif
2470  {
2471  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2472  }
2473 #if USE_ITT_BUILD
2474  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2475 #endif
2476 #if OMPT_SUPPORT && OMPT_OPTIONAL
2477  if (ompt_enabled.ompt_callback_mutex_acquired) {
2478  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2479  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2480  }
2481 #endif
2482 
2483 #else // KMP_USE_DYNAMIC_LOCK
2484 
2485  kmp_user_lock_p lck;
2486 
2487  if ((__kmp_user_lock_kind == lk_tas) &&
2488  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2489  lck = (kmp_user_lock_p)user_lock;
2490  }
2491 #if KMP_USE_FUTEX
2492  else if ((__kmp_user_lock_kind == lk_futex) &&
2493  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2494  lck = (kmp_user_lock_p)user_lock;
2495  }
2496 #endif
2497  else {
2498  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2499  }
2500 
2501 #if USE_ITT_BUILD
2502  __kmp_itt_lock_acquiring(lck);
2503 #endif /* USE_ITT_BUILD */
2504 #if OMPT_SUPPORT && OMPT_OPTIONAL
2505  // This is the case, if called from omp_init_lock_with_hint:
2506  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2507  if (!codeptr)
2508  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2509  if (ompt_enabled.ompt_callback_mutex_acquire) {
2510  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2511  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2512  (omp_wait_id_t)lck, codeptr);
2513  }
2514 #endif
2515 
2516  ACQUIRE_LOCK(lck, gtid);
2517 
2518 #if USE_ITT_BUILD
2519  __kmp_itt_lock_acquired(lck);
2520 #endif /* USE_ITT_BUILD */
2521 
2522 #if OMPT_SUPPORT && OMPT_OPTIONAL
2523  if (ompt_enabled.ompt_callback_mutex_acquired) {
2524  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2525  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2526  }
2527 #endif
2528 
2529 #endif // KMP_USE_DYNAMIC_LOCK
2530 }
2531 
2532 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2533 #if KMP_USE_DYNAMIC_LOCK
2534 
2535 #if USE_ITT_BUILD
2536  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2537 #endif
2538 #if OMPT_SUPPORT && OMPT_OPTIONAL
2539  // This is the case, if called from omp_init_lock_with_hint:
2540  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541  if (!codeptr)
2542  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2543  if (ompt_enabled.enabled) {
2544  if (ompt_enabled.ompt_callback_mutex_acquire) {
2545  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2546  ompt_mutex_nest_lock, omp_lock_hint_none,
2547  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2548  codeptr);
2549  }
2550  }
2551 #endif
2552  int acquire_status =
2553  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2554 #if USE_ITT_BUILD
2555  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2556 #endif
2557 
2558 #if OMPT_SUPPORT && OMPT_OPTIONAL
2559  if (ompt_enabled.enabled) {
2560  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2561  if (ompt_enabled.ompt_callback_mutex_acquired) {
2562  // lock_first
2563  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2564  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2565  }
2566  } else {
2567  if (ompt_enabled.ompt_callback_nest_lock) {
2568  // lock_next
2569  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2570  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
2571  }
2572  }
2573  }
2574 #endif
2575 
2576 #else // KMP_USE_DYNAMIC_LOCK
2577  int acquire_status;
2578  kmp_user_lock_p lck;
2579 
2580  if ((__kmp_user_lock_kind == lk_tas) &&
2581  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2582  OMP_NEST_LOCK_T_SIZE)) {
2583  lck = (kmp_user_lock_p)user_lock;
2584  }
2585 #if KMP_USE_FUTEX
2586  else if ((__kmp_user_lock_kind == lk_futex) &&
2587  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2588  OMP_NEST_LOCK_T_SIZE)) {
2589  lck = (kmp_user_lock_p)user_lock;
2590  }
2591 #endif
2592  else {
2593  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2594  }
2595 
2596 #if USE_ITT_BUILD
2597  __kmp_itt_lock_acquiring(lck);
2598 #endif /* USE_ITT_BUILD */
2599 #if OMPT_SUPPORT && OMPT_OPTIONAL
2600  // This is the case, if called from omp_init_lock_with_hint:
2601  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2602  if (!codeptr)
2603  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2604  if (ompt_enabled.enabled) {
2605  if (ompt_enabled.ompt_callback_mutex_acquire) {
2606  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2607  ompt_mutex_nest_lock, omp_lock_hint_none,
2608  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
2609  }
2610  }
2611 #endif
2612 
2613  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2614 
2615 #if USE_ITT_BUILD
2616  __kmp_itt_lock_acquired(lck);
2617 #endif /* USE_ITT_BUILD */
2618 
2619 #if OMPT_SUPPORT && OMPT_OPTIONAL
2620  if (ompt_enabled.enabled) {
2621  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2622  if (ompt_enabled.ompt_callback_mutex_acquired) {
2623  // lock_first
2624  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2625  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2626  }
2627  } else {
2628  if (ompt_enabled.ompt_callback_nest_lock) {
2629  // lock_next
2630  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2631  ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
2632  }
2633  }
2634  }
2635 #endif
2636 
2637 #endif // KMP_USE_DYNAMIC_LOCK
2638 }
2639 
2640 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2641 #if KMP_USE_DYNAMIC_LOCK
2642 
2643  int tag = KMP_EXTRACT_D_TAG(user_lock);
2644 #if USE_ITT_BUILD
2645  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2646 #endif
2647 #if KMP_USE_INLINED_TAS
2648  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2649  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2650  } else
2651 #elif KMP_USE_INLINED_FUTEX
2652  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2653  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2654  } else
2655 #endif
2656  {
2657  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2658  }
2659 
2660 #if OMPT_SUPPORT && OMPT_OPTIONAL
2661  // This is the case, if called from omp_init_lock_with_hint:
2662  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2663  if (!codeptr)
2664  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2665  if (ompt_enabled.ompt_callback_mutex_released) {
2666  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2667  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2668  }
2669 #endif
2670 
2671 #else // KMP_USE_DYNAMIC_LOCK
2672 
2673  kmp_user_lock_p lck;
2674 
2675  /* Can't use serial interval since not block structured */
2676  /* release the lock */
2677 
2678  if ((__kmp_user_lock_kind == lk_tas) &&
2679  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2680 #if KMP_OS_LINUX && \
2681  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2682 // "fast" path implemented to fix customer performance issue
2683 #if USE_ITT_BUILD
2684  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2685 #endif /* USE_ITT_BUILD */
2686  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2687  KMP_MB();
2688 
2689 #if OMPT_SUPPORT && OMPT_OPTIONAL
2690  // This is the case, if called from omp_init_lock_with_hint:
2691  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692  if (!codeptr)
2693  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2694  if (ompt_enabled.ompt_callback_mutex_released) {
2695  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2696  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2697  }
2698 #endif
2699 
2700  return;
2701 #else
2702  lck = (kmp_user_lock_p)user_lock;
2703 #endif
2704  }
2705 #if KMP_USE_FUTEX
2706  else if ((__kmp_user_lock_kind == lk_futex) &&
2707  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2708  lck = (kmp_user_lock_p)user_lock;
2709  }
2710 #endif
2711  else {
2712  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2713  }
2714 
2715 #if USE_ITT_BUILD
2716  __kmp_itt_lock_releasing(lck);
2717 #endif /* USE_ITT_BUILD */
2718 
2719  RELEASE_LOCK(lck, gtid);
2720 
2721 #if OMPT_SUPPORT && OMPT_OPTIONAL
2722  // This is the case, if called from omp_init_lock_with_hint:
2723  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2724  if (!codeptr)
2725  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2726  if (ompt_enabled.ompt_callback_mutex_released) {
2727  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2728  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2729  }
2730 #endif
2731 
2732 #endif // KMP_USE_DYNAMIC_LOCK
2733 }
2734 
2735 /* release the lock */
2736 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2737 #if KMP_USE_DYNAMIC_LOCK
2738 
2739 #if USE_ITT_BUILD
2740  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2741 #endif
2742  int release_status =
2743  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2744 
2745 #if OMPT_SUPPORT && OMPT_OPTIONAL
2746  // This is the case, if called from omp_init_lock_with_hint:
2747  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748  if (!codeptr)
2749  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750  if (ompt_enabled.enabled) {
2751  if (release_status == KMP_LOCK_RELEASED) {
2752  if (ompt_enabled.ompt_callback_mutex_released) {
2753  // release_lock_last
2754  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2755  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2756  }
2757  } else if (ompt_enabled.ompt_callback_nest_lock) {
2758  // release_lock_prev
2759  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2760  ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
2761  }
2762  }
2763 #endif
2764 
2765 #else // KMP_USE_DYNAMIC_LOCK
2766 
2767  kmp_user_lock_p lck;
2768 
2769  /* Can't use serial interval since not block structured */
2770 
2771  if ((__kmp_user_lock_kind == lk_tas) &&
2772  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2773  OMP_NEST_LOCK_T_SIZE)) {
2774 #if KMP_OS_LINUX && \
2775  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2776  // "fast" path implemented to fix customer performance issue
2777  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2778 #if USE_ITT_BUILD
2779  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2780 #endif /* USE_ITT_BUILD */
2781 
2782 #if OMPT_SUPPORT && OMPT_OPTIONAL
2783  int release_status = KMP_LOCK_STILL_HELD;
2784 #endif
2785 
2786  if (--(tl->lk.depth_locked) == 0) {
2787  TCW_4(tl->lk.poll, 0);
2788 #if OMPT_SUPPORT && OMPT_OPTIONAL
2789  release_status = KMP_LOCK_RELEASED;
2790 #endif
2791  }
2792  KMP_MB();
2793 
2794 #if OMPT_SUPPORT && OMPT_OPTIONAL
2795  // This is the case, if called from omp_init_lock_with_hint:
2796  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2797  if (!codeptr)
2798  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2799  if (ompt_enabled.enabled) {
2800  if (release_status == KMP_LOCK_RELEASED) {
2801  if (ompt_enabled.ompt_callback_mutex_released) {
2802  // release_lock_last
2803  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2804  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2805  }
2806  } else if (ompt_enabled.ompt_callback_nest_lock) {
2807  // release_lock_previous
2808  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2809  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2810  }
2811  }
2812 #endif
2813 
2814  return;
2815 #else
2816  lck = (kmp_user_lock_p)user_lock;
2817 #endif
2818  }
2819 #if KMP_USE_FUTEX
2820  else if ((__kmp_user_lock_kind == lk_futex) &&
2821  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2822  OMP_NEST_LOCK_T_SIZE)) {
2823  lck = (kmp_user_lock_p)user_lock;
2824  }
2825 #endif
2826  else {
2827  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2828  }
2829 
2830 #if USE_ITT_BUILD
2831  __kmp_itt_lock_releasing(lck);
2832 #endif /* USE_ITT_BUILD */
2833 
2834  int release_status;
2835  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837  // This is the case, if called from omp_init_lock_with_hint:
2838  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2839  if (!codeptr)
2840  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841  if (ompt_enabled.enabled) {
2842  if (release_status == KMP_LOCK_RELEASED) {
2843  if (ompt_enabled.ompt_callback_mutex_released) {
2844  // release_lock_last
2845  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2846  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2847  }
2848  } else if (ompt_enabled.ompt_callback_nest_lock) {
2849  // release_lock_previous
2850  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2851  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2852  }
2853  }
2854 #endif
2855 
2856 #endif // KMP_USE_DYNAMIC_LOCK
2857 }
2858 
2859 /* try to acquire the lock */
2860 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2861  KMP_COUNT_BLOCK(OMP_test_lock);
2862 
2863 #if KMP_USE_DYNAMIC_LOCK
2864  int rc;
2865  int tag = KMP_EXTRACT_D_TAG(user_lock);
2866 #if USE_ITT_BUILD
2867  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2868 #endif
2869 #if OMPT_SUPPORT && OMPT_OPTIONAL
2870  // This is the case, if called from omp_init_lock_with_hint:
2871  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2872  if (!codeptr)
2873  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2874  if (ompt_enabled.ompt_callback_mutex_acquire) {
2875  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2876  ompt_mutex_lock, omp_lock_hint_none,
2877  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2878  codeptr);
2879  }
2880 #endif
2881 #if KMP_USE_INLINED_TAS
2882  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2883  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2884  } else
2885 #elif KMP_USE_INLINED_FUTEX
2886  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2887  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2888  } else
2889 #endif
2890  {
2891  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2892  }
2893  if (rc) {
2894 #if USE_ITT_BUILD
2895  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2896 #endif
2897 #if OMPT_SUPPORT && OMPT_OPTIONAL
2898  if (ompt_enabled.ompt_callback_mutex_acquired) {
2899  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2900  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2901  }
2902 #endif
2903  return FTN_TRUE;
2904  } else {
2905 #if USE_ITT_BUILD
2906  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2907 #endif
2908  return FTN_FALSE;
2909  }
2910 
2911 #else // KMP_USE_DYNAMIC_LOCK
2912 
2913  kmp_user_lock_p lck;
2914  int rc;
2915 
2916  if ((__kmp_user_lock_kind == lk_tas) &&
2917  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2918  lck = (kmp_user_lock_p)user_lock;
2919  }
2920 #if KMP_USE_FUTEX
2921  else if ((__kmp_user_lock_kind == lk_futex) &&
2922  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2923  lck = (kmp_user_lock_p)user_lock;
2924  }
2925 #endif
2926  else {
2927  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2928  }
2929 
2930 #if USE_ITT_BUILD
2931  __kmp_itt_lock_acquiring(lck);
2932 #endif /* USE_ITT_BUILD */
2933 #if OMPT_SUPPORT && OMPT_OPTIONAL
2934  // This is the case, if called from omp_init_lock_with_hint:
2935  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2936  if (!codeptr)
2937  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2938  if (ompt_enabled.ompt_callback_mutex_acquire) {
2939  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2940  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2941  (omp_wait_id_t)lck, codeptr);
2942  }
2943 #endif
2944 
2945  rc = TEST_LOCK(lck, gtid);
2946 #if USE_ITT_BUILD
2947  if (rc) {
2948  __kmp_itt_lock_acquired(lck);
2949  } else {
2950  __kmp_itt_lock_cancelled(lck);
2951  }
2952 #endif /* USE_ITT_BUILD */
2953 #if OMPT_SUPPORT && OMPT_OPTIONAL
2954  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2955  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2956  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2957  }
2958 #endif
2959 
2960  return (rc ? FTN_TRUE : FTN_FALSE);
2961 
2962 /* Can't use serial interval since not block structured */
2963 
2964 #endif // KMP_USE_DYNAMIC_LOCK
2965 }
2966 
2967 /* try to acquire the lock */
2968 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2969 #if KMP_USE_DYNAMIC_LOCK
2970  int rc;
2971 #if USE_ITT_BUILD
2972  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2973 #endif
2974 #if OMPT_SUPPORT && OMPT_OPTIONAL
2975  // This is the case, if called from omp_init_lock_with_hint:
2976  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2977  if (!codeptr)
2978  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2979  if (ompt_enabled.ompt_callback_mutex_acquire) {
2980  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2981  ompt_mutex_nest_lock, omp_lock_hint_none,
2982  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2983  codeptr);
2984  }
2985 #endif
2986  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2987 #if USE_ITT_BUILD
2988  if (rc) {
2989  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2990  } else {
2991  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2992  }
2993 #endif
2994 #if OMPT_SUPPORT && OMPT_OPTIONAL
2995  if (ompt_enabled.enabled && rc) {
2996  if (rc == 1) {
2997  if (ompt_enabled.ompt_callback_mutex_acquired) {
2998  // lock_first
2999  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3000  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
3001  }
3002  } else {
3003  if (ompt_enabled.ompt_callback_nest_lock) {
3004  // lock_next
3005  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3006  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
3007  }
3008  }
3009  }
3010 #endif
3011  return rc;
3012 
3013 #else // KMP_USE_DYNAMIC_LOCK
3014 
3015  kmp_user_lock_p lck;
3016  int rc;
3017 
3018  if ((__kmp_user_lock_kind == lk_tas) &&
3019  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3020  OMP_NEST_LOCK_T_SIZE)) {
3021  lck = (kmp_user_lock_p)user_lock;
3022  }
3023 #if KMP_USE_FUTEX
3024  else if ((__kmp_user_lock_kind == lk_futex) &&
3025  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3026  OMP_NEST_LOCK_T_SIZE)) {
3027  lck = (kmp_user_lock_p)user_lock;
3028  }
3029 #endif
3030  else {
3031  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3032  }
3033 
3034 #if USE_ITT_BUILD
3035  __kmp_itt_lock_acquiring(lck);
3036 #endif /* USE_ITT_BUILD */
3037 
3038 #if OMPT_SUPPORT && OMPT_OPTIONAL
3039  // This is the case, if called from omp_init_lock_with_hint:
3040  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3041  if (!codeptr)
3042  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3043  if (ompt_enabled.enabled) &&
3044  ompt_enabled.ompt_callback_mutex_acquire) {
3045  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3046  ompt_mutex_nest_lock, omp_lock_hint_none,
3047  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
3048  }
3049 #endif
3050 
3051  rc = TEST_NESTED_LOCK(lck, gtid);
3052 #if USE_ITT_BUILD
3053  if (rc) {
3054  __kmp_itt_lock_acquired(lck);
3055  } else {
3056  __kmp_itt_lock_cancelled(lck);
3057  }
3058 #endif /* USE_ITT_BUILD */
3059 #if OMPT_SUPPORT && OMPT_OPTIONAL
3060  if (ompt_enabled.enabled && rc) {
3061  if (rc == 1) {
3062  if (ompt_enabled.ompt_callback_mutex_acquired) {
3063  // lock_first
3064  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3065  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
3066  }
3067  } else {
3068  if (ompt_enabled.ompt_callback_nest_lock) {
3069  // lock_next
3070  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3071  ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
3072  }
3073  }
3074  }
3075 #endif
3076  return rc;
3077 
3078 /* Can't use serial interval since not block structured */
3079 
3080 #endif // KMP_USE_DYNAMIC_LOCK
3081 }
3082 
3083 // Interface to fast scalable reduce methods routines
3084 
3085 // keep the selected method in a thread local structure for cross-function
3086 // usage: will be used in __kmpc_end_reduce* functions;
3087 // another solution: to re-determine the method one more time in
3088 // __kmpc_end_reduce* functions (new prototype required then)
3089 // AT: which solution is better?
3090 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3091  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3092 
3093 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3094  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3095 
3096 // description of the packed_reduction_method variable: look at the macros in
3097 // kmp.h
3098 
3099 // used in a critical section reduce block
3100 static __forceinline void
3101 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3102  kmp_critical_name *crit) {
3103 
3104  // this lock was visible to a customer and to the threading profile tool as a
3105  // serial overhead span (although it's used for an internal purpose only)
3106  // why was it visible in previous implementation?
3107  // should we keep it visible in new reduce block?
3108  kmp_user_lock_p lck;
3109 
3110 #if KMP_USE_DYNAMIC_LOCK
3111 
3112  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3113  // Check if it is initialized.
3114  if (*lk == 0) {
3115  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3116  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3117  KMP_GET_D_TAG(__kmp_user_lock_seq));
3118  } else {
3119  __kmp_init_indirect_csptr(crit, loc, global_tid,
3120  KMP_GET_I_TAG(__kmp_user_lock_seq));
3121  }
3122  }
3123  // Branch for accessing the actual lock object and set operation. This
3124  // branching is inevitable since this lock initialization does not follow the
3125  // normal dispatch path (lock table is not used).
3126  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3127  lck = (kmp_user_lock_p)lk;
3128  KMP_DEBUG_ASSERT(lck != NULL);
3129  if (__kmp_env_consistency_check) {
3130  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3131  }
3132  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3133  } else {
3134  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3135  lck = ilk->lock;
3136  KMP_DEBUG_ASSERT(lck != NULL);
3137  if (__kmp_env_consistency_check) {
3138  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3139  }
3140  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3141  }
3142 
3143 #else // KMP_USE_DYNAMIC_LOCK
3144 
3145  // We know that the fast reduction code is only emitted by Intel compilers
3146  // with 32 byte critical sections. If there isn't enough space, then we
3147  // have to use a pointer.
3148  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3149  lck = (kmp_user_lock_p)crit;
3150  } else {
3151  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3152  }
3153  KMP_DEBUG_ASSERT(lck != NULL);
3154 
3155  if (__kmp_env_consistency_check)
3156  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3157 
3158  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3159 
3160 #endif // KMP_USE_DYNAMIC_LOCK
3161 }
3162 
3163 // used in a critical section reduce block
3164 static __forceinline void
3165 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3166  kmp_critical_name *crit) {
3167 
3168  kmp_user_lock_p lck;
3169 
3170 #if KMP_USE_DYNAMIC_LOCK
3171 
3172  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3173  lck = (kmp_user_lock_p)crit;
3174  if (__kmp_env_consistency_check)
3175  __kmp_pop_sync(global_tid, ct_critical, loc);
3176  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3177  } else {
3178  kmp_indirect_lock_t *ilk =
3179  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3180  if (__kmp_env_consistency_check)
3181  __kmp_pop_sync(global_tid, ct_critical, loc);
3182  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3183  }
3184 
3185 #else // KMP_USE_DYNAMIC_LOCK
3186 
3187  // We know that the fast reduction code is only emitted by Intel compilers
3188  // with 32 byte critical sections. If there isn't enough space, then we have
3189  // to use a pointer.
3190  if (__kmp_base_user_lock_size > 32) {
3191  lck = *((kmp_user_lock_p *)crit);
3192  KMP_ASSERT(lck != NULL);
3193  } else {
3194  lck = (kmp_user_lock_p)crit;
3195  }
3196 
3197  if (__kmp_env_consistency_check)
3198  __kmp_pop_sync(global_tid, ct_critical, loc);
3199 
3200  __kmp_release_user_lock_with_checks(lck, global_tid);
3201 
3202 #endif // KMP_USE_DYNAMIC_LOCK
3203 } // __kmp_end_critical_section_reduce_block
3204 
3205 #if OMP_40_ENABLED
3206 static __forceinline int
3207 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3208  int *task_state) {
3209  kmp_team_t *team;
3210 
3211  // Check if we are inside the teams construct?
3212  if (th->th.th_teams_microtask) {
3213  *team_p = team = th->th.th_team;
3214  if (team->t.t_level == th->th.th_teams_level) {
3215  // This is reduction at teams construct.
3216  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3217  // Let's swap teams temporarily for the reduction.
3218  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3219  th->th.th_team = team->t.t_parent;
3220  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3221  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3222  *task_state = th->th.th_task_state;
3223  th->th.th_task_state = 0;
3224 
3225  return 1;
3226  }
3227  }
3228  return 0;
3229 }
3230 
3231 static __forceinline void
3232 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3233  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3234  th->th.th_info.ds.ds_tid = 0;
3235  th->th.th_team = team;
3236  th->th.th_team_nproc = team->t.t_nproc;
3237  th->th.th_task_team = team->t.t_task_team[task_state];
3238  th->th.th_task_state = task_state;
3239 }
3240 #endif
3241 
3242 /* 2.a.i. Reduce Block without a terminating barrier */
3258 kmp_int32
3259 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3260  size_t reduce_size, void *reduce_data,
3261  void (*reduce_func)(void *lhs_data, void *rhs_data),
3262  kmp_critical_name *lck) {
3263 
3264  KMP_COUNT_BLOCK(REDUCE_nowait);
3265  int retval = 0;
3266  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3267 #if OMP_40_ENABLED
3268  kmp_info_t *th;
3269  kmp_team_t *team;
3270  int teams_swapped = 0, task_state;
3271 #endif
3272  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3273 
3274  // why do we need this initialization here at all?
3275  // Reduction clause can not be used as a stand-alone directive.
3276 
3277  // do not call __kmp_serial_initialize(), it will be called by
3278  // __kmp_parallel_initialize() if needed
3279  // possible detection of false-positive race by the threadchecker ???
3280  if (!TCR_4(__kmp_init_parallel))
3281  __kmp_parallel_initialize();
3282 
3283 // check correctness of reduce block nesting
3284 #if KMP_USE_DYNAMIC_LOCK
3285  if (__kmp_env_consistency_check)
3286  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3287 #else
3288  if (__kmp_env_consistency_check)
3289  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3290 #endif
3291 
3292 #if OMP_40_ENABLED
3293  th = __kmp_thread_from_gtid(global_tid);
3294  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3295 #endif // OMP_40_ENABLED
3296 
3297  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3298  // the value should be kept in a variable
3299  // the variable should be either a construct-specific or thread-specific
3300  // property, not a team specific property
3301  // (a thread can reach the next reduce block on the next construct, reduce
3302  // method may differ on the next construct)
3303  // an ident_t "loc" parameter could be used as a construct-specific property
3304  // (what if loc == 0?)
3305  // (if both construct-specific and team-specific variables were shared,
3306  // then unness extra syncs should be needed)
3307  // a thread-specific variable is better regarding two issues above (next
3308  // construct and extra syncs)
3309  // a thread-specific "th_local.reduction_method" variable is used currently
3310  // each thread executes 'determine' and 'set' lines (no need to execute by one
3311  // thread, to avoid unness extra syncs)
3312 
3313  packed_reduction_method = __kmp_determine_reduction_method(
3314  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3315  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3316 
3317  if (packed_reduction_method == critical_reduce_block) {
3318 
3319  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3320  retval = 1;
3321 
3322  } else if (packed_reduction_method == empty_reduce_block) {
3323 
3324  // usage: if team size == 1, no synchronization is required ( Intel
3325  // platforms only )
3326  retval = 1;
3327 
3328  } else if (packed_reduction_method == atomic_reduce_block) {
3329 
3330  retval = 2;
3331 
3332  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3333  // won't be called by the code gen)
3334  // (it's not quite good, because the checking block has been closed by
3335  // this 'pop',
3336  // but atomic operation has not been executed yet, will be executed
3337  // slightly later, literally on next instruction)
3338  if (__kmp_env_consistency_check)
3339  __kmp_pop_sync(global_tid, ct_reduce, loc);
3340 
3341  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3342  tree_reduce_block)) {
3343 
3344 // AT: performance issue: a real barrier here
3345 // AT: (if master goes slow, other threads are blocked here waiting for the
3346 // master to come and release them)
3347 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3348 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3349 // be confusing to a customer)
3350 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3351 // might go faster and be more in line with sense of NOWAIT
3352 // AT: TO DO: do epcc test and compare times
3353 
3354 // this barrier should be invisible to a customer and to the threading profile
3355 // tool (it's neither a terminating barrier nor customer's code, it's
3356 // used for an internal purpose)
3357 #if OMPT_SUPPORT
3358  // JP: can this barrier potentially leed to task scheduling?
3359  // JP: as long as there is a barrier in the implementation, OMPT should and
3360  // will provide the barrier events
3361  // so we set-up the necessary frame/return addresses.
3362  omp_frame_t *ompt_frame;
3363  if (ompt_enabled.enabled) {
3364  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3365  if (ompt_frame->enter_frame == NULL)
3366  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3367  OMPT_STORE_RETURN_ADDRESS(global_tid);
3368  }
3369 #endif
3370 #if USE_ITT_NOTIFY
3371  __kmp_threads[global_tid]->th.th_ident = loc;
3372 #endif
3373  retval =
3374  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3375  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3376  retval = (retval != 0) ? (0) : (1);
3377 #if OMPT_SUPPORT && OMPT_OPTIONAL
3378  if (ompt_enabled.enabled) {
3379  ompt_frame->enter_frame = NULL;
3380  }
3381 #endif
3382 
3383  // all other workers except master should do this pop here
3384  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3385  if (__kmp_env_consistency_check) {
3386  if (retval == 0) {
3387  __kmp_pop_sync(global_tid, ct_reduce, loc);
3388  }
3389  }
3390 
3391  } else {
3392 
3393  // should never reach this block
3394  KMP_ASSERT(0); // "unexpected method"
3395  }
3396 #if OMP_40_ENABLED
3397  if (teams_swapped) {
3398  __kmp_restore_swapped_teams(th, team, task_state);
3399  }
3400 #endif
3401  KA_TRACE(
3402  10,
3403  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3404  global_tid, packed_reduction_method, retval));
3405 
3406  return retval;
3407 }
3408 
3417 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3418  kmp_critical_name *lck) {
3419 
3420  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3421 
3422  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3423 
3424  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3425 
3426  if (packed_reduction_method == critical_reduce_block) {
3427 
3428  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3429 
3430  } else if (packed_reduction_method == empty_reduce_block) {
3431 
3432  // usage: if team size == 1, no synchronization is required ( on Intel
3433  // platforms only )
3434 
3435  } else if (packed_reduction_method == atomic_reduce_block) {
3436 
3437  // neither master nor other workers should get here
3438  // (code gen does not generate this call in case 2: atomic reduce block)
3439  // actually it's better to remove this elseif at all;
3440  // after removal this value will checked by the 'else' and will assert
3441 
3442  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3443  tree_reduce_block)) {
3444 
3445  // only master gets here
3446 
3447  } else {
3448 
3449  // should never reach this block
3450  KMP_ASSERT(0); // "unexpected method"
3451  }
3452 
3453  if (__kmp_env_consistency_check)
3454  __kmp_pop_sync(global_tid, ct_reduce, loc);
3455 
3456  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3457  global_tid, packed_reduction_method));
3458 
3459  return;
3460 }
3461 
3462 /* 2.a.ii. Reduce Block with a terminating barrier */
3463 
3479 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3480  size_t reduce_size, void *reduce_data,
3481  void (*reduce_func)(void *lhs_data, void *rhs_data),
3482  kmp_critical_name *lck) {
3483  KMP_COUNT_BLOCK(REDUCE_wait);
3484  int retval = 0;
3485  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3486 #if OMP_40_ENABLED
3487  kmp_info_t *th;
3488  kmp_team_t *team;
3489  int teams_swapped = 0, task_state;
3490 #endif
3491 
3492  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3493 
3494  // why do we need this initialization here at all?
3495  // Reduction clause can not be a stand-alone directive.
3496 
3497  // do not call __kmp_serial_initialize(), it will be called by
3498  // __kmp_parallel_initialize() if needed
3499  // possible detection of false-positive race by the threadchecker ???
3500  if (!TCR_4(__kmp_init_parallel))
3501  __kmp_parallel_initialize();
3502 
3503 // check correctness of reduce block nesting
3504 #if KMP_USE_DYNAMIC_LOCK
3505  if (__kmp_env_consistency_check)
3506  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3507 #else
3508  if (__kmp_env_consistency_check)
3509  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3510 #endif
3511 
3512 #if OMP_40_ENABLED
3513  th = __kmp_thread_from_gtid(global_tid);
3514  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3515 #endif // OMP_40_ENABLED
3516 
3517  packed_reduction_method = __kmp_determine_reduction_method(
3518  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3519  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3520 
3521  if (packed_reduction_method == critical_reduce_block) {
3522 
3523  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3524  retval = 1;
3525 
3526  } else if (packed_reduction_method == empty_reduce_block) {
3527 
3528  // usage: if team size == 1, no synchronization is required ( Intel
3529  // platforms only )
3530  retval = 1;
3531 
3532  } else if (packed_reduction_method == atomic_reduce_block) {
3533 
3534  retval = 2;
3535 
3536  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3537  tree_reduce_block)) {
3538 
3539 // case tree_reduce_block:
3540 // this barrier should be visible to a customer and to the threading profile
3541 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3542 #if OMPT_SUPPORT
3543  omp_frame_t *ompt_frame;
3544  if (ompt_enabled.enabled) {
3545  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3546  if (ompt_frame->enter_frame == NULL)
3547  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3548  OMPT_STORE_RETURN_ADDRESS(global_tid);
3549  }
3550 #endif
3551 #if USE_ITT_NOTIFY
3552  __kmp_threads[global_tid]->th.th_ident =
3553  loc; // needed for correct notification of frames
3554 #endif
3555  retval =
3556  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3557  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3558  retval = (retval != 0) ? (0) : (1);
3559 #if OMPT_SUPPORT && OMPT_OPTIONAL
3560  if (ompt_enabled.enabled) {
3561  ompt_frame->enter_frame = NULL;
3562  }
3563 #endif
3564 
3565  // all other workers except master should do this pop here
3566  // ( none of other workers except master will enter __kmpc_end_reduce() )
3567  if (__kmp_env_consistency_check) {
3568  if (retval == 0) { // 0: all other workers; 1: master
3569  __kmp_pop_sync(global_tid, ct_reduce, loc);
3570  }
3571  }
3572 
3573  } else {
3574 
3575  // should never reach this block
3576  KMP_ASSERT(0); // "unexpected method"
3577  }
3578 #if OMP_40_ENABLED
3579  if (teams_swapped) {
3580  __kmp_restore_swapped_teams(th, team, task_state);
3581  }
3582 #endif
3583 
3584  KA_TRACE(10,
3585  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3586  global_tid, packed_reduction_method, retval));
3587 
3588  return retval;
3589 }
3590 
3601 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3602  kmp_critical_name *lck) {
3603 
3604  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3605 #if OMP_40_ENABLED
3606  kmp_info_t *th;
3607  kmp_team_t *team;
3608  int teams_swapped = 0, task_state;
3609 #endif
3610 
3611  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3612 
3613 #if OMP_40_ENABLED
3614  th = __kmp_thread_from_gtid(global_tid);
3615  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3616 #endif // OMP_40_ENABLED
3617 
3618  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3619 
3620  // this barrier should be visible to a customer and to the threading profile
3621  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3622 
3623  if (packed_reduction_method == critical_reduce_block) {
3624 
3625  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3626 
3627 // TODO: implicit barrier: should be exposed
3628 #if OMPT_SUPPORT
3629  omp_frame_t *ompt_frame;
3630  if (ompt_enabled.enabled) {
3631  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3632  if (ompt_frame->enter_frame == NULL)
3633  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3634  OMPT_STORE_RETURN_ADDRESS(global_tid);
3635  }
3636 #endif
3637 #if USE_ITT_NOTIFY
3638  __kmp_threads[global_tid]->th.th_ident = loc;
3639 #endif
3640  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3641 #if OMPT_SUPPORT && OMPT_OPTIONAL
3642  if (ompt_enabled.enabled) {
3643  ompt_frame->enter_frame = NULL;
3644  }
3645 #endif
3646 
3647  } else if (packed_reduction_method == empty_reduce_block) {
3648 
3649 // usage: if team size==1, no synchronization is required (Intel platforms only)
3650 
3651 // TODO: implicit barrier: should be exposed
3652 #if OMPT_SUPPORT
3653  omp_frame_t *ompt_frame;
3654  if (ompt_enabled.enabled) {
3655  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3656  if (ompt_frame->enter_frame == NULL)
3657  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3658  OMPT_STORE_RETURN_ADDRESS(global_tid);
3659  }
3660 #endif
3661 #if USE_ITT_NOTIFY
3662  __kmp_threads[global_tid]->th.th_ident = loc;
3663 #endif
3664  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3665 #if OMPT_SUPPORT && OMPT_OPTIONAL
3666  if (ompt_enabled.enabled) {
3667  ompt_frame->enter_frame = NULL;
3668  }
3669 #endif
3670 
3671  } else if (packed_reduction_method == atomic_reduce_block) {
3672 
3673 #if OMPT_SUPPORT
3674  omp_frame_t *ompt_frame;
3675  if (ompt_enabled.enabled) {
3676  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3677  if (ompt_frame->enter_frame == NULL)
3678  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3679  OMPT_STORE_RETURN_ADDRESS(global_tid);
3680  }
3681 #endif
3682 // TODO: implicit barrier: should be exposed
3683 #if USE_ITT_NOTIFY
3684  __kmp_threads[global_tid]->th.th_ident = loc;
3685 #endif
3686  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3687 #if OMPT_SUPPORT && OMPT_OPTIONAL
3688  if (ompt_enabled.enabled) {
3689  ompt_frame->enter_frame = NULL;
3690  }
3691 #endif
3692 
3693  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3694  tree_reduce_block)) {
3695 
3696  // only master executes here (master releases all other workers)
3697  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3698  global_tid);
3699 
3700  } else {
3701 
3702  // should never reach this block
3703  KMP_ASSERT(0); // "unexpected method"
3704  }
3705 #if OMP_40_ENABLED
3706  if (teams_swapped) {
3707  __kmp_restore_swapped_teams(th, team, task_state);
3708  }
3709 #endif
3710 
3711  if (__kmp_env_consistency_check)
3712  __kmp_pop_sync(global_tid, ct_reduce, loc);
3713 
3714  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3715  global_tid, packed_reduction_method));
3716 
3717  return;
3718 }
3719 
3720 #undef __KMP_GET_REDUCTION_METHOD
3721 #undef __KMP_SET_REDUCTION_METHOD
3722 
3723 /* end of interface to fast scalable reduce routines */
3724 
3725 kmp_uint64 __kmpc_get_taskid() {
3726 
3727  kmp_int32 gtid;
3728  kmp_info_t *thread;
3729 
3730  gtid = __kmp_get_gtid();
3731  if (gtid < 0) {
3732  return 0;
3733  }
3734  thread = __kmp_thread_from_gtid(gtid);
3735  return thread->th.th_current_task->td_task_id;
3736 
3737 } // __kmpc_get_taskid
3738 
3739 kmp_uint64 __kmpc_get_parent_taskid() {
3740 
3741  kmp_int32 gtid;
3742  kmp_info_t *thread;
3743  kmp_taskdata_t *parent_task;
3744 
3745  gtid = __kmp_get_gtid();
3746  if (gtid < 0) {
3747  return 0;
3748  }
3749  thread = __kmp_thread_from_gtid(gtid);
3750  parent_task = thread->th.th_current_task->td_parent;
3751  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3752 
3753 } // __kmpc_get_parent_taskid
3754 
3755 #if OMP_45_ENABLED
3756 
3767 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3768  struct kmp_dim *dims) {
3769  int j, idx;
3770  kmp_int64 last, trace_count;
3771  kmp_info_t *th = __kmp_threads[gtid];
3772  kmp_team_t *team = th->th.th_team;
3773  kmp_uint32 *flags;
3774  kmp_disp_t *pr_buf = th->th.th_dispatch;
3775  dispatch_shared_info_t *sh_buf;
3776 
3777  KA_TRACE(
3778  20,
3779  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3780  gtid, num_dims, !team->t.t_serialized));
3781  KMP_DEBUG_ASSERT(dims != NULL);
3782  KMP_DEBUG_ASSERT(num_dims > 0);
3783 
3784  if (team->t.t_serialized) {
3785  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3786  return; // no dependencies if team is serialized
3787  }
3788  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3789  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3790  // the next loop
3791  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3792 
3793  // Save bounds info into allocated private buffer
3794  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3795  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3796  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3797  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3798  pr_buf->th_doacross_info[0] =
3799  (kmp_int64)num_dims; // first element is number of dimensions
3800  // Save also address of num_done in order to access it later without knowing
3801  // the buffer index
3802  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3803  pr_buf->th_doacross_info[2] = dims[0].lo;
3804  pr_buf->th_doacross_info[3] = dims[0].up;
3805  pr_buf->th_doacross_info[4] = dims[0].st;
3806  last = 5;
3807  for (j = 1; j < num_dims; ++j) {
3808  kmp_int64
3809  range_length; // To keep ranges of all dimensions but the first dims[0]
3810  if (dims[j].st == 1) { // most common case
3811  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3812  range_length = dims[j].up - dims[j].lo + 1;
3813  } else {
3814  if (dims[j].st > 0) {
3815  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3816  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3817  } else { // negative increment
3818  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3819  range_length =
3820  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3821  }
3822  }
3823  pr_buf->th_doacross_info[last++] = range_length;
3824  pr_buf->th_doacross_info[last++] = dims[j].lo;
3825  pr_buf->th_doacross_info[last++] = dims[j].up;
3826  pr_buf->th_doacross_info[last++] = dims[j].st;
3827  }
3828 
3829  // Compute total trip count.
3830  // Start with range of dims[0] which we don't need to keep in the buffer.
3831  if (dims[0].st == 1) { // most common case
3832  trace_count = dims[0].up - dims[0].lo + 1;
3833  } else if (dims[0].st > 0) {
3834  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3835  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3836  } else { // negative increment
3837  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3838  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3839  }
3840  for (j = 1; j < num_dims; ++j) {
3841  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3842  }
3843  KMP_DEBUG_ASSERT(trace_count > 0);
3844 
3845  // Check if shared buffer is not occupied by other loop (idx -
3846  // __kmp_dispatch_num_buffers)
3847  if (idx != sh_buf->doacross_buf_idx) {
3848  // Shared buffer is occupied, wait for it to be free
3849  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3850  __kmp_eq_4, NULL);
3851  }
3852 #if KMP_32_BIT_ARCH
3853  // Check if we are the first thread. After the CAS the first thread gets 0,
3854  // others get 1 if initialization is in progress, allocated pointer otherwise.
3855  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3856  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3857  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3858 #else
3859  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3860  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3861 #endif
3862  if (flags == NULL) {
3863  // we are the first thread, allocate the array of flags
3864  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3865  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3866  KMP_MB();
3867  sh_buf->doacross_flags = flags;
3868  } else if (flags == (kmp_uint32 *)1) {
3869 #if KMP_32_BIT_ARCH
3870  // initialization is still in progress, need to wait
3871  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3872 #else
3873  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3874 #endif
3875  KMP_YIELD(TRUE);
3876  KMP_MB();
3877  } else {
3878  KMP_MB();
3879  }
3880  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3881  pr_buf->th_doacross_flags =
3882  sh_buf->doacross_flags; // save private copy in order to not
3883  // touch shared buffer on each iteration
3884  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3885 }
3886 
3887 void __kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec) {
3888  kmp_int32 shft, num_dims, i;
3889  kmp_uint32 flag;
3890  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3891  kmp_info_t *th = __kmp_threads[gtid];
3892  kmp_team_t *team = th->th.th_team;
3893  kmp_disp_t *pr_buf;
3894  kmp_int64 lo, up, st;
3895 
3896  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3897  if (team->t.t_serialized) {
3898  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3899  return; // no dependencies if team is serialized
3900  }
3901 
3902  // calculate sequential iteration number and check out-of-bounds condition
3903  pr_buf = th->th.th_dispatch;
3904  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3905  num_dims = pr_buf->th_doacross_info[0];
3906  lo = pr_buf->th_doacross_info[2];
3907  up = pr_buf->th_doacross_info[3];
3908  st = pr_buf->th_doacross_info[4];
3909  if (st == 1) { // most common case
3910  if (vec[0] < lo || vec[0] > up) {
3911  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3912  "bounds [%lld,%lld]\n",
3913  gtid, vec[0], lo, up));
3914  return;
3915  }
3916  iter_number = vec[0] - lo;
3917  } else if (st > 0) {
3918  if (vec[0] < lo || vec[0] > up) {
3919  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3920  "bounds [%lld,%lld]\n",
3921  gtid, vec[0], lo, up));
3922  return;
3923  }
3924  iter_number = (kmp_uint64)(vec[0] - lo) / st;
3925  } else { // negative increment
3926  if (vec[0] > lo || vec[0] < up) {
3927  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3928  "bounds [%lld,%lld]\n",
3929  gtid, vec[0], lo, up));
3930  return;
3931  }
3932  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3933  }
3934  for (i = 1; i < num_dims; ++i) {
3935  kmp_int64 iter, ln;
3936  kmp_int32 j = i * 4;
3937  ln = pr_buf->th_doacross_info[j + 1];
3938  lo = pr_buf->th_doacross_info[j + 2];
3939  up = pr_buf->th_doacross_info[j + 3];
3940  st = pr_buf->th_doacross_info[j + 4];
3941  if (st == 1) {
3942  if (vec[i] < lo || vec[i] > up) {
3943  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3944  "bounds [%lld,%lld]\n",
3945  gtid, vec[i], lo, up));
3946  return;
3947  }
3948  iter = vec[i] - lo;
3949  } else if (st > 0) {
3950  if (vec[i] < lo || vec[i] > up) {
3951  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3952  "bounds [%lld,%lld]\n",
3953  gtid, vec[i], lo, up));
3954  return;
3955  }
3956  iter = (kmp_uint64)(vec[i] - lo) / st;
3957  } else { // st < 0
3958  if (vec[i] > lo || vec[i] < up) {
3959  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3960  "bounds [%lld,%lld]\n",
3961  gtid, vec[i], lo, up));
3962  return;
3963  }
3964  iter = (kmp_uint64)(lo - vec[i]) / (-st);
3965  }
3966  iter_number = iter + ln * iter_number;
3967  }
3968  shft = iter_number % 32; // use 32-bit granularity
3969  iter_number >>= 5; // divided by 32
3970  flag = 1 << shft;
3971  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
3972  KMP_YIELD(TRUE);
3973  }
3974  KMP_MB();
3975  KA_TRACE(20,
3976  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3977  gtid, (iter_number << 5) + shft));
3978 }
3979 
3980 void __kmpc_doacross_post(ident_t *loc, int gtid, long long *vec) {
3981  kmp_int32 shft, num_dims, i;
3982  kmp_uint32 flag;
3983  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3984  kmp_info_t *th = __kmp_threads[gtid];
3985  kmp_team_t *team = th->th.th_team;
3986  kmp_disp_t *pr_buf;
3987  kmp_int64 lo, st;
3988 
3989  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
3990  if (team->t.t_serialized) {
3991  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
3992  return; // no dependencies if team is serialized
3993  }
3994 
3995  // calculate sequential iteration number (same as in "wait" but no
3996  // out-of-bounds checks)
3997  pr_buf = th->th.th_dispatch;
3998  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3999  num_dims = pr_buf->th_doacross_info[0];
4000  lo = pr_buf->th_doacross_info[2];
4001  st = pr_buf->th_doacross_info[4];
4002  if (st == 1) { // most common case
4003  iter_number = vec[0] - lo;
4004  } else if (st > 0) {
4005  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4006  } else { // negative increment
4007  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4008  }
4009  for (i = 1; i < num_dims; ++i) {
4010  kmp_int64 iter, ln;
4011  kmp_int32 j = i * 4;
4012  ln = pr_buf->th_doacross_info[j + 1];
4013  lo = pr_buf->th_doacross_info[j + 2];
4014  st = pr_buf->th_doacross_info[j + 4];
4015  if (st == 1) {
4016  iter = vec[i] - lo;
4017  } else if (st > 0) {
4018  iter = (kmp_uint64)(vec[i] - lo) / st;
4019  } else { // st < 0
4020  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4021  }
4022  iter_number = iter + ln * iter_number;
4023  }
4024  shft = iter_number % 32; // use 32-bit granularity
4025  iter_number >>= 5; // divided by 32
4026  flag = 1 << shft;
4027  KMP_MB();
4028  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4029  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4030  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4031  (iter_number << 5) + shft));
4032 }
4033 
4034 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4035  kmp_int32 num_done;
4036  kmp_info_t *th = __kmp_threads[gtid];
4037  kmp_team_t *team = th->th.th_team;
4038  kmp_disp_t *pr_buf = th->th.th_dispatch;
4039 
4040  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4041  if (team->t.t_serialized) {
4042  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4043  return; // nothing to do
4044  }
4045  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4046  if (num_done == th->th.th_team_nproc) {
4047  // we are the last thread, need to free shared resources
4048  int idx = pr_buf->th_doacross_buf_idx - 1;
4049  dispatch_shared_info_t *sh_buf =
4050  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4051  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4052  (kmp_int64)&sh_buf->doacross_num_done);
4053  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4054  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4055  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4056  sh_buf->doacross_flags = NULL;
4057  sh_buf->doacross_num_done = 0;
4058  sh_buf->doacross_buf_idx +=
4059  __kmp_dispatch_num_buffers; // free buffer for future re-use
4060  }
4061  // free private resources (need to keep buffer index forever)
4062  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4063  pr_buf->th_doacross_info = NULL;
4064  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4065 }
4066 #endif
4067 
4068 #if OMP_50_ENABLED
4069 int __kmpc_get_target_offload(void) {
4070  if (!__kmp_init_serial) {
4071  __kmp_serial_initialize();
4072  }
4073  return __kmp_target_offload;
4074 }
4075 #endif // OMP_50_ENABLED
4076 
4077 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
Definition: kmp_stats.h:834
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
Definition: kmp.h:189
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:816
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:210
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1389
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:220
kmp_int32 flags
Definition: kmp.h:212