23 #if defined(_WIN32_WINNT) && defined(_M_IX86) 25 #define _WIN32_WINNT 0x0502 29 #include "kmp_error.h" 32 #include "kmp_stats.h" 34 #if KMP_OS_WINDOWS && KMP_ARCH_X86 39 #include "ompt-specific.h" 44 #if KMP_STATIC_STEAL_ENABLED 48 template <
typename T>
struct dispatch_private_infoXX_template {
49 typedef typename traits_t<T>::unsigned_t UT;
50 typedef typename traits_t<T>::signed_t ST;
57 T static_steal_counter;
67 struct KMP_ALIGN(32) {
85 template <
typename T>
struct dispatch_private_infoXX_template {
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
110 template <
typename T>
struct KMP_ALIGN_CACHE dispatch_private_info_template {
113 union KMP_ALIGN_CACHE private_info_tmpl {
114 dispatch_private_infoXX_template<T> p;
115 dispatch_private_info64_t p64;
119 kmp_uint32 ordered_bumped;
121 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
122 dispatch_private_info *next;
124 kmp_uint32 type_size;
125 enum cons_type pushed_ws;
130 template <
typename UT>
struct dispatch_shared_infoXX_template {
133 volatile UT iteration;
134 volatile UT num_done;
135 volatile UT ordered_iteration;
137 UT ordered_dummy[KMP_MAX_ORDERED - 3];
141 template <
typename UT>
struct dispatch_shared_info_template {
143 union shared_info_tmpl {
144 dispatch_shared_infoXX_template<UT> s;
145 dispatch_shared_info64_t s64;
147 volatile kmp_uint32 buffer_index;
149 volatile kmp_int32 doacross_buf_idx;
150 kmp_uint32 *doacross_flags;
151 kmp_int32 doacross_num_done;
163 #undef USE_TEST_LOCKS 166 template <
typename T>
static __forceinline T test_then_add(
volatile T *p, T d);
169 __forceinline kmp_int32 test_then_add<kmp_int32>(
volatile kmp_int32 *p,
172 r = KMP_TEST_THEN_ADD32(p, d);
177 __forceinline kmp_int64 test_then_add<kmp_int64>(
volatile kmp_int64 *p,
180 r = KMP_TEST_THEN_ADD64(p, d);
185 template <
typename T>
static __forceinline T test_then_inc_acq(
volatile T *p);
188 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(
volatile kmp_int32 *p) {
190 r = KMP_TEST_THEN_INC_ACQ32(p);
195 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(
volatile kmp_int64 *p) {
197 r = KMP_TEST_THEN_INC_ACQ64(p);
202 template <
typename T>
static __forceinline T test_then_inc(
volatile T *p);
205 __forceinline kmp_int32 test_then_inc<kmp_int32>(
volatile kmp_int32 *p) {
207 r = KMP_TEST_THEN_INC32(p);
212 __forceinline kmp_int64 test_then_inc<kmp_int64>(
volatile kmp_int64 *p) {
214 r = KMP_TEST_THEN_INC64(p);
219 template <
typename T>
220 static __forceinline kmp_int32 compare_and_swap(
volatile T *p, T c, T s);
223 __forceinline kmp_int32 compare_and_swap<kmp_int32>(
volatile kmp_int32 *p,
224 kmp_int32 c, kmp_int32 s) {
225 return KMP_COMPARE_AND_STORE_REL32(p, c, s);
229 __forceinline kmp_int32 compare_and_swap<kmp_int64>(
volatile kmp_int64 *p,
230 kmp_int64 c, kmp_int64 s) {
231 return KMP_COMPARE_AND_STORE_REL64(p, c, s);
252 template <
typename UT>
253 static UT __kmp_wait_yield(
volatile UT *spinner, UT checker,
254 kmp_uint32 (*pred)(UT, UT)
255 USE_ITT_BUILD_ARG(
void *obj)) {
257 volatile UT *spin = spinner;
260 kmp_uint32 (*f)(UT, UT) = pred;
263 KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
264 KMP_INIT_YIELD(spins);
266 while (!f(r = *spin, check)) {
267 KMP_FSYNC_SPIN_PREPARE(obj);
275 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
276 KMP_YIELD_SPIN(spins);
278 KMP_FSYNC_SPIN_ACQUIRED(obj);
282 template <
typename UT>
static kmp_uint32 __kmp_eq(UT value, UT checker) {
283 return value == checker;
286 template <
typename UT>
static kmp_uint32 __kmp_neq(UT value, UT checker) {
287 return value != checker;
290 template <
typename UT>
static kmp_uint32 __kmp_lt(UT value, UT checker) {
291 return value < checker;
294 template <
typename UT>
static kmp_uint32 __kmp_ge(UT value, UT checker) {
295 return value >= checker;
298 template <
typename UT>
static kmp_uint32 __kmp_le(UT value, UT checker) {
299 return value <= checker;
304 static void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
308 KMP_DEBUG_ASSERT(gtid_ref);
310 if (__kmp_env_consistency_check) {
311 th = __kmp_threads[*gtid_ref];
312 if (th->th.th_root->r.r_active &&
313 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
314 #if KMP_USE_DYNAMIC_LOCK 315 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
317 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
323 template <
typename UT>
324 static void __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
325 typedef typename traits_t<UT>::signed_t ST;
326 dispatch_private_info_template<UT> *pr;
328 int gtid = *gtid_ref;
330 kmp_info_t *th = __kmp_threads[gtid];
331 KMP_DEBUG_ASSERT(th->th.th_dispatch);
333 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid));
334 if (__kmp_env_consistency_check) {
335 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
336 th->th.th_dispatch->th_dispatch_pr_current);
337 if (pr->pushed_ws != ct_none) {
338 #if KMP_USE_DYNAMIC_LOCK 339 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0);
341 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL);
346 if (!th->th.th_team->t.t_serialized) {
347 dispatch_shared_info_template<UT> *sh =
348 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
349 th->th.th_dispatch->th_dispatch_sh_current);
352 if (!__kmp_env_consistency_check) {
353 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
354 th->th.th_dispatch->th_dispatch_pr_current);
356 lower = pr->u.p.ordered_lower;
358 #if !defined(KMP_GOMP_COMPAT) 359 if (__kmp_env_consistency_check) {
360 if (pr->ordered_bumped) {
361 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
362 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
363 ct_ordered_in_pdo, loc_ref,
364 &p->stack_data[p->w_top]);
374 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d before wait: " 375 "ordered_iter:%%%s lower:%%%s\n",
376 traits_t<UT>::spec, traits_t<UT>::spec);
377 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
378 __kmp_str_free(&buff);
382 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
383 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
389 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d after wait: " 390 "ordered_iter:%%%s lower:%%%s\n",
391 traits_t<UT>::spec, traits_t<UT>::spec);
392 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
393 __kmp_str_free(&buff);
397 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid));
400 static void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
404 if (__kmp_env_consistency_check) {
405 th = __kmp_threads[*gtid_ref];
406 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
407 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
412 template <
typename UT>
413 static void __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
414 typedef typename traits_t<UT>::signed_t ST;
415 dispatch_private_info_template<UT> *pr;
417 int gtid = *gtid_ref;
419 kmp_info_t *th = __kmp_threads[gtid];
420 KMP_DEBUG_ASSERT(th->th.th_dispatch);
422 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid));
423 if (__kmp_env_consistency_check) {
424 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
425 th->th.th_dispatch->th_dispatch_pr_current);
426 if (pr->pushed_ws != ct_none) {
427 __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref);
431 if (!th->th.th_team->t.t_serialized) {
432 dispatch_shared_info_template<UT> *sh =
433 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
434 th->th.th_dispatch->th_dispatch_sh_current);
436 if (!__kmp_env_consistency_check) {
437 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
438 th->th.th_dispatch->th_dispatch_pr_current);
441 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
442 #if !defined(KMP_GOMP_COMPAT) 443 if (__kmp_env_consistency_check) {
444 if (pr->ordered_bumped != 0) {
445 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
447 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
448 ct_ordered_in_pdo, loc_ref,
449 &p->stack_data[p->w_top]);
456 pr->ordered_bumped += 1;
459 (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
460 gtid, pr->ordered_bumped));
465 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
469 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid));
473 template <
typename UT>
474 static __forceinline
long double __kmp_pow(
long double x, UT y) {
475 long double s = 1.0L;
477 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
493 template <
typename T>
494 static __inline
typename traits_t<T>::unsigned_t
495 __kmp_dispatch_guided_remaining(T tc,
typename traits_t<T>::floating_t base,
496 typename traits_t<T>::unsigned_t idx) {
502 typedef typename traits_t<T>::unsigned_t UT;
504 long double x = tc * __kmp_pow<UT>(base, idx);
517 static int guided_int_param = 2;
518 static double guided_flt_param = 0.5;
522 template <
typename T>
525 T ub,
typename traits_t<T>::signed_t st,
526 typename traits_t<T>::signed_t chunk,
int push_ws) {
527 typedef typename traits_t<T>::unsigned_t UT;
528 typedef typename traits_t<T>::signed_t ST;
529 typedef typename traits_t<T>::floating_t DBL;
535 kmp_uint32 my_buffer_index;
536 dispatch_private_info_template<T> *pr;
537 dispatch_shared_info_template<UT>
volatile *sh;
539 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
540 sizeof(dispatch_private_info));
541 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
542 sizeof(dispatch_shared_info));
544 if (!TCR_4(__kmp_init_parallel))
545 __kmp_parallel_initialize();
547 #if INCLUDE_SSC_MARKS 548 SSC_MARK_DISPATCH_INIT();
554 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d " 555 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
556 traits_t<ST>::spec, traits_t<T>::spec,
557 traits_t<T>::spec, traits_t<ST>::spec);
558 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
559 __kmp_str_free(&buff);
563 th = __kmp_threads[gtid];
564 team = th->th.th_team;
565 active = !team->t.t_serialized;
566 th->th.th_ident = loc;
569 kmp_uint64 cur_chunk = chunk;
570 int itt_need_metadata_reporting = __itt_metadata_add_ptr &&
571 __kmp_forkjoin_frames_mode == 3 &&
572 KMP_MASTER_GTID(gtid) &&
574 th->th.th_teams_microtask == NULL &&
576 team->t.t_active_level == 1;
579 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
580 th->th.th_dispatch->th_disp_buffer);
582 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
583 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
585 my_buffer_index = th->th.th_dispatch->th_disp_index++;
588 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
590 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
591 sh =
reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
592 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
595 #if (KMP_STATIC_STEAL_ENABLED) 596 if (SCHEDULE_HAS_NONMONOTONIC(schedule))
598 schedule = kmp_sch_static_steal;
601 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
611 pr->type_size = traits_t<T>::type_size;
621 schedule = __kmp_static;
623 if (schedule == kmp_sch_runtime) {
626 schedule = team->t.t_sched.r_sched_type;
630 schedule = __kmp_guided;
632 schedule = __kmp_static;
636 chunk = team->t.t_sched.chunk;
644 buff = __kmp_str_format(
645 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
647 KD_TRACE(10, (buff, gtid, schedule, chunk));
648 __kmp_str_free(&buff);
653 schedule = __kmp_guided;
656 chunk = KMP_DEFAULT_CHUNK;
662 schedule = __kmp_auto;
667 buff = __kmp_str_format(
"__kmp_dispatch_init: kmp_sch_auto: T#%%d new: " 668 "schedule:%%d chunk:%%%s\n",
670 KD_TRACE(10, (buff, gtid, schedule, chunk));
671 __kmp_str_free(&buff);
677 if (schedule == kmp_sch_guided_analytical_chunked &&
678 th->th.th_team_nproc > 1 << 20) {
679 schedule = kmp_sch_guided_iterative_chunked;
680 KMP_WARNING(DispatchManyThreads);
682 if (schedule == kmp_sch_runtime_simd) {
684 schedule = team->t.t_sched.r_sched_type;
688 schedule == __kmp_static) {
689 schedule = kmp_sch_static_balanced_chunked;
692 schedule = kmp_sch_guided_simd;
694 chunk = team->t.t_sched.chunk * chunk;
703 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d new: schedule:%%d" 706 KD_TRACE(10, (buff, gtid, schedule, chunk));
707 __kmp_str_free(&buff);
711 pr->u.p.parm1 = chunk;
714 "unknown scheduling type");
718 if (__kmp_env_consistency_check) {
720 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
721 (pr->ordered ? ct_pdo_ordered : ct_pdo), loc);
735 tc = (UT)(lb - ub) / (-st) + 1;
743 tc = (UT)(ub - lb) / st + 1;
751 if (schedule == __kmp_static) {
765 pr->u.p.last_upper = ub + st;
771 if (pr->ordered == 0) {
772 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
773 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
775 pr->ordered_bumped = 0;
777 pr->u.p.ordered_lower = 1;
778 pr->u.p.ordered_upper = 0;
780 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
781 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
785 if (__kmp_env_consistency_check) {
786 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
788 __kmp_push_workshare(gtid, ws, loc);
791 __kmp_check_workshare(gtid, ws, loc);
792 pr->pushed_ws = ct_none;
797 #if (KMP_STATIC_STEAL_ENABLED) 798 case kmp_sch_static_steal: {
799 T nproc = th->th.th_team_nproc;
803 (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid));
805 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
806 if (nproc > 1 && ntc >= nproc) {
808 T
id = __kmp_tid_from_gtid(gtid);
809 T small_chunk, extras;
811 small_chunk = ntc / nproc;
812 extras = ntc % nproc;
814 init =
id * small_chunk + (
id < extras ? id : extras);
815 pr->u.p.count = init;
816 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
820 pr->u.p.parm4 = (
id + 1) % nproc;
822 if (traits_t<T>::type_size > 4) {
828 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL);
829 th->th.th_dispatch->th_steal_lock =
830 (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
831 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
835 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 836 "kmp_sch_static_balanced\n",
838 schedule = kmp_sch_static_balanced;
844 case kmp_sch_static_balanced: {
845 T nproc = th->th.th_team_nproc;
848 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
852 T
id = __kmp_tid_from_gtid(gtid);
858 pr->u.p.parm1 = (
id == tc - 1);
861 pr->u.p.parm1 = FALSE;
865 T small_chunk = tc / nproc;
866 T extras = tc % nproc;
867 init =
id * small_chunk + (
id < extras ? id : extras);
868 limit = init + small_chunk - (
id < extras ? 0 : 1);
869 pr->u.p.parm1 = (
id == nproc - 1);
875 pr->u.p.parm1 = TRUE;
878 pr->u.p.parm1 = FALSE;
884 if (itt_need_metadata_reporting)
885 cur_chunk = limit - init + 1;
888 pr->u.p.lb = lb + init;
889 pr->u.p.ub = lb + limit;
892 T ub_tmp = lb + limit * st;
893 pr->u.p.lb = lb + init * st;
897 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
899 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
903 pr->u.p.ordered_lower = init;
904 pr->u.p.ordered_upper = limit;
908 case kmp_sch_static_balanced_chunked: {
910 T nth = th->th.th_team_nproc;
911 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d runtime(simd:static)" 912 " -> falling-through to static_greedy\n",
914 schedule = kmp_sch_static_greedy;
916 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
921 case kmp_sch_guided_iterative_chunked:
922 case kmp_sch_guided_simd: {
923 T nproc = th->th.th_team_nproc;
924 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked" 929 if ((2L * chunk + 1) * nproc >= tc) {
931 schedule = kmp_sch_dynamic_chunked;
934 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
935 *(
double *)&pr->u.p.parm3 =
936 guided_flt_param / nproc;
939 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 940 "kmp_sch_static_greedy\n",
942 schedule = kmp_sch_static_greedy;
944 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",
950 case kmp_sch_guided_analytical_chunked: {
951 T nproc = th->th.th_team_nproc;
952 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked" 956 if ((2L * chunk + 1) * nproc >= tc) {
958 schedule = kmp_sch_dynamic_chunked;
963 #if KMP_OS_WINDOWS && KMP_ARCH_X86 973 unsigned int oldFpcw = _control87(0, 0);
974 _control87(_PC_64, _MCW_PC);
977 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
984 x = (
long double)1.0 - (
long double)0.5 / nproc;
995 ptrdiff_t natural_alignment =
996 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
1000 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
1005 *(DBL *)&pr->u.p.parm3 = x;
1010 UT left, right, mid;
1018 p = __kmp_pow<UT>(x, right);
1023 }
while (p > target && right < (1 << 27));
1031 while (left + 1 < right) {
1032 mid = (left + right) / 2;
1033 if (__kmp_pow<UT>(x, mid) > target) {
1042 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
1043 __kmp_pow<UT>(x, cross) <= target);
1046 pr->u.p.parm2 = cross;
1049 #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) 1050 #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) 1052 #define GUIDED_ANALYTICAL_WORKAROUND (x) 1055 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
1056 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
1058 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1060 _control87(oldFpcw, _MCW_PC);
1064 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 1065 "kmp_sch_static_greedy\n",
1067 schedule = kmp_sch_static_greedy;
1073 case kmp_sch_static_greedy:
1075 (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n", gtid));
1076 pr->u.p.parm1 = (th->th.th_team_nproc > 1)
1077 ? (tc + th->th.th_team_nproc - 1) / th->th.th_team_nproc
1080 case kmp_sch_static_chunked:
1081 case kmp_sch_dynamic_chunked:
1082 if (pr->u.p.parm1 <= 0) {
1083 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
1085 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d " 1086 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
1089 case kmp_sch_trapezoidal: {
1092 T parm1, parm2, parm3, parm4;
1094 (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid));
1099 parm2 = (tc / (2 * th->th.th_team_nproc));
1109 }
else if (parm1 > parm2) {
1114 parm3 = (parm2 + parm1);
1115 parm3 = (2 * tc + parm3 - 1) / parm3;
1122 parm4 = (parm3 - 1);
1123 parm4 = (parm2 - parm1) / parm4;
1130 pr->u.p.parm1 = parm1;
1131 pr->u.p.parm2 = parm2;
1132 pr->u.p.parm3 = parm3;
1133 pr->u.p.parm4 = parm4;
1138 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
1139 KMP_HNT(GetNewerLibrary),
1144 pr->schedule = schedule;
1149 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " 1150 "sh->buffer_index:%d\n",
1151 gtid, my_buffer_index, sh->buffer_index));
1152 __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
1153 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
1157 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " 1158 "sh->buffer_index:%d\n",
1159 gtid, my_buffer_index, sh->buffer_index));
1161 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
1162 th->th.th_dispatch->th_dispatch_sh_current =
1163 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
1166 __kmp_itt_ordered_init(gtid);
1169 if (itt_need_metadata_reporting) {
1171 kmp_uint64 schedtype = 0;
1173 case kmp_sch_static_chunked:
1174 case kmp_sch_static_balanced:
1176 case kmp_sch_static_greedy:
1177 cur_chunk = pr->u.p.parm1;
1179 case kmp_sch_dynamic_chunked:
1182 case kmp_sch_guided_iterative_chunked:
1183 case kmp_sch_guided_analytical_chunked:
1184 case kmp_sch_guided_simd:
1193 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1202 buff = __kmp_str_format(
1203 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " 1205 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" 1206 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1207 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
1208 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1209 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
1210 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1211 KD_TRACE(10, (buff, gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1212 pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->u.p.ordered_lower,
1213 pr->u.p.ordered_upper, pr->u.p.parm1, pr->u.p.parm2,
1214 pr->u.p.parm3, pr->u.p.parm4));
1215 __kmp_str_free(&buff);
1218 #if (KMP_STATIC_STEAL_ENABLED) 1224 if (schedule == kmp_sch_static_steal) {
1228 volatile T *p = &pr->u.p.static_steal_counter;
1231 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1233 #if OMPT_SUPPORT && OMPT_OPTIONAL 1234 if (ompt_enabled.ompt_callback_work) {
1235 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1236 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1237 kmp_info_t *thr = __kmp_threads[gtid];
1238 ompt_callbacks.ompt_callback(ompt_callback_work)(
1239 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
1240 &(task_info->task_data), tc, OMPT_LOAD_RETURN_ADDRESS(gtid));
1250 template <
typename UT>
1251 static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1252 typedef typename traits_t<UT>::signed_t ST;
1253 kmp_info_t *th = __kmp_threads[gtid];
1255 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1256 if (!th->th.th_team->t.t_serialized) {
1258 dispatch_private_info_template<UT> *pr =
1259 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1260 th->th.th_dispatch->th_dispatch_pr_current);
1261 dispatch_shared_info_template<UT>
volatile *sh =
1262 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1263 th->th.th_dispatch->th_dispatch_sh_current);
1264 KMP_DEBUG_ASSERT(pr);
1265 KMP_DEBUG_ASSERT(sh);
1266 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1267 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1269 if (pr->ordered_bumped) {
1272 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1274 pr->ordered_bumped = 0;
1276 UT lower = pr->u.p.ordered_lower;
1282 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: " 1283 "ordered_iteration:%%%s lower:%%%s\n",
1284 traits_t<UT>::spec, traits_t<UT>::spec);
1285 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1286 __kmp_str_free(&buff);
1290 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1291 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1297 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: " 1298 "ordered_iteration:%%%s lower:%%%s\n",
1299 traits_t<UT>::spec, traits_t<UT>::spec);
1300 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1301 __kmp_str_free(&buff);
1305 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1308 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1311 #ifdef KMP_GOMP_COMPAT 1313 template <
typename UT>
1314 static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1315 typedef typename traits_t<UT>::signed_t ST;
1316 kmp_info_t *th = __kmp_threads[gtid];
1318 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1319 if (!th->th.th_team->t.t_serialized) {
1321 dispatch_private_info_template<UT> *pr =
1322 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1323 th->th.th_dispatch->th_dispatch_pr_current);
1324 dispatch_shared_info_template<UT>
volatile *sh =
1325 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1326 th->th.th_dispatch->th_dispatch_sh_current);
1327 KMP_DEBUG_ASSERT(pr);
1328 KMP_DEBUG_ASSERT(sh);
1329 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1330 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1333 UT lower = pr->u.p.ordered_lower;
1334 UT upper = pr->u.p.ordered_upper;
1335 UT inc = upper - lower + 1;
1337 if (pr->ordered_bumped == inc) {
1340 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1342 pr->ordered_bumped = 0;
1344 inc -= pr->ordered_bumped;
1350 buff = __kmp_str_format(
1351 "__kmp_dispatch_finish_chunk: T#%%d before wait: " 1352 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1353 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1354 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1355 __kmp_str_free(&buff);
1359 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1360 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1363 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting " 1364 "ordered_bumped to zero\n",
1366 pr->ordered_bumped = 0;
1372 buff = __kmp_str_format(
1373 "__kmp_dispatch_finish_chunk: T#%%d after wait: " 1374 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1375 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1376 traits_t<UT>::spec);
1378 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1379 __kmp_str_free(&buff);
1383 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1387 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1395 #if OMPT_SUPPORT && OMPT_OPTIONAL 1396 #define OMPT_LOOP_END \ 1397 if (status == 0) { \ 1398 if (ompt_enabled.ompt_callback_work) { \ 1399 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1400 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \ 1401 ompt_callbacks.ompt_callback(ompt_callback_work)( \ 1402 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \ 1403 &(task_info->task_data), 0, codeptr); \ 1408 #define OMPT_LOOP_END // no-op 1411 template <
typename T>
1412 static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
1414 typename traits_t<T>::signed_t *p_st
1415 #
if OMPT_SUPPORT && OMPT_OPTIONAL
1421 typedef typename traits_t<T>::unsigned_t UT;
1422 typedef typename traits_t<T>::signed_t ST;
1423 typedef typename traits_t<T>::floating_t DBL;
1429 KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
1432 dispatch_private_info_template<T> *pr;
1433 kmp_info_t *th = __kmp_threads[gtid];
1434 kmp_team_t *team = th->th.th_team;
1436 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
1441 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d called p_lb:%%%s " 1442 "p_ub:%%%s p_st:%%%s p_last: %%p\n",
1443 traits_t<T>::spec, traits_t<T>::spec,
1444 traits_t<ST>::spec);
1445 KD_TRACE(1000, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last));
1446 __kmp_str_free(&buff);
1450 if (team->t.t_serialized) {
1452 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1453 th->th.th_dispatch->th_disp_buffer);
1454 KMP_DEBUG_ASSERT(pr);
1456 if ((status = (pr->u.p.tc != 0)) == 0) {
1463 if (__kmp_env_consistency_check) {
1464 if (pr->pushed_ws != ct_none) {
1465 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1468 }
else if (pr->nomerge) {
1471 UT limit, trip, init;
1473 T chunk = pr->u.p.parm1;
1475 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1478 init = chunk * pr->u.p.count++;
1479 trip = pr->u.p.tc - 1;
1481 if ((status = (init <= trip)) == 0) {
1488 if (__kmp_env_consistency_check) {
1489 if (pr->pushed_ws != ct_none) {
1490 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1495 limit = chunk + init - 1;
1498 if ((last = (limit >= trip)) != 0) {
1501 pr->u.p.last_upper = pr->u.p.ub;
1509 *p_lb = start + init;
1510 *p_ub = start + limit;
1512 *p_lb = start + init * incr;
1513 *p_ub = start + limit * incr;
1517 pr->u.p.ordered_lower = init;
1518 pr->u.p.ordered_upper = limit;
1523 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1524 "ordered_lower:%%%s ordered_upper:%%%s\n",
1525 traits_t<UT>::spec, traits_t<UT>::spec);
1526 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1527 pr->u.p.ordered_upper));
1528 __kmp_str_free(&buff);
1538 pr->u.p.last_upper = *p_ub;
1549 buff = __kmp_str_format(
1550 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " 1551 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1552 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1553 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status));
1554 __kmp_str_free(&buff);
1557 #if INCLUDE_SSC_MARKS 1558 SSC_MARK_DISPATCH_NEXT();
1564 dispatch_shared_info_template<UT> *sh;
1567 UT limit, trip, init;
1569 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1570 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1572 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1573 th->th.th_dispatch->th_dispatch_pr_current);
1574 KMP_DEBUG_ASSERT(pr);
1575 sh =
reinterpret_cast<dispatch_shared_info_template<UT> *
>(
1576 th->th.th_dispatch->th_dispatch_sh_current);
1577 KMP_DEBUG_ASSERT(sh);
1579 if (pr->u.p.tc == 0) {
1583 switch (pr->schedule) {
1584 #if (KMP_STATIC_STEAL_ENABLED) 1585 case kmp_sch_static_steal: {
1586 T chunk = pr->u.p.parm1;
1587 int nproc = th->th.th_team_nproc;
1589 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n",
1592 trip = pr->u.p.tc - 1;
1594 if (traits_t<T>::type_size > 4) {
1597 kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
1598 KMP_DEBUG_ASSERT(lck != NULL);
1599 if (pr->u.p.count < (UT)pr->u.p.ub) {
1600 __kmp_acquire_lock(lck, gtid);
1602 init = (pr->u.p.count)++;
1603 status = (init < (UT)pr->u.p.ub);
1604 __kmp_release_lock(lck, gtid);
1609 kmp_info_t **other_threads = team->t.t_threads;
1610 int while_limit = nproc;
1611 int while_index = 0;
1614 while ((!status) && (while_limit != ++while_index)) {
1616 T victimIdx = pr->u.p.parm4;
1617 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1618 dispatch_private_info_template<T> *victim =
1619 reinterpret_cast<dispatch_private_info_template<T> *
>(
1620 other_threads[victimIdx]
1621 ->th.th_dispatch->th_dispatch_pr_current);
1622 while ((victim == NULL || victim == pr ||
1623 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1624 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1625 oldVictimIdx != victimIdx) {
1626 victimIdx = (victimIdx + 1) % nproc;
1627 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1628 other_threads[victimIdx]
1629 ->th.th_dispatch->th_dispatch_pr_current);
1632 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1633 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1638 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1639 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1643 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1644 KMP_ASSERT(lck != NULL);
1645 __kmp_acquire_lock(lck, gtid);
1646 limit = victim->u.p.ub;
1647 if (victim->u.p.count >= limit ||
1648 (remaining = limit - victim->u.p.count) < 2) {
1649 __kmp_release_lock(lck, gtid);
1650 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1655 if (remaining > 3) {
1657 init = (victim->u.p.ub -=
1662 (victim->u.p.ub -= 1);
1664 __kmp_release_lock(lck, gtid);
1666 KMP_DEBUG_ASSERT(init + 1 <= limit);
1667 pr->u.p.parm4 = victimIdx;
1671 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1672 pr->u.p.count = init + 1;
1674 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1689 union_i4 vold, vnew;
1690 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1693 while (!KMP_COMPARE_AND_STORE_ACQ64(
1694 (
volatile kmp_int64 *)&pr->u.p.count,
1695 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1696 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1698 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1703 init = vnew.p.count;
1704 status = (init < (UT)vnew.p.ub);
1708 kmp_info_t **other_threads = team->t.t_threads;
1709 int while_limit = nproc;
1710 int while_index = 0;
1714 while ((!status) && (while_limit != ++while_index)) {
1715 union_i4 vold, vnew;
1716 kmp_int32 remaining;
1717 T victimIdx = pr->u.p.parm4;
1718 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1719 dispatch_private_info_template<T> *victim =
1720 reinterpret_cast<dispatch_private_info_template<T> *
>(
1721 other_threads[victimIdx]
1722 ->th.th_dispatch->th_dispatch_pr_current);
1723 while ((victim == NULL || victim == pr ||
1724 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1725 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1726 oldVictimIdx != victimIdx) {
1727 victimIdx = (victimIdx + 1) % nproc;
1728 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1729 other_threads[victimIdx]
1730 ->th.th_dispatch->th_dispatch_pr_current);
1733 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1734 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1739 pr->u.p.parm4 = victimIdx;
1741 vold.b = *(
volatile kmp_int64 *)(&victim->u.p.count);
1744 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1745 if (vnew.p.count >= (UT)vnew.p.ub ||
1746 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1748 (victimIdx + 1) % nproc;
1751 if (remaining > 3) {
1752 vnew.p.ub -= (remaining >> 2);
1756 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1758 if (KMP_COMPARE_AND_STORE_ACQ64(
1759 (
volatile kmp_int64 *)&victim->u.p.count,
1760 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1761 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1764 vold.p.ub - vnew.p.ub);
1769 vold.p.count = init + 1;
1771 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count),
1774 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1789 start = pr->u.p.parm2;
1791 limit = chunk + init - 1;
1795 KMP_DEBUG_ASSERT(init <= trip);
1796 if ((last = (limit >= trip)) != 0)
1802 *p_lb = start + init;
1803 *p_ub = start + limit;
1805 *p_lb = start + init * incr;
1806 *p_ub = start + limit * incr;
1810 pr->u.p.ordered_lower = init;
1811 pr->u.p.ordered_upper = limit;
1816 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1817 "ordered_lower:%%%s ordered_upper:%%%s\n",
1818 traits_t<UT>::spec, traits_t<UT>::spec);
1819 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1820 pr->u.p.ordered_upper));
1821 __kmp_str_free(&buff);
1828 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1829 case kmp_sch_static_balanced: {
1832 (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid));
1833 if ((status = !pr->u.p.count) !=
1838 last = pr->u.p.parm1;
1842 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1849 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1850 "ordered_lower:%%%s ordered_upper:%%%s\n",
1851 traits_t<UT>::spec, traits_t<UT>::spec);
1852 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1853 pr->u.p.ordered_upper));
1854 __kmp_str_free(&buff);
1860 case kmp_sch_static_greedy:
1862 case kmp_sch_static_chunked: {
1865 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d " 1866 "kmp_sch_static_[affinity|chunked] case\n",
1868 parm1 = pr->u.p.parm1;
1870 trip = pr->u.p.tc - 1;
1871 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1873 if ((status = (init <= trip)) != 0) {
1876 limit = parm1 + init - 1;
1878 if ((last = (limit >= trip)) != 0)
1884 pr->u.p.count += th->th.th_team_nproc;
1887 *p_lb = start + init;
1888 *p_ub = start + limit;
1890 *p_lb = start + init * incr;
1891 *p_ub = start + limit * incr;
1895 pr->u.p.ordered_lower = init;
1896 pr->u.p.ordered_upper = limit;
1901 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1902 "ordered_lower:%%%s ordered_upper:%%%s\n",
1903 traits_t<UT>::spec, traits_t<UT>::spec);
1904 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1905 pr->u.p.ordered_upper));
1906 __kmp_str_free(&buff);
1914 case kmp_sch_dynamic_chunked: {
1915 T chunk = pr->u.p.parm1;
1919 (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid));
1921 init = chunk * test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1922 trip = pr->u.p.tc - 1;
1924 if ((status = (init <= trip)) == 0) {
1931 limit = chunk + init - 1;
1934 if ((last = (limit >= trip)) != 0)
1941 *p_lb = start + init;
1942 *p_ub = start + limit;
1944 *p_lb = start + init * incr;
1945 *p_ub = start + limit * incr;
1949 pr->u.p.ordered_lower = init;
1950 pr->u.p.ordered_upper = limit;
1955 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1956 "ordered_lower:%%%s ordered_upper:%%%s\n",
1957 traits_t<UT>::spec, traits_t<UT>::spec);
1958 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1959 pr->u.p.ordered_upper));
1960 __kmp_str_free(&buff);
1968 case kmp_sch_guided_iterative_chunked: {
1969 T chunkspec = pr->u.p.parm1;
1970 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 1977 init = sh->u.s.iteration;
1978 remaining = trip - init;
1979 if (remaining <= 0) {
1988 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
1990 remaining = trip - init;
1991 if (remaining <= 0) {
1995 if ((T)remaining > chunkspec) {
1996 limit = init + chunkspec - 1;
1999 limit = init + remaining - 1;
2004 limit = init + (UT)(remaining *
2005 *(
double *)&pr->u.p.parm3);
2006 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2007 (ST)init, (ST)limit)) {
2019 *p_lb = start + init * incr;
2020 *p_ub = start + limit * incr;
2022 pr->u.p.ordered_lower = init;
2023 pr->u.p.ordered_upper = limit;
2028 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2029 "ordered_lower:%%%s ordered_upper:%%%s\n",
2030 traits_t<UT>::spec, traits_t<UT>::spec);
2031 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2032 pr->u.p.ordered_upper));
2033 __kmp_str_free(&buff);
2046 case kmp_sch_guided_simd: {
2049 T chunk = pr->u.p.parm1;
2050 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_simd case\n",
2056 init = sh->u.s.iteration;
2057 remaining = trip - init;
2058 if (remaining <= 0) {
2062 KMP_DEBUG_ASSERT(init % chunk == 0);
2064 if ((T)remaining < pr->u.p.parm2) {
2067 init = test_then_add<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2069 remaining = trip - init;
2070 if (remaining <= 0) {
2075 if ((T)remaining > chunk) {
2076 limit = init + chunk - 1;
2079 limit = init + remaining - 1;
2085 UT span = remaining * (*(
double *)&pr->u.p.parm3);
2086 UT rem = span % chunk;
2088 span += chunk - rem;
2089 limit = init + span;
2090 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2091 (ST)init, (ST)limit)) {
2103 *p_lb = start + init * incr;
2104 *p_ub = start + limit * incr;
2106 pr->u.p.ordered_lower = init;
2107 pr->u.p.ordered_upper = limit;
2112 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2113 "ordered_lower:%%%s ordered_upper:%%%s\n",
2114 traits_t<UT>::spec, traits_t<UT>::spec);
2115 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2116 pr->u.p.ordered_upper));
2117 __kmp_str_free(&buff);
2130 case kmp_sch_guided_analytical_chunked: {
2131 T chunkspec = pr->u.p.parm1;
2133 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2136 unsigned int oldFpcw;
2137 unsigned int fpcwSet = 0;
2139 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 2140 "analytical case\n",
2145 KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1);
2146 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc <
2151 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
2152 if (chunkIdx >= (UT)pr->u.p.parm2) {
2155 init = chunkIdx * chunkspec + pr->u.p.count;
2158 if ((status = (init > 0 && init <= trip)) != 0) {
2159 limit = init + chunkspec - 1;
2161 if ((last = (limit >= trip)) != 0)
2171 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2176 oldFpcw = _control87(0, 0);
2177 _control87(_PC_64, _MCW_PC);
2182 init = __kmp_dispatch_guided_remaining<T>(
2183 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
2184 KMP_DEBUG_ASSERT(init);
2188 limit = trip - __kmp_dispatch_guided_remaining<T>(
2189 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
2190 KMP_ASSERT(init <= limit);
2192 KMP_DEBUG_ASSERT(limit <= trip);
2199 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2203 if (fpcwSet && (oldFpcw & fpcwSet))
2204 _control87(oldFpcw, _MCW_PC);
2211 *p_lb = start + init * incr;
2212 *p_ub = start + limit * incr;
2214 pr->u.p.ordered_lower = init;
2215 pr->u.p.ordered_upper = limit;
2220 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2221 "ordered_lower:%%%s ordered_upper:%%%s\n",
2222 traits_t<UT>::spec, traits_t<UT>::spec);
2223 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2224 pr->u.p.ordered_upper));
2225 __kmp_str_free(&buff);
2238 case kmp_sch_trapezoidal: {
2240 T parm2 = pr->u.p.parm2;
2241 T parm3 = pr->u.p.parm3;
2242 T parm4 = pr->u.p.parm4;
2243 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2246 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
2248 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
2249 trip = pr->u.p.tc - 1;
2251 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
2258 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
2261 if ((last = (limit >= trip)) != 0)
2268 *p_lb = start + init;
2269 *p_ub = start + limit;
2271 *p_lb = start + init * incr;
2272 *p_ub = start + limit * incr;
2276 pr->u.p.ordered_lower = init;
2277 pr->u.p.ordered_upper = limit;
2282 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2283 "ordered_lower:%%%s ordered_upper:%%%s\n",
2284 traits_t<UT>::spec, traits_t<UT>::spec);
2285 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2286 pr->u.p.ordered_upper));
2287 __kmp_str_free(&buff);
2296 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected),
2297 KMP_HNT(GetNewerLibrary),
2307 num_done = test_then_inc<ST>((
volatile ST *)&sh->u.s.num_done);
2312 buff = __kmp_str_format(
2313 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2314 traits_t<UT>::spec);
2315 KD_TRACE(100, (buff, gtid, sh->u.s.num_done));
2316 __kmp_str_free(&buff);
2320 if ((ST)num_done == th->th.th_team_nproc - 1) {
2321 #if (KMP_STATIC_STEAL_ENABLED) 2322 if (pr->schedule == kmp_sch_static_steal &&
2323 traits_t<T>::type_size > 4) {
2325 kmp_info_t **other_threads = team->t.t_threads;
2327 for (i = 0; i < th->th.th_team_nproc; ++i) {
2328 kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2329 KMP_ASSERT(lck != NULL);
2330 __kmp_destroy_lock(lck);
2332 other_threads[i]->th.th_dispatch->th_steal_lock = NULL;
2340 sh->u.s.num_done = 0;
2341 sh->u.s.iteration = 0;
2345 sh->u.s.ordered_iteration = 0;
2350 sh->buffer_index += __kmp_dispatch_num_buffers;
2351 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2352 gtid, sh->buffer_index));
2357 if (__kmp_env_consistency_check) {
2358 if (pr->pushed_ws != ct_none) {
2359 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2363 th->th.th_dispatch->th_deo_fcn = NULL;
2364 th->th.th_dispatch->th_dxo_fcn = NULL;
2365 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2366 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2370 pr->u.p.last_upper = pr->u.p.ub;
2373 if (p_last != NULL && status != 0)
2381 buff = __kmp_str_format(
2382 "__kmp_dispatch_next: T#%%d normal case: " 2383 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2384 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2385 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status));
2386 __kmp_str_free(&buff);
2389 #if INCLUDE_SSC_MARKS 2390 SSC_MARK_DISPATCH_NEXT();
2396 template <
typename T>
2397 static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2398 kmp_int32 *plastiter, T *plower, T *pupper,
2399 typename traits_t<T>::signed_t incr) {
2400 typedef typename traits_t<T>::unsigned_t UT;
2401 typedef typename traits_t<T>::signed_t ST;
2408 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2409 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2414 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d " 2415 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2416 traits_t<T>::spec, traits_t<T>::spec,
2417 traits_t<ST>::spec, traits_t<T>::spec);
2418 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2419 __kmp_str_free(&buff);
2423 if (__kmp_env_consistency_check) {
2425 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2428 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2438 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2441 th = __kmp_threads[gtid];
2442 team = th->th.th_team;
2444 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2445 nteams = th->th.th_teams_size.nteams;
2447 team_id = team->t.t_master_tid;
2448 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2452 trip_count = *pupper - *plower + 1;
2453 }
else if (incr == -1) {
2454 trip_count = *plower - *pupper + 1;
2455 }
else if (incr > 0) {
2457 trip_count = (UT)(*pupper - *plower) / incr + 1;
2459 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2462 if (trip_count <= nteams) {
2464 __kmp_static == kmp_sch_static_greedy ||
2466 kmp_sch_static_balanced);
2468 if (team_id < trip_count) {
2469 *pupper = *plower = *plower + team_id * incr;
2471 *plower = *pupper + incr;
2473 if (plastiter != NULL)
2474 *plastiter = (team_id == trip_count - 1);
2476 if (__kmp_static == kmp_sch_static_balanced) {
2477 UT chunk = trip_count / nteams;
2478 UT extras = trip_count % nteams;
2480 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2481 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2482 if (plastiter != NULL)
2483 *plastiter = (team_id == nteams - 1);
2486 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2488 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2490 *plower += team_id * chunk_inc_count;
2491 *pupper = *plower + chunk_inc_count - incr;
2494 if (*pupper < *plower)
2495 *pupper = traits_t<T>::max_value;
2496 if (plastiter != NULL)
2497 *plastiter = *plower <= upper && *pupper > upper - incr;
2498 if (*pupper > upper)
2501 if (*pupper > *plower)
2502 *pupper = traits_t<T>::min_value;
2503 if (plastiter != NULL)
2504 *plastiter = *plower >= upper && *pupper < upper - incr;
2505 if (*pupper < upper)
2537 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2538 KMP_DEBUG_ASSERT(__kmp_init_serial);
2539 #if OMPT_SUPPORT && OMPT_OPTIONAL 2540 OMPT_STORE_RETURN_ADDRESS(gtid);
2542 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2549 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2550 KMP_DEBUG_ASSERT(__kmp_init_serial);
2551 #if OMPT_SUPPORT && OMPT_OPTIONAL 2552 OMPT_STORE_RETURN_ADDRESS(gtid);
2554 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2562 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2563 KMP_DEBUG_ASSERT(__kmp_init_serial);
2564 #if OMPT_SUPPORT && OMPT_OPTIONAL 2565 OMPT_STORE_RETURN_ADDRESS(gtid);
2567 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2575 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2576 KMP_DEBUG_ASSERT(__kmp_init_serial);
2577 #if OMPT_SUPPORT && OMPT_OPTIONAL 2578 OMPT_STORE_RETURN_ADDRESS(gtid);
2580 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2594 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2596 KMP_DEBUG_ASSERT(__kmp_init_serial);
2597 #if OMPT_SUPPORT && OMPT_OPTIONAL 2598 OMPT_STORE_RETURN_ADDRESS(gtid);
2600 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2601 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2604 void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2606 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2608 KMP_DEBUG_ASSERT(__kmp_init_serial);
2609 #if OMPT_SUPPORT && OMPT_OPTIONAL 2610 OMPT_STORE_RETURN_ADDRESS(gtid);
2612 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2613 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2616 void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2618 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2620 KMP_DEBUG_ASSERT(__kmp_init_serial);
2621 #if OMPT_SUPPORT && OMPT_OPTIONAL 2622 OMPT_STORE_RETURN_ADDRESS(gtid);
2624 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2625 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2628 void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2630 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2632 KMP_DEBUG_ASSERT(__kmp_init_serial);
2633 #if OMPT_SUPPORT && OMPT_OPTIONAL 2634 OMPT_STORE_RETURN_ADDRESS(gtid);
2636 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2637 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2654 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2655 #if OMPT_SUPPORT && OMPT_OPTIONAL 2656 OMPT_STORE_RETURN_ADDRESS(gtid);
2658 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2659 #if OMPT_SUPPORT && OMPT_OPTIONAL 2661 OMPT_LOAD_RETURN_ADDRESS(gtid)
2670 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2672 #if OMPT_SUPPORT && OMPT_OPTIONAL 2673 OMPT_STORE_RETURN_ADDRESS(gtid);
2675 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2676 #if OMPT_SUPPORT && OMPT_OPTIONAL 2678 OMPT_LOAD_RETURN_ADDRESS(gtid)
2687 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2688 #if OMPT_SUPPORT && OMPT_OPTIONAL 2689 OMPT_STORE_RETURN_ADDRESS(gtid);
2691 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2692 #if OMPT_SUPPORT && OMPT_OPTIONAL 2694 OMPT_LOAD_RETURN_ADDRESS(gtid)
2703 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2705 #if OMPT_SUPPORT && OMPT_OPTIONAL 2706 OMPT_STORE_RETURN_ADDRESS(gtid);
2708 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2709 #if OMPT_SUPPORT && OMPT_OPTIONAL 2711 OMPT_LOAD_RETURN_ADDRESS(gtid)
2723 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2730 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2737 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2744 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2751 kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2752 return value == checker;
2755 kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2756 return value != checker;
2759 kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2760 return value < checker;
2763 kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2764 return value >= checker;
2767 kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2768 return value <= checker;
2772 __kmp_wait_yield_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2773 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2777 volatile kmp_uint32 *spin = spinner;
2778 kmp_uint32 check = checker;
2780 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2783 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2784 KMP_INIT_YIELD(spins);
2786 while (!f(r = TCR_4(*spin), check)) {
2787 KMP_FSYNC_SPIN_PREPARE(obj);
2795 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2796 KMP_YIELD_SPIN(spins);
2798 KMP_FSYNC_SPIN_ACQUIRED(obj);
2802 void __kmp_wait_yield_4_ptr(
2803 void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(
void *, kmp_uint32),
2807 void *spin = spinner;
2808 kmp_uint32 check = checker;
2810 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2812 KMP_FSYNC_SPIN_INIT(obj, spin);
2813 KMP_INIT_YIELD(spins);
2815 while (!f(spin, check)) {
2816 KMP_FSYNC_SPIN_PREPARE(obj);
2819 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2820 KMP_YIELD_SPIN(spins);
2822 KMP_FSYNC_SPIN_ACQUIRED(obj);
2827 #ifdef KMP_GOMP_COMPAT 2829 void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2831 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2833 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2837 void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2839 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2841 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2845 void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2847 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2849 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2853 void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2855 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2857 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2861 void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2862 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2865 void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2866 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2869 void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2870 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2873 void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2874 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)