LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include <stddef.h>
15 #include <atomic>
16 
17 #include "kmp.h"
18 #include "kmp_i18n.h"
19 #include "kmp_io.h"
20 #include "kmp_itt.h"
21 #include "kmp_lock.h"
22 
23 #include "tsan_annotations.h"
24 
25 #if KMP_USE_FUTEX
26 #include <sys/syscall.h>
27 #include <unistd.h>
28 // We should really include <futex.h>, but that causes compatibility problems on
29 // different Linux* OS distributions that either require that you include (or
30 // break when you try to include) <pci/types.h>. Since all we need is the two
31 // macros below (which are part of the kernel ABI, so can't change) we just
32 // define the constants here and don't include <futex.h>
33 #ifndef FUTEX_WAIT
34 #define FUTEX_WAIT 0
35 #endif
36 #ifndef FUTEX_WAKE
37 #define FUTEX_WAKE 1
38 #endif
39 #endif
40 
41 /* Implement spin locks for internal library use. */
42 /* The algorithm implemented is Lamport's bakery lock [1974]. */
43 
44 void __kmp_validate_locks(void) {
45  int i;
46  kmp_uint32 x, y;
47 
48  /* Check to make sure unsigned arithmetic does wraps properly */
49  x = ~((kmp_uint32)0) - 2;
50  y = x - 2;
51 
52  for (i = 0; i < 8; ++i, ++x, ++y) {
53  kmp_uint32 z = (x - y);
54  KMP_ASSERT(z == 2);
55  }
56 
57  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
58 }
59 
60 /* ------------------------------------------------------------------------ */
61 /* test and set locks */
62 
63 // For the non-nested locks, we can only assume that the first 4 bytes were
64 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
65 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
66 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
67 //
68 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
69 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
70 
71 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
72  return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1;
73 }
74 
75 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
76  return lck->lk.depth_locked != -1;
77 }
78 
79 __forceinline static int
80 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
81  KMP_MB();
82 
83 #ifdef USE_LOCK_PROFILE
84  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
85  if ((curr != 0) && (curr != gtid + 1))
86  __kmp_printf("LOCK CONTENTION: %p\n", lck);
87 /* else __kmp_printf( "." );*/
88 #endif /* USE_LOCK_PROFILE */
89 
90  if ((lck->lk.poll == KMP_LOCK_FREE(tas)) &&
91  KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas),
92  KMP_LOCK_BUSY(gtid + 1, tas))) {
93  KMP_FSYNC_ACQUIRED(lck);
94  return KMP_LOCK_ACQUIRED_FIRST;
95  }
96 
97  kmp_uint32 spins;
98  KMP_FSYNC_PREPARE(lck);
99  KMP_INIT_YIELD(spins);
100  if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
101  KMP_YIELD(TRUE);
102  } else {
103  KMP_YIELD_SPIN(spins);
104  }
105 
106  kmp_backoff_t backoff = __kmp_spin_backoff_params;
107  while ((lck->lk.poll != KMP_LOCK_FREE(tas)) ||
108  (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas),
109  KMP_LOCK_BUSY(gtid + 1, tas)))) {
110 
111  __kmp_spin_backoff(&backoff);
112  if (TCR_4(__kmp_nth) >
113  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
114  KMP_YIELD(TRUE);
115  } else {
116  KMP_YIELD_SPIN(spins);
117  }
118  }
119  KMP_FSYNC_ACQUIRED(lck);
120  return KMP_LOCK_ACQUIRED_FIRST;
121 }
122 
123 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
124  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
125  ANNOTATE_TAS_ACQUIRED(lck);
126  return retval;
127 }
128 
129 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
130  kmp_int32 gtid) {
131  char const *const func = "omp_set_lock";
132  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
133  __kmp_is_tas_lock_nestable(lck)) {
134  KMP_FATAL(LockNestableUsedAsSimple, func);
135  }
136  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
137  KMP_FATAL(LockIsAlreadyOwned, func);
138  }
139  return __kmp_acquire_tas_lock(lck, gtid);
140 }
141 
142 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
143  if ((lck->lk.poll == KMP_LOCK_FREE(tas)) &&
144  KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas),
145  KMP_LOCK_BUSY(gtid + 1, tas))) {
146  KMP_FSYNC_ACQUIRED(lck);
147  return TRUE;
148  }
149  return FALSE;
150 }
151 
152 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
153  kmp_int32 gtid) {
154  char const *const func = "omp_test_lock";
155  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
156  __kmp_is_tas_lock_nestable(lck)) {
157  KMP_FATAL(LockNestableUsedAsSimple, func);
158  }
159  return __kmp_test_tas_lock(lck, gtid);
160 }
161 
162 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
163  KMP_MB(); /* Flush all pending memory write invalidates. */
164 
165  KMP_FSYNC_RELEASING(lck);
166  ANNOTATE_TAS_RELEASED(lck);
167  KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas));
168  KMP_MB(); /* Flush all pending memory write invalidates. */
169 
170  KMP_YIELD(TCR_4(__kmp_nth) >
171  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
172  return KMP_LOCK_RELEASED;
173 }
174 
175 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
176  kmp_int32 gtid) {
177  char const *const func = "omp_unset_lock";
178  KMP_MB(); /* in case another processor initialized lock */
179  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
180  __kmp_is_tas_lock_nestable(lck)) {
181  KMP_FATAL(LockNestableUsedAsSimple, func);
182  }
183  if (__kmp_get_tas_lock_owner(lck) == -1) {
184  KMP_FATAL(LockUnsettingFree, func);
185  }
186  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
187  (__kmp_get_tas_lock_owner(lck) != gtid)) {
188  KMP_FATAL(LockUnsettingSetByAnother, func);
189  }
190  return __kmp_release_tas_lock(lck, gtid);
191 }
192 
193 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
194  TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas));
195 }
196 
197 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
198  __kmp_init_tas_lock(lck);
199 }
200 
201 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
202 
203 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
204  char const *const func = "omp_destroy_lock";
205  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
206  __kmp_is_tas_lock_nestable(lck)) {
207  KMP_FATAL(LockNestableUsedAsSimple, func);
208  }
209  if (__kmp_get_tas_lock_owner(lck) != -1) {
210  KMP_FATAL(LockStillOwned, func);
211  }
212  __kmp_destroy_tas_lock(lck);
213 }
214 
215 // nested test and set locks
216 
217 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
218  KMP_DEBUG_ASSERT(gtid >= 0);
219 
220  if (__kmp_get_tas_lock_owner(lck) == gtid) {
221  lck->lk.depth_locked += 1;
222  return KMP_LOCK_ACQUIRED_NEXT;
223  } else {
224  __kmp_acquire_tas_lock_timed_template(lck, gtid);
225  ANNOTATE_TAS_ACQUIRED(lck);
226  lck->lk.depth_locked = 1;
227  return KMP_LOCK_ACQUIRED_FIRST;
228  }
229 }
230 
231 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
232  kmp_int32 gtid) {
233  char const *const func = "omp_set_nest_lock";
234  if (!__kmp_is_tas_lock_nestable(lck)) {
235  KMP_FATAL(LockSimpleUsedAsNestable, func);
236  }
237  return __kmp_acquire_nested_tas_lock(lck, gtid);
238 }
239 
240 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
241  int retval;
242 
243  KMP_DEBUG_ASSERT(gtid >= 0);
244 
245  if (__kmp_get_tas_lock_owner(lck) == gtid) {
246  retval = ++lck->lk.depth_locked;
247  } else if (!__kmp_test_tas_lock(lck, gtid)) {
248  retval = 0;
249  } else {
250  KMP_MB();
251  retval = lck->lk.depth_locked = 1;
252  }
253  return retval;
254 }
255 
256 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
257  kmp_int32 gtid) {
258  char const *const func = "omp_test_nest_lock";
259  if (!__kmp_is_tas_lock_nestable(lck)) {
260  KMP_FATAL(LockSimpleUsedAsNestable, func);
261  }
262  return __kmp_test_nested_tas_lock(lck, gtid);
263 }
264 
265 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
266  KMP_DEBUG_ASSERT(gtid >= 0);
267 
268  KMP_MB();
269  if (--(lck->lk.depth_locked) == 0) {
270  __kmp_release_tas_lock(lck, gtid);
271  return KMP_LOCK_RELEASED;
272  }
273  return KMP_LOCK_STILL_HELD;
274 }
275 
276 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
277  kmp_int32 gtid) {
278  char const *const func = "omp_unset_nest_lock";
279  KMP_MB(); /* in case another processor initialized lock */
280  if (!__kmp_is_tas_lock_nestable(lck)) {
281  KMP_FATAL(LockSimpleUsedAsNestable, func);
282  }
283  if (__kmp_get_tas_lock_owner(lck) == -1) {
284  KMP_FATAL(LockUnsettingFree, func);
285  }
286  if (__kmp_get_tas_lock_owner(lck) != gtid) {
287  KMP_FATAL(LockUnsettingSetByAnother, func);
288  }
289  return __kmp_release_nested_tas_lock(lck, gtid);
290 }
291 
292 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
293  __kmp_init_tas_lock(lck);
294  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
295 }
296 
297 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
298  __kmp_init_nested_tas_lock(lck);
299 }
300 
301 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
302  __kmp_destroy_tas_lock(lck);
303  lck->lk.depth_locked = 0;
304 }
305 
306 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
307  char const *const func = "omp_destroy_nest_lock";
308  if (!__kmp_is_tas_lock_nestable(lck)) {
309  KMP_FATAL(LockSimpleUsedAsNestable, func);
310  }
311  if (__kmp_get_tas_lock_owner(lck) != -1) {
312  KMP_FATAL(LockStillOwned, func);
313  }
314  __kmp_destroy_nested_tas_lock(lck);
315 }
316 
317 #if KMP_USE_FUTEX
318 
319 /* ------------------------------------------------------------------------ */
320 /* futex locks */
321 
322 // futex locks are really just test and set locks, with a different method
323 // of handling contention. They take the same amount of space as test and
324 // set locks, and are allocated the same way (i.e. use the area allocated by
325 // the compiler for non-nested locks / allocate nested locks on the heap).
326 
327 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
328  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
329 }
330 
331 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
332  return lck->lk.depth_locked != -1;
333 }
334 
335 __forceinline static int
336 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
337  kmp_int32 gtid_code = (gtid + 1) << 1;
338 
339  KMP_MB();
340 
341 #ifdef USE_LOCK_PROFILE
342  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
343  if ((curr != 0) && (curr != gtid_code))
344  __kmp_printf("LOCK CONTENTION: %p\n", lck);
345 /* else __kmp_printf( "." );*/
346 #endif /* USE_LOCK_PROFILE */
347 
348  KMP_FSYNC_PREPARE(lck);
349  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
350  lck, lck->lk.poll, gtid));
351 
352  kmp_int32 poll_val;
353 
354  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
355  &(lck->lk.poll), KMP_LOCK_FREE(futex),
356  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
357 
358  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
359  KA_TRACE(
360  1000,
361  ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
362  lck, gtid, poll_val, cond));
363 
364  // NOTE: if you try to use the following condition for this branch
365  //
366  // if ( poll_val & 1 == 0 )
367  //
368  // Then the 12.0 compiler has a bug where the following block will
369  // always be skipped, regardless of the value of the LSB of poll_val.
370  if (!cond) {
371  // Try to set the lsb in the poll to indicate to the owner
372  // thread that they need to wake this thread up.
373  if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
374  poll_val | KMP_LOCK_BUSY(1, futex))) {
375  KA_TRACE(
376  1000,
377  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
378  lck, lck->lk.poll, gtid));
379  continue;
380  }
381  poll_val |= KMP_LOCK_BUSY(1, futex);
382 
383  KA_TRACE(1000,
384  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
385  lck->lk.poll, gtid));
386  }
387 
388  KA_TRACE(
389  1000,
390  ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
391  lck, gtid, poll_val));
392 
393  kmp_int32 rc;
394  if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
395  NULL, 0)) != 0) {
396  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
397  "failed (rc=%d errno=%d)\n",
398  lck, gtid, poll_val, rc, errno));
399  continue;
400  }
401 
402  KA_TRACE(1000,
403  ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
404  lck, gtid, poll_val));
405  // This thread has now done a successful futex wait call and was entered on
406  // the OS futex queue. We must now perform a futex wake call when releasing
407  // the lock, as we have no idea how many other threads are in the queue.
408  gtid_code |= 1;
409  }
410 
411  KMP_FSYNC_ACQUIRED(lck);
412  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
413  lck->lk.poll, gtid));
414  return KMP_LOCK_ACQUIRED_FIRST;
415 }
416 
417 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
418  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
419  ANNOTATE_FUTEX_ACQUIRED(lck);
420  return retval;
421 }
422 
423 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
424  kmp_int32 gtid) {
425  char const *const func = "omp_set_lock";
426  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
427  __kmp_is_futex_lock_nestable(lck)) {
428  KMP_FATAL(LockNestableUsedAsSimple, func);
429  }
430  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
431  KMP_FATAL(LockIsAlreadyOwned, func);
432  }
433  return __kmp_acquire_futex_lock(lck, gtid);
434 }
435 
436 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
437  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
438  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
439  KMP_FSYNC_ACQUIRED(lck);
440  return TRUE;
441  }
442  return FALSE;
443 }
444 
445 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
446  kmp_int32 gtid) {
447  char const *const func = "omp_test_lock";
448  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
449  __kmp_is_futex_lock_nestable(lck)) {
450  KMP_FATAL(LockNestableUsedAsSimple, func);
451  }
452  return __kmp_test_futex_lock(lck, gtid);
453 }
454 
455 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
456  KMP_MB(); /* Flush all pending memory write invalidates. */
457 
458  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
459  lck, lck->lk.poll, gtid));
460 
461  KMP_FSYNC_RELEASING(lck);
462  ANNOTATE_FUTEX_RELEASED(lck);
463 
464  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
465 
466  KA_TRACE(1000,
467  ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
468  lck, gtid, poll_val));
469 
470  if (KMP_LOCK_STRIP(poll_val) & 1) {
471  KA_TRACE(1000,
472  ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
473  lck, gtid));
474  syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
475  NULL, NULL, 0);
476  }
477 
478  KMP_MB(); /* Flush all pending memory write invalidates. */
479 
480  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
481  lck->lk.poll, gtid));
482 
483  KMP_YIELD(TCR_4(__kmp_nth) >
484  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
485  return KMP_LOCK_RELEASED;
486 }
487 
488 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
489  kmp_int32 gtid) {
490  char const *const func = "omp_unset_lock";
491  KMP_MB(); /* in case another processor initialized lock */
492  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
493  __kmp_is_futex_lock_nestable(lck)) {
494  KMP_FATAL(LockNestableUsedAsSimple, func);
495  }
496  if (__kmp_get_futex_lock_owner(lck) == -1) {
497  KMP_FATAL(LockUnsettingFree, func);
498  }
499  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
500  (__kmp_get_futex_lock_owner(lck) != gtid)) {
501  KMP_FATAL(LockUnsettingSetByAnother, func);
502  }
503  return __kmp_release_futex_lock(lck, gtid);
504 }
505 
506 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
507  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
508 }
509 
510 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
511  __kmp_init_futex_lock(lck);
512 }
513 
514 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
515 
516 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
517  char const *const func = "omp_destroy_lock";
518  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
519  __kmp_is_futex_lock_nestable(lck)) {
520  KMP_FATAL(LockNestableUsedAsSimple, func);
521  }
522  if (__kmp_get_futex_lock_owner(lck) != -1) {
523  KMP_FATAL(LockStillOwned, func);
524  }
525  __kmp_destroy_futex_lock(lck);
526 }
527 
528 // nested futex locks
529 
530 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
531  KMP_DEBUG_ASSERT(gtid >= 0);
532 
533  if (__kmp_get_futex_lock_owner(lck) == gtid) {
534  lck->lk.depth_locked += 1;
535  return KMP_LOCK_ACQUIRED_NEXT;
536  } else {
537  __kmp_acquire_futex_lock_timed_template(lck, gtid);
538  ANNOTATE_FUTEX_ACQUIRED(lck);
539  lck->lk.depth_locked = 1;
540  return KMP_LOCK_ACQUIRED_FIRST;
541  }
542 }
543 
544 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
545  kmp_int32 gtid) {
546  char const *const func = "omp_set_nest_lock";
547  if (!__kmp_is_futex_lock_nestable(lck)) {
548  KMP_FATAL(LockSimpleUsedAsNestable, func);
549  }
550  return __kmp_acquire_nested_futex_lock(lck, gtid);
551 }
552 
553 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
554  int retval;
555 
556  KMP_DEBUG_ASSERT(gtid >= 0);
557 
558  if (__kmp_get_futex_lock_owner(lck) == gtid) {
559  retval = ++lck->lk.depth_locked;
560  } else if (!__kmp_test_futex_lock(lck, gtid)) {
561  retval = 0;
562  } else {
563  KMP_MB();
564  retval = lck->lk.depth_locked = 1;
565  }
566  return retval;
567 }
568 
569 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
570  kmp_int32 gtid) {
571  char const *const func = "omp_test_nest_lock";
572  if (!__kmp_is_futex_lock_nestable(lck)) {
573  KMP_FATAL(LockSimpleUsedAsNestable, func);
574  }
575  return __kmp_test_nested_futex_lock(lck, gtid);
576 }
577 
578 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
579  KMP_DEBUG_ASSERT(gtid >= 0);
580 
581  KMP_MB();
582  if (--(lck->lk.depth_locked) == 0) {
583  __kmp_release_futex_lock(lck, gtid);
584  return KMP_LOCK_RELEASED;
585  }
586  return KMP_LOCK_STILL_HELD;
587 }
588 
589 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
590  kmp_int32 gtid) {
591  char const *const func = "omp_unset_nest_lock";
592  KMP_MB(); /* in case another processor initialized lock */
593  if (!__kmp_is_futex_lock_nestable(lck)) {
594  KMP_FATAL(LockSimpleUsedAsNestable, func);
595  }
596  if (__kmp_get_futex_lock_owner(lck) == -1) {
597  KMP_FATAL(LockUnsettingFree, func);
598  }
599  if (__kmp_get_futex_lock_owner(lck) != gtid) {
600  KMP_FATAL(LockUnsettingSetByAnother, func);
601  }
602  return __kmp_release_nested_futex_lock(lck, gtid);
603 }
604 
605 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
606  __kmp_init_futex_lock(lck);
607  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
608 }
609 
610 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
611  __kmp_init_nested_futex_lock(lck);
612 }
613 
614 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
615  __kmp_destroy_futex_lock(lck);
616  lck->lk.depth_locked = 0;
617 }
618 
619 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
620  char const *const func = "omp_destroy_nest_lock";
621  if (!__kmp_is_futex_lock_nestable(lck)) {
622  KMP_FATAL(LockSimpleUsedAsNestable, func);
623  }
624  if (__kmp_get_futex_lock_owner(lck) != -1) {
625  KMP_FATAL(LockStillOwned, func);
626  }
627  __kmp_destroy_nested_futex_lock(lck);
628 }
629 
630 #endif // KMP_USE_FUTEX
631 
632 /* ------------------------------------------------------------------------ */
633 /* ticket (bakery) locks */
634 
635 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
636  return std::atomic_load_explicit(&lck->lk.owner_id,
637  std::memory_order_relaxed) -
638  1;
639 }
640 
641 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
642  return std::atomic_load_explicit(&lck->lk.depth_locked,
643  std::memory_order_relaxed) != -1;
644 }
645 
646 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
647  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
648  std::memory_order_acquire) == my_ticket;
649 }
650 
651 __forceinline static int
652 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
653  kmp_int32 gtid) {
654  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
655  &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
656 
657 #ifdef USE_LOCK_PROFILE
658  if (std::atomic_load_explicit(&lck->lk.now_serving,
659  std::memory_order_relaxed) != my_ticket)
660  __kmp_printf("LOCK CONTENTION: %p\n", lck);
661 /* else __kmp_printf( "." );*/
662 #endif /* USE_LOCK_PROFILE */
663 
664  if (std::atomic_load_explicit(&lck->lk.now_serving,
665  std::memory_order_acquire) == my_ticket) {
666  return KMP_LOCK_ACQUIRED_FIRST;
667  }
668  KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
669  return KMP_LOCK_ACQUIRED_FIRST;
670 }
671 
672 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
673  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
674  ANNOTATE_TICKET_ACQUIRED(lck);
675  return retval;
676 }
677 
678 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
679  kmp_int32 gtid) {
680  char const *const func = "omp_set_lock";
681 
682  if (!std::atomic_load_explicit(&lck->lk.initialized,
683  std::memory_order_relaxed)) {
684  KMP_FATAL(LockIsUninitialized, func);
685  }
686  if (lck->lk.self != lck) {
687  KMP_FATAL(LockIsUninitialized, func);
688  }
689  if (__kmp_is_ticket_lock_nestable(lck)) {
690  KMP_FATAL(LockNestableUsedAsSimple, func);
691  }
692  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
693  KMP_FATAL(LockIsAlreadyOwned, func);
694  }
695 
696  __kmp_acquire_ticket_lock(lck, gtid);
697 
698  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
699  std::memory_order_relaxed);
700  return KMP_LOCK_ACQUIRED_FIRST;
701 }
702 
703 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
704  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
705  std::memory_order_relaxed);
706 
707  if (std::atomic_load_explicit(&lck->lk.now_serving,
708  std::memory_order_relaxed) == my_ticket) {
709  kmp_uint32 next_ticket = my_ticket + 1;
710  if (std::atomic_compare_exchange_strong_explicit(
711  &lck->lk.next_ticket, &my_ticket, next_ticket,
712  std::memory_order_acquire, std::memory_order_acquire)) {
713  return TRUE;
714  }
715  }
716  return FALSE;
717 }
718 
719 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
720  kmp_int32 gtid) {
721  char const *const func = "omp_test_lock";
722 
723  if (!std::atomic_load_explicit(&lck->lk.initialized,
724  std::memory_order_relaxed)) {
725  KMP_FATAL(LockIsUninitialized, func);
726  }
727  if (lck->lk.self != lck) {
728  KMP_FATAL(LockIsUninitialized, func);
729  }
730  if (__kmp_is_ticket_lock_nestable(lck)) {
731  KMP_FATAL(LockNestableUsedAsSimple, func);
732  }
733 
734  int retval = __kmp_test_ticket_lock(lck, gtid);
735 
736  if (retval) {
737  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
738  std::memory_order_relaxed);
739  }
740  return retval;
741 }
742 
743 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
744  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
745  std::memory_order_relaxed) -
746  std::atomic_load_explicit(&lck->lk.now_serving,
747  std::memory_order_relaxed);
748 
749  ANNOTATE_TICKET_RELEASED(lck);
750  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
751  std::memory_order_release);
752 
753  KMP_YIELD(distance >
754  (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
755  return KMP_LOCK_RELEASED;
756 }
757 
758 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
759  kmp_int32 gtid) {
760  char const *const func = "omp_unset_lock";
761 
762  if (!std::atomic_load_explicit(&lck->lk.initialized,
763  std::memory_order_relaxed)) {
764  KMP_FATAL(LockIsUninitialized, func);
765  }
766  if (lck->lk.self != lck) {
767  KMP_FATAL(LockIsUninitialized, func);
768  }
769  if (__kmp_is_ticket_lock_nestable(lck)) {
770  KMP_FATAL(LockNestableUsedAsSimple, func);
771  }
772  if (__kmp_get_ticket_lock_owner(lck) == -1) {
773  KMP_FATAL(LockUnsettingFree, func);
774  }
775  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
776  (__kmp_get_ticket_lock_owner(lck) != gtid)) {
777  KMP_FATAL(LockUnsettingSetByAnother, func);
778  }
779  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
780  return __kmp_release_ticket_lock(lck, gtid);
781 }
782 
783 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
784  lck->lk.location = NULL;
785  lck->lk.self = lck;
786  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
787  std::memory_order_relaxed);
788  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
789  std::memory_order_relaxed);
790  std::atomic_store_explicit(
791  &lck->lk.owner_id, 0,
792  std::memory_order_relaxed); // no thread owns the lock.
793  std::atomic_store_explicit(
794  &lck->lk.depth_locked, -1,
795  std::memory_order_relaxed); // -1 => not a nested lock.
796  std::atomic_store_explicit(&lck->lk.initialized, true,
797  std::memory_order_release);
798 }
799 
800 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
801  __kmp_init_ticket_lock(lck);
802 }
803 
804 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
805  std::atomic_store_explicit(&lck->lk.initialized, false,
806  std::memory_order_release);
807  lck->lk.self = NULL;
808  lck->lk.location = NULL;
809  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
810  std::memory_order_relaxed);
811  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
812  std::memory_order_relaxed);
813  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
814  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
815  std::memory_order_relaxed);
816 }
817 
818 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
819  char const *const func = "omp_destroy_lock";
820 
821  if (!std::atomic_load_explicit(&lck->lk.initialized,
822  std::memory_order_relaxed)) {
823  KMP_FATAL(LockIsUninitialized, func);
824  }
825  if (lck->lk.self != lck) {
826  KMP_FATAL(LockIsUninitialized, func);
827  }
828  if (__kmp_is_ticket_lock_nestable(lck)) {
829  KMP_FATAL(LockNestableUsedAsSimple, func);
830  }
831  if (__kmp_get_ticket_lock_owner(lck) != -1) {
832  KMP_FATAL(LockStillOwned, func);
833  }
834  __kmp_destroy_ticket_lock(lck);
835 }
836 
837 // nested ticket locks
838 
839 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
840  KMP_DEBUG_ASSERT(gtid >= 0);
841 
842  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
843  std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
844  std::memory_order_relaxed);
845  return KMP_LOCK_ACQUIRED_NEXT;
846  } else {
847  __kmp_acquire_ticket_lock_timed_template(lck, gtid);
848  ANNOTATE_TICKET_ACQUIRED(lck);
849  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
850  std::memory_order_relaxed);
851  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
852  std::memory_order_relaxed);
853  return KMP_LOCK_ACQUIRED_FIRST;
854  }
855 }
856 
857 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
858  kmp_int32 gtid) {
859  char const *const func = "omp_set_nest_lock";
860 
861  if (!std::atomic_load_explicit(&lck->lk.initialized,
862  std::memory_order_relaxed)) {
863  KMP_FATAL(LockIsUninitialized, func);
864  }
865  if (lck->lk.self != lck) {
866  KMP_FATAL(LockIsUninitialized, func);
867  }
868  if (!__kmp_is_ticket_lock_nestable(lck)) {
869  KMP_FATAL(LockSimpleUsedAsNestable, func);
870  }
871  return __kmp_acquire_nested_ticket_lock(lck, gtid);
872 }
873 
874 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
875  int retval;
876 
877  KMP_DEBUG_ASSERT(gtid >= 0);
878 
879  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
880  retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
881  std::memory_order_relaxed) +
882  1;
883  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
884  retval = 0;
885  } else {
886  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
887  std::memory_order_relaxed);
888  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
889  std::memory_order_relaxed);
890  retval = 1;
891  }
892  return retval;
893 }
894 
895 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
896  kmp_int32 gtid) {
897  char const *const func = "omp_test_nest_lock";
898 
899  if (!std::atomic_load_explicit(&lck->lk.initialized,
900  std::memory_order_relaxed)) {
901  KMP_FATAL(LockIsUninitialized, func);
902  }
903  if (lck->lk.self != lck) {
904  KMP_FATAL(LockIsUninitialized, func);
905  }
906  if (!__kmp_is_ticket_lock_nestable(lck)) {
907  KMP_FATAL(LockSimpleUsedAsNestable, func);
908  }
909  return __kmp_test_nested_ticket_lock(lck, gtid);
910 }
911 
912 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
913  KMP_DEBUG_ASSERT(gtid >= 0);
914 
915  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
916  std::memory_order_relaxed) -
917  1) == 0) {
918  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
919  __kmp_release_ticket_lock(lck, gtid);
920  return KMP_LOCK_RELEASED;
921  }
922  return KMP_LOCK_STILL_HELD;
923 }
924 
925 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
926  kmp_int32 gtid) {
927  char const *const func = "omp_unset_nest_lock";
928 
929  if (!std::atomic_load_explicit(&lck->lk.initialized,
930  std::memory_order_relaxed)) {
931  KMP_FATAL(LockIsUninitialized, func);
932  }
933  if (lck->lk.self != lck) {
934  KMP_FATAL(LockIsUninitialized, func);
935  }
936  if (!__kmp_is_ticket_lock_nestable(lck)) {
937  KMP_FATAL(LockSimpleUsedAsNestable, func);
938  }
939  if (__kmp_get_ticket_lock_owner(lck) == -1) {
940  KMP_FATAL(LockUnsettingFree, func);
941  }
942  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
943  KMP_FATAL(LockUnsettingSetByAnother, func);
944  }
945  return __kmp_release_nested_ticket_lock(lck, gtid);
946 }
947 
948 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
949  __kmp_init_ticket_lock(lck);
950  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
951  std::memory_order_relaxed);
952  // >= 0 for nestable locks, -1 for simple locks
953 }
954 
955 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
956  __kmp_init_nested_ticket_lock(lck);
957 }
958 
959 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
960  __kmp_destroy_ticket_lock(lck);
961  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
962  std::memory_order_relaxed);
963 }
964 
965 static void
966 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
967  char const *const func = "omp_destroy_nest_lock";
968 
969  if (!std::atomic_load_explicit(&lck->lk.initialized,
970  std::memory_order_relaxed)) {
971  KMP_FATAL(LockIsUninitialized, func);
972  }
973  if (lck->lk.self != lck) {
974  KMP_FATAL(LockIsUninitialized, func);
975  }
976  if (!__kmp_is_ticket_lock_nestable(lck)) {
977  KMP_FATAL(LockSimpleUsedAsNestable, func);
978  }
979  if (__kmp_get_ticket_lock_owner(lck) != -1) {
980  KMP_FATAL(LockStillOwned, func);
981  }
982  __kmp_destroy_nested_ticket_lock(lck);
983 }
984 
985 // access functions to fields which don't exist for all lock kinds.
986 
987 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
988  return std::atomic_load_explicit(&lck->lk.initialized,
989  std::memory_order_relaxed) &&
990  (lck->lk.self == lck);
991 }
992 
993 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
994  return lck->lk.location;
995 }
996 
997 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
998  const ident_t *loc) {
999  lck->lk.location = loc;
1000 }
1001 
1002 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
1003  return lck->lk.flags;
1004 }
1005 
1006 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
1007  kmp_lock_flags_t flags) {
1008  lck->lk.flags = flags;
1009 }
1010 
1011 /* ------------------------------------------------------------------------ */
1012 /* queuing locks */
1013 
1014 /* First the states
1015  (head,tail) = 0, 0 means lock is unheld, nobody on queue
1016  UINT_MAX or -1, 0 means lock is held, nobody on queue
1017  h, h means lock held or about to transition,
1018  1 element on queue
1019  h, t h <> t, means lock is held or about to
1020  transition, >1 elements on queue
1021 
1022  Now the transitions
1023  Acquire(0,0) = -1 ,0
1024  Release(0,0) = Error
1025  Acquire(-1,0) = h ,h h > 0
1026  Release(-1,0) = 0 ,0
1027  Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1028  Release(h,h) = -1 ,0 h > 0
1029  Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1030  Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1031 
1032  And pictorially
1033 
1034  +-----+
1035  | 0, 0|------- release -------> Error
1036  +-----+
1037  | ^
1038  acquire| |release
1039  | |
1040  | |
1041  v |
1042  +-----+
1043  |-1, 0|
1044  +-----+
1045  | ^
1046  acquire| |release
1047  | |
1048  | |
1049  v |
1050  +-----+
1051  | h, h|
1052  +-----+
1053  | ^
1054  acquire| |release
1055  | |
1056  | |
1057  v |
1058  +-----+
1059  | h, t|----- acquire, release loopback ---+
1060  +-----+ |
1061  ^ |
1062  | |
1063  +------------------------------------+
1064  */
1065 
1066 #ifdef DEBUG_QUEUING_LOCKS
1067 
1068 /* Stuff for circular trace buffer */
1069 #define TRACE_BUF_ELE 1024
1070 static char traces[TRACE_BUF_ELE][128] = {0};
1071 static int tc = 0;
1072 #define TRACE_LOCK(X, Y) \
1073  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1074 #define TRACE_LOCK_T(X, Y, Z) \
1075  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1076 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1077  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1078  Z, Q);
1079 
1080 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1081  kmp_queuing_lock_t *lck, kmp_int32 head_id,
1082  kmp_int32 tail_id) {
1083  kmp_int32 t, i;
1084 
1085  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1086 
1087  i = tc % TRACE_BUF_ELE;
1088  __kmp_printf_no_lock("%s\n", traces[i]);
1089  i = (i + 1) % TRACE_BUF_ELE;
1090  while (i != (tc % TRACE_BUF_ELE)) {
1091  __kmp_printf_no_lock("%s", traces[i]);
1092  i = (i + 1) % TRACE_BUF_ELE;
1093  }
1094  __kmp_printf_no_lock("\n");
1095 
1096  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1097  "next_wait:%d, head_id:%d, tail_id:%d\n",
1098  gtid + 1, this_thr->th.th_spin_here,
1099  this_thr->th.th_next_waiting, head_id, tail_id);
1100 
1101  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1102 
1103  if (lck->lk.head_id >= 1) {
1104  t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1105  while (t > 0) {
1106  __kmp_printf_no_lock("-> %d ", t);
1107  t = __kmp_threads[t - 1]->th.th_next_waiting;
1108  }
1109  }
1110  __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1111  __kmp_printf_no_lock("\n\n");
1112 }
1113 
1114 #endif /* DEBUG_QUEUING_LOCKS */
1115 
1116 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1117  return TCR_4(lck->lk.owner_id) - 1;
1118 }
1119 
1120 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1121  return lck->lk.depth_locked != -1;
1122 }
1123 
1124 /* Acquire a lock using a the queuing lock implementation */
1125 template <bool takeTime>
1126 /* [TLW] The unused template above is left behind because of what BEB believes
1127  is a potential compiler problem with __forceinline. */
1128 __forceinline static int
1129 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1130  kmp_int32 gtid) {
1131  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1132  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1133  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1134  volatile kmp_uint32 *spin_here_p;
1135  kmp_int32 need_mf = 1;
1136 
1137 #if OMPT_SUPPORT
1138  omp_state_t prev_state = omp_state_undefined;
1139 #endif
1140 
1141  KA_TRACE(1000,
1142  ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1143 
1144  KMP_FSYNC_PREPARE(lck);
1145  KMP_DEBUG_ASSERT(this_thr != NULL);
1146  spin_here_p = &this_thr->th.th_spin_here;
1147 
1148 #ifdef DEBUG_QUEUING_LOCKS
1149  TRACE_LOCK(gtid + 1, "acq ent");
1150  if (*spin_here_p)
1151  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1152  if (this_thr->th.th_next_waiting != 0)
1153  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1154 #endif
1155  KMP_DEBUG_ASSERT(!*spin_here_p);
1156  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1157 
1158  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1159  head_id_p that may follow, not just in execution order, but also in
1160  visibility order. This way, when a releasing thread observes the changes to
1161  the queue by this thread, it can rightly assume that spin_here_p has
1162  already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1163  not premature. If the releasing thread sets spin_here_p to FALSE before
1164  this thread sets it to TRUE, this thread will hang. */
1165  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1166 
1167  while (1) {
1168  kmp_int32 enqueued;
1169  kmp_int32 head;
1170  kmp_int32 tail;
1171 
1172  head = *head_id_p;
1173 
1174  switch (head) {
1175 
1176  case -1: {
1177 #ifdef DEBUG_QUEUING_LOCKS
1178  tail = *tail_id_p;
1179  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1180 #endif
1181  tail = 0; /* to make sure next link asynchronously read is not set
1182  accidentally; this assignment prevents us from entering the
1183  if ( t > 0 ) condition in the enqueued case below, which is not
1184  necessary for this state transition */
1185 
1186  need_mf = 0;
1187  /* try (-1,0)->(tid,tid) */
1188  enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1189  KMP_PACK_64(-1, 0),
1190  KMP_PACK_64(gtid + 1, gtid + 1));
1191 #ifdef DEBUG_QUEUING_LOCKS
1192  if (enqueued)
1193  TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1194 #endif
1195  } break;
1196 
1197  default: {
1198  tail = *tail_id_p;
1199  KMP_DEBUG_ASSERT(tail != gtid + 1);
1200 
1201 #ifdef DEBUG_QUEUING_LOCKS
1202  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1203 #endif
1204 
1205  if (tail == 0) {
1206  enqueued = FALSE;
1207  } else {
1208  need_mf = 0;
1209  /* try (h,t) or (h,h)->(h,tid) */
1210  enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1211 
1212 #ifdef DEBUG_QUEUING_LOCKS
1213  if (enqueued)
1214  TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1215 #endif
1216  }
1217  } break;
1218 
1219  case 0: /* empty queue */
1220  {
1221  kmp_int32 grabbed_lock;
1222 
1223 #ifdef DEBUG_QUEUING_LOCKS
1224  tail = *tail_id_p;
1225  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1226 #endif
1227  /* try (0,0)->(-1,0) */
1228 
1229  /* only legal transition out of head = 0 is head = -1 with no change to
1230  * tail */
1231  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1232 
1233  if (grabbed_lock) {
1234 
1235  *spin_here_p = FALSE;
1236 
1237  KA_TRACE(
1238  1000,
1239  ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1240  lck, gtid));
1241 #ifdef DEBUG_QUEUING_LOCKS
1242  TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1243 #endif
1244 
1245 #if OMPT_SUPPORT
1246  if (ompt_enabled.enabled && prev_state != omp_state_undefined) {
1247  /* change the state before clearing wait_id */
1248  this_thr->th.ompt_thread_info.state = prev_state;
1249  this_thr->th.ompt_thread_info.wait_id = 0;
1250  }
1251 #endif
1252 
1253  KMP_FSYNC_ACQUIRED(lck);
1254  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1255  }
1256  enqueued = FALSE;
1257  } break;
1258  }
1259 
1260 #if OMPT_SUPPORT
1261  if (ompt_enabled.enabled && prev_state == omp_state_undefined) {
1262  /* this thread will spin; set wait_id before entering wait state */
1263  prev_state = this_thr->th.ompt_thread_info.state;
1264  this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1265  this_thr->th.ompt_thread_info.state = omp_state_wait_lock;
1266  }
1267 #endif
1268 
1269  if (enqueued) {
1270  if (tail > 0) {
1271  kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1272  KMP_ASSERT(tail_thr != NULL);
1273  tail_thr->th.th_next_waiting = gtid + 1;
1274  /* corresponding wait for this write in release code */
1275  }
1276  KA_TRACE(1000,
1277  ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1278  lck, gtid));
1279 
1280  /* ToDo: May want to consider using __kmp_wait_sleep or something that
1281  sleeps for throughput only here. */
1282  KMP_MB();
1283  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286  TRACE_LOCK(gtid + 1, "acq spin");
1287 
1288  if (this_thr->th.th_next_waiting != 0)
1289  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1290 #endif
1291  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1292  KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1293  "waiting on queue\n",
1294  lck, gtid));
1295 
1296 #ifdef DEBUG_QUEUING_LOCKS
1297  TRACE_LOCK(gtid + 1, "acq exit 2");
1298 #endif
1299 
1300 #if OMPT_SUPPORT
1301  /* change the state before clearing wait_id */
1302  this_thr->th.ompt_thread_info.state = prev_state;
1303  this_thr->th.ompt_thread_info.wait_id = 0;
1304 #endif
1305 
1306  /* got lock, we were dequeued by the thread that released lock */
1307  return KMP_LOCK_ACQUIRED_FIRST;
1308  }
1309 
1310  /* Yield if number of threads > number of logical processors */
1311  /* ToDo: Not sure why this should only be in oversubscription case,
1312  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1313  KMP_YIELD(TCR_4(__kmp_nth) >
1314  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
1315 #ifdef DEBUG_QUEUING_LOCKS
1316  TRACE_LOCK(gtid + 1, "acq retry");
1317 #endif
1318  }
1319  KMP_ASSERT2(0, "should not get here");
1320  return KMP_LOCK_ACQUIRED_FIRST;
1321 }
1322 
1323 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1324  KMP_DEBUG_ASSERT(gtid >= 0);
1325 
1326  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1327  ANNOTATE_QUEUING_ACQUIRED(lck);
1328  return retval;
1329 }
1330 
1331 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1332  kmp_int32 gtid) {
1333  char const *const func = "omp_set_lock";
1334  if (lck->lk.initialized != lck) {
1335  KMP_FATAL(LockIsUninitialized, func);
1336  }
1337  if (__kmp_is_queuing_lock_nestable(lck)) {
1338  KMP_FATAL(LockNestableUsedAsSimple, func);
1339  }
1340  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1341  KMP_FATAL(LockIsAlreadyOwned, func);
1342  }
1343 
1344  __kmp_acquire_queuing_lock(lck, gtid);
1345 
1346  lck->lk.owner_id = gtid + 1;
1347  return KMP_LOCK_ACQUIRED_FIRST;
1348 }
1349 
1350 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1351  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1352  kmp_int32 head;
1353 #ifdef KMP_DEBUG
1354  kmp_info_t *this_thr;
1355 #endif
1356 
1357  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1358  KMP_DEBUG_ASSERT(gtid >= 0);
1359 #ifdef KMP_DEBUG
1360  this_thr = __kmp_thread_from_gtid(gtid);
1361  KMP_DEBUG_ASSERT(this_thr != NULL);
1362  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1363 #endif
1364 
1365  head = *head_id_p;
1366 
1367  if (head == 0) { /* nobody on queue, nobody holding */
1368  /* try (0,0)->(-1,0) */
1369  if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1370  KA_TRACE(1000,
1371  ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1372  KMP_FSYNC_ACQUIRED(lck);
1373  ANNOTATE_QUEUING_ACQUIRED(lck);
1374  return TRUE;
1375  }
1376  }
1377 
1378  KA_TRACE(1000,
1379  ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1380  return FALSE;
1381 }
1382 
1383 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1384  kmp_int32 gtid) {
1385  char const *const func = "omp_test_lock";
1386  if (lck->lk.initialized != lck) {
1387  KMP_FATAL(LockIsUninitialized, func);
1388  }
1389  if (__kmp_is_queuing_lock_nestable(lck)) {
1390  KMP_FATAL(LockNestableUsedAsSimple, func);
1391  }
1392 
1393  int retval = __kmp_test_queuing_lock(lck, gtid);
1394 
1395  if (retval) {
1396  lck->lk.owner_id = gtid + 1;
1397  }
1398  return retval;
1399 }
1400 
1401 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1402  kmp_info_t *this_thr;
1403  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1404  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1405 
1406  KA_TRACE(1000,
1407  ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1408  KMP_DEBUG_ASSERT(gtid >= 0);
1409  this_thr = __kmp_thread_from_gtid(gtid);
1410  KMP_DEBUG_ASSERT(this_thr != NULL);
1411 #ifdef DEBUG_QUEUING_LOCKS
1412  TRACE_LOCK(gtid + 1, "rel ent");
1413 
1414  if (this_thr->th.th_spin_here)
1415  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1416  if (this_thr->th.th_next_waiting != 0)
1417  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1418 #endif
1419  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1420  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1421 
1422  KMP_FSYNC_RELEASING(lck);
1423  ANNOTATE_QUEUING_RELEASED(lck);
1424 
1425  while (1) {
1426  kmp_int32 dequeued;
1427  kmp_int32 head;
1428  kmp_int32 tail;
1429 
1430  head = *head_id_p;
1431 
1432 #ifdef DEBUG_QUEUING_LOCKS
1433  tail = *tail_id_p;
1434  TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1435  if (head == 0)
1436  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1437 #endif
1438  KMP_DEBUG_ASSERT(head !=
1439  0); /* holding the lock, head must be -1 or queue head */
1440 
1441  if (head == -1) { /* nobody on queue */
1442  /* try (-1,0)->(0,0) */
1443  if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1444  KA_TRACE(
1445  1000,
1446  ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1447  lck, gtid));
1448 #ifdef DEBUG_QUEUING_LOCKS
1449  TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1450 #endif
1451 
1452 #if OMPT_SUPPORT
1453 /* nothing to do - no other thread is trying to shift blame */
1454 #endif
1455  return KMP_LOCK_RELEASED;
1456  }
1457  dequeued = FALSE;
1458  } else {
1459  KMP_MB();
1460  tail = *tail_id_p;
1461  if (head == tail) { /* only one thread on the queue */
1462 #ifdef DEBUG_QUEUING_LOCKS
1463  if (head <= 0)
1464  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1465 #endif
1466  KMP_DEBUG_ASSERT(head > 0);
1467 
1468  /* try (h,h)->(-1,0) */
1469  dequeued = KMP_COMPARE_AND_STORE_REL64(
1470  RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1471  KMP_PACK_64(-1, 0));
1472 #ifdef DEBUG_QUEUING_LOCKS
1473  TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1474 #endif
1475 
1476  } else {
1477  volatile kmp_int32 *waiting_id_p;
1478  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1479  KMP_DEBUG_ASSERT(head_thr != NULL);
1480  waiting_id_p = &head_thr->th.th_next_waiting;
1481 
1482 /* Does this require synchronous reads? */
1483 #ifdef DEBUG_QUEUING_LOCKS
1484  if (head <= 0 || tail <= 0)
1485  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1486 #endif
1487  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1488 
1489  /* try (h,t)->(h',t) or (t,t) */
1490  KMP_MB();
1491  /* make sure enqueuing thread has time to update next waiting thread
1492  * field */
1493  *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
1494  KMP_NEQ, NULL);
1495 #ifdef DEBUG_QUEUING_LOCKS
1496  TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1497 #endif
1498  dequeued = TRUE;
1499  }
1500  }
1501 
1502  if (dequeued) {
1503  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1504  KMP_DEBUG_ASSERT(head_thr != NULL);
1505 
1506 /* Does this require synchronous reads? */
1507 #ifdef DEBUG_QUEUING_LOCKS
1508  if (head <= 0 || tail <= 0)
1509  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1510 #endif
1511  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1512 
1513  /* For clean code only. Thread not released until next statement prevents
1514  race with acquire code. */
1515  head_thr->th.th_next_waiting = 0;
1516 #ifdef DEBUG_QUEUING_LOCKS
1517  TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1518 #endif
1519 
1520  KMP_MB();
1521  /* reset spin value */
1522  head_thr->th.th_spin_here = FALSE;
1523 
1524  KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1525  "dequeuing\n",
1526  lck, gtid));
1527 #ifdef DEBUG_QUEUING_LOCKS
1528  TRACE_LOCK(gtid + 1, "rel exit 2");
1529 #endif
1530  return KMP_LOCK_RELEASED;
1531  }
1532 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1533  threads */
1534 
1535 #ifdef DEBUG_QUEUING_LOCKS
1536  TRACE_LOCK(gtid + 1, "rel retry");
1537 #endif
1538 
1539  } /* while */
1540  KMP_ASSERT2(0, "should not get here");
1541  return KMP_LOCK_RELEASED;
1542 }
1543 
1544 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1545  kmp_int32 gtid) {
1546  char const *const func = "omp_unset_lock";
1547  KMP_MB(); /* in case another processor initialized lock */
1548  if (lck->lk.initialized != lck) {
1549  KMP_FATAL(LockIsUninitialized, func);
1550  }
1551  if (__kmp_is_queuing_lock_nestable(lck)) {
1552  KMP_FATAL(LockNestableUsedAsSimple, func);
1553  }
1554  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1555  KMP_FATAL(LockUnsettingFree, func);
1556  }
1557  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1558  KMP_FATAL(LockUnsettingSetByAnother, func);
1559  }
1560  lck->lk.owner_id = 0;
1561  return __kmp_release_queuing_lock(lck, gtid);
1562 }
1563 
1564 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1565  lck->lk.location = NULL;
1566  lck->lk.head_id = 0;
1567  lck->lk.tail_id = 0;
1568  lck->lk.next_ticket = 0;
1569  lck->lk.now_serving = 0;
1570  lck->lk.owner_id = 0; // no thread owns the lock.
1571  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1572  lck->lk.initialized = lck;
1573 
1574  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1575 }
1576 
1577 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1578  __kmp_init_queuing_lock(lck);
1579 }
1580 
1581 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1582  lck->lk.initialized = NULL;
1583  lck->lk.location = NULL;
1584  lck->lk.head_id = 0;
1585  lck->lk.tail_id = 0;
1586  lck->lk.next_ticket = 0;
1587  lck->lk.now_serving = 0;
1588  lck->lk.owner_id = 0;
1589  lck->lk.depth_locked = -1;
1590 }
1591 
1592 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1593  char const *const func = "omp_destroy_lock";
1594  if (lck->lk.initialized != lck) {
1595  KMP_FATAL(LockIsUninitialized, func);
1596  }
1597  if (__kmp_is_queuing_lock_nestable(lck)) {
1598  KMP_FATAL(LockNestableUsedAsSimple, func);
1599  }
1600  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1601  KMP_FATAL(LockStillOwned, func);
1602  }
1603  __kmp_destroy_queuing_lock(lck);
1604 }
1605 
1606 // nested queuing locks
1607 
1608 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1609  KMP_DEBUG_ASSERT(gtid >= 0);
1610 
1611  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1612  lck->lk.depth_locked += 1;
1613  return KMP_LOCK_ACQUIRED_NEXT;
1614  } else {
1615  __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1616  ANNOTATE_QUEUING_ACQUIRED(lck);
1617  KMP_MB();
1618  lck->lk.depth_locked = 1;
1619  KMP_MB();
1620  lck->lk.owner_id = gtid + 1;
1621  return KMP_LOCK_ACQUIRED_FIRST;
1622  }
1623 }
1624 
1625 static int
1626 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1627  kmp_int32 gtid) {
1628  char const *const func = "omp_set_nest_lock";
1629  if (lck->lk.initialized != lck) {
1630  KMP_FATAL(LockIsUninitialized, func);
1631  }
1632  if (!__kmp_is_queuing_lock_nestable(lck)) {
1633  KMP_FATAL(LockSimpleUsedAsNestable, func);
1634  }
1635  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1636 }
1637 
1638 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1639  int retval;
1640 
1641  KMP_DEBUG_ASSERT(gtid >= 0);
1642 
1643  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1644  retval = ++lck->lk.depth_locked;
1645  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1646  retval = 0;
1647  } else {
1648  KMP_MB();
1649  retval = lck->lk.depth_locked = 1;
1650  KMP_MB();
1651  lck->lk.owner_id = gtid + 1;
1652  }
1653  return retval;
1654 }
1655 
1656 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1657  kmp_int32 gtid) {
1658  char const *const func = "omp_test_nest_lock";
1659  if (lck->lk.initialized != lck) {
1660  KMP_FATAL(LockIsUninitialized, func);
1661  }
1662  if (!__kmp_is_queuing_lock_nestable(lck)) {
1663  KMP_FATAL(LockSimpleUsedAsNestable, func);
1664  }
1665  return __kmp_test_nested_queuing_lock(lck, gtid);
1666 }
1667 
1668 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1669  KMP_DEBUG_ASSERT(gtid >= 0);
1670 
1671  KMP_MB();
1672  if (--(lck->lk.depth_locked) == 0) {
1673  KMP_MB();
1674  lck->lk.owner_id = 0;
1675  __kmp_release_queuing_lock(lck, gtid);
1676  return KMP_LOCK_RELEASED;
1677  }
1678  return KMP_LOCK_STILL_HELD;
1679 }
1680 
1681 static int
1682 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1683  kmp_int32 gtid) {
1684  char const *const func = "omp_unset_nest_lock";
1685  KMP_MB(); /* in case another processor initialized lock */
1686  if (lck->lk.initialized != lck) {
1687  KMP_FATAL(LockIsUninitialized, func);
1688  }
1689  if (!__kmp_is_queuing_lock_nestable(lck)) {
1690  KMP_FATAL(LockSimpleUsedAsNestable, func);
1691  }
1692  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1693  KMP_FATAL(LockUnsettingFree, func);
1694  }
1695  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1696  KMP_FATAL(LockUnsettingSetByAnother, func);
1697  }
1698  return __kmp_release_nested_queuing_lock(lck, gtid);
1699 }
1700 
1701 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1702  __kmp_init_queuing_lock(lck);
1703  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1704 }
1705 
1706 static void
1707 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1708  __kmp_init_nested_queuing_lock(lck);
1709 }
1710 
1711 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1712  __kmp_destroy_queuing_lock(lck);
1713  lck->lk.depth_locked = 0;
1714 }
1715 
1716 static void
1717 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1718  char const *const func = "omp_destroy_nest_lock";
1719  if (lck->lk.initialized != lck) {
1720  KMP_FATAL(LockIsUninitialized, func);
1721  }
1722  if (!__kmp_is_queuing_lock_nestable(lck)) {
1723  KMP_FATAL(LockSimpleUsedAsNestable, func);
1724  }
1725  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1726  KMP_FATAL(LockStillOwned, func);
1727  }
1728  __kmp_destroy_nested_queuing_lock(lck);
1729 }
1730 
1731 // access functions to fields which don't exist for all lock kinds.
1732 
1733 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
1734  return lck == lck->lk.initialized;
1735 }
1736 
1737 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1738  return lck->lk.location;
1739 }
1740 
1741 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1742  const ident_t *loc) {
1743  lck->lk.location = loc;
1744 }
1745 
1746 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1747  return lck->lk.flags;
1748 }
1749 
1750 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1751  kmp_lock_flags_t flags) {
1752  lck->lk.flags = flags;
1753 }
1754 
1755 #if KMP_USE_ADAPTIVE_LOCKS
1756 
1757 /* RTM Adaptive locks */
1758 
1759 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1760 
1761 #include <immintrin.h>
1762 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1763 
1764 #else
1765 
1766 // Values from the status register after failed speculation.
1767 #define _XBEGIN_STARTED (~0u)
1768 #define _XABORT_EXPLICIT (1 << 0)
1769 #define _XABORT_RETRY (1 << 1)
1770 #define _XABORT_CONFLICT (1 << 2)
1771 #define _XABORT_CAPACITY (1 << 3)
1772 #define _XABORT_DEBUG (1 << 4)
1773 #define _XABORT_NESTED (1 << 5)
1774 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1775 
1776 // Aborts for which it's worth trying again immediately
1777 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1778 
1779 #define STRINGIZE_INTERNAL(arg) #arg
1780 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1781 
1782 // Access to RTM instructions
1783 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1784  an abort. This is the same definition as the compiler intrinsic that will be
1785  supported at some point. */
1786 static __inline int _xbegin() {
1787  int res = -1;
1788 
1789 #if KMP_OS_WINDOWS
1790 #if KMP_ARCH_X86_64
1791  _asm {
1792  _emit 0xC7
1793  _emit 0xF8
1794  _emit 2
1795  _emit 0
1796  _emit 0
1797  _emit 0
1798  jmp L2
1799  mov res, eax
1800  L2:
1801  }
1802 #else /* IA32 */
1803  _asm {
1804  _emit 0xC7
1805  _emit 0xF8
1806  _emit 2
1807  _emit 0
1808  _emit 0
1809  _emit 0
1810  jmp L2
1811  mov res, eax
1812  L2:
1813  }
1814 #endif // KMP_ARCH_X86_64
1815 #else
1816  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1817  returned in %eax(%rax) on abort. Other register values are restored, so
1818  don't need to be killed.
1819 
1820  We must also mark 'res' as an input and an output, since otherwise
1821  'res=-1' may be dropped as being dead, whereas we do need the assignment on
1822  the successful (i.e., non-abort) path. */
1823  __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1824  " .long 1f-1b-6\n"
1825  " jmp 2f\n"
1826  "1: movl %%eax,%0\n"
1827  "2:"
1828  : "+r"(res)::"memory", "%eax");
1829 #endif // KMP_OS_WINDOWS
1830  return res;
1831 }
1832 
1833 /* Transaction end */
1834 static __inline void _xend() {
1835 #if KMP_OS_WINDOWS
1836  __asm {
1837  _emit 0x0f
1838  _emit 0x01
1839  _emit 0xd5
1840  }
1841 #else
1842  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1843 #endif
1844 }
1845 
1846 /* This is a macro, the argument must be a single byte constant which can be
1847  evaluated by the inline assembler, since it is emitted as a byte into the
1848  assembly code. */
1849 // clang-format off
1850 #if KMP_OS_WINDOWS
1851 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1852 #else
1853 #define _xabort(ARG) \
1854  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1855 #endif
1856 // clang-format on
1857 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1858 
1859 // Statistics is collected for testing purpose
1860 #if KMP_DEBUG_ADAPTIVE_LOCKS
1861 
1862 // We accumulate speculative lock statistics when the lock is destroyed. We
1863 // keep locks that haven't been destroyed in the liveLocks list so that we can
1864 // grab their statistics too.
1865 static kmp_adaptive_lock_statistics_t destroyedStats;
1866 
1867 // To hold the list of live locks.
1868 static kmp_adaptive_lock_info_t liveLocks;
1869 
1870 // A lock so we can safely update the list of locks.
1871 static kmp_bootstrap_lock_t chain_lock;
1872 
1873 // Initialize the list of stats.
1874 void __kmp_init_speculative_stats() {
1875  kmp_adaptive_lock_info_t *lck = &liveLocks;
1876 
1877  memset((void *)&(lck->stats), 0, sizeof(lck->stats));
1878  lck->stats.next = lck;
1879  lck->stats.prev = lck;
1880 
1881  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1882  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1883 
1884  __kmp_init_bootstrap_lock(&chain_lock);
1885 }
1886 
1887 // Insert the lock into the circular list
1888 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1889  __kmp_acquire_bootstrap_lock(&chain_lock);
1890 
1891  lck->stats.next = liveLocks.stats.next;
1892  lck->stats.prev = &liveLocks;
1893 
1894  liveLocks.stats.next = lck;
1895  lck->stats.next->stats.prev = lck;
1896 
1897  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1898  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1899 
1900  __kmp_release_bootstrap_lock(&chain_lock);
1901 }
1902 
1903 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1904  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1905  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1906 
1907  kmp_adaptive_lock_info_t *n = lck->stats.next;
1908  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1909 
1910  n->stats.prev = p;
1911  p->stats.next = n;
1912 }
1913 
1914 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1915  memset((void *)&lck->stats, 0, sizeof(lck->stats));
1916  __kmp_remember_lock(lck);
1917 }
1918 
1919 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1920  kmp_adaptive_lock_info_t *lck) {
1921  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1922 
1923  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1924  t->successfulSpeculations += s->successfulSpeculations;
1925  t->hardFailedSpeculations += s->hardFailedSpeculations;
1926  t->softFailedSpeculations += s->softFailedSpeculations;
1927  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1928  t->lemmingYields += s->lemmingYields;
1929 }
1930 
1931 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1932  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
1933 
1934  __kmp_acquire_bootstrap_lock(&chain_lock);
1935 
1936  __kmp_add_stats(&destroyedStats, lck);
1937  __kmp_forget_lock(lck);
1938 
1939  __kmp_release_bootstrap_lock(&chain_lock);
1940 }
1941 
1942 static float percent(kmp_uint32 count, kmp_uint32 total) {
1943  return (total == 0) ? 0.0 : (100.0 * count) / total;
1944 }
1945 
1946 static FILE *__kmp_open_stats_file() {
1947  if (strcmp(__kmp_speculative_statsfile, "-") == 0)
1948  return stdout;
1949 
1950  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1951  char buffer[buffLen];
1952  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1953  (kmp_int32)getpid());
1954  FILE *result = fopen(&buffer[0], "w");
1955 
1956  // Maybe we should issue a warning here...
1957  return result ? result : stdout;
1958 }
1959 
1960 void __kmp_print_speculative_stats() {
1961  if (__kmp_user_lock_kind != lk_adaptive)
1962  return;
1963 
1964  FILE *statsFile = __kmp_open_stats_file();
1965 
1966  kmp_adaptive_lock_statistics_t total = destroyedStats;
1967  kmp_adaptive_lock_info_t *lck;
1968 
1969  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1970  __kmp_add_stats(&total, lck);
1971  }
1972  kmp_adaptive_lock_statistics_t *t = &total;
1973  kmp_uint32 totalSections =
1974  t->nonSpeculativeAcquires + t->successfulSpeculations;
1975  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1976  t->hardFailedSpeculations +
1977  t->softFailedSpeculations;
1978 
1979  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1980  fprintf(statsFile, " Lock parameters: \n"
1981  " max_soft_retries : %10d\n"
1982  " max_badness : %10d\n",
1983  __kmp_adaptive_backoff_params.max_soft_retries,
1984  __kmp_adaptive_backoff_params.max_badness);
1985  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1986  t->nonSpeculativeAcquireAttempts);
1987  fprintf(statsFile, " Total critical sections : %10d\n",
1988  totalSections);
1989  fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1990  t->successfulSpeculations,
1991  percent(t->successfulSpeculations, totalSections));
1992  fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1993  t->nonSpeculativeAcquires,
1994  percent(t->nonSpeculativeAcquires, totalSections));
1995  fprintf(statsFile, " Lemming yields : %10d\n\n",
1996  t->lemmingYields);
1997 
1998  fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1999  totalSpeculations);
2000  fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
2001  t->successfulSpeculations,
2002  percent(t->successfulSpeculations, totalSpeculations));
2003  fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
2004  t->softFailedSpeculations,
2005  percent(t->softFailedSpeculations, totalSpeculations));
2006  fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
2007  t->hardFailedSpeculations,
2008  percent(t->hardFailedSpeculations, totalSpeculations));
2009 
2010  if (statsFile != stdout)
2011  fclose(statsFile);
2012 }
2013 
2014 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
2015 #else
2016 #define KMP_INC_STAT(lck, stat)
2017 
2018 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2019 
2020 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
2021  // It is enough to check that the head_id is zero.
2022  // We don't also need to check the tail.
2023  bool res = lck->lk.head_id == 0;
2024 
2025 // We need a fence here, since we must ensure that no memory operations
2026 // from later in this thread float above that read.
2027 #if KMP_COMPILER_ICC
2028  _mm_mfence();
2029 #else
2030  __sync_synchronize();
2031 #endif
2032 
2033  return res;
2034 }
2035 
2036 // Functions for manipulating the badness
2037 static __inline void
2038 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
2039  // Reset the badness to zero so we eagerly try to speculate again
2040  lck->lk.adaptive.badness = 0;
2041  KMP_INC_STAT(lck, successfulSpeculations);
2042 }
2043 
2044 // Create a bit mask with one more set bit.
2045 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
2046  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
2047  if (newBadness > lck->lk.adaptive.max_badness) {
2048  return;
2049  } else {
2050  lck->lk.adaptive.badness = newBadness;
2051  }
2052 }
2053 
2054 // Check whether speculation should be attempted.
2055 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
2056  kmp_int32 gtid) {
2057  kmp_uint32 badness = lck->lk.adaptive.badness;
2058  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
2059  int res = (attempts & badness) == 0;
2060  return res;
2061 }
2062 
2063 // Attempt to acquire only the speculative lock.
2064 // Does not back off to the non-speculative lock.
2065 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
2066  kmp_int32 gtid) {
2067  int retries = lck->lk.adaptive.max_soft_retries;
2068 
2069  // We don't explicitly count the start of speculation, rather we record the
2070  // results (success, hard fail, soft fail). The sum of all of those is the
2071  // total number of times we started speculation since all speculations must
2072  // end one of those ways.
2073  do {
2074  kmp_uint32 status = _xbegin();
2075  // Switch this in to disable actual speculation but exercise at least some
2076  // of the rest of the code. Useful for debugging...
2077  // kmp_uint32 status = _XABORT_NESTED;
2078 
2079  if (status == _XBEGIN_STARTED) {
2080  /* We have successfully started speculation. Check that no-one acquired
2081  the lock for real between when we last looked and now. This also gets
2082  the lock cache line into our read-set, which we need so that we'll
2083  abort if anyone later claims it for real. */
2084  if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2085  // Lock is now visibly acquired, so someone beat us to it. Abort the
2086  // transaction so we'll restart from _xbegin with the failure status.
2087  _xabort(0x01);
2088  KMP_ASSERT2(0, "should not get here");
2089  }
2090  return 1; // Lock has been acquired (speculatively)
2091  } else {
2092  // We have aborted, update the statistics
2093  if (status & SOFT_ABORT_MASK) {
2094  KMP_INC_STAT(lck, softFailedSpeculations);
2095  // and loop round to retry.
2096  } else {
2097  KMP_INC_STAT(lck, hardFailedSpeculations);
2098  // Give up if we had a hard failure.
2099  break;
2100  }
2101  }
2102  } while (retries--); // Loop while we have retries, and didn't fail hard.
2103 
2104  // Either we had a hard failure or we didn't succeed softly after
2105  // the full set of attempts, so back off the badness.
2106  __kmp_step_badness(lck);
2107  return 0;
2108 }
2109 
2110 // Attempt to acquire the speculative lock, or back off to the non-speculative
2111 // one if the speculative lock cannot be acquired.
2112 // We can succeed speculatively, non-speculatively, or fail.
2113 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2114  // First try to acquire the lock speculatively
2115  if (__kmp_should_speculate(lck, gtid) &&
2116  __kmp_test_adaptive_lock_only(lck, gtid))
2117  return 1;
2118 
2119  // Speculative acquisition failed, so try to acquire it non-speculatively.
2120  // Count the non-speculative acquire attempt
2121  lck->lk.adaptive.acquire_attempts++;
2122 
2123  // Use base, non-speculative lock.
2124  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2125  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2126  return 1; // Lock is acquired (non-speculatively)
2127  } else {
2128  return 0; // Failed to acquire the lock, it's already visibly locked.
2129  }
2130 }
2131 
2132 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2133  kmp_int32 gtid) {
2134  char const *const func = "omp_test_lock";
2135  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2136  KMP_FATAL(LockIsUninitialized, func);
2137  }
2138 
2139  int retval = __kmp_test_adaptive_lock(lck, gtid);
2140 
2141  if (retval) {
2142  lck->lk.qlk.owner_id = gtid + 1;
2143  }
2144  return retval;
2145 }
2146 
2147 // Block until we can acquire a speculative, adaptive lock. We check whether we
2148 // should be trying to speculate. If we should be, we check the real lock to see
2149 // if it is free, and, if not, pause without attempting to acquire it until it
2150 // is. Then we try the speculative acquire. This means that although we suffer
2151 // from lemmings a little (because all we can't acquire the lock speculatively
2152 // until the queue of threads waiting has cleared), we don't get into a state
2153 // where we can never acquire the lock speculatively (because we force the queue
2154 // to clear by preventing new arrivals from entering the queue). This does mean
2155 // that when we're trying to break lemmings, the lock is no longer fair. However
2156 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2157 // problem.
2158 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2159  kmp_int32 gtid) {
2160  if (__kmp_should_speculate(lck, gtid)) {
2161  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2162  if (__kmp_test_adaptive_lock_only(lck, gtid))
2163  return;
2164  // We tried speculation and failed, so give up.
2165  } else {
2166  // We can't try speculation until the lock is free, so we pause here
2167  // (without suspending on the queueing lock, to allow it to drain, then
2168  // try again. All other threads will also see the same result for
2169  // shouldSpeculate, so will be doing the same if they try to claim the
2170  // lock from now on.
2171  while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2172  KMP_INC_STAT(lck, lemmingYields);
2173  __kmp_yield(TRUE);
2174  }
2175 
2176  if (__kmp_test_adaptive_lock_only(lck, gtid))
2177  return;
2178  }
2179  }
2180 
2181  // Speculative acquisition failed, so acquire it non-speculatively.
2182  // Count the non-speculative acquire attempt
2183  lck->lk.adaptive.acquire_attempts++;
2184 
2185  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2186  // We have acquired the base lock, so count that.
2187  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2188  ANNOTATE_QUEUING_ACQUIRED(lck);
2189 }
2190 
2191 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2192  kmp_int32 gtid) {
2193  char const *const func = "omp_set_lock";
2194  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2195  KMP_FATAL(LockIsUninitialized, func);
2196  }
2197  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2198  KMP_FATAL(LockIsAlreadyOwned, func);
2199  }
2200 
2201  __kmp_acquire_adaptive_lock(lck, gtid);
2202 
2203  lck->lk.qlk.owner_id = gtid + 1;
2204 }
2205 
2206 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2207  kmp_int32 gtid) {
2208  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2209  lck))) { // If the lock doesn't look claimed we must be speculating.
2210  // (Or the user's code is buggy and they're releasing without locking;
2211  // if we had XTEST we'd be able to check that case...)
2212  _xend(); // Exit speculation
2213  __kmp_update_badness_after_success(lck);
2214  } else { // Since the lock *is* visibly locked we're not speculating,
2215  // so should use the underlying lock's release scheme.
2216  __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2217  }
2218  return KMP_LOCK_RELEASED;
2219 }
2220 
2221 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2222  kmp_int32 gtid) {
2223  char const *const func = "omp_unset_lock";
2224  KMP_MB(); /* in case another processor initialized lock */
2225  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2226  KMP_FATAL(LockIsUninitialized, func);
2227  }
2228  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2229  KMP_FATAL(LockUnsettingFree, func);
2230  }
2231  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2232  KMP_FATAL(LockUnsettingSetByAnother, func);
2233  }
2234  lck->lk.qlk.owner_id = 0;
2235  __kmp_release_adaptive_lock(lck, gtid);
2236  return KMP_LOCK_RELEASED;
2237 }
2238 
2239 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2240  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2241  lck->lk.adaptive.badness = 0;
2242  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2243  lck->lk.adaptive.max_soft_retries =
2244  __kmp_adaptive_backoff_params.max_soft_retries;
2245  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2246 #if KMP_DEBUG_ADAPTIVE_LOCKS
2247  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2248 #endif
2249  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2250 }
2251 
2252 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2253  __kmp_init_adaptive_lock(lck);
2254 }
2255 
2256 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2257 #if KMP_DEBUG_ADAPTIVE_LOCKS
2258  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2259 #endif
2260  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2261  // Nothing needed for the speculative part.
2262 }
2263 
2264 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2265  char const *const func = "omp_destroy_lock";
2266  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2267  KMP_FATAL(LockIsUninitialized, func);
2268  }
2269  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2270  KMP_FATAL(LockStillOwned, func);
2271  }
2272  __kmp_destroy_adaptive_lock(lck);
2273 }
2274 
2275 #endif // KMP_USE_ADAPTIVE_LOCKS
2276 
2277 /* ------------------------------------------------------------------------ */
2278 /* DRDPA ticket locks */
2279 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2280 
2281 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2282  return TCR_4(lck->lk.owner_id) - 1;
2283 }
2284 
2285 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2286  return lck->lk.depth_locked != -1;
2287 }
2288 
2289 __forceinline static int
2290 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2291  kmp_uint64 ticket =
2292  KMP_TEST_THEN_INC64(RCAST(volatile kmp_int64 *, &lck->lk.next_ticket));
2293  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2294  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls;
2295 
2296 #ifdef USE_LOCK_PROFILE
2297  if (TCR_8(polls[ticket & mask].poll) != ticket)
2298  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2299 /* else __kmp_printf( "." );*/
2300 #endif /* USE_LOCK_PROFILE */
2301 
2302  // Now spin-wait, but reload the polls pointer and mask, in case the
2303  // polling area has been reconfigured. Unless it is reconfigured, the
2304  // reloads stay in L1 cache and are cheap.
2305  //
2306  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
2307  //
2308  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2309  // and poll to be re-read every spin iteration.
2310  kmp_uint32 spins;
2311 
2312  KMP_FSYNC_PREPARE(lck);
2313  KMP_INIT_YIELD(spins);
2314  while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
2315  // If we are oversubscribed,
2316  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2317  // CPU Pause is in the macros for yield.
2318  //
2319  KMP_YIELD(TCR_4(__kmp_nth) >
2320  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2321  KMP_YIELD_SPIN(spins);
2322 
2323  // Re-read the mask and the poll pointer from the lock structure.
2324  //
2325  // Make certain that "mask" is read before "polls" !!!
2326  //
2327  // If another thread picks reconfigures the polling area and updates their
2328  // values, and we get the new value of mask and the old polls pointer, we
2329  // could access memory beyond the end of the old polling area.
2330  mask = TCR_8(lck->lk.mask); // volatile load
2331  polls = lck->lk.polls; // volatile load
2332  }
2333 
2334  // Critical section starts here
2335  KMP_FSYNC_ACQUIRED(lck);
2336  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2337  ticket, lck));
2338  lck->lk.now_serving = ticket; // non-volatile store
2339 
2340  // Deallocate a garbage polling area if we know that we are the last
2341  // thread that could possibly access it.
2342  //
2343  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2344  // ticket.
2345  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2346  __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls));
2347  lck->lk.old_polls = NULL;
2348  lck->lk.cleanup_ticket = 0;
2349  }
2350 
2351  // Check to see if we should reconfigure the polling area.
2352  // If there is still a garbage polling area to be deallocated from a
2353  // previous reconfiguration, let a later thread reconfigure it.
2354  if (lck->lk.old_polls == NULL) {
2355  bool reconfigure = false;
2356  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2357  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2358 
2359  if (TCR_4(__kmp_nth) >
2360  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2361  // We are in oversubscription mode. Contract the polling area
2362  // down to a single location, if that hasn't been done already.
2363  if (num_polls > 1) {
2364  reconfigure = true;
2365  num_polls = TCR_4(lck->lk.num_polls);
2366  mask = 0;
2367  num_polls = 1;
2368  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2369  __kmp_allocate(num_polls * sizeof(*polls));
2370  polls[0].poll = ticket;
2371  }
2372  } else {
2373  // We are in under/fully subscribed mode. Check the number of
2374  // threads waiting on the lock. The size of the polling area
2375  // should be at least the number of threads waiting.
2376  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2377  if (num_waiting > num_polls) {
2378  kmp_uint32 old_num_polls = num_polls;
2379  reconfigure = true;
2380  do {
2381  mask = (mask << 1) | 1;
2382  num_polls *= 2;
2383  } while (num_polls <= num_waiting);
2384 
2385  // Allocate the new polling area, and copy the relevant portion
2386  // of the old polling area to the new area. __kmp_allocate()
2387  // zeroes the memory it allocates, and most of the old area is
2388  // just zero padding, so we only copy the release counters.
2389  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2390  __kmp_allocate(num_polls * sizeof(*polls));
2391  kmp_uint32 i;
2392  for (i = 0; i < old_num_polls; i++) {
2393  polls[i].poll = old_polls[i].poll;
2394  }
2395  }
2396  }
2397 
2398  if (reconfigure) {
2399  // Now write the updated fields back to the lock structure.
2400  //
2401  // Make certain that "polls" is written before "mask" !!!
2402  //
2403  // If another thread picks up the new value of mask and the old polls
2404  // pointer , it could access memory beyond the end of the old polling
2405  // area.
2406  //
2407  // On x86, we need memory fences.
2408  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2409  "lock %p to %d polls\n",
2410  ticket, lck, num_polls));
2411 
2412  lck->lk.old_polls = old_polls; // non-volatile store
2413  lck->lk.polls = polls; // volatile store
2414 
2415  KMP_MB();
2416 
2417  lck->lk.num_polls = num_polls; // non-volatile store
2418  lck->lk.mask = mask; // volatile store
2419 
2420  KMP_MB();
2421 
2422  // Only after the new polling area and mask have been flushed
2423  // to main memory can we update the cleanup ticket field.
2424  //
2425  // volatile load / non-volatile store
2426  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2427  }
2428  }
2429  return KMP_LOCK_ACQUIRED_FIRST;
2430 }
2431 
2432 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2433  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2434  ANNOTATE_DRDPA_ACQUIRED(lck);
2435  return retval;
2436 }
2437 
2438 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2439  kmp_int32 gtid) {
2440  char const *const func = "omp_set_lock";
2441  if (lck->lk.initialized != lck) {
2442  KMP_FATAL(LockIsUninitialized, func);
2443  }
2444  if (__kmp_is_drdpa_lock_nestable(lck)) {
2445  KMP_FATAL(LockNestableUsedAsSimple, func);
2446  }
2447  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2448  KMP_FATAL(LockIsAlreadyOwned, func);
2449  }
2450 
2451  __kmp_acquire_drdpa_lock(lck, gtid);
2452 
2453  lck->lk.owner_id = gtid + 1;
2454  return KMP_LOCK_ACQUIRED_FIRST;
2455 }
2456 
2457 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2458  // First get a ticket, then read the polls pointer and the mask.
2459  // The polls pointer must be read before the mask!!! (See above)
2460  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2461  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls;
2462  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2463  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2464  kmp_uint64 next_ticket = ticket + 1;
2465  if (KMP_COMPARE_AND_STORE_ACQ64(&lck->lk.next_ticket, ticket,
2466  next_ticket)) {
2467  KMP_FSYNC_ACQUIRED(lck);
2468  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2469  ticket, lck));
2470  lck->lk.now_serving = ticket; // non-volatile store
2471 
2472  // Since no threads are waiting, there is no possibility that we would
2473  // want to reconfigure the polling area. We might have the cleanup ticket
2474  // value (which says that it is now safe to deallocate old_polls), but
2475  // we'll let a later thread which calls __kmp_acquire_lock do that - this
2476  // routine isn't supposed to block, and we would risk blocks if we called
2477  // __kmp_free() to do the deallocation.
2478  return TRUE;
2479  }
2480  }
2481  return FALSE;
2482 }
2483 
2484 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2485  kmp_int32 gtid) {
2486  char const *const func = "omp_test_lock";
2487  if (lck->lk.initialized != lck) {
2488  KMP_FATAL(LockIsUninitialized, func);
2489  }
2490  if (__kmp_is_drdpa_lock_nestable(lck)) {
2491  KMP_FATAL(LockNestableUsedAsSimple, func);
2492  }
2493 
2494  int retval = __kmp_test_drdpa_lock(lck, gtid);
2495 
2496  if (retval) {
2497  lck->lk.owner_id = gtid + 1;
2498  }
2499  return retval;
2500 }
2501 
2502 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2503  // Read the ticket value from the lock data struct, then the polls pointer and
2504  // the mask. The polls pointer must be read before the mask!!! (See above)
2505  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2506  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls;
2507  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2508  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2509  ticket - 1, lck));
2510  KMP_FSYNC_RELEASING(lck);
2511  ANNOTATE_DRDPA_RELEASED(lck);
2512  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2513  return KMP_LOCK_RELEASED;
2514 }
2515 
2516 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2517  kmp_int32 gtid) {
2518  char const *const func = "omp_unset_lock";
2519  KMP_MB(); /* in case another processor initialized lock */
2520  if (lck->lk.initialized != lck) {
2521  KMP_FATAL(LockIsUninitialized, func);
2522  }
2523  if (__kmp_is_drdpa_lock_nestable(lck)) {
2524  KMP_FATAL(LockNestableUsedAsSimple, func);
2525  }
2526  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2527  KMP_FATAL(LockUnsettingFree, func);
2528  }
2529  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2530  (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2531  KMP_FATAL(LockUnsettingSetByAnother, func);
2532  }
2533  lck->lk.owner_id = 0;
2534  return __kmp_release_drdpa_lock(lck, gtid);
2535 }
2536 
2537 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2538  lck->lk.location = NULL;
2539  lck->lk.mask = 0;
2540  lck->lk.num_polls = 1;
2541  lck->lk.polls =
2542  (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate(
2543  lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2544  lck->lk.cleanup_ticket = 0;
2545  lck->lk.old_polls = NULL;
2546  lck->lk.next_ticket = 0;
2547  lck->lk.now_serving = 0;
2548  lck->lk.owner_id = 0; // no thread owns the lock.
2549  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2550  lck->lk.initialized = lck;
2551 
2552  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2553 }
2554 
2555 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2556  __kmp_init_drdpa_lock(lck);
2557 }
2558 
2559 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2560  lck->lk.initialized = NULL;
2561  lck->lk.location = NULL;
2562  if (lck->lk.polls != NULL) {
2563  __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.polls));
2564  lck->lk.polls = NULL;
2565  }
2566  if (lck->lk.old_polls != NULL) {
2567  __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls));
2568  lck->lk.old_polls = NULL;
2569  }
2570  lck->lk.mask = 0;
2571  lck->lk.num_polls = 0;
2572  lck->lk.cleanup_ticket = 0;
2573  lck->lk.next_ticket = 0;
2574  lck->lk.now_serving = 0;
2575  lck->lk.owner_id = 0;
2576  lck->lk.depth_locked = -1;
2577 }
2578 
2579 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2580  char const *const func = "omp_destroy_lock";
2581  if (lck->lk.initialized != lck) {
2582  KMP_FATAL(LockIsUninitialized, func);
2583  }
2584  if (__kmp_is_drdpa_lock_nestable(lck)) {
2585  KMP_FATAL(LockNestableUsedAsSimple, func);
2586  }
2587  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2588  KMP_FATAL(LockStillOwned, func);
2589  }
2590  __kmp_destroy_drdpa_lock(lck);
2591 }
2592 
2593 // nested drdpa ticket locks
2594 
2595 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2596  KMP_DEBUG_ASSERT(gtid >= 0);
2597 
2598  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2599  lck->lk.depth_locked += 1;
2600  return KMP_LOCK_ACQUIRED_NEXT;
2601  } else {
2602  __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2603  ANNOTATE_DRDPA_ACQUIRED(lck);
2604  KMP_MB();
2605  lck->lk.depth_locked = 1;
2606  KMP_MB();
2607  lck->lk.owner_id = gtid + 1;
2608  return KMP_LOCK_ACQUIRED_FIRST;
2609  }
2610 }
2611 
2612 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2613  kmp_int32 gtid) {
2614  char const *const func = "omp_set_nest_lock";
2615  if (lck->lk.initialized != lck) {
2616  KMP_FATAL(LockIsUninitialized, func);
2617  }
2618  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2619  KMP_FATAL(LockSimpleUsedAsNestable, func);
2620  }
2621  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2622 }
2623 
2624 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2625  int retval;
2626 
2627  KMP_DEBUG_ASSERT(gtid >= 0);
2628 
2629  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2630  retval = ++lck->lk.depth_locked;
2631  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2632  retval = 0;
2633  } else {
2634  KMP_MB();
2635  retval = lck->lk.depth_locked = 1;
2636  KMP_MB();
2637  lck->lk.owner_id = gtid + 1;
2638  }
2639  return retval;
2640 }
2641 
2642 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2643  kmp_int32 gtid) {
2644  char const *const func = "omp_test_nest_lock";
2645  if (lck->lk.initialized != lck) {
2646  KMP_FATAL(LockIsUninitialized, func);
2647  }
2648  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2649  KMP_FATAL(LockSimpleUsedAsNestable, func);
2650  }
2651  return __kmp_test_nested_drdpa_lock(lck, gtid);
2652 }
2653 
2654 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2655  KMP_DEBUG_ASSERT(gtid >= 0);
2656 
2657  KMP_MB();
2658  if (--(lck->lk.depth_locked) == 0) {
2659  KMP_MB();
2660  lck->lk.owner_id = 0;
2661  __kmp_release_drdpa_lock(lck, gtid);
2662  return KMP_LOCK_RELEASED;
2663  }
2664  return KMP_LOCK_STILL_HELD;
2665 }
2666 
2667 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2668  kmp_int32 gtid) {
2669  char const *const func = "omp_unset_nest_lock";
2670  KMP_MB(); /* in case another processor initialized lock */
2671  if (lck->lk.initialized != lck) {
2672  KMP_FATAL(LockIsUninitialized, func);
2673  }
2674  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2675  KMP_FATAL(LockSimpleUsedAsNestable, func);
2676  }
2677  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2678  KMP_FATAL(LockUnsettingFree, func);
2679  }
2680  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2681  KMP_FATAL(LockUnsettingSetByAnother, func);
2682  }
2683  return __kmp_release_nested_drdpa_lock(lck, gtid);
2684 }
2685 
2686 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2687  __kmp_init_drdpa_lock(lck);
2688  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2689 }
2690 
2691 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2692  __kmp_init_nested_drdpa_lock(lck);
2693 }
2694 
2695 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2696  __kmp_destroy_drdpa_lock(lck);
2697  lck->lk.depth_locked = 0;
2698 }
2699 
2700 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2701  char const *const func = "omp_destroy_nest_lock";
2702  if (lck->lk.initialized != lck) {
2703  KMP_FATAL(LockIsUninitialized, func);
2704  }
2705  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2706  KMP_FATAL(LockSimpleUsedAsNestable, func);
2707  }
2708  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2709  KMP_FATAL(LockStillOwned, func);
2710  }
2711  __kmp_destroy_nested_drdpa_lock(lck);
2712 }
2713 
2714 // access functions to fields which don't exist for all lock kinds.
2715 
2716 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
2717  return lck == lck->lk.initialized;
2718 }
2719 
2720 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2721  return lck->lk.location;
2722 }
2723 
2724 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2725  const ident_t *loc) {
2726  lck->lk.location = loc;
2727 }
2728 
2729 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2730  return lck->lk.flags;
2731 }
2732 
2733 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2734  kmp_lock_flags_t flags) {
2735  lck->lk.flags = flags;
2736 }
2737 
2738 // Time stamp counter
2739 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2740 #define __kmp_tsc() __kmp_hardware_timestamp()
2741 // Runtime's default backoff parameters
2742 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2743 #else
2744 // Use nanoseconds for other platforms
2745 extern kmp_uint64 __kmp_now_nsec();
2746 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2747 #define __kmp_tsc() __kmp_now_nsec()
2748 #endif
2749 
2750 // A useful predicate for dealing with timestamps that may wrap.
2751 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2752 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2753 // Times where going clockwise is less distance than going anti-clockwise
2754 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2755 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2756 // signed(b) = 0 captures the actual difference
2757 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2758  return ((kmp_int64)b - (kmp_int64)a) > 0;
2759 }
2760 
2761 // Truncated binary exponential backoff function
2762 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2763  // We could flatten this loop, but making it a nested loop gives better result
2764  kmp_uint32 i;
2765  for (i = boff->step; i > 0; i--) {
2766  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2767  do {
2768  KMP_CPU_PAUSE();
2769  } while (before(__kmp_tsc(), goal));
2770  }
2771  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2772 }
2773 
2774 #if KMP_USE_DYNAMIC_LOCK
2775 
2776 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2777 // lock word.
2778 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2779  kmp_dyna_lockseq_t seq) {
2780  TCW_4(*lck, KMP_GET_D_TAG(seq));
2781  KA_TRACE(
2782  20,
2783  ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2784 }
2785 
2786 #if KMP_USE_TSX
2787 
2788 // HLE lock functions - imported from the testbed runtime.
2789 #define HLE_ACQUIRE ".byte 0xf2;"
2790 #define HLE_RELEASE ".byte 0xf3;"
2791 
2792 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2793  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2794  return v;
2795 }
2796 
2797 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2798 
2799 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2800  // Use gtid for KMP_LOCK_BUSY if necessary
2801  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2802  int delay = 1;
2803  do {
2804  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2805  for (int i = delay; i != 0; --i)
2806  KMP_CPU_PAUSE();
2807  delay = ((delay << 1) | 1) & 7;
2808  }
2809  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2810  }
2811 }
2812 
2813 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2814  kmp_int32 gtid) {
2815  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2816 }
2817 
2818 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2819  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2820  : "=m"(*lck)
2821  : "r"(KMP_LOCK_FREE(hle))
2822  : "memory");
2823  return KMP_LOCK_RELEASED;
2824 }
2825 
2826 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2827  kmp_int32 gtid) {
2828  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2829 }
2830 
2831 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2832  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2833 }
2834 
2835 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2836  kmp_int32 gtid) {
2837  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2838 }
2839 
2840 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) {
2841  __kmp_init_queuing_lock(lck);
2842 }
2843 
2844 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) {
2845  __kmp_destroy_queuing_lock(lck);
2846 }
2847 
2848 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2849  unsigned retries = 3, status;
2850  do {
2851  status = _xbegin();
2852  if (status == _XBEGIN_STARTED) {
2853  if (__kmp_is_unlocked_queuing_lock(lck))
2854  return;
2855  _xabort(0xff);
2856  }
2857  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2858  // Wait until lock becomes free
2859  while (!__kmp_is_unlocked_queuing_lock(lck))
2860  __kmp_yield(TRUE);
2861  } else if (!(status & _XABORT_RETRY))
2862  break;
2863  } while (retries--);
2864 
2865  // Fall-back non-speculative lock (xchg)
2866  __kmp_acquire_queuing_lock(lck, gtid);
2867 }
2868 
2869 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2870  kmp_int32 gtid) {
2871  __kmp_acquire_rtm_lock(lck, gtid);
2872 }
2873 
2874 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2875  if (__kmp_is_unlocked_queuing_lock(lck)) {
2876  // Releasing from speculation
2877  _xend();
2878  } else {
2879  // Releasing from a real lock
2880  __kmp_release_queuing_lock(lck, gtid);
2881  }
2882  return KMP_LOCK_RELEASED;
2883 }
2884 
2885 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2886  kmp_int32 gtid) {
2887  return __kmp_release_rtm_lock(lck, gtid);
2888 }
2889 
2890 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2891  unsigned retries = 3, status;
2892  do {
2893  status = _xbegin();
2894  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2895  return 1;
2896  }
2897  if (!(status & _XABORT_RETRY))
2898  break;
2899  } while (retries--);
2900 
2901  return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0;
2902 }
2903 
2904 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2905  kmp_int32 gtid) {
2906  return __kmp_test_rtm_lock(lck, gtid);
2907 }
2908 
2909 #endif // KMP_USE_TSX
2910 
2911 // Entry functions for indirect locks (first element of direct lock jump tables)
2912 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2913  kmp_dyna_lockseq_t tag);
2914 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2915 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2916 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2917 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2918 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2919  kmp_int32);
2920 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2921  kmp_int32);
2922 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2923  kmp_int32);
2924 
2925 // Jump tables for the indirect lock functions
2926 // Only fill in the odd entries, that avoids the need to shift out the low bit
2927 
2928 // init functions
2929 #define expand(l, op) 0, __kmp_init_direct_lock,
2930 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2931  __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2932 #undef expand
2933 
2934 // destroy functions
2935 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2936 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = {
2937  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2938 #undef expand
2939 
2940 // set/acquire functions
2941 #define expand(l, op) \
2942  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2943 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2944  __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2945 #undef expand
2946 #define expand(l, op) \
2947  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2948 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2949  __kmp_set_indirect_lock_with_checks, 0,
2950  KMP_FOREACH_D_LOCK(expand, acquire)};
2951 #undef expand
2952 
2953 // unset/release and test functions
2954 #define expand(l, op) \
2955  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2956 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
2957  __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
2958 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
2959  __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
2960 #undef expand
2961 #define expand(l, op) \
2962  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2963 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2964  __kmp_unset_indirect_lock_with_checks, 0,
2965  KMP_FOREACH_D_LOCK(expand, release)};
2966 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2967  __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
2968 #undef expand
2969 
2970 // Exposes only one set of jump tables (*lock or *lock_with_checks).
2971 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
2972 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
2973 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
2974 
2975 // Jump tables for the indirect lock functions
2976 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2977 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
2978  KMP_FOREACH_I_LOCK(expand, init)};
2979 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = {
2980  KMP_FOREACH_I_LOCK(expand, destroy)};
2981 #undef expand
2982 
2983 // set/acquire functions
2984 #define expand(l, op) \
2985  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2986 static int (*indirect_set[])(kmp_user_lock_p,
2987  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
2988 #undef expand
2989 #define expand(l, op) \
2990  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2991 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
2992  KMP_FOREACH_I_LOCK(expand, acquire)};
2993 #undef expand
2994 
2995 // unset/release and test functions
2996 #define expand(l, op) \
2997  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2998 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
2999  KMP_FOREACH_I_LOCK(expand, release)};
3000 static int (*indirect_test[])(kmp_user_lock_p,
3001  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3002 #undef expand
3003 #define expand(l, op) \
3004  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3005 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3006  KMP_FOREACH_I_LOCK(expand, release)};
3007 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3008  KMP_FOREACH_I_LOCK(expand, test)};
3009 #undef expand
3010 
3011 // Exposes only one jump tables (*lock or *lock_with_checks).
3012 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3013 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3014 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3015 
3016 // Lock index table.
3017 kmp_indirect_lock_table_t __kmp_i_lock_table;
3018 
3019 // Size of indirect locks.
3020 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3021 
3022 // Jump tables for lock accessor/modifier.
3023 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3024  const ident_t *) = {0};
3025 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3026  kmp_lock_flags_t) = {0};
3027 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3028  kmp_user_lock_p) = {0};
3029 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3030  kmp_user_lock_p) = {0};
3031 
3032 // Use different lock pools for different lock types.
3033 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3034 
3035 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3036 // the indirect lock table holds the address and type of the allocated indrect
3037 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3038 // full. A destroyed indirect lock object is returned to the reusable pool of
3039 // locks, unique to each lock type.
3040 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3041  kmp_int32 gtid,
3042  kmp_indirect_locktag_t tag) {
3043  kmp_indirect_lock_t *lck;
3044  kmp_lock_index_t idx;
3045 
3046  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3047 
3048  if (__kmp_indirect_lock_pool[tag] != NULL) {
3049  // Reuse the allocated and destroyed lock object
3050  lck = __kmp_indirect_lock_pool[tag];
3051  if (OMP_LOCK_T_SIZE < sizeof(void *))
3052  idx = lck->lock->pool.index;
3053  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3054  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3055  lck));
3056  } else {
3057  idx = __kmp_i_lock_table.next;
3058  // Check capacity and double the size if it is full
3059  if (idx == __kmp_i_lock_table.size) {
3060  // Double up the space for block pointers
3061  int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3062  kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3063  2 * row * sizeof(kmp_indirect_lock_t *));
3064  KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3065  row * sizeof(kmp_indirect_lock_t *));
3066  kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3067  __kmp_i_lock_table.table = new_table;
3068  __kmp_free(old_table);
3069  // Allocate new objects in the new blocks
3070  for (int i = row; i < 2 * row; ++i)
3071  *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3072  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3073  __kmp_i_lock_table.size = 2 * idx;
3074  }
3075  __kmp_i_lock_table.next++;
3076  lck = KMP_GET_I_LOCK(idx);
3077  // Allocate a new base lock object
3078  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3079  KA_TRACE(20,
3080  ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3081  }
3082 
3083  __kmp_release_lock(&__kmp_global_lock, gtid);
3084 
3085  lck->type = tag;
3086 
3087  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3088  *((kmp_lock_index_t *)user_lock) = idx
3089  << 1; // indirect lock word must be even
3090  } else {
3091  *((kmp_indirect_lock_t **)user_lock) = lck;
3092  }
3093 
3094  return lck;
3095 }
3096 
3097 // User lock lookup for dynamically dispatched locks.
3098 static __forceinline kmp_indirect_lock_t *
3099 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3100  if (__kmp_env_consistency_check) {
3101  kmp_indirect_lock_t *lck = NULL;
3102  if (user_lock == NULL) {
3103  KMP_FATAL(LockIsUninitialized, func);
3104  }
3105  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3106  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3107  if (idx >= __kmp_i_lock_table.size) {
3108  KMP_FATAL(LockIsUninitialized, func);
3109  }
3110  lck = KMP_GET_I_LOCK(idx);
3111  } else {
3112  lck = *((kmp_indirect_lock_t **)user_lock);
3113  }
3114  if (lck == NULL) {
3115  KMP_FATAL(LockIsUninitialized, func);
3116  }
3117  return lck;
3118  } else {
3119  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3120  return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3121  } else {
3122  return *((kmp_indirect_lock_t **)user_lock);
3123  }
3124  }
3125 }
3126 
3127 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3128  kmp_dyna_lockseq_t seq) {
3129 #if KMP_USE_ADAPTIVE_LOCKS
3130  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3131  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3132  seq = lockseq_queuing;
3133  }
3134 #endif
3135 #if KMP_USE_TSX
3136  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3137  seq = lockseq_queuing;
3138  }
3139 #endif
3140  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3141  kmp_indirect_lock_t *l =
3142  __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3143  KMP_I_LOCK_FUNC(l, init)(l->lock);
3144  KA_TRACE(
3145  20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3146  seq));
3147 }
3148 
3149 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3150  kmp_uint32 gtid = __kmp_entry_gtid();
3151  kmp_indirect_lock_t *l =
3152  __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3153  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3154  kmp_indirect_locktag_t tag = l->type;
3155 
3156  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3157 
3158  // Use the base lock's space to keep the pool chain.
3159  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3160  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3161  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3162  }
3163  __kmp_indirect_lock_pool[tag] = l;
3164 
3165  __kmp_release_lock(&__kmp_global_lock, gtid);
3166 }
3167 
3168 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3169  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3170  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3171 }
3172 
3173 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3174  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3175  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3176 }
3177 
3178 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3179  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3180  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3181 }
3182 
3183 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3184  kmp_int32 gtid) {
3185  kmp_indirect_lock_t *l =
3186  __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3187  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3188 }
3189 
3190 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3191  kmp_int32 gtid) {
3192  kmp_indirect_lock_t *l =
3193  __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3194  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3195 }
3196 
3197 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3198  kmp_int32 gtid) {
3199  kmp_indirect_lock_t *l =
3200  __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3201  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3202 }
3203 
3204 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3205 
3206 // This is used only in kmp_error.cpp when consistency checking is on.
3207 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3208  switch (seq) {
3209  case lockseq_tas:
3210  case lockseq_nested_tas:
3211  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3212 #if KMP_USE_FUTEX
3213  case lockseq_futex:
3214  case lockseq_nested_futex:
3215  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3216 #endif
3217  case lockseq_ticket:
3218  case lockseq_nested_ticket:
3219  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3220  case lockseq_queuing:
3221  case lockseq_nested_queuing:
3222 #if KMP_USE_ADAPTIVE_LOCKS
3223  case lockseq_adaptive:
3224 #endif
3225  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3226  case lockseq_drdpa:
3227  case lockseq_nested_drdpa:
3228  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3229  default:
3230  return 0;
3231  }
3232 }
3233 
3234 // Initializes data for dynamic user locks.
3235 void __kmp_init_dynamic_user_locks() {
3236  // Initialize jump table for the lock functions
3237  if (__kmp_env_consistency_check) {
3238  __kmp_direct_set = direct_set_check;
3239  __kmp_direct_unset = direct_unset_check;
3240  __kmp_direct_test = direct_test_check;
3241  __kmp_indirect_set = indirect_set_check;
3242  __kmp_indirect_unset = indirect_unset_check;
3243  __kmp_indirect_test = indirect_test_check;
3244  } else {
3245  __kmp_direct_set = direct_set;
3246  __kmp_direct_unset = direct_unset;
3247  __kmp_direct_test = direct_test;
3248  __kmp_indirect_set = indirect_set;
3249  __kmp_indirect_unset = indirect_unset;
3250  __kmp_indirect_test = indirect_test;
3251  }
3252  // If the user locks have already been initialized, then return. Allow the
3253  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3254  // new lock tables if they have already been allocated.
3255  if (__kmp_init_user_locks)
3256  return;
3257 
3258  // Initialize lock index table
3259  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3260  __kmp_i_lock_table.table =
3261  (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3262  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3263  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3264  __kmp_i_lock_table.next = 0;
3265 
3266  // Indirect lock size
3267  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3268  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3269 #if KMP_USE_ADAPTIVE_LOCKS
3270  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3271 #endif
3272  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3273 #if KMP_USE_TSX
3274  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3275 #endif
3276  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3277 #if KMP_USE_FUTEX
3278  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3279 #endif
3280  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3281  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3282  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3283 
3284 // Initialize lock accessor/modifier
3285 #define fill_jumps(table, expand, sep) \
3286  { \
3287  table[locktag##sep##ticket] = expand(ticket); \
3288  table[locktag##sep##queuing] = expand(queuing); \
3289  table[locktag##sep##drdpa] = expand(drdpa); \
3290  }
3291 
3292 #if KMP_USE_ADAPTIVE_LOCKS
3293 #define fill_table(table, expand) \
3294  { \
3295  fill_jumps(table, expand, _); \
3296  table[locktag_adaptive] = expand(queuing); \
3297  fill_jumps(table, expand, _nested_); \
3298  }
3299 #else
3300 #define fill_table(table, expand) \
3301  { \
3302  fill_jumps(table, expand, _); \
3303  fill_jumps(table, expand, _nested_); \
3304  }
3305 #endif // KMP_USE_ADAPTIVE_LOCKS
3306 
3307 #define expand(l) \
3308  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3309  fill_table(__kmp_indirect_set_location, expand);
3310 #undef expand
3311 #define expand(l) \
3312  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3313  fill_table(__kmp_indirect_set_flags, expand);
3314 #undef expand
3315 #define expand(l) \
3316  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3317  fill_table(__kmp_indirect_get_location, expand);
3318 #undef expand
3319 #define expand(l) \
3320  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3321  fill_table(__kmp_indirect_get_flags, expand);
3322 #undef expand
3323 
3324  __kmp_init_user_locks = TRUE;
3325 }
3326 
3327 // Clean up the lock table.
3328 void __kmp_cleanup_indirect_user_locks() {
3329  kmp_lock_index_t i;
3330  int k;
3331 
3332  // Clean up locks in the pools first (they were already destroyed before going
3333  // into the pools).
3334  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3335  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3336  while (l != NULL) {
3337  kmp_indirect_lock_t *ll = l;
3338  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3339  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3340  ll));
3341  __kmp_free(ll->lock);
3342  ll->lock = NULL;
3343  }
3344  __kmp_indirect_lock_pool[k] = NULL;
3345  }
3346  // Clean up the remaining undestroyed locks.
3347  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3348  kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3349  if (l->lock != NULL) {
3350  // Locks not destroyed explicitly need to be destroyed here.
3351  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3352  KA_TRACE(
3353  20,
3354  ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3355  l));
3356  __kmp_free(l->lock);
3357  }
3358  }
3359  // Free the table
3360  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3361  __kmp_free(__kmp_i_lock_table.table[i]);
3362  __kmp_free(__kmp_i_lock_table.table);
3363 
3364  __kmp_init_user_locks = FALSE;
3365 }
3366 
3367 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3368 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3369 
3370 #else // KMP_USE_DYNAMIC_LOCK
3371 
3372 /* user locks
3373  * They are implemented as a table of function pointers which are set to the
3374  * lock functions of the appropriate kind, once that has been determined. */
3375 
3376 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3377 
3378 size_t __kmp_base_user_lock_size = 0;
3379 size_t __kmp_user_lock_size = 0;
3380 
3381 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3382 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3383  kmp_int32 gtid) = NULL;
3384 
3385 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3386  kmp_int32 gtid) = NULL;
3387 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3388  kmp_int32 gtid) = NULL;
3389 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3390 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3391 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3392 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3393  kmp_int32 gtid) = NULL;
3394 
3395 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3396  kmp_int32 gtid) = NULL;
3397 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3398  kmp_int32 gtid) = NULL;
3399 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3400 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3401 
3402 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3403 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3404 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3405  const ident_t *loc) = NULL;
3406 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3407 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3408  kmp_lock_flags_t flags) = NULL;
3409 
3410 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3411  switch (user_lock_kind) {
3412  case lk_default:
3413  default:
3414  KMP_ASSERT(0);
3415 
3416  case lk_tas: {
3417  __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3418  __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3419 
3420  __kmp_get_user_lock_owner_ =
3421  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3422 
3423  if (__kmp_env_consistency_check) {
3424  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3425  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3426  } else {
3427  KMP_BIND_USER_LOCK(tas);
3428  KMP_BIND_NESTED_USER_LOCK(tas);
3429  }
3430 
3431  __kmp_destroy_user_lock_ =
3432  (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3433 
3434  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3435 
3436  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3437 
3438  __kmp_set_user_lock_location_ =
3439  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3440 
3441  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3442 
3443  __kmp_set_user_lock_flags_ =
3444  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3445  } break;
3446 
3447 #if KMP_USE_FUTEX
3448 
3449  case lk_futex: {
3450  __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3451  __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3452 
3453  __kmp_get_user_lock_owner_ =
3454  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3455 
3456  if (__kmp_env_consistency_check) {
3457  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3458  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3459  } else {
3460  KMP_BIND_USER_LOCK(futex);
3461  KMP_BIND_NESTED_USER_LOCK(futex);
3462  }
3463 
3464  __kmp_destroy_user_lock_ =
3465  (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3466 
3467  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3468 
3469  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3470 
3471  __kmp_set_user_lock_location_ =
3472  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3473 
3474  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3475 
3476  __kmp_set_user_lock_flags_ =
3477  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3478  } break;
3479 
3480 #endif // KMP_USE_FUTEX
3481 
3482  case lk_ticket: {
3483  __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3484  __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3485 
3486  __kmp_get_user_lock_owner_ =
3487  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3488 
3489  if (__kmp_env_consistency_check) {
3490  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3491  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3492  } else {
3493  KMP_BIND_USER_LOCK(ticket);
3494  KMP_BIND_NESTED_USER_LOCK(ticket);
3495  }
3496 
3497  __kmp_destroy_user_lock_ =
3498  (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3499 
3500  __kmp_is_user_lock_initialized_ =
3501  (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3502 
3503  __kmp_get_user_lock_location_ =
3504  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3505 
3506  __kmp_set_user_lock_location_ = (void (*)(
3507  kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3508 
3509  __kmp_get_user_lock_flags_ =
3510  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3511 
3512  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3513  &__kmp_set_ticket_lock_flags);
3514  } break;
3515 
3516  case lk_queuing: {
3517  __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3518  __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3519 
3520  __kmp_get_user_lock_owner_ =
3521  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3522 
3523  if (__kmp_env_consistency_check) {
3524  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3525  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3526  } else {
3527  KMP_BIND_USER_LOCK(queuing);
3528  KMP_BIND_NESTED_USER_LOCK(queuing);
3529  }
3530 
3531  __kmp_destroy_user_lock_ =
3532  (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3533 
3534  __kmp_is_user_lock_initialized_ =
3535  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3536 
3537  __kmp_get_user_lock_location_ =
3538  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3539 
3540  __kmp_set_user_lock_location_ = (void (*)(
3541  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3542 
3543  __kmp_get_user_lock_flags_ =
3544  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3545 
3546  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3547  &__kmp_set_queuing_lock_flags);
3548  } break;
3549 
3550 #if KMP_USE_ADAPTIVE_LOCKS
3551  case lk_adaptive: {
3552  __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3553  __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3554 
3555  __kmp_get_user_lock_owner_ =
3556  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3557 
3558  if (__kmp_env_consistency_check) {
3559  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3560  } else {
3561  KMP_BIND_USER_LOCK(adaptive);
3562  }
3563 
3564  __kmp_destroy_user_lock_ =
3565  (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3566 
3567  __kmp_is_user_lock_initialized_ =
3568  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3569 
3570  __kmp_get_user_lock_location_ =
3571  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3572 
3573  __kmp_set_user_lock_location_ = (void (*)(
3574  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3575 
3576  __kmp_get_user_lock_flags_ =
3577  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3578 
3579  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3580  &__kmp_set_queuing_lock_flags);
3581 
3582  } break;
3583 #endif // KMP_USE_ADAPTIVE_LOCKS
3584 
3585  case lk_drdpa: {
3586  __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3587  __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3588 
3589  __kmp_get_user_lock_owner_ =
3590  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3591 
3592  if (__kmp_env_consistency_check) {
3593  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3594  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3595  } else {
3596  KMP_BIND_USER_LOCK(drdpa);
3597  KMP_BIND_NESTED_USER_LOCK(drdpa);
3598  }
3599 
3600  __kmp_destroy_user_lock_ =
3601  (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3602 
3603  __kmp_is_user_lock_initialized_ =
3604  (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3605 
3606  __kmp_get_user_lock_location_ =
3607  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3608 
3609  __kmp_set_user_lock_location_ = (void (*)(
3610  kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3611 
3612  __kmp_get_user_lock_flags_ =
3613  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3614 
3615  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3616  &__kmp_set_drdpa_lock_flags);
3617  } break;
3618  }
3619 }
3620 
3621 // ----------------------------------------------------------------------------
3622 // User lock table & lock allocation
3623 
3624 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3625 kmp_user_lock_p __kmp_lock_pool = NULL;
3626 
3627 // Lock block-allocation support.
3628 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3629 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3630 
3631 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3632  // Assume that kmp_global_lock is held upon entry/exit.
3633  kmp_lock_index_t index;
3634  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3635  kmp_lock_index_t size;
3636  kmp_user_lock_p *table;
3637  // Reallocate lock table.
3638  if (__kmp_user_lock_table.allocated == 0) {
3639  size = 1024;
3640  } else {
3641  size = __kmp_user_lock_table.allocated * 2;
3642  }
3643  table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3644  KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3645  sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3646  table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3647  // We cannot free the previous table now, since it may be in use by other
3648  // threads. So save the pointer to the previous table in in the first
3649  // element of the new table. All the tables will be organized into a list,
3650  // and could be freed when library shutting down.
3651  __kmp_user_lock_table.table = table;
3652  __kmp_user_lock_table.allocated = size;
3653  }
3654  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3655  __kmp_user_lock_table.allocated);
3656  index = __kmp_user_lock_table.used;
3657  __kmp_user_lock_table.table[index] = lck;
3658  ++__kmp_user_lock_table.used;
3659  return index;
3660 }
3661 
3662 static kmp_user_lock_p __kmp_lock_block_allocate() {
3663  // Assume that kmp_global_lock is held upon entry/exit.
3664  static int last_index = 0;
3665  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3666  // Restart the index.
3667  last_index = 0;
3668  // Need to allocate a new block.
3669  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3670  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3671  char *buffer =
3672  (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3673  // Set up the new block.
3674  kmp_block_of_locks *new_block =
3675  (kmp_block_of_locks *)(&buffer[space_for_locks]);
3676  new_block->next_block = __kmp_lock_blocks;
3677  new_block->locks = (void *)buffer;
3678  // Publish the new block.
3679  KMP_MB();
3680  __kmp_lock_blocks = new_block;
3681  }
3682  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3683  ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3684  last_index++;
3685  return ret;
3686 }
3687 
3688 // Get memory for a lock. It may be freshly allocated memory or reused memory
3689 // from lock pool.
3690 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3691  kmp_lock_flags_t flags) {
3692  kmp_user_lock_p lck;
3693  kmp_lock_index_t index;
3694  KMP_DEBUG_ASSERT(user_lock);
3695 
3696  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3697 
3698  if (__kmp_lock_pool == NULL) {
3699  // Lock pool is empty. Allocate new memory.
3700 
3701  // ANNOTATION: Found no good way to express the syncronisation
3702  // between allocation and usage, so ignore the allocation
3703  ANNOTATE_IGNORE_WRITES_BEGIN();
3704  if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3705  lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3706  } else {
3707  lck = __kmp_lock_block_allocate();
3708  }
3709  ANNOTATE_IGNORE_WRITES_END();
3710 
3711  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3712  // and debugger has info on all allocated locks.
3713  index = __kmp_lock_table_insert(lck);
3714  } else {
3715  // Pick up lock from pool.
3716  lck = __kmp_lock_pool;
3717  index = __kmp_lock_pool->pool.index;
3718  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3719  }
3720 
3721  // We could potentially differentiate between nested and regular locks
3722  // here, and do the lock table lookup for regular locks only.
3723  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3724  *((kmp_lock_index_t *)user_lock) = index;
3725  } else {
3726  *((kmp_user_lock_p *)user_lock) = lck;
3727  }
3728 
3729  // mark the lock if it is critical section lock.
3730  __kmp_set_user_lock_flags(lck, flags);
3731 
3732  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3733 
3734  return lck;
3735 }
3736 
3737 // Put lock's memory to pool for reusing.
3738 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3739  kmp_user_lock_p lck) {
3740  KMP_DEBUG_ASSERT(user_lock != NULL);
3741  KMP_DEBUG_ASSERT(lck != NULL);
3742 
3743  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3744 
3745  lck->pool.next = __kmp_lock_pool;
3746  __kmp_lock_pool = lck;
3747  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3748  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3749  KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3750  lck->pool.index = index;
3751  }
3752 
3753  __kmp_release_lock(&__kmp_global_lock, gtid);
3754 }
3755 
3756 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3757  kmp_user_lock_p lck = NULL;
3758 
3759  if (__kmp_env_consistency_check) {
3760  if (user_lock == NULL) {
3761  KMP_FATAL(LockIsUninitialized, func);
3762  }
3763  }
3764 
3765  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3766  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3767  if (__kmp_env_consistency_check) {
3768  if (!(0 < index && index < __kmp_user_lock_table.used)) {
3769  KMP_FATAL(LockIsUninitialized, func);
3770  }
3771  }
3772  KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3773  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3774  lck = __kmp_user_lock_table.table[index];
3775  } else {
3776  lck = *((kmp_user_lock_p *)user_lock);
3777  }
3778 
3779  if (__kmp_env_consistency_check) {
3780  if (lck == NULL) {
3781  KMP_FATAL(LockIsUninitialized, func);
3782  }
3783  }
3784 
3785  return lck;
3786 }
3787 
3788 void __kmp_cleanup_user_locks(void) {
3789  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3790  // iterating through lock table (it includes all the locks, dead or alive).
3791  __kmp_lock_pool = NULL;
3792 
3793 #define IS_CRITICAL(lck) \
3794  ((__kmp_get_user_lock_flags_ != NULL) && \
3795  ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3796 
3797  // Loop through lock table, free all locks.
3798  // Do not free item [0], it is reserved for lock tables list.
3799  //
3800  // FIXME - we are iterating through a list of (pointers to) objects of type
3801  // union kmp_user_lock, but we have no way of knowing whether the base type is
3802  // currently "pool" or whatever the global user lock type is.
3803  //
3804  // We are relying on the fact that for all of the user lock types
3805  // (except "tas"), the first field in the lock struct is the "initialized"
3806  // field, which is set to the address of the lock object itself when
3807  // the lock is initialized. When the union is of type "pool", the
3808  // first field is a pointer to the next object in the free list, which
3809  // will not be the same address as the object itself.
3810  //
3811  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3812  // for "pool" objects on the free list. This must happen as the "location"
3813  // field of real user locks overlaps the "index" field of "pool" objects.
3814  //
3815  // It would be better to run through the free list, and remove all "pool"
3816  // objects from the lock table before executing this loop. However,
3817  // "pool" objects do not always have their index field set (only on
3818  // lin_32e), and I don't want to search the lock table for the address
3819  // of every "pool" object on the free list.
3820  while (__kmp_user_lock_table.used > 1) {
3821  const ident *loc;
3822 
3823  // reduce __kmp_user_lock_table.used before freeing the lock,
3824  // so that state of locks is consistent
3825  kmp_user_lock_p lck =
3826  __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3827 
3828  if ((__kmp_is_user_lock_initialized_ != NULL) &&
3829  (*__kmp_is_user_lock_initialized_)(lck)) {
3830  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3831  // it is NOT a critical section (user is not responsible for destroying
3832  // criticals) AND we know source location to report.
3833  if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3834  ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3835  (loc->psource != NULL)) {
3836  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0);
3837  KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3838  __kmp_str_loc_free(&str_loc);
3839  }
3840 
3841 #ifdef KMP_DEBUG
3842  if (IS_CRITICAL(lck)) {
3843  KA_TRACE(
3844  20,
3845  ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3846  lck, *(void **)lck));
3847  } else {
3848  KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3849  *(void **)lck));
3850  }
3851 #endif // KMP_DEBUG
3852 
3853  // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3854  __kmp_destroy_user_lock(lck);
3855  }
3856 
3857  // Free the lock if block allocation of locks is not used.
3858  if (__kmp_lock_blocks == NULL) {
3859  __kmp_free(lck);
3860  }
3861  }
3862 
3863 #undef IS_CRITICAL
3864 
3865  // delete lock table(s).
3866  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3867  __kmp_user_lock_table.table = NULL;
3868  __kmp_user_lock_table.allocated = 0;
3869 
3870  while (table_ptr != NULL) {
3871  // In the first element we saved the pointer to the previous
3872  // (smaller) lock table.
3873  kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3874  __kmp_free(table_ptr);
3875  table_ptr = next;
3876  }
3877 
3878  // Free buffers allocated for blocks of locks.
3879  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3880  __kmp_lock_blocks = NULL;
3881 
3882  while (block_ptr != NULL) {
3883  kmp_block_of_locks_t *next = block_ptr->next_block;
3884  __kmp_free(block_ptr->locks);
3885  // *block_ptr itself was allocated at the end of the locks vector.
3886  block_ptr = next;
3887  }
3888 
3889  TCW_4(__kmp_init_user_locks, FALSE);
3890 }
3891 
3892 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:210
char const * psource
Definition: kmp.h:220