LLVM OpenMP* Runtime Library
kmp_stats.h
1 #ifndef KMP_STATS_H
2 #define KMP_STATS_H
3 
8 //===----------------------------------------------------------------------===//
9 //
10 // The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "kmp_config.h"
18 
19 #if KMP_STATS_ENABLED
20 /* Statistics accumulator.
21  Accumulates number of samples and computes min, max, mean, standard deviation
22  on the fly.
23 
24  Online variance calculation algorithm from
25  http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
26  */
27 
28 #include "kmp_stats_timing.h"
29 #include <limits>
30 #include <math.h>
31 #include <new> // placement new
32 #include <stdint.h>
33 #include <string>
34 #include <vector>
35 
36 /* Enable developer statistics here if you want them. They are more detailed
37  than is useful for application characterisation and are intended for the
38  runtime library developer. */
39 // #define KMP_DEVELOPER_STATS 1
40 
47  noTotal = 1 << 0,
48  onlyInMaster = 1 << 1,
49  noUnits = 1 << 2,
50  notInMaster = 1 << 3,
51  logEvent = 1 << 4
52 };
54 
61  IDLE,
62  SERIAL_REGION,
63  FORK_JOIN_BARRIER,
64  PLAIN_BARRIER,
65  TASKWAIT,
66  TASKYIELD,
67  TASKGROUP,
68  IMPLICIT_TASK,
69  EXPLICIT_TASK
70 };
71 
90 // clang-format off
91 #define KMP_FOREACH_COUNTER(macro, arg) \
92  macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \
93  macro(OMP_NESTED_PARALLEL, 0, arg) \
94  macro(OMP_FOR_static, 0, arg) \
95  macro(OMP_FOR_static_steal, 0, arg) \
96  macro(OMP_FOR_dynamic, 0, arg) \
97  macro(OMP_DISTRIBUTE, 0, arg) \
98  macro(OMP_BARRIER, 0, arg) \
99  macro(OMP_CRITICAL, 0, arg) \
100  macro(OMP_SINGLE, 0, arg) \
101  macro(OMP_MASTER, 0, arg) \
102  macro(OMP_TEAMS, 0, arg) \
103  macro(OMP_set_lock, 0, arg) \
104  macro(OMP_test_lock, 0, arg) \
105  macro(REDUCE_wait, 0, arg) \
106  macro(REDUCE_nowait, 0, arg) \
107  macro(OMP_TASKYIELD, 0, arg) \
108  macro(OMP_TASKLOOP, 0, arg) \
109  macro(TASK_executed, 0, arg) \
110  macro(TASK_cancelled, 0, arg) \
111  macro(TASK_stolen, 0, arg)
112 // clang-format on
113 
132 // clang-format off
133 #define KMP_FOREACH_TIMER(macro, arg) \
134  macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
135  macro (FOR_static_scheduling, 0, arg) \
136  macro (FOR_dynamic_scheduling, 0, arg) \
137  macro (OMP_critical, 0, arg) \
138  macro (OMP_critical_wait, 0, arg) \
139  macro (OMP_single, 0, arg) \
140  macro (OMP_master, 0, arg) \
141  macro (OMP_idle, stats_flags_e::logEvent, arg) \
142  macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
143  macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
144  macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
145  macro (OMP_parallel, stats_flags_e::logEvent, arg) \
146  macro (OMP_task_immediate, 0, arg) \
147  macro (OMP_task_taskwait, 0, arg) \
148  macro (OMP_task_taskyield, 0, arg) \
149  macro (OMP_task_taskgroup, 0, arg) \
150  macro (OMP_task_join_bar, 0, arg) \
151  macro (OMP_task_plain_bar, 0, arg) \
152  macro (OMP_serial, stats_flags_e::logEvent, arg) \
153  macro (OMP_taskloop_scheduling, 0, arg) \
154  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
155  arg) \
156  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
157  arg) \
158  macro (FOR_static_iterations, \
159  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
160  macro (FOR_dynamic_iterations, \
161  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
162  macro (FOR_static_steal_stolen, \
163  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
164  macro (FOR_static_steal_chunks, \
165  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
166  KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
167 // clang-format on
168 
169 // OMP_start_end -- Time from when OpenMP is initialized until the
170 // stats are printed at exit
171 // OMP_serial -- Thread zero time executing serial code
172 // OMP_work -- Elapsed time in code dispatched by a fork (measured
173 // in the thread)
174 // OMP_barrier -- Time at "real" barriers (includes task time)
175 // FOR_static_scheduling -- Time spent doing scheduling for a static "for"
176 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
177 // OMP_idle -- Worker threads time spent waiting for inclusion in
178 // a parallel region
179 // OMP_plain_barrier -- Time spent in a barrier construct
180 // OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a
181 // parallel region
182 // OMP_parallel -- Time spent inside a parallel construct
183 // OMP_task_immediate -- Time spent executing non-deferred tasks
184 // OMP_task_taskwait -- Time spent executing tasks inside a taskwait
185 // construct
186 // OMP_task_taskyield -- Time spent executing tasks inside a taskyield
187 // construct
188 // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup
189 // construct
190 // OMP_task_join_bar -- Time spent executing tasks inside a join barrier
191 // OMP_task_plain_bar -- Time spent executing tasks inside a barrier
192 // construct
193 // OMP_single -- Time spent executing a "single" region
194 // OMP_master -- Time spent executing a "master" region
195 // OMP_set_numthreads -- Values passed to omp_set_num_threads
196 // OMP_PARALLEL_args -- Number of arguments passed to a parallel region
197 // FOR_static_iterations -- Number of available parallel chunks of work in a
198 // static for
199 // FOR_dynamic_iterations -- Number of available parallel chunks of work in a
200 // dynamic for
201 // Both adjust for any chunking, so if there were an
202 // iteration count of 20 but a chunk size of 10, we'd
203 // record 2.
204 
205 #if (KMP_DEVELOPER_STATS)
206 // Timers which are of interest to runtime library developers, not end users.
207 // These have to be explicitly enabled in addition to the other stats.
208 
209 // KMP_fork_barrier -- time in __kmp_fork_barrier
210 // KMP_join_barrier -- time in __kmp_join_barrier
211 // KMP_barrier -- time in __kmp_barrier
212 // KMP_end_split_barrier -- time in __kmp_end_split_barrier
213 // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
214 // KMP_icv_copy -- start/stop timer for any ICV copying
215 // KMP_linear_gather -- time in __kmp_linear_barrier_gather
216 // KMP_linear_release -- time in __kmp_linear_barrier_release
217 // KMP_tree_gather -- time in __kmp_tree_barrier_gather
218 // KMP_tree_release -- time in __kmp_tree_barrier_release
219 // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
220 // KMP_hyper_release -- time in __kmp_hyper_barrier_release
221 // clang-format off
222 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
223  macro(KMP_fork_call, 0, arg) \
224  macro(KMP_join_call, 0, arg) \
225  macro(KMP_end_split_barrier, 0, arg) \
226  macro(KMP_hier_gather, 0, arg) \
227  macro(KMP_hier_release, 0, arg) \
228  macro(KMP_hyper_gather, 0, arg) \
229  macro(KMP_hyper_release, 0, arg) \
230  macro(KMP_linear_gather, 0, arg) \
231  macro(KMP_linear_release, 0, arg) \
232  macro(KMP_tree_gather, 0, arg) \
233  macro(KMP_tree_release, 0, arg) \
234  macro(USER_resume, 0, arg) \
235  macro(USER_suspend, 0, arg) \
236  macro(KMP_allocate_team, 0, arg) \
237  macro(KMP_setup_icv_copy, 0, arg) \
238  macro(USER_icv_copy, 0, arg)
239 #else
240 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
241 #endif
242 // clang-format on
243 
263 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
264 
265 #define ENUMERATE(name, ignore, prefix) prefix##name,
266 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
267 
268 enum explicit_timer_e {
269  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
270 };
271 
272 enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
273 #undef ENUMERATE
274 
275 class timerPair {
276  explicit_timer_e timer_index;
277  timer_e timer;
278 
279 public:
280  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
281  inline explicit_timer_e get_index() const { return timer_index; }
282  inline timer_e get_timer() const { return timer; }
283  bool operator==(const timerPair &rhs) {
284  return this->get_index() == rhs.get_index();
285  }
286  bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
287 };
288 
289 class statistic {
290  double minVal;
291  double maxVal;
292  double meanVal;
293  double m2;
294  uint64_t sampleCount;
295 
296 public:
297  statistic() { reset(); }
298  statistic(statistic const &o)
299  : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
300  sampleCount(o.sampleCount) {}
301 
302  double getMin() const { return minVal; }
303  double getMean() const { return meanVal; }
304  double getMax() const { return maxVal; }
305  uint64_t getCount() const { return sampleCount; }
306  double getSD() const { return sqrt(m2 / sampleCount); }
307  double getTotal() const { return sampleCount * meanVal; }
308 
309  void reset() {
310  minVal = std::numeric_limits<double>::max();
311  maxVal = -std::numeric_limits<double>::max();
312  meanVal = 0.0;
313  m2 = 0.0;
314  sampleCount = 0;
315  }
316  void addSample(double sample);
317  void scale(double factor);
318  void scaleDown(double f) { scale(1. / f); }
319  statistic &operator+=(statistic const &other);
320 
321  std::string format(char unit, bool total = false) const;
322 };
323 
324 struct statInfo {
325  const char *name;
326  uint32_t flags;
327 };
328 
329 class timeStat : public statistic {
330  static statInfo timerInfo[];
331 
332 public:
333  timeStat() : statistic() {}
334  static const char *name(timer_e e) { return timerInfo[e].name; }
335  static bool noTotal(timer_e e) {
336  return timerInfo[e].flags & stats_flags_e::noTotal;
337  }
338  static bool masterOnly(timer_e e) {
339  return timerInfo[e].flags & stats_flags_e::onlyInMaster;
340  }
341  static bool workerOnly(timer_e e) {
342  return timerInfo[e].flags & stats_flags_e::notInMaster;
343  }
344  static bool noUnits(timer_e e) {
345  return timerInfo[e].flags & stats_flags_e::noUnits;
346  }
347  static bool logEvent(timer_e e) {
348  return timerInfo[e].flags & stats_flags_e::logEvent;
349  }
350  static void clearEventFlags() {
351  for (int i = 0; i < TIMER_LAST; i++) {
352  timerInfo[i].flags &= (~(stats_flags_e::logEvent));
353  }
354  }
355 };
356 
357 // Where we need explicitly to start and end the timer, this version can be used
358 // Since these timers normally aren't nicely scoped, so don't have a good place
359 // to live on the stack of the thread, they're more work to use.
360 class explicitTimer {
361  timeStat *stat;
362  tsc_tick_count startTime;
363  tsc_tick_count pauseStartTime;
364  tsc_tick_count::tsc_interval_t totalPauseTime;
365 
366 public:
367  explicitTimer()
368  : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
369  explicitTimer(timeStat *s)
370  : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
371 
372  void setStat(timeStat *s) { stat = s; }
373  void start(timer_e timerEnumValue);
374  void pause() { pauseStartTime = tsc_tick_count::now(); }
375  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
376  void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
377  void reset() {
378  startTime = 0;
379  pauseStartTime = 0;
380  totalPauseTime = 0;
381  }
382 };
383 
384 // Where all you need is to time a block, this is enough.
385 // (It avoids the need to have an explicit end, leaving the scope suffices.)
386 class blockTimer : public explicitTimer {
387  timer_e timerEnumValue;
388 
389 public:
390  blockTimer(timeStat *s, timer_e newTimerEnumValue)
391  : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
392  start(timerEnumValue);
393  }
394  ~blockTimer() { stop(timerEnumValue); }
395 };
396 
397 // Where you need to partition a threads clock ticks into separate states
398 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
399 // DOING_NOTHING would render these conditions:
400 // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
401 // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
402 // versa
403 class partitionedTimers {
404 private:
405  explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
406  std::vector<timerPair> timer_stack;
407 
408 public:
409  partitionedTimers();
410  void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
411  void init(timerPair timer_index);
412  void push(timerPair timer_index);
413  void pop();
414  void windup();
415 };
416 
417 // Special wrapper around the partioned timers to aid timing code blocks
418 // It avoids the need to have an explicit end, leaving the scope suffices.
419 class blockPartitionedTimer {
420  partitionedTimers *part_timers;
421  timerPair timer_pair;
422 
423 public:
424  blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
425  : part_timers(pt), timer_pair(tp) {
426  part_timers->push(timer_pair);
427  }
428  ~blockPartitionedTimer() { part_timers->pop(); }
429 };
430 
431 // Special wrapper around the thread state to aid in keeping state in code
432 // blocks It avoids the need to have an explicit end, leaving the scope
433 // suffices.
434 class blockThreadState {
435  stats_state_e *state_pointer;
436  stats_state_e old_state;
437 
438 public:
439  blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
440  : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
441  *state_pointer = new_state;
442  }
443  ~blockThreadState() { *state_pointer = old_state; }
444 };
445 
446 // If all you want is a count, then you can use this...
447 // The individual per-thread counts will be aggregated into a statistic at
448 // program exit.
449 class counter {
450  uint64_t value;
451  static const statInfo counterInfo[];
452 
453 public:
454  counter() : value(0) {}
455  void increment() { value++; }
456  uint64_t getValue() const { return value; }
457  void reset() { value = 0; }
458  static const char *name(counter_e e) { return counterInfo[e].name; }
459  static bool masterOnly(counter_e e) {
460  return counterInfo[e].flags & stats_flags_e::onlyInMaster;
461  }
462 };
463 
464 /* ****************************************************************
465  Class to implement an event
466 
467  There are four components to an event: start time, stop time
468  nest_level, and timer_name.
469  The start and stop time should be obvious (recorded in clock ticks).
470  The nest_level relates to the bar width in the timeline graph.
471  The timer_name is used to determine which timer event triggered this event.
472 
473  the interface to this class is through four read-only operations:
474  1) getStart() -- returns the start time as 64 bit integer
475  2) getStop() -- returns the stop time as 64 bit integer
476  3) getNestLevel() -- returns the nest level of the event
477  4) getTimerName() -- returns the timer name that triggered event
478 
479  *MORE ON NEST_LEVEL*
480  The nest level is used in the bar graph that represents the timeline.
481  Its main purpose is for showing how events are nested inside eachother.
482  For example, say events, A, B, and C are recorded. If the timeline
483  looks like this:
484 
485 Begin -------------------------------------------------------------> Time
486  | | | | | |
487  A B C C B A
488  start start start end end end
489 
490  Then A, B, C will have a nest level of 1, 2, 3 respectively.
491  These values are then used to calculate the barwidth so you can
492  see that inside A, B has occurred, and inside B, C has occurred.
493  Currently, this is shown with A's bar width being larger than B's
494  bar width, and B's bar width being larger than C's bar width.
495 
496 **************************************************************** */
497 class kmp_stats_event {
498  uint64_t start;
499  uint64_t stop;
500  int nest_level;
501  timer_e timer_name;
502 
503 public:
504  kmp_stats_event()
505  : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
506  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
507  : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
508  inline uint64_t getStart() const { return start; }
509  inline uint64_t getStop() const { return stop; }
510  inline int getNestLevel() const { return nest_level; }
511  inline timer_e getTimerName() const { return timer_name; }
512 };
513 
514 /* ****************************************************************
515  Class to implement a dynamically expandable array of events
516 
517  ---------------------------------------------------------
518  | event 1 | event 2 | event 3 | event 4 | ... | event N |
519  ---------------------------------------------------------
520 
521  An event is pushed onto the back of this array at every
522  explicitTimer->stop() call. The event records the thread #,
523  start time, stop time, and nest level related to the bar width.
524 
525  The event vector starts at size INIT_SIZE and grows (doubles in size)
526  if needed. An implication of this behavior is that log(N)
527  reallocations are needed (where N is number of events). If you want
528  to avoid reallocations, then set INIT_SIZE to a large value.
529 
530  the interface to this class is through six operations:
531  1) reset() -- sets the internal_size back to 0 but does not deallocate any
532  memory
533  2) size() -- returns the number of valid elements in the vector
534  3) push_back(start, stop, nest, timer_name) -- pushes an event onto
535  the back of the array
536  4) deallocate() -- frees all memory associated with the vector
537  5) sort() -- sorts the vector by start time
538  6) operator[index] or at(index) -- returns event reference at that index
539 **************************************************************** */
540 class kmp_stats_event_vector {
541  kmp_stats_event *events;
542  int internal_size;
543  int allocated_size;
544  static const int INIT_SIZE = 1024;
545 
546 public:
547  kmp_stats_event_vector() {
548  events =
549  (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
550  internal_size = 0;
551  allocated_size = INIT_SIZE;
552  }
553  ~kmp_stats_event_vector() {}
554  inline void reset() { internal_size = 0; }
555  inline int size() const { return internal_size; }
556  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
557  timer_e name) {
558  int i;
559  if (internal_size == allocated_size) {
560  kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
561  sizeof(kmp_stats_event) * allocated_size * 2);
562  for (i = 0; i < internal_size; i++)
563  tmp[i] = events[i];
564  __kmp_free(events);
565  events = tmp;
566  allocated_size *= 2;
567  }
568  events[internal_size] =
569  kmp_stats_event(start_time, stop_time, nest_level, name);
570  internal_size++;
571  return;
572  }
573  void deallocate();
574  void sort();
575  const kmp_stats_event &operator[](int index) const { return events[index]; }
576  kmp_stats_event &operator[](int index) { return events[index]; }
577  const kmp_stats_event &at(int index) const { return events[index]; }
578  kmp_stats_event &at(int index) { return events[index]; }
579 };
580 
581 /* ****************************************************************
582  Class to implement a doubly-linked, circular, statistics list
583 
584  |---| ---> |---| ---> |---| ---> |---| ---> ... next
585  | | | | | | | |
586  |---| <--- |---| <--- |---| <--- |---| <--- ... prev
587  Sentinel first second third
588  Node node node node
589 
590  The Sentinel Node is the user handle on the list.
591  The first node corresponds to thread 0's statistics.
592  The second node corresponds to thread 1's statistics and so on...
593 
594  Each node has a _timers, _counters, and _explicitTimers array to hold that
595  thread's statistics. The _explicitTimers point to the correct _timer and
596  update its statistics at every stop() call. The explicitTimers' pointers are
597  set up in the constructor. Each node also has an event vector to hold that
598  thread's timing events. The event vector expands as necessary and records
599  the start-stop times for each timer.
600 
601  The nestLevel variable is for plotting events and is related
602  to the bar width in the timeline graph.
603 
604  Every thread will have a thread local pointer to its node in
605  the list. The sentinel node is used by the master thread to
606  store "dummy" statistics before __kmp_create_worker() is called.
607 **************************************************************** */
608 class kmp_stats_list {
609  int gtid;
610  timeStat _timers[TIMER_LAST + 1];
611  counter _counters[COUNTER_LAST + 1];
612  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
613  partitionedTimers _partitionedTimers;
614  int _nestLevel; // one per thread
615  kmp_stats_event_vector _event_vector;
616  kmp_stats_list *next;
617  kmp_stats_list *prev;
618  stats_state_e state;
619  int thread_is_idle_flag;
620 
621 public:
622  kmp_stats_list()
623  : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
624  thread_is_idle_flag(0) {
625 #define doInit(name, ignore1, ignore2) \
626  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
627  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \
628  getExplicitTimer(EXPLICIT_TIMER_##name));
629  KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
630 #undef doInit
631  }
632  ~kmp_stats_list() {}
633  inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
634  inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
635  inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
636  return &_explicitTimers[idx];
637  }
638  inline partitionedTimers *getPartitionedTimers() {
639  return &_partitionedTimers;
640  }
641  inline timeStat *getTimers() { return _timers; }
642  inline counter *getCounters() { return _counters; }
643  inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
644  inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
645  inline void resetEventVector() { _event_vector.reset(); }
646  inline void incrementNestValue() { _nestLevel++; }
647  inline int getNestValue() { return _nestLevel; }
648  inline void decrementNestValue() { _nestLevel--; }
649  inline int getGtid() const { return gtid; }
650  inline void setGtid(int newgtid) { gtid = newgtid; }
651  inline void setState(stats_state_e newstate) { state = newstate; }
652  inline stats_state_e getState() const { return state; }
653  inline stats_state_e *getStatePointer() { return &state; }
654  inline bool isIdle() { return thread_is_idle_flag == 1; }
655  inline void setIdleFlag() { thread_is_idle_flag = 1; }
656  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
657  kmp_stats_list *push_back(int gtid); // returns newly created list node
658  inline void push_event(uint64_t start_time, uint64_t stop_time,
659  int nest_level, timer_e name) {
660  _event_vector.push_back(start_time, stop_time, nest_level, name);
661  }
662  void deallocate();
663  class iterator;
664  kmp_stats_list::iterator begin();
665  kmp_stats_list::iterator end();
666  int size();
667  class iterator {
668  kmp_stats_list *ptr;
669  friend kmp_stats_list::iterator kmp_stats_list::begin();
670  friend kmp_stats_list::iterator kmp_stats_list::end();
671 
672  public:
673  iterator();
674  ~iterator();
675  iterator operator++();
676  iterator operator++(int dummy);
677  iterator operator--();
678  iterator operator--(int dummy);
679  bool operator!=(const iterator &rhs);
680  bool operator==(const iterator &rhs);
681  kmp_stats_list *operator*() const; // dereference operator
682  };
683 };
684 
685 /* ****************************************************************
686  Class to encapsulate all output functions and the environment variables
687 
688  This module holds filenames for various outputs (normal stats, events, plot
689  file), as well as coloring information for the plot file.
690 
691  The filenames and flags variables are read from environment variables.
692  These are read once by the constructor of the global variable
693  __kmp_stats_output which calls init().
694 
695  During this init() call, event flags for the timeStat::timerInfo[] global
696  array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
697 
698  The only interface function that is public is outputStats(heading). This
699  function should print out everything it needs to, either to files or stderr,
700  depending on the environment variables described below
701 
702  ENVIRONMENT VARIABLES:
703  KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
704  file, otherwise, print to stderr
705  KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
706  either KMP_STATS_FILE or stderr
707  KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
708  otherwise, the plot file is sent to "events.plt"
709  KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
710  events
711  KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
712  otherwise, output is sent to "events.dat"
713 **************************************************************** */
714 class kmp_stats_output_module {
715 
716 public:
717  struct rgb_color {
718  float r;
719  float g;
720  float b;
721  };
722 
723 private:
724  std::string outputFileName;
725  static const char *eventsFileName;
726  static const char *plotFileName;
727  static int printPerThreadFlag;
728  static int printPerThreadEventsFlag;
729  static const rgb_color globalColorArray[];
730  static rgb_color timerColorInfo[];
731 
732  void init();
733  static void setupEventColors();
734  static void printPloticusFile();
735  static void printHeaderInfo(FILE *statsOut);
736  static void printTimerStats(FILE *statsOut, statistic const *theStats,
737  statistic const *totalStats);
738  static void printCounterStats(FILE *statsOut, statistic const *theStats);
739  static void printCounters(FILE *statsOut, counter const *theCounters);
740  static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
741  int gtid);
742  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
743  static void windupExplicitTimers();
744  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
745 
746 public:
747  kmp_stats_output_module() { init(); }
748  void outputStats(const char *heading);
749 };
750 
751 #ifdef __cplusplus
752 extern "C" {
753 #endif
754 void __kmp_stats_init();
755 void __kmp_stats_fini();
756 void __kmp_reset_stats();
757 void __kmp_output_stats(const char *);
758 void __kmp_accumulate_stats_at_exit(void);
759 // thread local pointer to stats node within list
760 extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
761 // head to stats list.
762 extern kmp_stats_list *__kmp_stats_list;
763 // lock for __kmp_stats_list
764 extern kmp_tas_lock_t __kmp_stats_lock;
765 // reference start time
766 extern tsc_tick_count __kmp_stats_start_time;
767 // interface to output
768 extern kmp_stats_output_module __kmp_stats_output;
769 
770 #ifdef __cplusplus
771 }
772 #endif
773 
774 // Simple, standard interfaces that drop out completely if stats aren't enabled
775 
788 #define KMP_TIME_BLOCK(name) \
789  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
790  TIMER_##name)
791 
803 #define KMP_COUNT_VALUE(name, value) \
804  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
805 
816 #define KMP_COUNT_BLOCK(name) \
817  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
818 
834 #define KMP_START_EXPLICIT_TIMER(name) \
835  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
836  ->start(TIMER_##name)
837 
853 #define KMP_STOP_EXPLICIT_TIMER(name) \
854  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
855  ->stop(TIMER_##name)
856 
874 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
875 
883 #define KMP_INIT_PARTITIONED_TIMERS(name) \
884  __kmp_stats_thread_ptr->getPartitionedTimers()->init( \
885  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
886 
887 #define KMP_TIME_PARTITIONED_BLOCK(name) \
888  blockPartitionedTimer __PBLOCKTIME__( \
889  __kmp_stats_thread_ptr->getPartitionedTimers(), \
890  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
891 
892 #define KMP_PUSH_PARTITIONED_TIMER(name) \
893  __kmp_stats_thread_ptr->getPartitionedTimers()->push( \
894  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
895 
896 #define KMP_POP_PARTITIONED_TIMER() \
897  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
898 
899 #define KMP_SET_THREAD_STATE(state_name) \
900  __kmp_stats_thread_ptr->setState(state_name)
901 
902 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
903 
904 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \
905  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
906  state_name)
907 
915 #define KMP_RESET_STATS() __kmp_reset_stats()
916 
917 #if (KMP_DEVELOPER_STATS)
918 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
919 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
920 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
921 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
922 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
923 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
924 #else
925 // Null definitions
926 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
927 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
928 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
929 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
930 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
931 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
932 #endif
933 
934 #else // KMP_STATS_ENABLED
935 
936 // Null definitions
937 #define KMP_TIME_BLOCK(n) ((void)0)
938 #define KMP_COUNT_VALUE(n, v) ((void)0)
939 #define KMP_COUNT_BLOCK(n) ((void)0)
940 #define KMP_START_EXPLICIT_TIMER(n) ((void)0)
941 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
942 
943 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
944 #define KMP_RESET_STATS() ((void)0)
945 
946 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
947 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
948 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
949 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
950 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
951 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
952 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
953 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
954 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
955 #define KMP_POP_PARTITIONED_TIMER() ((void)0)
956 #define KMP_SET_THREAD_STATE(state_name) ((void)0)
957 #define KMP_GET_THREAD_STATE() ((void)0)
958 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
959 #endif // KMP_STATS_ENABLED
960 
961 #endif // KMP_STATS_H
statistic is valid only for master
Definition: kmp_stats.h:48
statistic is valid only for non-master threads
Definition: kmp_stats.h:50
do not show a TOTAL_aggregation for this statistic
Definition: kmp_stats.h:47
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
Definition: kmp_stats.h:263
statistic doesn&#39;t need units printed next to it
Definition: kmp_stats.h:49
stats_flags_e
flags to describe the statistic (timer or counter)
Definition: kmp_stats.h:46
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
Definition: kmp_stats.h:91
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:60