17 #include "kmp_config.h" 28 #include "kmp_stats_timing.h" 91 #define KMP_FOREACH_COUNTER(macro, arg) \ 92 macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \ 93 macro(OMP_NESTED_PARALLEL, 0, arg) \ 94 macro(OMP_FOR_static, 0, arg) \ 95 macro(OMP_FOR_static_steal, 0, arg) \ 96 macro(OMP_FOR_dynamic, 0, arg) \ 97 macro(OMP_DISTRIBUTE, 0, arg) \ 98 macro(OMP_BARRIER, 0, arg) \ 99 macro(OMP_CRITICAL, 0, arg) \ 100 macro(OMP_SINGLE, 0, arg) \ 101 macro(OMP_MASTER, 0, arg) \ 102 macro(OMP_TEAMS, 0, arg) \ 103 macro(OMP_set_lock, 0, arg) \ 104 macro(OMP_test_lock, 0, arg) \ 105 macro(REDUCE_wait, 0, arg) \ 106 macro(REDUCE_nowait, 0, arg) \ 107 macro(OMP_TASKYIELD, 0, arg) \ 108 macro(OMP_TASKLOOP, 0, arg) \ 109 macro(TASK_executed, 0, arg) \ 110 macro(TASK_cancelled, 0, arg) \ 111 macro(TASK_stolen, 0, arg) 133 #define KMP_FOREACH_TIMER(macro, arg) \ 134 macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ 135 macro (FOR_static_scheduling, 0, arg) \ 136 macro (FOR_dynamic_scheduling, 0, arg) \ 137 macro (OMP_critical, 0, arg) \ 138 macro (OMP_critical_wait, 0, arg) \ 139 macro (OMP_single, 0, arg) \ 140 macro (OMP_master, 0, arg) \ 141 macro (OMP_idle, stats_flags_e::logEvent, arg) \ 142 macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ 143 macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ 144 macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ 145 macro (OMP_parallel, stats_flags_e::logEvent, arg) \ 146 macro (OMP_task_immediate, 0, arg) \ 147 macro (OMP_task_taskwait, 0, arg) \ 148 macro (OMP_task_taskyield, 0, arg) \ 149 macro (OMP_task_taskgroup, 0, arg) \ 150 macro (OMP_task_join_bar, 0, arg) \ 151 macro (OMP_task_plain_bar, 0, arg) \ 152 macro (OMP_serial, stats_flags_e::logEvent, arg) \ 153 macro (OMP_taskloop_scheduling, 0, arg) \ 154 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\ 156 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 158 macro (FOR_static_iterations, \ 159 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 160 macro (FOR_dynamic_iterations, \ 161 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 162 macro (FOR_static_steal_stolen, \ 163 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 164 macro (FOR_static_steal_chunks, \ 165 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 166 KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 205 #if (KMP_DEVELOPER_STATS) 222 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 223 macro(KMP_fork_call, 0, arg) \ 224 macro(KMP_join_call, 0, arg) \ 225 macro(KMP_end_split_barrier, 0, arg) \ 226 macro(KMP_hier_gather, 0, arg) \ 227 macro(KMP_hier_release, 0, arg) \ 228 macro(KMP_hyper_gather, 0, arg) \ 229 macro(KMP_hyper_release, 0, arg) \ 230 macro(KMP_linear_gather, 0, arg) \ 231 macro(KMP_linear_release, 0, arg) \ 232 macro(KMP_tree_gather, 0, arg) \ 233 macro(KMP_tree_release, 0, arg) \ 234 macro(USER_resume, 0, arg) \ 235 macro(USER_suspend, 0, arg) \ 236 macro(KMP_allocate_team, 0, arg) \ 237 macro(KMP_setup_icv_copy, 0, arg) \ 238 macro(USER_icv_copy, 0, arg) 240 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 263 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) 265 #define ENUMERATE(name, ignore, prefix) prefix##name, 266 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
268 enum explicit_timer_e {
276 explicit_timer_e timer_index;
280 timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
281 inline explicit_timer_e get_index()
const {
return timer_index; }
282 inline timer_e get_timer()
const {
return timer; }
283 bool operator==(
const timerPair &rhs) {
284 return this->get_index() == rhs.get_index();
286 bool operator!=(
const timerPair &rhs) {
return !(*
this == rhs); }
294 uint64_t sampleCount;
297 statistic() { reset(); }
298 statistic(statistic
const &o)
299 : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
300 sampleCount(o.sampleCount) {}
302 double getMin()
const {
return minVal; }
303 double getMean()
const {
return meanVal; }
304 double getMax()
const {
return maxVal; }
305 uint64_t getCount()
const {
return sampleCount; }
306 double getSD()
const {
return sqrt(m2 / sampleCount); }
307 double getTotal()
const {
return sampleCount * meanVal; }
310 minVal = std::numeric_limits<double>::max();
311 maxVal = -std::numeric_limits<double>::max();
316 void addSample(
double sample);
317 void scale(
double factor);
318 void scaleDown(
double f) { scale(1. / f); }
319 statistic &operator+=(statistic
const &other);
321 std::string format(
char unit,
bool total =
false)
const;
329 class timeStat :
public statistic {
330 static statInfo timerInfo[];
333 timeStat() : statistic() {}
334 static const char *name(timer_e e) {
return timerInfo[e].name; }
335 static bool noTotal(timer_e e) {
338 static bool masterOnly(timer_e e) {
341 static bool workerOnly(timer_e e) {
344 static bool noUnits(timer_e e) {
350 static void clearEventFlags() {
351 for (
int i = 0; i < TIMER_LAST; i++) {
360 class explicitTimer {
362 tsc_tick_count startTime;
363 tsc_tick_count pauseStartTime;
364 tsc_tick_count::tsc_interval_t totalPauseTime;
368 : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
369 explicitTimer(timeStat *s)
370 : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
372 void setStat(timeStat *s) { stat = s; }
373 void start(timer_e timerEnumValue);
374 void pause() { pauseStartTime = tsc_tick_count::now(); }
375 void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
376 void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr =
nullptr);
386 class blockTimer :
public explicitTimer {
387 timer_e timerEnumValue;
390 blockTimer(timeStat *s, timer_e newTimerEnumValue)
391 : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
392 start(timerEnumValue);
394 ~blockTimer() { stop(timerEnumValue); }
403 class partitionedTimers {
405 explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
406 std::vector<timerPair> timer_stack;
410 void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
411 void init(timerPair timer_index);
412 void push(timerPair timer_index);
419 class blockPartitionedTimer {
420 partitionedTimers *part_timers;
421 timerPair timer_pair;
424 blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
425 : part_timers(pt), timer_pair(tp) {
426 part_timers->push(timer_pair);
428 ~blockPartitionedTimer() { part_timers->pop(); }
434 class blockThreadState {
440 : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
441 *state_pointer = new_state;
443 ~blockThreadState() { *state_pointer = old_state; }
451 static const statInfo counterInfo[];
454 counter() : value(0) {}
455 void increment() { value++; }
456 uint64_t getValue()
const {
return value; }
457 void reset() { value = 0; }
458 static const char *name(counter_e e) {
return counterInfo[e].name; }
459 static bool masterOnly(counter_e e) {
497 class kmp_stats_event {
505 : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
506 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme)
507 : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
508 inline uint64_t getStart()
const {
return start; }
509 inline uint64_t getStop()
const {
return stop; }
510 inline int getNestLevel()
const {
return nest_level; }
511 inline timer_e getTimerName()
const {
return timer_name; }
540 class kmp_stats_event_vector {
541 kmp_stats_event *events;
544 static const int INIT_SIZE = 1024;
547 kmp_stats_event_vector() {
549 (kmp_stats_event *)__kmp_allocate(
sizeof(kmp_stats_event) * INIT_SIZE);
551 allocated_size = INIT_SIZE;
553 ~kmp_stats_event_vector() {}
554 inline void reset() { internal_size = 0; }
555 inline int size()
const {
return internal_size; }
556 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level,
559 if (internal_size == allocated_size) {
560 kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
561 sizeof(kmp_stats_event) * allocated_size * 2);
562 for (i = 0; i < internal_size; i++)
568 events[internal_size] =
569 kmp_stats_event(start_time, stop_time, nest_level, name);
575 const kmp_stats_event &operator[](
int index)
const {
return events[index]; }
576 kmp_stats_event &operator[](
int index) {
return events[index]; }
577 const kmp_stats_event &at(
int index)
const {
return events[index]; }
578 kmp_stats_event &at(
int index) {
return events[index]; }
608 class kmp_stats_list {
610 timeStat _timers[TIMER_LAST + 1];
611 counter _counters[COUNTER_LAST + 1];
612 explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
613 partitionedTimers _partitionedTimers;
615 kmp_stats_event_vector _event_vector;
616 kmp_stats_list *next;
617 kmp_stats_list *prev;
619 int thread_is_idle_flag;
623 : _nestLevel(0), _event_vector(), next(
this), prev(
this), state(IDLE),
624 thread_is_idle_flag(0) {
625 #define doInit(name, ignore1, ignore2) \ 626 getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ 627 _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \ 628 getExplicitTimer(EXPLICIT_TIMER_##name)); 633 inline timeStat *getTimer(timer_e idx) {
return &_timers[idx]; }
634 inline counter *getCounter(counter_e idx) {
return &_counters[idx]; }
635 inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
636 return &_explicitTimers[idx];
638 inline partitionedTimers *getPartitionedTimers() {
639 return &_partitionedTimers;
641 inline timeStat *getTimers() {
return _timers; }
642 inline counter *getCounters() {
return _counters; }
643 inline explicitTimer *getExplicitTimers() {
return _explicitTimers; }
644 inline kmp_stats_event_vector &getEventVector() {
return _event_vector; }
645 inline void resetEventVector() { _event_vector.reset(); }
646 inline void incrementNestValue() { _nestLevel++; }
647 inline int getNestValue() {
return _nestLevel; }
648 inline void decrementNestValue() { _nestLevel--; }
649 inline int getGtid()
const {
return gtid; }
650 inline void setGtid(
int newgtid) { gtid = newgtid; }
651 inline void setState(
stats_state_e newstate) { state = newstate; }
654 inline bool isIdle() {
return thread_is_idle_flag == 1; }
655 inline void setIdleFlag() { thread_is_idle_flag = 1; }
656 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
657 kmp_stats_list *push_back(
int gtid);
658 inline void push_event(uint64_t start_time, uint64_t stop_time,
659 int nest_level, timer_e name) {
660 _event_vector.push_back(start_time, stop_time, nest_level, name);
664 kmp_stats_list::iterator begin();
665 kmp_stats_list::iterator end();
669 friend kmp_stats_list::iterator kmp_stats_list::begin();
670 friend kmp_stats_list::iterator kmp_stats_list::end();
675 iterator operator++();
676 iterator operator++(
int dummy);
677 iterator operator--();
678 iterator operator--(
int dummy);
679 bool operator!=(
const iterator &rhs);
680 bool operator==(
const iterator &rhs);
681 kmp_stats_list *operator*()
const;
714 class kmp_stats_output_module {
724 std::string outputFileName;
725 static const char *eventsFileName;
726 static const char *plotFileName;
727 static int printPerThreadFlag;
728 static int printPerThreadEventsFlag;
729 static const rgb_color globalColorArray[];
730 static rgb_color timerColorInfo[];
733 static void setupEventColors();
734 static void printPloticusFile();
735 static void printHeaderInfo(FILE *statsOut);
736 static void printTimerStats(FILE *statsOut, statistic
const *theStats,
737 statistic
const *totalStats);
738 static void printCounterStats(FILE *statsOut, statistic
const *theStats);
739 static void printCounters(FILE *statsOut, counter
const *theCounters);
740 static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
742 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
743 static void windupExplicitTimers();
744 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
747 kmp_stats_output_module() { init(); }
748 void outputStats(
const char *heading);
754 void __kmp_stats_init();
755 void __kmp_stats_fini();
756 void __kmp_reset_stats();
757 void __kmp_output_stats(
const char *);
758 void __kmp_accumulate_stats_at_exit(
void);
760 extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
762 extern kmp_stats_list *__kmp_stats_list;
764 extern kmp_tas_lock_t __kmp_stats_lock;
766 extern tsc_tick_count __kmp_stats_start_time;
768 extern kmp_stats_output_module __kmp_stats_output;
788 #define KMP_TIME_BLOCK(name) \ 789 blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ 803 #define KMP_COUNT_VALUE(name, value) \ 804 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 816 #define KMP_COUNT_BLOCK(name) \ 817 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 834 #define KMP_START_EXPLICIT_TIMER(name) \ 835 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ 836 ->start(TIMER_##name) 853 #define KMP_STOP_EXPLICIT_TIMER(name) \ 854 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ 874 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) 883 #define KMP_INIT_PARTITIONED_TIMERS(name) \ 884 __kmp_stats_thread_ptr->getPartitionedTimers()->init( \ 885 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 887 #define KMP_TIME_PARTITIONED_BLOCK(name) \ 888 blockPartitionedTimer __PBLOCKTIME__( \ 889 __kmp_stats_thread_ptr->getPartitionedTimers(), \ 890 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 892 #define KMP_PUSH_PARTITIONED_TIMER(name) \ 893 __kmp_stats_thread_ptr->getPartitionedTimers()->push( \ 894 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 896 #define KMP_POP_PARTITIONED_TIMER() \ 897 __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 899 #define KMP_SET_THREAD_STATE(state_name) \ 900 __kmp_stats_thread_ptr->setState(state_name) 902 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() 904 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 905 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ 915 #define KMP_RESET_STATS() __kmp_reset_stats() 917 #if (KMP_DEVELOPER_STATS) 918 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) 919 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) 920 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 921 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) 922 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) 923 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) 926 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 927 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 928 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 929 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 930 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 931 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 934 #else // KMP_STATS_ENABLED 937 #define KMP_TIME_BLOCK(n) ((void)0) 938 #define KMP_COUNT_VALUE(n, v) ((void)0) 939 #define KMP_COUNT_BLOCK(n) ((void)0) 940 #define KMP_START_EXPLICIT_TIMER(n) ((void)0) 941 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) 943 #define KMP_OUTPUT_STATS(heading_string) ((void)0) 944 #define KMP_RESET_STATS() ((void)0) 946 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 947 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 948 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 949 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 950 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 951 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 952 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 953 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 954 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 955 #define KMP_POP_PARTITIONED_TIMER() ((void)0) 956 #define KMP_SET_THREAD_STATE(state_name) ((void)0) 957 #define KMP_GET_THREAD_STATE() ((void)0) 958 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 959 #endif // KMP_STATS_ENABLED 961 #endif // KMP_STATS_H statistic is valid only for master
statistic is valid only for non-master threads
do not show a TOTAL_aggregation for this statistic
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
statistic doesn't need units printed next to it
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
stats_state_e
the states which a thread can be in