vocabtree  0.0.1
cycletimer.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #if defined(__APPLE__)
4  #if defined(__x86_64__)
5  #include <sys/sysctl.h>
6  #else
7  #include <mach/mach.h>
8  #include <mach/mach_time.h>
9  #endif // __x86_64__ or not
10 
11  #include <stdio.h> // fprintf
12  #include <stdlib.h> // exit
13 
14 #elif _WIN32
15 # include <windows.h>
16 # include <time.h>
17 #else
18 # include <stdio.h>
19 # include <stdlib.h>
20 # include <string.h>
21 # include <sys/time.h>
22 #endif
23 
24 
25 /// This uses the cycle counter of the processor. Different
26 /// processors in the system will have different values for this. If
27 /// you process moves across processors, then the delta time you
28 /// measure will likely be incorrect. This is mostly for fine
29 /// grained measurements where the process is likely to be on the
30 /// same processor. For more global things you should use the
31 /// Time interface.
32 ///
33 /// Also note that if you processors' speeds change (i.e. processors
34 /// scaling) or if you are in a heterogenous environment, you will
35 /// likely get spurious results.
36 namespace CycleTimer {
37  typedef unsigned long long SysClock;
38 
39  /// Return the current CPU time, in terms of clock ticks.
40  /// Time zero is at some arbitrary point in the past.
42 #if defined(__APPLE__) && !defined(__x86_64__)
43  return mach_absolute_time();
44 #elif defined(_WIN32)
45  LARGE_INTEGER qwTime;
46  QueryPerformanceCounter(&qwTime);
47  return qwTime.QuadPart;
48 #elif defined(__x86_64__)
49  unsigned int a, d;
50  asm volatile("rdtsc" : "=a" (a), "=d" (d));
51  return static_cast<unsigned long long>(a) |
52  (static_cast<unsigned long long>(d) << 32);
53 #elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
54  unsigned int val;
55  asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
56  return val;
57 #else
58  timespec spec;
59  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
60  return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec));
61 #endif
62  }
63 
64  inline const char* tickUnits() {
65 #if defined(__APPLE__) && !defined(__x86_64__)
66  return "ns";
67 #elif defined(__WIN32__) || defined(__x86_64__)
68  return "cycles";
69 #else
70  return "ns"; // clock_gettime
71 #endif
72  }
73 
74  /// Return the conversion from ticks to seconds.
75  inline double secondsPerTick() {
76  static bool initialized = false;
77  static double secondsPerTick_val;
78  if (initialized) return secondsPerTick_val;
79 #if defined(__APPLE__)
80 #ifdef __x86_64__
81  int args[] = {CTL_HW, HW_CPU_FREQ};
82  unsigned int Hz;
83  size_t len = sizeof(Hz);
84  if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
85  fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
86  exit(-1);
87  }
88  secondsPerTick_val = 1.0 / (double) Hz;
89 #else
90  mach_timebase_info_data_t time_info;
91  mach_timebase_info(&time_info);
92 
93  // Scales to nanoseconds without 1e-9f
94  secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/
95  static_cast<double>(time_info.denom);
96 #endif // x86_64 or not
97 #elif defined(_WIN32)
98  LARGE_INTEGER qwTicksPerSec;
99  QueryPerformanceFrequency(&qwTicksPerSec);
100  secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart);
101 #else
102  FILE *fp = fopen("/proc/cpuinfo","r");
103  char input[1024];
104  if (!fp) {
105  fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
106  exit(-1);
107  }
108  // In case we don't find it, e.g. on the N900
109  secondsPerTick_val = 1e-9;
110  while (!feof(fp) && fgets(input, 1024, fp)) {
111  // NOTE(boulos): Because reading cpuinfo depends on dynamic
112  // frequency scaling it's better to read the @ sign first
113  float GHz, MHz;
114  if (strstr(input, "model name")) {
115  char* at_sign = strstr(input, "@");
116  if (at_sign) {
117  char* after_at = at_sign + 1;
118  char* GHz_str = strstr(after_at, "GHz");
119  char* MHz_str = strstr(after_at, "MHz");
120  if (GHz_str) {
121  *GHz_str = '\0';
122  if (1 == sscanf(after_at, "%f", &GHz)) {
123  //printf("GHz = %f\n", GHz);
124  secondsPerTick_val = 1e-9f / GHz;
125  break;
126  }
127  } else if (MHz_str) {
128  *MHz_str = '\0';
129  if (1 == sscanf(after_at, "%f", &MHz)) {
130  //printf("MHz = %f\n", MHz);
131  secondsPerTick_val = 1e-6f / GHz;
132  break;
133  }
134  }
135  }
136  } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
137  //printf("MHz = %f\n", MHz);
138  secondsPerTick_val = 1e-6f / MHz;
139  break;
140  }
141  }
142  fclose(fp);
143 #endif
144 
145  initialized = true;
146  return secondsPerTick_val;
147  }
148 
149  /// Return the conversion from ticks to milliseconds.
150  inline double msPerTick() {
151  return secondsPerTick() * 1000.0;
152  }
153 
154  /// Return the current CPU time, in terms of seconds.
155  /// This is slower than currentTicks(). Time zero is at
156  /// some arbitrary point in the past.
157  inline double currentSeconds() {
158  return currentTicks() * secondsPerTick();
159  }
160 
161  /// Return the conversion from seconds to ticks.
162  inline double ticksPerSecond() {
163  return 1.0/secondsPerTick();
164  }
165 };
166