Loading...
Searching...
No Matches
timer.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <Kokkos_Core.hpp>
4#include <iostream>
5#include <memory>
6#include <mpi.h>
7#include <mutex>
8#include <sstream>
9#include <string>
10#include <vector>
11
12namespace terra::util {
13
14/// @brief Node representing a timed region in the hierarchy.
15///
16/// @note See class `Timer` for actually running a timer.
18{
19 std::string name; ///< Name of the timer region
20 double total_time{ 0.0 }; ///< Accumulated time (per rank)
21 int count{ 0 }; ///< Number of times this node was timed
22 std::map< std::string, std::shared_ptr< TimerNode > > children; ///< Nested child timers
23 TimerNode* parent{ nullptr }; ///< Parent node pointer
24
25 // Aggregated statistics across MPI ranks
26 double root_time{ 0.0 }, sum_time{ 0.0 }, min_time{ 0.0 }, max_time{ 0.0 }, avg_time{ 0.0 };
27
28 public:
29 friend class TimerTree;
30
31 /// @brief Constructor
32 TimerNode( const std::string& n, TimerNode* p = nullptr )
33 : name( n )
34 , parent( p )
35 {}
36
38 {
39 total_time = 0.0;
40 count = 0.0;
41 root_time = 0.0;
42 sum_time = 0.0;
43 min_time = 0.0;
44 max_time = 0.0;
45 avg_time = 0.0;
46 children.clear();
47 }
48
49 /// @brief Convert this node (and children) to JSON (per-rank)
50 std::string to_json( int indent = 0 ) const
51 {
52 std::ostringstream oss;
53 std::string pad( indent, ' ' );
54 oss << pad << "{\n";
55 oss << pad << " \"name\": \"" << name << "\",\n";
56 oss << pad << " \"total_time\": " << total_time << ",\n";
57 oss << pad << " \"count\": " << count << ",\n";
58 oss << pad << " \"children\": [\n";
59 int i = 0;
60 for ( const auto& child : children | std::ranges::views::values )
61 {
62 oss << child->to_json( indent + 4 );
63 if ( i + 1 < children.size() )
64 {
65 oss << ",";
66 }
67 oss << "\n";
68 i++;
69 }
70 oss << pad << " ]\n" << pad << "}";
71 return oss.str();
72 }
73
74 /// @brief Convert this node (and children) to JSON with MPI-aggregated statistics
75 std::string to_agg_json( int indent = 0 ) const
76 {
77 std::ostringstream oss;
78 std::string pad( indent, ' ' );
79 oss << pad << "{\n";
80 oss << pad << " \"name\": \"" << name << "\",\n";
81 oss << pad << " \"root_time\": " << root_time << ",\n";
82 oss << pad << " \"sum_time\": " << sum_time << ",\n";
83 oss << pad << " \"min_time\": " << min_time << ",\n";
84 oss << pad << " \"avg_time\": " << avg_time << ",\n";
85 oss << pad << " \"max_time\": " << max_time << ",\n";
86 oss << pad << " \"count\": " << count << ",\n";
87 oss << pad << " \"children\": [\n";
88 int i = 0;
89 for ( const auto& child : children | std::ranges::views::values )
90 {
91 oss << child->to_agg_json( indent + 4 );
92 if ( i + 1 < children.size() )
93 {
94 oss << ",";
95 }
96 oss << "\n";
97 i++;
98 }
99 oss << pad << " ]\n" << pad << "}";
100 return oss.str();
101 }
102};
103
104/// @brief Singleton tree managing all timer nodes per MPI rank
105///
106/// @note Use `Timer` class for the actually starting and stopping timers. Internally `Timer` objects will access a
107/// `TimerTree` singleton. So you can easily add timer calls without changing the API of your code.
108///
109/// Can be exported via json.
110///
111/// Example:
112/// @code
113/// auto tt = TimerTree::instance();
114///
115/// tt.aggregate_mpi();
116/// std::cout << tt.json() << std::endl;
117/// std::cout << tt.json_aggregate() << std::endl;
118/// tt.clear();
119/// @endcode
120///
121/// Example output for `json()`.
122/// Note that the root node will always be there carrying no timings.
123/// @code
124/// {
125/// "name": "root",
126/// "total_time": 0,
127/// "count": 0,
128/// "children": [
129/// {
130/// "name": "laplace_apply",
131/// "total_time": 0.356301,
132/// "count": 28,
133/// "children": [
134/// {
135/// "name": "laplace_comm",
136/// "total_time": 0.02748,
137/// "count": 28,
138/// "children": [
139/// ]
140/// },
141/// {
142/// "name": "laplace_kernel",
143/// "total_time": 0.327421,
144/// "count": 28,
145/// "children": [
146/// ]
147/// }
148/// ]
149/// }
150/// ]
151/// }
152/// @endcode
154{
155 TimerNode root{ "root" }; ///< Root node
156 TimerNode* current{ &root }; ///< Pointer to current active node
157 std::mutex mtx; ///< Mutex for thread safety
158
159 public:
160 /// @brief Access the singleton instance
162 {
163 static TimerTree tree;
164 return tree;
165 }
166
167 void clear()
168 {
169 std::lock_guard< std::mutex > lock( mtx );
171 current = &root;
172 }
173
174 /// @brief Enter a new timing scope
175 void enter_scope( const std::string& name )
176 {
177 std::lock_guard< std::mutex > lock( mtx );
178 if ( !current->children.contains( name ) )
179 {
180 current->children[name] = std::make_shared< TimerNode >( name, current );
181 }
182 current = current->children[name].get();
183 }
184
185 /// @brief Exit the current timing scope and record elapsed time
186 void exit_scope( double elapsed )
187 {
188 std::lock_guard< std::mutex > lock( mtx );
189 current->total_time += elapsed;
190 current->count += 1;
191 if ( current->parent )
192 {
193 current = current->parent;
194 }
195 }
196
197 /// @brief Per-rank json tree.
198 ///
199 /// Returns a definitely non-reduced timer tree in json format.
200 /// This means that this returns the process-local timings depending on the process that calls this method.
201 std::string json() { return root.to_json(); }
202
203 /// @brief MPI-reduced / aggregate json.
204 ///
205 /// Returns the timings after reduction over all processes.
206 /// You need to call aggregate_mpi() before this for reasonable results.
207 ///
208 /// This method does not need to be called collectively.
209 std::string json_aggregate() { return root.to_agg_json(); }
210
211 /// @brief Aggregate timings across all MPI ranks
212 ///
213 /// Must be called collectively.
214 void aggregate_mpi() { aggregate_node( &root, MPI_COMM_WORLD ); }
215
216 private:
217 /// @brief Recursively aggregate a node's timings across MPI ranks
218 void aggregate_node( TimerNode* node, MPI_Comm comm )
219 {
220 double local_time = node->total_time;
221 double root_time, min_time, max_time, sum_time;
222
223 root_time = local_time;
224 MPI_Bcast( &root_time, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD );
225 MPI_Allreduce( &local_time, &min_time, 1, MPI_DOUBLE, MPI_MIN, comm );
226 MPI_Allreduce( &local_time, &max_time, 1, MPI_DOUBLE, MPI_MAX, comm );
227 MPI_Allreduce( &local_time, &sum_time, 1, MPI_DOUBLE, MPI_SUM, comm );
228
229 int size;
230 MPI_Comm_size( comm, &size );
231 node->root_time = root_time;
232 node->sum_time = sum_time;
233 node->min_time = min_time;
234 node->max_time = max_time;
235 node->avg_time = sum_time / size;
236
237 for ( auto& child : node->children | std::ranges::views::values )
238 {
239 aggregate_node( child.get(), comm );
240 }
241 }
242};
243
244/// @brief Timer supporting RAII scope or manual stop.
245///
246/// Starts timer on construction.
247///
248/// Automatically adds timing to `TimerTree`'s singleton instance.
249/// See `TimerTree` for details on how to export the timings.
250///
251/// Example usage: scoped
252/// @code
253/// {
254/// Timer t("compute"); // scoped timer - starts here
255/// // do computation
256/// } // timer ends here - writes result to TimerTree::instance()
257/// @endcode
258///
259/// Example usage: stop explicitly
260/// @code
261/// {
262/// Timer t("compute"); // scoped timer - starts here
263/// // do computation
264/// t.stop() // timer ends here - writes result to TimerTree::instance()
265/// // do something that is not included in timing
266/// }
267/// @endcode
268///
269class Timer
270{
271 std::string name; ///< Timer name
272 Kokkos::Timer timer; ///< Underlying Kokkos timer
273 bool running{ false }; ///< Is timer currently running
274
275 public:
276 /// @brief Constructor - starts the timer
277 /// @param n Timer name
278 explicit Timer( const std::string& n )
279 : name( n )
280 {
282 timer.reset();
283 running = true;
284 }
285
286 /// @brief Stop the timer and record elapsed time.
287 ///
288 /// Can be safely called twice - does not do anything on second call.
289 void stop()
290 {
291 if ( running )
292 {
293 double elapsed = timer.seconds();
294 TimerTree::instance().exit_scope( elapsed );
295 running = false;
296 }
297 }
298
299 /// @brief Destructor stops timer if still running.
300 ///
301 /// Can be used instead of stopping manually.
303 {
304 if ( running )
305 {
306 stop();
307 }
308 }
309};
310
311} // namespace terra::util
Node representing a timed region in the hierarchy.
Definition timer.hpp:18
TimerNode(const std::string &n, TimerNode *p=nullptr)
Constructor.
Definition timer.hpp:32
void clear_this_and_children()
Definition timer.hpp:37
std::string to_agg_json(int indent=0) const
Convert this node (and children) to JSON with MPI-aggregated statistics.
Definition timer.hpp:75
std::string to_json(int indent=0) const
Convert this node (and children) to JSON (per-rank)
Definition timer.hpp:50
Singleton tree managing all timer nodes per MPI rank.
Definition timer.hpp:154
void clear()
Definition timer.hpp:167
void exit_scope(double elapsed)
Exit the current timing scope and record elapsed time.
Definition timer.hpp:186
std::string json_aggregate()
MPI-reduced / aggregate json.
Definition timer.hpp:209
std::string json()
Per-rank json tree.
Definition timer.hpp:201
void aggregate_mpi()
Aggregate timings across all MPI ranks.
Definition timer.hpp:214
static TimerTree & instance()
Access the singleton instance.
Definition timer.hpp:161
void enter_scope(const std::string &name)
Enter a new timing scope.
Definition timer.hpp:175
Timer supporting RAII scope or manual stop.
Definition timer.hpp:270
~Timer()
Destructor stops timer if still running.
Definition timer.hpp:302
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:289
Timer(const std::string &n)
Constructor - starts the timer.
Definition timer.hpp:278
Definition solver.hpp:9