Loading...
Searching...
No Matches
xdmf.hpp
Go to the documentation of this file.
1
2#pragma once
3
4#include <fstream>
5
6#include "mpi/mpi.hpp"
8#include "util/filesystem.hpp"
9#include "util/logging.hpp"
10#include "util/result.hpp"
11#include "util/xml.hpp"
12
13namespace terra::io {
14
15/// @brief XDMF output that simultaneously serves for visualization with software like Paraview and as a simulation
16/// checkpoint.
17///
18/// # Overview
19///
20/// Writes simulation data (time-series for a constant mesh) and metadata into a directory.
21/// The written data simultaneously serves for visualization with tools that can read XDMF files (e.g., Paraview) and
22/// can be used to restore the grid data, e.g., to continue a previous simulation (aka checkpoint).
23/// This does not involve data duplication. The data required for XDMF is directly used as a checkpoint.
24///
25/// Currently restricted to the spherical shell mesh data structure.
26/// Interprets data as block-structured wedge-element meshes.
27///
28/// # File overview (what is written)
29///
30/// The mesh data has to be added upon construction.
31/// None, one, or many scalar or vector-valued grids can be added afterward.
32///
33/// Each write() call then writes out
34/// - binary data files for each added grid data item containing the raw values,
35/// - an .xmf file to be used for visualization of all the binary files (read by an XDMF reader).
36///
37/// The first write() call also writes (once)
38/// - the mesh data (grid coordinates/topology) to binary files
39/// - a checkpoint metadata file required to read in the checkpoints
40///
41/// For time-dependent runs, call write() in, e.g., every timestep. The mesh written in the first call will be
42/// referenced from each .xmf file.
43///
44/// # Output file precision
45///
46/// The actually written data type can be selected regardless of the underlying data type of the allocated data for the
47/// mesh points, topology, and each data grid. Defaults are selected via default parameters.
48/// Concretely, you can write your double precision fields in single precision without converting them manually.
49/// Note that this means that your checkpoints obviously have the same precision that you specify here.
50///
51/// # Other notes
52///
53/// All added data grids must have different (Kokkos::View-)labels.
54///
55/// Uses MPI IO for fast parallel output.
56///
57/// # Checkpoints
58///
59/// To recover a checkpoint, use the function \ref read_xdmf_checkpoint_grid() (or
60/// \ref read_xdmf_checkpoint_metadata() to just inspect the structure of the checkpoint).
61///
62/// Note that the checkpoint can only be read using the same domain partitioning (i.e., the 'topology' of the
63/// \ref DistributedDomain used when the checkpoint was written must be identical) - BUT the number of MPI processes
64/// does not need to match (nor does the distribution of subdomains to ranks need to match). So you can technically
65/// (if the amount of main memory permits) read a checkpoint from a large parallel simulation with only one or a few
66/// processes (possibly useful for post-processing).
67///
68/// ## Picking the last step during checkpoint recovery
69///
70/// The .xmf file for each write() call is written last (after the binary data).
71/// Thus, if the corresponding .xmf step has been written, the parallel binary data output should be completed.
72///
73/// # Checkpoint data binary format
74///
75/// All data is written to a single binary file per grid data item and per time step.
76/// Each subdomain is written contiguously. Concretely, per subdomain (also applies to the grid point coordinates)
77///
78/// @code
79/// for ( each subdomain )
80/// {
81/// for ( int r = 0; r < subdomain_size_r; r++ )
82/// {
83/// for ( int y = 0; y < subdomain_size_y; y++ )
84/// {
85/// for ( int x = 0; x < subdomain_size_x; x++ )
86/// {
87/// for ( int d = 0; d < point_dim; d++ )
88/// {
89/// stream << data( subdomain_id, x, y, r, d )
90/// }
91/// }
92/// }
93/// }
94/// }
95/// @endcode
96///
97/// The subdomain order depends on the various factors and can be basically random.
98/// The concrete ordering (as well as the data type) is reflected in the checkpoint metadata (see below).
99///
100/// # Checkpoint metadata format (required only once per time series)
101///
102/// See \ref terra::grid::shell::SubdomainInfo for how the global subdomain ID is encoded.
103///
104/// @code
105/// version: i32
106/// num_subdomains_per_diamond_lateral_direction: i32
107/// num_subdomains_per_diamond_radial_direction: i32
108/// subdomain_size_x: i32
109/// subdomain_size_y: i32
110/// subdomain_size_r: i32
111/// radii: array: f64, entries: num_subdomains_per_diamond_radial_direction * (subdomain_size_r - 1) + 1
112/// grid_scalar_bytes i32 // new in checkpoint version 1, number of float bytes for writing the grid (4 or 8 byte float)
113/// num_grid_data_files: i32
114/// list (size = num_grid_data_files)
115/// [
116/// grid_name_string_size_bytes: i32
117/// grid_name_string: grid_name_string_size_bytes * 8
118/// scalar_data_type: i32 // 0: signed integer, 1: unsigned integer, 2: float
119/// scalar_bytes: i32
120/// vec_dim: i32
121/// ]
122/// checkpoint_subdomain_ordering: i32
123/// if (checkpoint_subdomain_ordering == 0) {
124/// // The following list contains the encoded global_subdomain_ids (as i64) in the order in which the
125/// // subdomains are written to the data files. To find out where a certain subdomain is located in the
126/// // file, the starting offset must be set equal to the index of the global_subdomain_id in the list below,
127/// // multiplied by the size of a single chunk of data per subdomain.
128/// // That means that in the worst case, the entire list must be read to find the correct subdomain.
129/// // However, this way it is easy to _write_ the metadata since we do not need to globally sort the subdomain
130/// // positions in the parallel setting, and we get away with O(1) local memory usage during parsing.
131/// // Lookup is O(n), though.
132/// //
133/// // An alternative would be to sort the list before writing the checkpoint and store the _position of the
134/// // subdomain data_ instead of the global_subdomain_id. Since the global_subdomain_id is sortable, an
135/// // implicit mapping from global_subdomain_id to this list's index can be accomplished.
136/// // Then look up the position of the data in O(1).
137/// // However, that would require reducing the entire list on one process which is in theory not scalable
138/// // (plus the sorting approach is a tiny bit more complicated).
139/// // Use a different value for checkpoint_subdomain_ordering in that case and extend the file format.
140/// list (size = "num_global_subdomains" = 10 * num_subdomains_per_diamond_lateral_direction * num_subdomains_per_diamond_lateral_direction * num_subdomains_per_diamond_radial_direction)
141/// [
142/// global_subdomain_id: i64
143/// ]
144/// }
145/// @endcode
146///
147template < typename InputGridScalarType >
149{
150 public:
151 /// @brief Used to specify the output type when writing floating point data.
152 ///
153 /// Values are the number of bytes.
154 enum class OutputTypeFloat : int
155 {
156 Float32 = 4,
157 Float64 = 8,
158 };
159
160 /// @brief Used to specify the output type when writing (signed) integer data.
161 ///
162 /// Values are the number of bytes.
163 enum class OutputTypeInt : int
164 {
165 Int32 = 4,
166 Int64 = 8,
167 };
168
169 /// @brief Constructs an XDMFOutput object.
170 ///
171 /// All data will be written to the specified directory (it is a good idea to pass an empty directory).
172 ///
173 /// Construction does not write any data, yet.
174 ///
175 /// @param directory_path Path to a directory that the data shall be written to. If the directory does not exist,
176 /// it will be created during the first write() call. If it does already exist, data will be
177 /// overwritten.
178 /// @param distributed_domain \ref DistributedDomain instance.
179 /// @param coords_shell_device Lateral spherical shell grid coordinates (see \ref subdomain_unit_sphere_single_shell_coords()).
180 /// @param coords_radii_device Spherical shell radii (see \ref subdomain_shell_radii()).
181 /// @param output_type_points Floating point data type to use for mesh coordinate output.
182 /// @param output_type_connectivity Integer data type to use for mesh connectivity output.
184 const std::string& directory_path,
185 const grid::shell::DistributedDomain& distributed_domain,
186 const grid::Grid3DDataVec< InputGridScalarType, 3 >& coords_shell_device,
187 const grid::Grid2DDataScalar< InputGridScalarType >& coords_radii_device,
188 const OutputTypeFloat output_type_points = OutputTypeFloat::Float32,
189 const OutputTypeInt output_type_connectivity = OutputTypeInt::Int64 )
190 : directory_path_( directory_path )
191 , distributed_domain_( distributed_domain )
192 , coords_shell_device_( coords_shell_device )
193 , coords_radii_device_( coords_radii_device )
194 , output_type_points_( output_type_points )
195 , output_type_connectivity_( output_type_connectivity )
196 {
197 if ( coords_shell_device.extent( 0 ) != coords_radii_device.extent( 0 ) )
198 {
199 Kokkos::abort( "XDMF: Number of subdomains of shell and radii coords does not match." );
200 }
201 }
202
203 /// @brief Set the write counter manually.
204 ///
205 /// This will only affect the step number attached to the file names. The geometry is still written once during the
206 /// first write() call.
207 void set_write_counter( int write_counter ) { write_counter_ = write_counter; }
208
209 /// @brief Adds a new scalar data grid to be written out.
210 ///
211 /// Does not write any data to file yet - call write() for writing the next time step.
212 template < typename InputScalarDataType >
213 void
215 const OutputTypeFloat output_type = OutputTypeFloat::Float32 )
216 {
217 if ( first_write_happened_ )
218 {
219 Kokkos::abort( "XDMF::add(): Cannot add data after write() has been called." );
220 }
221
222 check_extents( data );
223
224 if ( is_label_taken( data.label() ) )
225 {
226 Kokkos::abort( ( "Cannot add data with label '" + data.label() +
227 "' - data with identical label has been added previously." )
228 .c_str() );
229 }
230
231 if constexpr ( std::is_same_v< InputScalarDataType, double > )
232 {
233 device_data_views_scalar_double_.push_back( { data, output_type } );
234 }
235 else if constexpr ( std::is_same_v< InputScalarDataType, float > )
236 {
237 device_data_views_scalar_float_.push_back( { data, output_type } );
238 }
239 else
240 {
241 Kokkos::abort( "XDMF::add(): Grid data type not supported (yet)." );
242 }
243 }
244
245 /// @brief Adds a new vector-valued data grid to be written out.
246 ///
247 /// Does not write any data to file yet - call write() for writing the next time step.
248 template < typename InputScalarDataType, int VecDim >
249 void
251 const OutputTypeFloat output_type = OutputTypeFloat::Float32 )
252 {
253 if ( first_write_happened_ )
254 {
255 Kokkos::abort( "XDMF::add(): Cannot add data after write() has been called." );
256 }
257
258 check_extents( data );
259
260 if ( is_label_taken( data.label() ) )
261 {
262 Kokkos::abort( ( "Cannot add data with label '" + data.label() +
263 "' - data with identical label has been added previously." )
264 .c_str() );
265 }
266
267 if constexpr ( std::is_same_v< InputScalarDataType, double > )
268 {
269 device_data_views_vec_double_.push_back( { data, output_type } );
270 }
271 else if constexpr ( std::is_same_v< InputScalarDataType, float > )
272 {
273 device_data_views_vec_float_.push_back( { data, output_type } );
274 }
275 else
276 {
277 Kokkos::abort( "XDMF::add(): Grid data type not supported (yet)." );
278 }
279 }
280
281 /// @brief Writes a "time step".
282 ///
283 /// Will write one .xmf file with the current counter as part of the name such that the files can be opened as a
284 /// time series.
285 ///
286 /// The first write() call after construction will also write the mesh data (binary files) that will be referenced
287 /// from later .xmf files, as well as checkpoint metadata.
288 ///
289 /// For each added data grid, one binary file is written. The data is copied to the host if required.
290 /// The write() calls will allocate temporary storage on the host if host and device memory are not shared.
291 /// Currently, for data grids, some host-side temporary buffers are kept after this method returns (the sizes depend
292 /// on the type of data added) to avoid frequent reallocation.
293 void write()
294 {
295 using util::XML;
296
297 const auto geometry_file_base = "geometry.bin";
298 const auto topology_file_base = "topology.bin";
299
300 const auto readme_file_path = directory_path_ + "/README.md";
301
302 const auto geometry_file_path = directory_path_ + "/" + geometry_file_base;
303 const auto topology_file_path = directory_path_ + "/" + topology_file_base;
304
305 const auto step_file_path = directory_path_ + "/step_" + std::to_string( write_counter_ ) + ".xmf";
306
307 const int num_subdomains = coords_shell_device_.extent( 0 );
308 const int nodes_x = coords_shell_device_.extent( 1 );
309 const int nodes_y = coords_shell_device_.extent( 2 );
310 const int nodes_r = coords_radii_device_.extent( 1 );
311
312 const auto number_of_nodes_local = num_subdomains * nodes_x * nodes_y * nodes_r;
313 const auto number_of_elements_local = num_subdomains * ( nodes_x - 1 ) * ( nodes_y - 1 ) * ( nodes_r - 1 ) * 2;
314
315 if ( !first_write_happened_ )
316 {
317 // Number of global nodes and elements.
318
319 int num_nodes_elements_subdomains_global[3] = {
320 number_of_nodes_local, number_of_elements_local, num_subdomains };
321 MPI_Allreduce( MPI_IN_PLACE, &num_nodes_elements_subdomains_global, 3, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
322 number_of_nodes_global_ = num_nodes_elements_subdomains_global[0];
323 number_of_elements_global_ = num_nodes_elements_subdomains_global[1];
324 number_of_subdomains_global_ = num_nodes_elements_subdomains_global[2];
325
326 // Check MPI write offset
327
328 // To be populated:
329 // First entry: number of nodes of processes before this
330 // Second entry: number of elements of processes before this
331 // Third entry: number of subdomains of processes before this
332 int offsets[3];
333
334 int local_values[3] = { number_of_nodes_local, number_of_elements_local, num_subdomains };
335
336 // Compute the prefix sum (inclusive)
337 MPI_Scan( &local_values, &offsets, 3, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
338
339 // Subtract the local value to get the sum of all values from processes with ranks < current rank
340 number_of_nodes_offset_ = offsets[0] - local_values[0];
341 number_of_elements_offset_ = offsets[1] - local_values[1];
342 number_of_subdomains_offset_ = offsets[2] - local_values[2];
343
344 // Create a directory on root
345
346 util::prepare_empty_directory( directory_path_ );
347
348 // Add a README to the directory (what to keep, what the data contains, some notes on how to use paraview).
349
350 if ( mpi::rank() == 0 )
351 {
352 std::ofstream readme_stream( readme_file_path );
353 readme_stream
354 << "# This directory contains the output of the XDMF writer (for visualization and checkpointing).\n";
355 readme_stream << "\n";
356 readme_stream << "Overview:\n";
357 readme_stream << "- `geometry.bin`: cartesian node coordinates\n";
358 readme_stream << "- `topology.bin`: connectivity/topology/elements (whatever you want to call it)\n";
359 readme_stream << "- `checkpoint_metadata.bin`: metadata for checkpointing\n";
360 readme_stream
361 << "- `step_*.xmf`: xmf file (open this with Paraview for visualization) for each write() step\n";
362 readme_stream << "- `<some_name>_*.bin`: binary grid data (per write() step)\n";
363 readme_stream.close();
364 }
365
366 // Write mesh binary data if first write
367
368 // Node points.
369 {
370 std::stringstream geometry_stream;
371 switch ( output_type_points_ )
372 {
374 write_geometry_binary_data< float >( geometry_stream );
375 break;
377 write_geometry_binary_data< double >( geometry_stream );
378 break;
379 default:
380 Kokkos::abort( "XDMF: Unknown output type for geometry." );
381 }
382
383 MPI_File fh;
384 MPI_File_open(
385 MPI_COMM_WORLD, geometry_file_path.c_str(), MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
386
387 // Define the file view: each process writes its local data sequentially
388 MPI_Offset disp = number_of_nodes_offset_ * 3 * static_cast< int >( output_type_points_ );
389 MPI_File_set_view( fh, disp, MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL );
390
391 std::string geom_str = geometry_stream.str();
392
393 // Write data collectively
394 MPI_File_write_all(
395 fh, geom_str.data(), static_cast< int >( geom_str.size() ), MPI_CHAR, MPI_STATUS_IGNORE );
396
397 // Close the file
398 MPI_File_close( &fh );
399 }
400
401 // Connectivity/topology/elements (whatever you want to call it).
402 {
403 std::stringstream topology_stream;
404 switch ( output_type_connectivity_ )
405 {
407 write_topology_binary_data< int32_t >( topology_stream, number_of_nodes_offset_ );
408 break;
410 write_topology_binary_data< int64_t >( topology_stream, number_of_nodes_offset_ );
411 break;
412 default:
413 Kokkos::abort( "XDMF: Unknown output type for topology." );
414 }
415
416 MPI_File fh;
417 MPI_File_open(
418 MPI_COMM_WORLD, topology_file_path.c_str(), MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
419
420 // Define the file view: each process writes its local data sequentially
421 MPI_Offset disp = 6 * number_of_elements_offset_ * static_cast< int >( output_type_connectivity_ );
422 MPI_File_set_view( fh, disp, MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL );
423
424 std::string topo_str = topology_stream.str();
425
426 // Write data collectively
427 MPI_File_write_all(
428 fh, topo_str.data(), static_cast< int >( topo_str.size() ), MPI_CHAR, MPI_STATUS_IGNORE );
429
430 // Close the file
431 MPI_File_close( &fh );
432 }
433
434 // Checkpoint metadata
435 {
436 write_checkpoint_metadata();
437 }
438 }
439
440 // Build XML skeleton.
441
442 auto xml = XML( "Xdmf", { { "Version", "2.0" } } );
443 auto domain = XML( "Domain" );
444 auto grid = XML( "Grid", { { "Name", "Grid" }, { "GridType", "Uniform" } } );
445
446 auto geometry =
447 XML( "Geometry", { { "Type", "XYZ" } } )
448 .add_child(
449 XML( "DataItem",
450 { { "Format", "Binary" },
451 { "DataType", "Float" },
452 { "Precision", std::to_string( static_cast< int >( output_type_points_ ) ) },
453 { "Endian", "Little" },
454 { "Dimensions", std::to_string( number_of_nodes_global_ ) + " " + std::to_string( 3 ) } },
455 geometry_file_base ) );
456
457 grid.add_child( geometry );
458
459 auto topology =
460 XML( "Topology",
461 { { "Type", "Wedge" }, { "NumberOfElements", std::to_string( number_of_elements_global_ ) } } )
462 .add_child(
463 XML( "DataItem",
464 { { "Format", "Binary" },
465 { "DataType", "Int" },
466 { "Precision", std::to_string( static_cast< int >( output_type_connectivity_ ) ) },
467 { "Endian", "Little" },
468 { "Dimensions", std::to_string( number_of_elements_global_ * 6 ) } },
469 topology_file_base ) );
470
471 grid.add_child( topology );
472
473 // Write attribute data for each attached grid.
474
475 for ( const auto& [data, output_type] : device_data_views_scalar_float_ )
476 {
477 const auto attribute = write_scalar_attribute_file( data, output_type );
478 grid.add_child( attribute );
479 }
480
481 for ( const auto& [data, output_type] : device_data_views_scalar_double_ )
482 {
483 const auto attribute = write_scalar_attribute_file( data, output_type );
484 grid.add_child( attribute );
485 }
486
487 for ( const auto& [data, output_type] : device_data_views_vec_float_ )
488 {
489 const auto attribute = write_vec_attribute_file( data, output_type );
490 grid.add_child( attribute );
491 }
492
493 for ( const auto& [data, output_type] : device_data_views_vec_double_ )
494 {
495 const auto attribute = write_vec_attribute_file( data, output_type );
496 grid.add_child( attribute );
497 }
498
499 domain.add_child( grid );
500 xml.add_child( domain );
501
502 if ( mpi::rank() == 0 )
503 {
504 std::ofstream step_stream( step_file_path );
505 step_stream << "<?xml version=\"1.0\" ?>\n";
506 step_stream << "<!DOCTYPE Xdmf SYSTEM \"Xdmf.dtd\" []>\n";
507 step_stream << xml.to_string();
508 step_stream.close();
509 }
510
511 write_counter_++;
512 first_write_happened_ = true;
513 }
514
515 private:
516 template < typename GridDataType >
517 void check_extents( const GridDataType& data )
518 {
519 if ( data.extent( 0 ) != coords_radii_device_.extent( 0 ) )
520 {
521 Kokkos::abort( "XDMF: Number of subdomains of added data item does not match mesh." );
522 }
523
524 if ( data.extent( 1 ) != coords_shell_device_.extent( 1 ) )
525 {
526 Kokkos::abort( "XDMF: Dim x of added data item does not match mesh." );
527 }
528
529 if ( data.extent( 2 ) != coords_shell_device_.extent( 2 ) )
530 {
531 Kokkos::abort( "XDMF: Dim y of added data item does not match mesh." );
532 }
533
534 if ( data.extent( 3 ) != coords_radii_device_.extent( 1 ) )
535 {
536 Kokkos::abort( "XDMF: Dim r of added data item does not match mesh." );
537 }
538 }
539
540 bool is_label_taken( const std::string& label )
541 {
542 for ( auto [grid, _] : device_data_views_scalar_double_ )
543 {
544 if ( grid.label() == label )
545 {
546 return true;
547 }
548 }
549
550 for ( auto [grid, _] : device_data_views_scalar_float_ )
551 {
552 if ( grid.label() == label )
553 {
554 return true;
555 }
556 }
557
558 for ( auto [grid, _] : device_data_views_vec_double_ )
559 {
560 if ( grid.label() == label )
561 {
562 return true;
563 }
564 }
565
566 for ( auto [grid, _] : device_data_views_vec_float_ )
567 {
568 if ( grid.label() == label )
569 {
570 return true;
571 }
572 }
573
574 return false;
575 }
576
577 template < std::floating_point FloatingPointOutputType >
578 void write_geometry_binary_data( std::stringstream& out )
579 {
580 // Copy mesh to host.
581 // We assume the mesh is only written once so we throw away the host copies after this method returns.
582 const typename grid::Grid3DDataVec< InputGridScalarType, 3 >::HostMirror coords_shell_host =
583 Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace{}, coords_shell_device_ );
584 const typename grid::Grid2DDataScalar< InputGridScalarType >::HostMirror coords_radii_host =
585 Kokkos::create_mirror_view_and_copy( Kokkos::HostSpace{}, coords_radii_device_ );
586
587 for ( int local_subdomain_id = 0; local_subdomain_id < coords_shell_host.extent( 0 ); local_subdomain_id++ )
588 {
589 for ( int r = 0; r < coords_radii_host.extent( 1 ); r++ )
590 {
591 for ( int y = 0; y < coords_shell_host.extent( 2 ); y++ )
592 {
593 for ( int x = 0; x < coords_shell_host.extent( 1 ); x++ )
594 {
595 const auto c =
596 grid::shell::coords( local_subdomain_id, x, y, r, coords_shell_host, coords_radii_host );
597
598 for ( int d = 0; d < 3; d++ )
599 {
600 const auto cd = static_cast< FloatingPointOutputType >( c( d ) );
601 out.write( reinterpret_cast< const char* >( &cd ), sizeof( FloatingPointOutputType ) );
602 }
603 }
604 }
605 }
606 }
607 }
608
609 template < std::integral IntegerOutputType >
610 void write_topology_binary_data( std::stringstream& out, IntegerOutputType number_of_nodes_offset )
611 {
612 const int num_subdomains = coords_shell_device_.extent( 0 );
613 const int nodes_x = coords_shell_device_.extent( 1 );
614 const int nodes_y = coords_shell_device_.extent( 2 );
615 const int nodes_r = coords_radii_device_.extent( 1 );
616
617 const int stride_0 = nodes_x * nodes_y * nodes_r;
618 const int stride_1 = nodes_x * nodes_y;
619 const int stride_2 = nodes_x;
620
621 for ( int local_subdomain_id = 0; local_subdomain_id < num_subdomains; local_subdomain_id++ )
622 {
623 for ( int r = 0; r < nodes_r - 1; r++ )
624 {
625 for ( int y = 0; y < nodes_y - 1; y++ )
626 {
627 for ( int x = 0; x < nodes_x - 1; x++ )
628 {
629 // Hex nodes
630 IntegerOutputType v[8];
631
632 v[0] = number_of_nodes_offset + local_subdomain_id * stride_0 + r * stride_1 + y * stride_2 + x;
633 v[1] = v[0] + 1;
634 v[2] = v[0] + nodes_x;
635 v[3] = v[0] + nodes_x + 1;
636
637 v[4] = number_of_nodes_offset + local_subdomain_id * stride_0 + ( r + 1 ) * stride_1 +
638 y * stride_2 + x;
639 v[5] = v[4] + 1;
640 v[6] = v[4] + nodes_x;
641 v[7] = v[4] + nodes_x + 1;
642
643 IntegerOutputType wedge_0[6] = { v[0], v[1], v[2], v[4], v[5], v[6] };
644 IntegerOutputType wedge_1[6] = { v[3], v[2], v[1], v[7], v[6], v[5] };
645
646 out.write( reinterpret_cast< const char* >( wedge_0 ), sizeof( IntegerOutputType ) * 6 );
647 out.write( reinterpret_cast< const char* >( wedge_1 ), sizeof( IntegerOutputType ) * 6 );
648 }
649 }
650 }
651 }
652 }
653
654 template < typename ScalarTypeIn, typename ScalarTypeOut >
655 void write_scalar_attribute_binary_data(
656 const grid::Grid4DDataScalar< ScalarTypeIn >& device_data,
657 std::stringstream& out )
658 {
659 // Copy data to host.
660 if constexpr ( std::is_same_v< ScalarTypeIn, double > )
661 {
662 if ( !host_data_mirror_scalar_double_.has_value() )
663 {
664 host_data_mirror_scalar_double_ = Kokkos::create_mirror( Kokkos::HostSpace{}, device_data );
665 }
666
667 Kokkos::deep_copy( host_data_mirror_scalar_double_.value(), device_data );
668
669 const auto& host_data = host_data_mirror_scalar_double_.value();
670
671 for ( int local_subdomain_id = 0; local_subdomain_id < host_data.extent( 0 ); local_subdomain_id++ )
672 {
673 for ( int r = 0; r < host_data.extent( 3 ); r++ )
674 {
675 for ( int y = 0; y < host_data.extent( 2 ); y++ )
676 {
677 for ( int x = 0; x < host_data.extent( 1 ); x++ )
678 {
679 const auto value = static_cast< ScalarTypeOut >( host_data( local_subdomain_id, x, y, r ) );
680 out.write( reinterpret_cast< const char* >( &value ), sizeof( ScalarTypeOut ) );
681 }
682 }
683 }
684 }
685 }
686 else if constexpr ( std::is_same_v< ScalarTypeIn, float > )
687 {
688 if ( !host_data_mirror_scalar_float_.has_value() )
689 {
690 host_data_mirror_scalar_float_ = Kokkos::create_mirror( Kokkos::HostSpace{}, device_data );
691 }
692
693 Kokkos::deep_copy( host_data_mirror_scalar_float_.value(), device_data );
694
695 const auto& host_data = host_data_mirror_scalar_float_.value();
696
697 for ( int local_subdomain_id = 0; local_subdomain_id < host_data.extent( 0 ); local_subdomain_id++ )
698 {
699 for ( int r = 0; r < host_data.extent( 3 ); r++ )
700 {
701 for ( int y = 0; y < host_data.extent( 2 ); y++ )
702 {
703 for ( int x = 0; x < host_data.extent( 1 ); x++ )
704 {
705 const auto value = static_cast< ScalarTypeOut >( host_data( local_subdomain_id, x, y, r ) );
706 out.write( reinterpret_cast< const char* >( &value ), sizeof( ScalarTypeOut ) );
707 }
708 }
709 }
710 }
711 }
712 else
713 {
714 Kokkos::abort( "XDMF: Only double precision grids supported for scalar attributes." );
715 }
716 }
717
718 template < typename ScalarTypeIn >
719 util::XML write_scalar_attribute_file(
720 const grid::Grid4DDataScalar< ScalarTypeIn >& data,
721 const OutputTypeFloat& output_type )
722 {
723 const auto attribute_file_base = data.label() + "_" + std::to_string( write_counter_ ) + ".bin";
724 const auto attribute_file_path = directory_path_ + "/" + attribute_file_base;
725
726 {
727 std::stringstream attribute_stream;
728 switch ( output_type )
729 {
731 write_scalar_attribute_binary_data< ScalarTypeIn, float >( data, attribute_stream );
732 break;
734 write_scalar_attribute_binary_data< ScalarTypeIn, double >( data, attribute_stream );
735 break;
736 }
737
738 MPI_File fh;
739 MPI_File_open(
740 MPI_COMM_WORLD, attribute_file_path.c_str(), MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
741
742 // Define the file view: each process writes its local data sequentially
743 MPI_Offset disp = number_of_nodes_offset_ * static_cast< int >( output_type_points_ );
744 MPI_File_set_view( fh, disp, MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL );
745
746 std::string attr_str = attribute_stream.str();
747
748 // Write data collectively
749 MPI_File_write_all(
750 fh, attr_str.data(), static_cast< int >( attr_str.size() ), MPI_CHAR, MPI_STATUS_IGNORE );
751
752 // Close the file
753 MPI_File_close( &fh );
754 }
755
756 auto attribute =
757 util::XML( "Attribute", { { "Name", data.label() }, { "AttributeType", "Scalar" }, { "Center", "Node" } } )
758 .add_child(
759 util::XML(
760 "DataItem",
761 { { "Format", "Binary" },
762 { "DataType", "Float" },
763 { "Precision", std::to_string( static_cast< int >( output_type ) ) },
764 { "Endian", "Little" },
765 { "Dimensions", std::to_string( number_of_nodes_global_ ) } },
766 attribute_file_base ) );
767
768 return attribute;
769 }
770
771 template < typename ScalarTypeIn, typename ScalarTypeOut, int VecDim >
772 void write_vec_attribute_binary_data(
773 const grid::Grid4DDataVec< ScalarTypeIn, VecDim >& device_data,
774 std::stringstream& out )
775 {
776 // Copy data to host.
777 if constexpr ( std::is_same_v< ScalarTypeIn, double > )
778 {
779 if ( !host_data_mirror_vec_double_.has_value() )
780 {
781 host_data_mirror_vec_double_ = Kokkos::create_mirror( Kokkos::HostSpace{}, device_data );
782 }
783
784 Kokkos::deep_copy( host_data_mirror_vec_double_.value(), device_data );
785
786 const auto& host_data = host_data_mirror_vec_double_.value();
787
788 for ( int local_subdomain_id = 0; local_subdomain_id < host_data.extent( 0 ); local_subdomain_id++ )
789 {
790 for ( int r = 0; r < host_data.extent( 3 ); r++ )
791 {
792 for ( int y = 0; y < host_data.extent( 2 ); y++ )
793 {
794 for ( int x = 0; x < host_data.extent( 1 ); x++ )
795 {
796 for ( int d = 0; d < VecDim; d++ )
797 {
798 const auto value =
799 static_cast< ScalarTypeOut >( host_data( local_subdomain_id, x, y, r, d ) );
800 out.write( reinterpret_cast< const char* >( &value ), sizeof( ScalarTypeOut ) );
801 }
802 }
803 }
804 }
805 }
806 }
807 else if constexpr ( std::is_same_v< ScalarTypeIn, float > )
808 {
809 if ( !host_data_mirror_vec_float_.has_value() )
810 {
811 host_data_mirror_vec_float_ = Kokkos::create_mirror( Kokkos::HostSpace{}, device_data );
812 }
813
814 Kokkos::deep_copy( host_data_mirror_vec_float_.value(), device_data );
815
816 const auto& host_data = host_data_mirror_vec_float_.value();
817
818 for ( int local_subdomain_id = 0; local_subdomain_id < host_data.extent( 0 ); local_subdomain_id++ )
819 {
820 for ( int r = 0; r < host_data.extent( 3 ); r++ )
821 {
822 for ( int y = 0; y < host_data.extent( 2 ); y++ )
823 {
824 for ( int x = 0; x < host_data.extent( 1 ); x++ )
825 {
826 for ( int d = 0; d < VecDim; d++ )
827 {
828 const auto value =
829 static_cast< ScalarTypeOut >( host_data( local_subdomain_id, x, y, r, d ) );
830 out.write( reinterpret_cast< const char* >( &value ), sizeof( ScalarTypeOut ) );
831 }
832 }
833 }
834 }
835 }
836 }
837 else
838 {
839 Kokkos::abort( "XDMF: Only double precision grids supported for vector-valued attributes." );
840 }
841 }
842
843 template < typename ScalarTypeIn, int VecDim >
844 util::XML write_vec_attribute_file(
845 const grid::Grid4DDataVec< ScalarTypeIn, VecDim >& data,
846 const OutputTypeFloat& output_type )
847 {
848 const auto attribute_file_base = data.label() + "_" + std::to_string( write_counter_ ) + ".bin";
849 const auto attribute_file_path = directory_path_ + "/" + attribute_file_base;
850
851 {
852 std::stringstream attribute_stream;
853 switch ( output_type )
854 {
856 write_vec_attribute_binary_data< ScalarTypeIn, float >( data, attribute_stream );
857 break;
859 write_vec_attribute_binary_data< ScalarTypeIn, double >( data, attribute_stream );
860 break;
861 }
862
863 MPI_File fh;
864 MPI_File_open(
865 MPI_COMM_WORLD, attribute_file_path.c_str(), MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
866
867 // Define the file view: each process writes its local data sequentially
868 MPI_Offset disp = VecDim * number_of_nodes_offset_ * static_cast< int >( output_type_points_ );
869 MPI_File_set_view( fh, disp, MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL );
870
871 std::string attr_str = attribute_stream.str();
872
873 // Write data collectively
874 MPI_File_write_all(
875 fh, attr_str.data(), static_cast< int >( attr_str.size() ), MPI_CHAR, MPI_STATUS_IGNORE );
876
877 // Close the file
878 MPI_File_close( &fh );
879 }
880
881 auto attribute =
882 util::XML( "Attribute", { { "Name", data.label() }, { "AttributeType", "Vector" }, { "Center", "Node" } } )
883 .add_child(
884 util::XML(
885 "DataItem",
886 { { "Format", "Binary" },
887 { "DataType", "Float" },
888 { "Precision", std::to_string( static_cast< int >( output_type ) ) },
889 { "Endian", "Little" },
890 { "Dimensions",
891 std::to_string( number_of_nodes_global_ ) + " " + std::to_string( VecDim ) } },
892 attribute_file_base ) );
893
894 return attribute;
895 }
896
897 void write_checkpoint_metadata()
898 {
899 const auto checkpoint_metadata_file_path = directory_path_ + "/" + "checkpoint_metadata.bin";
900
901 std::stringstream checkpoint_metadata_stream;
902
903 auto write_i32 = [&]( const int32_t value ) {
904 checkpoint_metadata_stream.write( reinterpret_cast< const char* >( &value ), sizeof( int32_t ) );
905 };
906
907 auto write_i64 = [&]( const int64_t value ) {
908 checkpoint_metadata_stream.write( reinterpret_cast< const char* >( &value ), sizeof( int64_t ) );
909 };
910
911 auto write_f64 = [&]( const double value ) {
912 checkpoint_metadata_stream.write( reinterpret_cast< const char* >( &value ), sizeof( double ) );
913 };
914
915 write_i32( 1 ); // version
916 write_i32( distributed_domain_.domain_info().num_subdomains_per_diamond_side() );
917 write_i32( distributed_domain_.domain_info().num_subdomains_in_radial_direction() );
918
919 write_i32( coords_shell_device_.extent( 1 ) ); // subdomain_size_x
920 write_i32( coords_shell_device_.extent( 2 ) ); // subdomain_size_y
921 write_i32( coords_radii_device_.extent( 1 ) ); // subdomain_size_r
922
923 for ( const auto r : distributed_domain_.domain_info().radii() )
924 {
925 write_f64( static_cast< double >( r ) );
926 }
927
928 write_i32( static_cast< int32_t >( output_type_points_ ) );
929
930 write_i32(
931 device_data_views_scalar_float_.size() + device_data_views_scalar_double_.size() +
932 device_data_views_vec_float_.size() + device_data_views_vec_double_.size() );
933
934 for ( const auto& [data, output_type] : device_data_views_scalar_float_ )
935 {
936 write_i32( data.label().size() );
937 checkpoint_metadata_stream.write( data.label().data(), data.label().size() );
938 write_i32( 2 ); // scalar_data_type
939 if ( output_type == OutputTypeFloat::Float32 )
940 {
941 write_i32( sizeof( float ) );
942 }
943 else if ( output_type == OutputTypeFloat::Float64 )
944 {
945 write_i32( sizeof( double ) );
946 }
947 else
948 {
949 Kokkos::abort( "Invalid output type." );
950 }
951
952 write_i32( 1 ); // vec_dim
953 }
954
955 for ( const auto& [data, output_type] : device_data_views_scalar_double_ )
956 {
957 write_i32( data.label().size() );
958 checkpoint_metadata_stream.write( data.label().data(), data.label().size() );
959 write_i32( 2 ); // scalar_data_type
960 if ( output_type == OutputTypeFloat::Float32 )
961 {
962 write_i32( sizeof( float ) );
963 }
964 else if ( output_type == OutputTypeFloat::Float64 )
965 {
966 write_i32( sizeof( double ) );
967 }
968 else
969 {
970 Kokkos::abort( "Invalid output type." );
971 }
972 write_i32( 1 ); // vec_dim
973 }
974
975 for ( const auto& [data, output_type] : device_data_views_vec_float_ )
976 {
977 write_i32( data.label().size() );
978 checkpoint_metadata_stream.write( data.label().data(), data.label().size() );
979 write_i32( 2 ); // scalar_data_type
980 if ( output_type == OutputTypeFloat::Float32 )
981 {
982 write_i32( sizeof( float ) );
983 }
984 else if ( output_type == OutputTypeFloat::Float64 )
985 {
986 write_i32( sizeof( double ) );
987 }
988 else
989 {
990 Kokkos::abort( "Invalid output type." );
991 }
992 write_i32( data.extent( 4 ) ); // vec_dim
993 }
994
995 for ( const auto& [data, output_type] : device_data_views_vec_double_ )
996 {
997 write_i32( data.label().size() );
998 checkpoint_metadata_stream.write( data.label().data(), data.label().size() );
999 write_i32( 2 ); // scalar_data_type
1000 if ( output_type == OutputTypeFloat::Float32 )
1001 {
1002 write_i32( sizeof( float ) );
1003 }
1004 else if ( output_type == OutputTypeFloat::Float64 )
1005 {
1006 write_i32( sizeof( double ) );
1007 }
1008 else
1009 {
1010 Kokkos::abort( "Invalid output type." );
1011 }
1012 write_i32( data.extent( 4 ) ); // vec_dim
1013 }
1014
1015 write_i32( 0 ); // checkpoint_subdomain_ordering
1016
1017 // for ( int local_subdomain_id = 0; local_subdomain_id < coords_shell_device_.extent( 0 ); local_subdomain_id++ )
1018 // {
1019 // write_i64( distributed_domain_.subdomain_info_from_local_idx( local_subdomain_id ).global_id() );
1020 // }
1021
1022 MPI_File fh;
1023 MPI_File_open(
1024 MPI_COMM_WORLD,
1025 checkpoint_metadata_file_path.c_str(),
1026 MPI_MODE_CREATE | MPI_MODE_RDWR,
1027 MPI_INFO_NULL,
1028 &fh );
1029
1030 // Define the file view: each process writes its local data sequentially
1031 MPI_Offset disp = 0;
1032 const auto offset_metadata_size_bytes = static_cast< int >( checkpoint_metadata_stream.str().size() );
1033 if ( mpi::rank() != 0 )
1034 {
1035 // We'll write the global metadata just via rank 0.
1036 // Next up: each process writes the global subdomain IDs of their local subdomains in parallel.
1037 // We have previously also filled the stream with the global metadata on non-root processes to facilitate
1038 // computing its length (we need to know where to start writing).
1039 // After computing its length and before writing the subdomain ids we clear the stream here.
1040 disp = offset_metadata_size_bytes + number_of_subdomains_offset_ * sizeof( int64_t );
1041 checkpoint_metadata_stream.clear();
1042 checkpoint_metadata_stream.str( "" );
1043 }
1044
1045 // Writing the global subdomain ids of the local subdomains now in contiguous chunks per process.
1046 // Must be the order of the local_subdomain_id here to adhere to the ordering of the data output (which also
1047 // writes in that order).
1048 for ( int local_subdomain_id = 0; local_subdomain_id < coords_shell_device_.extent( 0 ); local_subdomain_id++ )
1049 {
1050 write_i64( distributed_domain_.subdomain_info_from_local_idx( local_subdomain_id ).global_id() );
1051 }
1052
1053 MPI_File_set_view( fh, disp, MPI_CHAR, MPI_CHAR, "native", MPI_INFO_NULL );
1054
1055 std::string checkpoint_metadata_str = checkpoint_metadata_stream.str();
1056
1057 // Write data collectively
1058 MPI_File_write_all(
1059 fh,
1060 checkpoint_metadata_str.data(),
1061 static_cast< int >( checkpoint_metadata_str.size() ),
1062 MPI_CHAR,
1063 MPI_STATUS_IGNORE );
1064
1065 // Close the file
1066 MPI_File_close( &fh );
1067 }
1068
1069 std::string directory_path_;
1070
1071 grid::shell::DistributedDomain distributed_domain_;
1072
1073 grid::Grid3DDataVec< InputGridScalarType, 3 > coords_shell_device_;
1074 grid::Grid2DDataScalar< InputGridScalarType > coords_radii_device_;
1075
1076 OutputTypeFloat output_type_points_;
1077 OutputTypeInt output_type_connectivity_;
1078
1079 // Collecting all views that are written on every write call.
1080 std::vector< std::pair< grid::Grid4DDataScalar< double >, OutputTypeFloat > > device_data_views_scalar_double_;
1081 std::vector< std::pair< grid::Grid4DDataScalar< float >, OutputTypeFloat > > device_data_views_scalar_float_;
1082
1083 std::vector< std::pair< grid::Grid4DDataVec< double, 3 >, OutputTypeFloat > > device_data_views_vec_double_;
1084 std::vector< std::pair< grid::Grid4DDataVec< float, 3 >, OutputTypeFloat > > device_data_views_vec_float_;
1085
1086 // Just a single mirror for buffering during write.
1087 std::optional< grid::Grid4DDataScalar< double >::HostMirror > host_data_mirror_scalar_double_;
1088 std::optional< grid::Grid4DDataScalar< float >::HostMirror > host_data_mirror_scalar_float_;
1089
1090 std::optional< grid::Grid4DDataVec< double, 3 >::HostMirror > host_data_mirror_vec_double_;
1091 std::optional< grid::Grid4DDataVec< float, 3 >::HostMirror > host_data_mirror_vec_float_;
1092
1093 int write_counter_ = 0;
1094 bool first_write_happened_ = false;
1095
1096 int number_of_nodes_offset_ = -1;
1097 int number_of_elements_offset_ = -1;
1098 int number_of_subdomains_offset_ = -1;
1099
1100 int number_of_nodes_global_ = -1;
1101 int number_of_elements_global_ = -1;
1102 int number_of_subdomains_global_ = -1;
1103};
1104
1105/// Captures the format of the checkpoint metadata.
1106/// See \ref XDMFOutput for details on the format.
1108{
1109 int32_t version{};
1112 int32_t size_x{};
1113 int32_t size_y{};
1114 int32_t size_r{};
1115
1116 std::vector< double > radii;
1117
1119
1121 {
1122 std::string grid_name_string;
1124 int32_t scalar_bytes{};
1125 int32_t vec_dim{};
1126 };
1127
1128 std::vector< GridDataFile > grid_data_files;
1129
1131
1133};
1134
1135/// @brief Reads metadata from an XDMF/checkpoint directory. See \ref XDMFOutput for details.
1136///
1137/// @param checkpoint_directory path to the directory containing the XDMF data
1138/// @return Populated \ref CheckpointMetadata struct.
1139[[nodiscard]] inline util::Result< CheckpointMetadata >
1140 read_xdmf_checkpoint_metadata( const std::string& checkpoint_directory )
1141{
1142 const auto metadata_file = checkpoint_directory + "/" + "checkpoint_metadata.bin";
1143
1144 if ( !std::filesystem::exists( metadata_file ) )
1145 {
1146 return { "Could not find file: " + metadata_file };
1147 }
1148
1149 MPI_File fh;
1150 MPI_File_open( MPI_COMM_WORLD, metadata_file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh );
1151 MPI_Offset filesize;
1152 MPI_File_get_size( fh, &filesize );
1153
1154 std::vector< char > buffer( filesize );
1155 MPI_File_read_all( fh, buffer.data(), filesize, MPI_BYTE, MPI_STATUS_IGNORE );
1156 MPI_File_close( &fh );
1157
1158 std::string data( buffer.data(), buffer.size() );
1159
1160 std::istringstream in( data, std::ios::binary );
1161
1162 auto read_i32 = [&]( int32_t& value ) {
1163 in.read( reinterpret_cast< char* >( &value ), sizeof( value ) );
1164 if ( !in )
1165 {
1166 return 1;
1167 }
1168 return 0;
1169 };
1170
1171 auto read_i64 = [&]( int64_t& value ) {
1172 in.read( reinterpret_cast< char* >( &value ), sizeof( value ) );
1173 if ( !in )
1174 {
1175 return 1;
1176 }
1177 return 0;
1178 };
1179
1180 auto read_f64 = [&]( double& value ) {
1181 in.read( reinterpret_cast< char* >( &value ), sizeof( value ) );
1182 if ( !in )
1183 {
1184 return 1;
1185 }
1186 return 0;
1187 };
1188
1189 const std::string read_error = "Failed to read from input stream.";
1190
1191 CheckpointMetadata metadata;
1192
1193 if ( read_i32( metadata.version ) )
1194 return read_error;
1195 if ( read_i32( metadata.num_subdomains_per_diamond_lateral_direction ) )
1196 return read_error;
1197 if ( read_i32( metadata.num_subdomains_per_diamond_radial_direction ) )
1198 return read_error;
1199 if ( read_i32( metadata.size_x ) )
1200 return read_error;
1201 if ( read_i32( metadata.size_y ) )
1202 return read_error;
1203 if ( read_i32( metadata.size_r ) )
1204 return read_error;
1205
1206 for ( int i = 0; i < metadata.num_subdomains_per_diamond_radial_direction * ( metadata.size_r - 1 ) + 1; i++ )
1207 {
1208 double r;
1209 if ( read_f64( r ) )
1210 return read_error;
1211
1212 if ( i > 0 && r <= metadata.radii.back() )
1213 {
1214 return { "Radii are not sorted correctly in checkpoint." };
1215 }
1216
1217 metadata.radii.push_back( r );
1218 }
1219
1220 if ( metadata.version > 0 )
1221 {
1222 // new in version 1: number of bytes for grid points data
1223 if ( read_i32( metadata.grid_scalar_bytes ) )
1224 return read_error;
1225 }
1226
1227 int32_t num_grid_data_files;
1228 if ( read_i32( num_grid_data_files ) )
1229 return read_error;
1230
1231 metadata.grid_data_files.resize( num_grid_data_files );
1232 for ( auto& [grid_name_string, scalar_data_type, scalar_bytes, vec_dim] : metadata.grid_data_files )
1233 {
1234 int32_t grid_name_string_size_bytes;
1235 if ( read_i32( grid_name_string_size_bytes ) )
1236 return read_error;
1237
1238 grid_name_string = std::string( grid_name_string_size_bytes, '\0' );
1239 in.read( &grid_name_string[0], grid_name_string_size_bytes );
1240 if ( !in )
1241 return read_error;
1242
1243 if ( read_i32( scalar_data_type ) )
1244 return read_error;
1245 if ( read_i32( scalar_bytes ) )
1246 return read_error;
1247 if ( read_i32( vec_dim ) )
1248 return read_error;
1249 }
1250
1251 if ( read_i32( metadata.checkpoint_subdomain_ordering ) )
1252 return read_error;
1253
1254 if ( metadata.checkpoint_subdomain_ordering == 0 )
1255 {
1256 const auto num_global_subdomains = 10 * metadata.num_subdomains_per_diamond_lateral_direction *
1259
1260 metadata.checkpoint_ordering_0_global_subdomain_ids.resize( num_global_subdomains );
1261 for ( auto& global_subdomain_id : metadata.checkpoint_ordering_0_global_subdomain_ids )
1262 {
1263 if ( read_i64( global_subdomain_id ) )
1264 return read_error + " (global_subdomain_id reading error)";
1265 }
1266 }
1267
1268 return metadata;
1269}
1270
1271/// @brief Reads a single grid at a single write step from an XDMF checkpoint.
1272///
1273/// See \ref XDMFOutput for details.
1274///
1275/// @param checkpoint_directory path to the directory containing the XDMF data
1276/// @param data_label the Kokkos::View label of the grid data that shall be read in
1277/// @param step the "timestep" to read
1278/// @param distributed_domain \ref DistributedDomain instance that has the same topology as the one used when writing
1279/// the checkpoint
1280/// @param grid_data_device [out] properly sized Kokkos::View (can live on a device) to write the checkpoint to
1281template < typename GridDataType >
1283 const std::string& checkpoint_directory,
1284 const std::string& data_label,
1285 const int step,
1286 const grid::shell::DistributedDomain& distributed_domain,
1287 GridDataType& grid_data_device )
1288{
1289 auto checkpoint_metadata_result = read_xdmf_checkpoint_metadata( checkpoint_directory );
1290
1291 if ( checkpoint_metadata_result.is_err() )
1292 {
1293 return "Could not read checkpoint metadata: " + checkpoint_metadata_result.error();
1294 }
1295
1296 const auto& checkpoint_metadata = checkpoint_metadata_result.unwrap();
1297
1298 if ( !( checkpoint_metadata.version == 0 || checkpoint_metadata.version == 1 ) )
1299 {
1300 return {
1301 "Supported checkpoint verions: 0, 1. This checkpoint has version " +
1302 std::to_string( checkpoint_metadata.version ) + "." };
1303 }
1304
1305 // Check whether we have checkpoint metadata for the requested data label.
1306
1307 std::optional< CheckpointMetadata::GridDataFile > requested_grid_data_file;
1308 for ( const auto& grid_data_file : checkpoint_metadata.grid_data_files )
1309 {
1310 if ( grid_data_file.grid_name_string == data_label )
1311 {
1312 requested_grid_data_file = grid_data_file;
1313 break;
1314 }
1315 }
1316
1317 if ( !requested_grid_data_file.has_value() )
1318 {
1319 return { "Could not find requested data (" + data_label + ") in checkpoint" };
1320 }
1321
1322 // Check if we can also find a binary data file in the checkpoint directory.
1323
1324 const auto data_file_path = checkpoint_directory + "/" + data_label + "_" + std::to_string( step ) + ".bin";
1325
1326 if ( !std::filesystem::exists( data_file_path ) )
1327 {
1328 return { "Could not find checkpoint data file for requested label and step." };
1329 }
1330
1331 // Now we compare the grid extents with the metadata.
1332
1333 if ( grid_data_device.extent( 1 ) != checkpoint_metadata.size_x ||
1334 grid_data_device.extent( 2 ) != checkpoint_metadata.size_y ||
1335 grid_data_device.extent( 3 ) != checkpoint_metadata.size_r )
1336 {
1337 return { "Grid data extents do not match metadata." };
1338 }
1339
1340 // Let's try to read now.
1341 // We will attempt to read all chunks for the local subdomains (that are possibly distributed in the file)
1342 // into one array. In a second step we'll copy that into our grid data. That is not 100% memory efficient as we
1343 // have another copy (the buffer). We could write directly with the std library, but I am not sure if that is
1344 // scalable.
1345
1346 if ( checkpoint_metadata.checkpoint_subdomain_ordering != 0 )
1347 {
1348 return { "Checkpoint ordering type is not 0. Not supported." };
1349 }
1350
1351 // Figure out where the subdomain data is positioned in the file.
1352
1353 const auto num_local_subdomains = grid_data_device.extent( 0 );
1354
1355 const auto local_subdomain_num_scalars_v = checkpoint_metadata.size_x * checkpoint_metadata.size_y *
1356 checkpoint_metadata.size_r * requested_grid_data_file.value().vec_dim;
1357
1358 const auto local_subdomain_data_size_in_bytes =
1359 local_subdomain_num_scalars_v * requested_grid_data_file.value().scalar_bytes;
1360
1361 std::vector< MPI_Aint > local_subdomain_offsets_in_file_bytes( num_local_subdomains );
1362 std::vector< int > local_subdomain_num_bytes( num_local_subdomains, local_subdomain_data_size_in_bytes );
1363
1364 for ( int local_subdomain_id = 0; local_subdomain_id < num_local_subdomains; local_subdomain_id++ )
1365 {
1366 const auto global_subdomain_id =
1367 distributed_domain.subdomain_info_from_local_idx( local_subdomain_id ).global_id();
1368
1369 const auto index_in_file =
1370 std::ranges::find( checkpoint_metadata.checkpoint_ordering_0_global_subdomain_ids, global_subdomain_id ) -
1371 checkpoint_metadata.checkpoint_ordering_0_global_subdomain_ids.begin();
1372
1373 local_subdomain_offsets_in_file_bytes[local_subdomain_id] = index_in_file * local_subdomain_data_size_in_bytes;
1374 }
1375
1376 // Read with MPI
1377
1378 std::vector< char > buffer( num_local_subdomains * local_subdomain_data_size_in_bytes );
1379
1380 MPI_File fh;
1381 MPI_File_open( MPI_COMM_WORLD, data_file_path.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh );
1382
1383 MPI_Datatype filetype;
1384 MPI_Type_create_hindexed(
1385 num_local_subdomains,
1386 local_subdomain_num_bytes.data(),
1387 local_subdomain_offsets_in_file_bytes.data(),
1388 MPI_CHAR,
1389 &filetype );
1390 MPI_Type_commit( &filetype );
1391
1392 // Set each rank’s view of the file to its own scattered layout
1393 MPI_File_set_view( fh, 0, MPI_CHAR, filetype, "native", MPI_INFO_NULL );
1394
1395 MPI_File_read_all( fh, buffer.data(), buffer.size(), MPI_CHAR, MPI_STATUS_IGNORE );
1396
1397 MPI_Type_free( &filetype );
1398 MPI_File_close( &fh );
1399
1400 // Now write from buffer to grid.
1401
1402 typename GridDataType::HostMirror grid_data_host = Kokkos::create_mirror( Kokkos::HostSpace{}, grid_data_device );
1403
1404 const auto checkpoint_is_float =
1405 requested_grid_data_file.value().scalar_data_type == 2 && requested_grid_data_file.value().scalar_bytes == 4;
1406 const auto checkpoint_is_double =
1407 requested_grid_data_file.value().scalar_data_type == 2 && requested_grid_data_file.value().scalar_bytes == 8;
1408
1409 if ( !( checkpoint_is_float || checkpoint_is_double ) )
1410 {
1411 return { "Unsupported data type in checkpoint." };
1412 }
1413
1414 // Wrap vector in a stream
1415 std::string_view view( reinterpret_cast< const char* >( buffer.data() ), buffer.size() );
1416 std::istringstream stream{ std::string( view ) }; // make a copy to own the buffer
1417
1418 auto read_f32 = [&]() -> float {
1419 float value;
1420 stream.read( reinterpret_cast< char* >( &value ), sizeof( float ) );
1421 if ( stream.fail() )
1422 {
1423 Kokkos::abort( "Failed to read from stream." );
1424 }
1425 return value;
1426 };
1427
1428 auto read_f64 = [&]() -> double {
1429 double value;
1430 stream.read( reinterpret_cast< char* >( &value ), sizeof( double ) );
1431 if ( stream.fail() )
1432 {
1433 Kokkos::abort( "Failed to read from stream." );
1434 }
1435 return value;
1436 };
1437
1438 for ( int local_subdomain_id = 0; local_subdomain_id < grid_data_host.extent( 0 ); local_subdomain_id++ )
1439 {
1440 for ( int r = 0; r < grid_data_host.extent( 3 ); r++ )
1441 {
1442 for ( int y = 0; y < grid_data_host.extent( 2 ); y++ )
1443 {
1444 for ( int x = 0; x < grid_data_host.extent( 1 ); x++ )
1445 {
1446 if constexpr (
1447 std::is_same_v< GridDataType, grid::Grid4DDataScalar< float > > ||
1448 std::is_same_v< GridDataType, grid::Grid4DDataScalar< double > > )
1449 {
1450 if ( requested_grid_data_file.value().vec_dim != 1 )
1451 {
1452 return { "Checkpoint is scalar, passed grid data view dims do not match." };
1453 }
1454
1455 // scalar data
1456 if ( checkpoint_is_float )
1457 {
1458 grid_data_host( local_subdomain_id, x, y, r ) =
1459 static_cast< GridDataType::value_type >( read_f32() );
1460 }
1461 else if ( checkpoint_is_double )
1462 {
1463 grid_data_host( local_subdomain_id, x, y, r ) =
1464 static_cast< GridDataType::value_type >( read_f64() );
1465 }
1466 }
1467 else if constexpr (
1468 std::is_same_v< GridDataType, grid::Grid4DDataVec< float, 3 > > ||
1469 std::is_same_v< GridDataType, grid::Grid4DDataVec< double, 3 > > )
1470 {
1471 if ( requested_grid_data_file.value().vec_dim != 3 )
1472 {
1473 return { "Checkpoint is vector-valued, passed grid data view dims do not match." };
1474 }
1475
1476 // vector-valued data
1477 for ( int d = 0; d < grid_data_host.extent( 4 ); d++ )
1478 {
1479 if ( checkpoint_is_float )
1480 {
1481 grid_data_host( local_subdomain_id, x, y, r, d ) =
1482 static_cast< GridDataType::value_type >( read_f32() );
1483 }
1484 else if ( checkpoint_is_double )
1485 {
1486 grid_data_host( local_subdomain_id, x, y, r, d ) =
1487 static_cast< GridDataType::value_type >( read_f64() );
1488 }
1489 }
1490 }
1491 }
1492 }
1493 }
1494 }
1495
1496 Kokkos::deep_copy( grid_data_device, grid_data_host );
1497 Kokkos::fence();
1498
1499 return { util::Ok{} };
1500}
1501
1502} // namespace terra::io
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2498
const SubdomainInfo & subdomain_info_from_local_idx(const LocalSubdomainIdx subdomain_idx) const
Definition spherical_shell.hpp:2585
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2577
int num_subdomains_per_diamond_side() const
Definition spherical_shell.hpp:847
int num_subdomains_in_radial_direction() const
Definition spherical_shell.hpp:849
int64_t global_id() const
Scrambles the four indices (diamond ID, x, y, r) into a single integer.
Definition spherical_shell.hpp:663
XDMF output that simultaneously serves for visualization with software like Paraview and as a simulat...
Definition xdmf.hpp:149
void set_write_counter(int write_counter)
Set the write counter manually.
Definition xdmf.hpp:207
OutputTypeFloat
Used to specify the output type when writing floating point data.
Definition xdmf.hpp:155
OutputTypeInt
Used to specify the output type when writing (signed) integer data.
Definition xdmf.hpp:164
XDMFOutput(const std::string &directory_path, const grid::shell::DistributedDomain &distributed_domain, const grid::Grid3DDataVec< InputGridScalarType, 3 > &coords_shell_device, const grid::Grid2DDataScalar< InputGridScalarType > &coords_radii_device, const OutputTypeFloat output_type_points=OutputTypeFloat::Float32, const OutputTypeInt output_type_connectivity=OutputTypeInt::Int64)
Constructs an XDMFOutput object.
Definition xdmf.hpp:183
void add(const grid::Grid4DDataScalar< InputScalarDataType > &data, const OutputTypeFloat output_type=OutputTypeFloat::Float32)
Adds a new scalar data grid to be written out.
Definition xdmf.hpp:214
void add(const grid::Grid4DDataVec< InputScalarDataType, VecDim > &data, const OutputTypeFloat output_type=OutputTypeFloat::Float32)
Adds a new vector-valued data grid to be written out.
Definition xdmf.hpp:250
void write()
Writes a "time step".
Definition xdmf.hpp:293
Definition result.hpp:13
Definition xml.hpp:14
int r
Definition EpsilonDivDiv_kernel_gen.py:345
value
Definition EpsilonDivDiv_kernel_gen.py:285
local_subdomain_id
Definition EpsilonDivDiv_kernel_gen.py:23
dense::Vec< typename CoordsShellType::value_type, 3 > coords(const int subdomain, const int x, const int y, const int r, const CoordsShellType &coords_shell, const CoordsRadiiType &coords_radii)
Definition spherical_shell.hpp:2789
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:40
Kokkos::View< ScalarType ****[VecDim], Layout > Grid4DDataVec
Definition grid_types.hpp:43
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:25
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:19
Definition vtk.hpp:13
util::Result read_xdmf_checkpoint_grid(const std::string &checkpoint_directory, const std::string &data_label, const int step, const grid::shell::DistributedDomain &distributed_domain, GridDataType &grid_data_device)
Reads a single grid at a single write step from an XDMF checkpoint.
Definition xdmf.hpp:1282
util::Result< CheckpointMetadata > read_xdmf_checkpoint_metadata(const std::string &checkpoint_directory)
Reads metadata from an XDMF/checkpoint directory. See XDMFOutput for details.
Definition xdmf.hpp:1140
MPIRank rank()
Definition mpi.hpp:10
bool prepare_empty_directory(const std::string &path_str, bool root_only=true)
Prepares an empty directory with the passed path.
Definition filesystem.hpp:16
int32_t scalar_bytes
Definition xdmf.hpp:1124
int32_t vec_dim
Definition xdmf.hpp:1125
int32_t scalar_data_type
Definition xdmf.hpp:1123
std::string grid_name_string
Definition xdmf.hpp:1122
Definition xdmf.hpp:1108
int32_t size_y
Definition xdmf.hpp:1113
int32_t num_subdomains_per_diamond_radial_direction
Definition xdmf.hpp:1111
int32_t version
Definition xdmf.hpp:1109
int32_t num_subdomains_per_diamond_lateral_direction
Definition xdmf.hpp:1110
int32_t size_x
Definition xdmf.hpp:1112
int32_t size_r
Definition xdmf.hpp:1114
int32_t checkpoint_subdomain_ordering
Definition xdmf.hpp:1130
int32_t grid_scalar_bytes
Definition xdmf.hpp:1118
std::vector< double > radii
Definition xdmf.hpp:1116
std::vector< int64_t > checkpoint_ordering_0_global_subdomain_ids
Definition xdmf.hpp:1132
std::vector< GridDataFile > grid_data_files
Definition xdmf.hpp:1128
Definition result.hpp:9