Loading...
Searching...
No Matches
communication.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <ranges>
4#include <variant>
5#include <vector>
6
7#include "dense/vec.hpp"
8#include "grid/grid_types.hpp"
11
13
15
16constexpr int MPI_TAG_BOUNDARY_DATA = 100;
17
18namespace detail {
19
20// Build an unmanaged view with the *same* data_type/layout/device as a Grid*DDataVec,
21// pointing into a raw pointer slice.
22template < class GridViewT >
23auto make_unmanaged_like( typename GridViewT::value_type* ptr, int n0 = 0, int n1 = 0, int n2 = 0 )
24{
25 using data_type = typename GridViewT::data_type;
26 using array_layout = typename GridViewT::array_layout;
27 using device_type = typename GridViewT::device_type;
28
29 using unmanaged_view =
30 Kokkos::View< data_type, array_layout, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > >;
31
32 if constexpr ( GridViewT::rank == 1 )
33 return unmanaged_view( ptr, n0 );
34 else if constexpr ( GridViewT::rank == 2 )
35 return unmanaged_view( ptr, n0, n1 );
36 else if constexpr ( GridViewT::rank == 3 )
37 return unmanaged_view( ptr, n0, n1, n2 );
38 else
39 static_assert( GridViewT::rank >= 1 && GridViewT::rank <= 3, "Unsupported rank for unmanaged-like helper." );
40}
41
42} // namespace detail
43
44/// @brief Send and receive buffers for all process-local subdomain boundaries.
45///
46/// Allocates views for all boundaries of local subdomains. Those are the nodes that overlap with values from
47/// neighboring subdomains.
48///
49/// One buffer per local boundary + neighbor is allocated. So, for instance, for an edge shared with several
50/// neighbors, just as many buffers as neighbors are allocated. This facilitates the receiving step since all
51/// neighbors that a subdomain receives data from can send their data simultaneously.
52///
53/// Can be reused after communication (send + recv) has been completed to avoid unnecessary reallocation.
54template < typename ScalarType, int VecDim = 1 >
56{
57 public:
58 /// @brief Constructs a SubdomainNeighborhoodSendRecvBuffer for the passed distributed domain object.
60 {
61 setup_buffers( domain );
62 }
63
64 /// @brief Const reference to the view that is a buffer for a vertex of a subdomain.
65 ///
66 /// @param local_subdomain the SubdomainInfo identifying the local subdomain
67 /// @param local_boundary_vertex the boundary vertex of the local subdomain
68 /// @param neighbor_subdomain the SubdomainInfo identifying the neighboring subdomain
69 /// @param neighbor_boundary_vertex the boundary vertex of the neighboring subdomain
70 ///
71 /// @return A const ref to a Kokkos::View with shape (VecDim), where VecDim is the number of scalars per node (class
72 /// template parameter).
74 const grid::shell::SubdomainInfo& local_subdomain,
75 const grid::BoundaryVertex local_boundary_vertex,
76 const grid::shell::SubdomainInfo& neighbor_subdomain,
77 const grid::BoundaryVertex neighbor_boundary_vertex ) const
78 {
79 return buffers_vertex_.at(
80 { local_subdomain, local_boundary_vertex, neighbor_subdomain, neighbor_boundary_vertex } );
81 }
82
83 /// @brief Const reference to the view that is a buffer for an edge of a subdomain.
84 ///
85 /// @param local_subdomain the SubdomainInfo identifying the local subdomain
86 /// @param local_boundary_edge the boundary edge of the local subdomain
87 /// @param neighbor_subdomain the SubdomainInfo identifying the neighboring subdomain
88 /// @param neighbor_boundary_edge the boundary edge of the neighboring subdomain
89 ///
90 /// @return A const ref to a Kokkos::View with shape (N, VecDim), where N is the number of grid nodes on the edge
91 /// and VecDim is the number of scalars per node (class template parameter).
93 const grid::shell::SubdomainInfo& local_subdomain,
94 const grid::BoundaryEdge local_boundary_edge,
95 const grid::shell::SubdomainInfo& neighbor_subdomain,
96 const grid::BoundaryEdge neighbor_boundary_edge ) const
97 {
98 return buffers_edge_.at( { local_subdomain, local_boundary_edge, neighbor_subdomain, neighbor_boundary_edge } );
99 }
100
101 /// @brief Const reference to the view that is a buffer for a face of a subdomain.
102 ///
103 /// @param local_subdomain the SubdomainInfo identifying the local subdomain
104 /// @param local_boundary_face the boundary face of the local subdomain
105 /// @param neighbor_subdomain the SubdomainInfo identifying the neighboring subdomain
106 /// @param neighbor_boundary_face the boundary face of the neighboring subdomain
107 ///
108 /// @return A const ref to a Kokkos::View with shape (N, M, VecDim), where N, M are the number of grid nodes on
109 /// each side of the face and VecDim is the number of scalars per node (class template parameter).
111 const grid::shell::SubdomainInfo& local_subdomain,
112 const grid::BoundaryFace local_boundary_face,
113 const grid::shell::SubdomainInfo& neighbor_subdomain,
114 const grid::BoundaryFace neighbor_boundary_face ) const
115 {
116 return buffers_face_.at( { local_subdomain, local_boundary_face, neighbor_subdomain, neighbor_boundary_face } );
117 }
118
119 /// @brief Mutable reference to the view that is a buffer for a vertex of a subdomain.
121 const grid::shell::SubdomainInfo& local_subdomain,
122 const grid::BoundaryVertex local_boundary_vertex,
123 const grid::shell::SubdomainInfo& neighbor_subdomain,
124 const grid::BoundaryVertex neighbor_boundary_vertex )
125 {
126 return buffers_vertex_.at(
127 { local_subdomain, local_boundary_vertex, neighbor_subdomain, neighbor_boundary_vertex } );
128 }
129
130 /// @brief Mutable reference to the view that is a buffer for an edge of a subdomain.
132 const grid::shell::SubdomainInfo& local_subdomain,
133 const grid::BoundaryEdge local_boundary_edge,
134 const grid::shell::SubdomainInfo& neighbor_subdomain,
135 const grid::BoundaryEdge neighbor_boundary_edge )
136 {
137 return buffers_edge_.at( { local_subdomain, local_boundary_edge, neighbor_subdomain, neighbor_boundary_edge } );
138 }
139
140 /// @brief Mutable reference to the view that is a buffer for a face of a subdomain.
142 const grid::shell::SubdomainInfo& local_subdomain,
143 const grid::BoundaryFace local_boundary_face,
144 const grid::shell::SubdomainInfo& neighbor_subdomain,
145 const grid::BoundaryFace neighbor_boundary_face )
146 {
147 return buffers_face_.at( { local_subdomain, local_boundary_face, neighbor_subdomain, neighbor_boundary_face } );
148 }
149
150 private:
151 /// @brief Helper called in the ctor that allocates the buffers.
152 void setup_buffers( const grid::shell::DistributedDomain& domain )
153 {
154 for ( const auto& [subdomain_info, data] : domain.subdomains() )
155 {
156 const auto& [local_subdomain_idx, neighborhood] = data;
157
158 for ( const auto& [local_boundary_vertex, neighbor] : neighborhood.neighborhood_vertex() )
159 {
160 for ( const auto& [neighbor_subdomain, neighbor_boundary_vertex, mpi_rank] : neighbor )
161 {
162 buffers_vertex_[{
163 subdomain_info, local_boundary_vertex, neighbor_subdomain, neighbor_boundary_vertex }] =
165 }
166 }
167
168 for ( const auto& [local_boundary_edge, neighbor] : neighborhood.neighborhood_edge() )
169 {
170 for ( const auto& [neighbor_subdomain, neighbor_boundary_edge, _, mpi_rank] : neighbor )
171 {
172 const int buffer_size = grid::is_edge_boundary_radial( local_boundary_edge ) ?
175
176 buffers_edge_[{ subdomain_info, local_boundary_edge, neighbor_subdomain, neighbor_boundary_edge }] =
177 grid::Grid1DDataVec< ScalarType, VecDim >( "recv_buffer", buffer_size );
178 }
179 }
180
181 for ( const auto& [local_boundary_face, neighbor] : neighborhood.neighborhood_face() )
182 {
183 const auto& [neighbor_subdomain, neighbor_boundary_face, _, mpi_rank] = neighbor;
184
185 const int buffer_size_i = domain.domain_info().subdomain_num_nodes_per_side_laterally();
186 const int buffer_size_j = grid::is_face_boundary_normal_to_radial_direction( local_boundary_face ) ?
189
190 buffers_face_[{ subdomain_info, local_boundary_face, neighbor_subdomain, neighbor_boundary_face }] =
191 grid::Grid2DDataVec< ScalarType, VecDim >( "recv_buffer", buffer_size_i, buffer_size_j );
192 }
193 }
194 }
195
196 /// Key ordering: local subdomain, local boundary, neighbor subdomain, neighbor-local boundary
197 std::map<
198 std::
199 tuple< grid::shell::SubdomainInfo, grid::BoundaryVertex, grid::shell::SubdomainInfo, grid::BoundaryVertex >,
200 grid::Grid0DDataVec< ScalarType, VecDim > >
201 buffers_vertex_;
202 std::map<
203 std::tuple< grid::shell::SubdomainInfo, grid::BoundaryEdge, grid::shell::SubdomainInfo, grid::BoundaryEdge >,
204 grid::Grid1DDataVec< ScalarType, VecDim > >
205 buffers_edge_;
206 std::map<
207 std::tuple< grid::shell::SubdomainInfo, grid::BoundaryFace, grid::shell::SubdomainInfo, grid::BoundaryFace >,
208 grid::Grid2DDataVec< ScalarType, VecDim > >
209 buffers_face_;
210};
211
212/// @brief Packs, sends and recvs local subdomain boundaries using two sets of buffers.
213///
214/// Communication works like this:
215/// - data is packed from the boundaries of the grid data structure into send buffers
216/// - the send buffers are sent via MPI
217/// - the data is received in receive buffers
218/// - the receive buffers are unpacked into the grid data structure (and the data is potentially rotated if necessary)
219///
220/// If the sending and receiving subdomains are on the same process, the data is directly packed into the recv buffers.
221/// However, not yet directly written from subdomain A to subdomain B. This and further optimizations are obviously
222/// possible.
223///
224/// @note Must be complemented with `unpack_and_reduce_local_subdomain_boundaries()` to complete communication.
225/// This function waits until all recv buffers are filled - but does not unpack.
226///
227/// Performs "additive" communication. Nodes at the subdomain interfaces overlap and will be reduced using some
228/// reduction mode during the receiving phase. This is typically required for matrix-free matrix-vector multiplications
229/// in a finite element context: nodes that are shared by elements of two neighboring subdomains receive contributions
230/// from both subdomains that need to be added. In this case, the required reduction mode is `CommunicationReduction::SUM`.
231///
232/// The send buffers are only required until this function returns.
233/// The recv buffers must be passed to the corresponding unpacking function `recv_unpack_and_add_local_subdomain_boundaries()`.
234///
235/// @param domain the DistributedDomain that this works on
236/// @param data the data (Kokkos::View) to be communicated
237/// @param boundary_send_buffers SubdomainNeighborhoodSendRecvBuffer instance that serves for sending data - can be
238/// reused after this function returns
239/// @param boundary_recv_buffers SubdomainNeighborhoodSendRecvBuffer instance that serves for receiving data - must be
240/// passed to `unpack_and_reduce_local_subdomain_boundaries()`
241template < typename GridDataType >
243 const grid::shell::DistributedDomain& domain,
244 const GridDataType& data,
245 SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() >&
246 boundary_send_buffers,
247 SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() >&
248 boundary_recv_buffers )
249{
250 // Left this switch here (for debugging). Toggle to choose between
251 // - copying data directly into recv buffers if sender and receiver subdomains
252 // are on the same rank (enable_local_comm == true) and
253 // - still simply sending via MPI: first copy to send buffer, send, then copy from recv buffer to data
254 // (enable_local_comm == false)
255 // Further optimizations are possible ofc.
256 constexpr bool enable_local_comm = true;
257
258 // Since it is not clear whether a static last dimension of 1 impacts performance, we want to support both
259 // scalar and vector-valued grid data views. To simplify matters, we always use the vector-valued versions for the
260 // buffers.
261
262 static_assert(
263 std::is_same_v< GridDataType, grid::Grid4DDataScalar< typename GridDataType::value_type > > ||
264 std::is_same_v<
265 GridDataType,
267
268 using ScalarType = typename GridDataType::value_type;
269
270 // std::vector< MPI_Request > metadata_send_requests;
271 // std::vector< std::unique_ptr< std::array< int, 11 > > > metadata_send_buffers;
272
273 std::vector< MPI_Request > data_send_requests;
274
275 ////////////////////////////////////////////
276 // Collecting and sorting send-recv pairs //
277 ////////////////////////////////////////////
278
279 // First, we collect all the send-recv pairs and sort them.
280 // This ensures the same message order per process.
281 // We need to post the Isends and Irecvs in that correct order (per process pair).
282
283 struct SendRecvPair
284 {
285 int boundary_type = -1;
286 mpi::MPIRank local_rank;
287 grid::shell::SubdomainInfo local_subdomain;
288 int local_subdomain_boundary;
289 int local_subdomain_id;
290 mpi::MPIRank neighbor_rank;
291 grid::shell::SubdomainInfo neighbor_subdomain;
292 int neighbor_subdomain_boundary;
293
294 std::string to_string() const
295 {
296 std::stringstream ss;
297 ss << "boundary_type: " << boundary_type << ", ";
298 ss << "local_subdomain: " << local_subdomain << ", ";
299 if ( boundary_type == 0 )
300 {
301 ss << "local_subdomain_boundary: " << static_cast< grid::BoundaryVertex >( local_subdomain_boundary )
302 << ", ";
303 }
304 else if ( boundary_type == 1 )
305 {
306 ss << "local_subdomain_boundary: " << static_cast< grid::BoundaryEdge >( local_subdomain_boundary )
307 << ", ";
308 }
309 else if ( boundary_type == 2 )
310 {
311 ss << "local_subdomain_boundary: " << static_cast< grid::BoundaryFace >( local_subdomain_boundary )
312 << ", ";
313 }
314 ss << "neighbor_subdomain: " << neighbor_subdomain << ", ";
315 if ( boundary_type == 0 )
316 {
317 ss << "neighbor_subdomain_boundary: "
318 << static_cast< grid::BoundaryVertex >( neighbor_subdomain_boundary ) << ", ";
319 }
320 else if ( boundary_type == 1 )
321 {
322 ss << "neighbor_subdomain_boundary: "
323 << static_cast< grid::BoundaryEdge >( neighbor_subdomain_boundary ) << ", ";
324 }
325 else if ( boundary_type == 2 )
326 {
327 ss << "neighbor_subdomain_boundary: "
328 << static_cast< grid::BoundaryFace >( neighbor_subdomain_boundary ) << ", ";
329 }
330
331 return ss.str();
332 }
333 };
334
335 std::vector< SendRecvPair > send_recv_pairs;
336
337 for ( const auto& [local_subdomain_info, idx_and_neighborhood] : domain.subdomains() )
338 {
339 const auto& [local_subdomain_id, neighborhood] = idx_and_neighborhood;
340
341 for ( const auto& [local_vertex_boundary, neighbors] : neighborhood.neighborhood_vertex() )
342 {
343 // Multiple neighbor subdomains per vertex.
344 for ( const auto& neighbor : neighbors )
345 {
346 const auto& [neighbor_subdomain_info, neighbor_local_boundary, neighbor_rank] = neighbor;
347
348 SendRecvPair send_recv_pair{
349 .boundary_type = 0,
350 .local_rank = mpi::rank(),
351 .local_subdomain = local_subdomain_info,
352 .local_subdomain_boundary = static_cast< int >( local_vertex_boundary ),
353 .local_subdomain_id = local_subdomain_id,
354 .neighbor_rank = neighbor_rank,
355 .neighbor_subdomain = neighbor_subdomain_info,
356 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ) };
357 send_recv_pairs.push_back( send_recv_pair );
358 }
359 }
360
361 for ( const auto& [local_edge_boundary, neighbors] : neighborhood.neighborhood_edge() )
362 {
363 // Multiple neighbor subdomains per edge.
364 for ( const auto& neighbor : neighbors )
365 {
366 const auto& [neighbor_subdomain_info, neighbor_local_boundary, _, neighbor_rank] = neighbor;
367
368 SendRecvPair send_recv_pair{
369 .boundary_type = 1,
370 .local_rank = mpi::rank(),
371 .local_subdomain = local_subdomain_info,
372 .local_subdomain_boundary = static_cast< int >( local_edge_boundary ),
373 .local_subdomain_id = local_subdomain_id,
374 .neighbor_rank = neighbor_rank,
375 .neighbor_subdomain = neighbor_subdomain_info,
376 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ) };
377 send_recv_pairs.push_back( send_recv_pair );
378 }
379 }
380
381 for ( const auto& [local_face_boundary, neighbor] : neighborhood.neighborhood_face() )
382 {
383 // Single neighbor subdomain per facet.
384 const auto& [neighbor_subdomain_info, neighbor_local_boundary, _, neighbor_rank] = neighbor;
385
386 SendRecvPair send_recv_pair{
387 .boundary_type = 2,
388 .local_rank = mpi::rank(),
389 .local_subdomain = local_subdomain_info,
390 .local_subdomain_boundary = static_cast< int >( local_face_boundary ),
391 .local_subdomain_id = local_subdomain_id,
392 .neighbor_rank = neighbor_rank,
393 .neighbor_subdomain = neighbor_subdomain_info,
394 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ) };
395 send_recv_pairs.push_back( send_recv_pair );
396 }
397 }
398
399 ////////////////////
400 // Posting Irecvs //
401 ////////////////////
402
403 // Sort the pairs by sender subdomains.
404 std::sort( send_recv_pairs.begin(), send_recv_pairs.end(), []( const SendRecvPair& a, const SendRecvPair& b ) {
405 if ( a.boundary_type != b.boundary_type )
406 return a.boundary_type < b.boundary_type;
407 if ( a.neighbor_subdomain != b.neighbor_subdomain )
408 return a.neighbor_subdomain < b.neighbor_subdomain;
409 if ( a.neighbor_subdomain_boundary != b.neighbor_subdomain_boundary )
410 return a.neighbor_subdomain_boundary < b.neighbor_subdomain_boundary;
411 if ( a.local_subdomain != b.local_subdomain )
412 return a.local_subdomain < b.local_subdomain;
413 return a.local_subdomain_boundary < b.local_subdomain_boundary;
414 } );
415
416 std::vector< MPI_Request > data_recv_requests;
417
418 for ( const auto& send_recv_pair : send_recv_pairs )
419 {
420 // We will handle local communication via direct copies in the send loop.
421 if ( enable_local_comm && send_recv_pair.local_rank == send_recv_pair.neighbor_rank )
422 {
423 continue;
424 }
425
426 ScalarType* recv_buffer_ptr = nullptr;
427 int recv_buffer_size = 0;
428
429 if ( send_recv_pair.boundary_type == 0 )
430 {
431 const auto& recv_buffer = boundary_recv_buffers.buffer_vertex(
432 send_recv_pair.local_subdomain,
433 static_cast< grid::BoundaryVertex >( send_recv_pair.local_subdomain_boundary ),
434 send_recv_pair.neighbor_subdomain,
435 static_cast< grid::BoundaryVertex >( send_recv_pair.neighbor_subdomain_boundary ) );
436
437 recv_buffer_ptr = recv_buffer.data();
438 recv_buffer_size = recv_buffer.span();
439 }
440 else if ( send_recv_pair.boundary_type == 1 )
441 {
442 const auto& recv_buffer = boundary_recv_buffers.buffer_edge(
443 send_recv_pair.local_subdomain,
444 static_cast< grid::BoundaryEdge >( send_recv_pair.local_subdomain_boundary ),
445 send_recv_pair.neighbor_subdomain,
446 static_cast< grid::BoundaryEdge >( send_recv_pair.neighbor_subdomain_boundary ) );
447
448 recv_buffer_ptr = recv_buffer.data();
449 recv_buffer_size = recv_buffer.span();
450 }
451 else if ( send_recv_pair.boundary_type == 2 )
452 {
453 const auto& recv_buffer = boundary_recv_buffers.buffer_face(
454 send_recv_pair.local_subdomain,
455 static_cast< grid::BoundaryFace >( send_recv_pair.local_subdomain_boundary ),
456 send_recv_pair.neighbor_subdomain,
457 static_cast< grid::BoundaryFace >( send_recv_pair.neighbor_subdomain_boundary ) );
458
459 recv_buffer_ptr = recv_buffer.data();
460 recv_buffer_size = recv_buffer.span();
461 }
462 else
463 {
464 Kokkos::abort( "Unknown boundary type" );
465 }
466
467 MPI_Request data_recv_request;
468 MPI_Irecv(
469 recv_buffer_ptr,
470 recv_buffer_size,
471 mpi::mpi_datatype< ScalarType >(),
472 send_recv_pair.neighbor_rank,
474 MPI_COMM_WORLD,
475 &data_recv_request );
476 data_recv_requests.push_back( data_recv_request );
477 }
478
479 /////////////////////////////////////////////////
480 // Packing send data buffers and posting sends //
481 /////////////////////////////////////////////////
482
483 // Sort the pairs by sender subdomains.
484 std::sort( send_recv_pairs.begin(), send_recv_pairs.end(), []( const SendRecvPair& a, const SendRecvPair& b ) {
485 if ( a.boundary_type != b.boundary_type )
486 return a.boundary_type < b.boundary_type;
487 if ( a.local_subdomain != b.local_subdomain )
488 return a.local_subdomain < b.local_subdomain;
489 if ( a.local_subdomain_boundary != b.local_subdomain_boundary )
490 return a.local_subdomain_boundary < b.local_subdomain_boundary;
491 if ( a.neighbor_subdomain != b.neighbor_subdomain )
492 return a.neighbor_subdomain < b.neighbor_subdomain;
493 return a.neighbor_subdomain_boundary < b.neighbor_subdomain_boundary;
494 } );
495
496 for ( const auto& send_recv_pair : send_recv_pairs )
497 {
498 const auto local_comm = enable_local_comm && send_recv_pair.local_rank == send_recv_pair.neighbor_rank;
499
500 // Packing buffer.
501
502 const auto local_subdomain_id = send_recv_pair.local_subdomain_id;
503
504 // Deep-copy into device-side send buffer.
505
506 ScalarType* send_buffer_ptr = nullptr;
507 int send_buffer_size = 0;
508
509 if ( send_recv_pair.boundary_type == 0 )
510 {
511 const auto local_vertex_boundary =
512 static_cast< grid::BoundaryVertex >( send_recv_pair.local_subdomain_boundary );
513
514 if ( local_comm )
515 {
516 // Handling local communication and moving on to next send_recv_pair afterward.
517 const auto& recv_buffer = boundary_recv_buffers.buffer_vertex(
518 send_recv_pair.local_subdomain,
519 static_cast< grid::BoundaryVertex >( send_recv_pair.local_subdomain_boundary ),
520 send_recv_pair.neighbor_subdomain,
521 static_cast< grid::BoundaryVertex >( send_recv_pair.neighbor_subdomain_boundary ) );
522
523 if ( !domain.subdomains().contains( send_recv_pair.neighbor_subdomain ) )
524 {
525 Kokkos::abort( "Subdomain not found locally - but it should be there..." );
526 }
527
528 const auto local_subdomain_id_of_neighboring_subdomain =
529 std::get< 0 >( domain.subdomains().at( send_recv_pair.neighbor_subdomain ) );
530
532 recv_buffer,
533 data,
534 local_subdomain_id_of_neighboring_subdomain,
535 static_cast< grid::BoundaryVertex >( send_recv_pair.neighbor_subdomain_boundary ) );
536 continue;
537 }
538
539 auto& send_buffer = boundary_send_buffers.buffer_vertex(
540 send_recv_pair.local_subdomain,
541 static_cast< grid::BoundaryVertex >( send_recv_pair.local_subdomain_boundary ),
542 send_recv_pair.neighbor_subdomain,
543 static_cast< grid::BoundaryVertex >( send_recv_pair.neighbor_subdomain_boundary ) );
544
545 send_buffer_ptr = send_buffer.data();
546 send_buffer_size = send_buffer.span();
547
548 copy_to_buffer( send_buffer, data, local_subdomain_id, local_vertex_boundary );
549 }
550 else if ( send_recv_pair.boundary_type == 1 )
551 {
552 const auto local_edge_boundary =
553 static_cast< grid::BoundaryEdge >( send_recv_pair.local_subdomain_boundary );
554
555 if ( local_comm )
556 {
557 // Handling local communication and moving on to next send_recv_pair afterward.
558 const auto& recv_buffer = boundary_recv_buffers.buffer_edge(
559 send_recv_pair.local_subdomain,
560 static_cast< grid::BoundaryEdge >( send_recv_pair.local_subdomain_boundary ),
561 send_recv_pair.neighbor_subdomain,
562 static_cast< grid::BoundaryEdge >( send_recv_pair.neighbor_subdomain_boundary ) );
563
564 if ( !domain.subdomains().contains( send_recv_pair.neighbor_subdomain ) )
565 {
566 Kokkos::abort( "Subdomain not found locally - but it should be there..." );
567 }
568
569 const auto local_subdomain_id_of_neighboring_subdomain =
570 std::get< 0 >( domain.subdomains().at( send_recv_pair.neighbor_subdomain ) );
571
573 recv_buffer,
574 data,
575 local_subdomain_id_of_neighboring_subdomain,
576 static_cast< grid::BoundaryEdge >( send_recv_pair.neighbor_subdomain_boundary ) );
577 continue;
578 }
579
580 auto& send_buffer = boundary_send_buffers.buffer_edge(
581 send_recv_pair.local_subdomain,
582 static_cast< grid::BoundaryEdge >( send_recv_pair.local_subdomain_boundary ),
583 send_recv_pair.neighbor_subdomain,
584 static_cast< grid::BoundaryEdge >( send_recv_pair.neighbor_subdomain_boundary ) );
585
586 send_buffer_ptr = send_buffer.data();
587 send_buffer_size = send_buffer.span();
588
589 copy_to_buffer( send_buffer, data, local_subdomain_id, local_edge_boundary );
590 }
591 else if ( send_recv_pair.boundary_type == 2 )
592 {
593 const auto local_face_boundary =
594 static_cast< grid::BoundaryFace >( send_recv_pair.local_subdomain_boundary );
595
596 if ( local_comm )
597 {
598 // Handling local communication and moving on to next send_recv_pair afterward.
599 const auto& recv_buffer = boundary_recv_buffers.buffer_face(
600 send_recv_pair.local_subdomain,
601 static_cast< grid::BoundaryFace >( send_recv_pair.local_subdomain_boundary ),
602 send_recv_pair.neighbor_subdomain,
603 static_cast< grid::BoundaryFace >( send_recv_pair.neighbor_subdomain_boundary ) );
604
605 if ( !domain.subdomains().contains( send_recv_pair.neighbor_subdomain ) )
606 {
607 Kokkos::abort( "Subdomain not found locally - but it should be there..." );
608 }
609
610 const auto local_subdomain_id_of_neighboring_subdomain =
611 std::get< 0 >( domain.subdomains().at( send_recv_pair.neighbor_subdomain ) );
612
614 recv_buffer,
615 data,
616 local_subdomain_id_of_neighboring_subdomain,
617 static_cast< grid::BoundaryFace >( send_recv_pair.neighbor_subdomain_boundary ) );
618 continue;
619 }
620
621 const auto& send_buffer = boundary_send_buffers.buffer_face(
622 send_recv_pair.local_subdomain,
623 static_cast< grid::BoundaryFace >( send_recv_pair.local_subdomain_boundary ),
624 send_recv_pair.neighbor_subdomain,
625 static_cast< grid::BoundaryFace >( send_recv_pair.neighbor_subdomain_boundary ) );
626
627 send_buffer_ptr = send_buffer.data();
628 send_buffer_size = send_buffer.span();
629
630 copy_to_buffer( send_buffer, data, local_subdomain_id, local_face_boundary );
631 }
632 else
633 {
634 Kokkos::abort( "Unknown boundary type" );
635 }
636
637 Kokkos::fence( "deep_copy_into_send_buffer" );
638
639 // Schedule Isend (non-local comm).
640 MPI_Request data_send_request;
641 MPI_Isend(
642 send_buffer_ptr,
643 send_buffer_size,
644 mpi::mpi_datatype< ScalarType >(),
645 send_recv_pair.neighbor_rank,
647 MPI_COMM_WORLD,
648 &data_send_request );
649 data_send_requests.push_back( data_send_request );
650 }
651
652 /////////////////////////////////////
653 // Wait for all sends to complete. //
654 /////////////////////////////////////
655
656 MPI_Waitall( data_send_requests.size(), data_send_requests.data(), MPI_STATUSES_IGNORE );
657 MPI_Waitall( data_recv_requests.size(), data_recv_requests.data(), MPI_STATUSES_IGNORE );
658}
659
660/// @brief Unpacks and reduces local subdomain boundaries.
661///
662/// The recv buffers must be the same instances as used during sending in `pack_send_and_recv_local_subdomain_boundaries()`.
663///
664/// See `pack_send_and_recv_local_subdomain_boundaries()` for more details on how the communication works.
665///
666/// @param domain the DistributedDomain that this works on
667/// @param data the data (Kokkos::View) to be communicated
668/// @param boundary_recv_buffers SubdomainNeighborhoodSendRecvBuffer instance that serves for receiving data - must be
669/// the same that was previously populated by `pack_send_and_recv_local_subdomain_boundaries()`
670/// @param reduction reduction mode
671template < typename GridDataType >
673 const grid::shell::DistributedDomain& domain,
674 const GridDataType& data,
675 SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() >&
676 boundary_recv_buffers,
678{
679 // Since it is not clear whether a static last dimension of 1 impacts performance, we want to support both
680 // scalar and vector-valued grid data views. To simplify matters, we always use the vector-valued versions for the
681 // buffers.
682
683 static_assert(
684 std::is_same_v< GridDataType, grid::Grid4DDataScalar< typename GridDataType::value_type > > ||
685 std::is_same_v<
686 GridDataType,
688
689 for ( const auto& [local_subdomain_info, idx_and_neighborhood] : domain.subdomains() )
690 {
691 const auto& [local_subdomain_id, neighborhood] = idx_and_neighborhood;
692
693 for ( const auto& [local_vertex_boundary, neighbors] : neighborhood.neighborhood_vertex() )
694 {
695 // Multiple neighbor subdomains per vertex.
696 for ( const auto& neighbor : neighbors )
697 {
698 const auto& [neighbor_subdomain_info, neighbor_local_boundary, neighbor_rank] = neighbor;
699
700 auto recv_buffer = boundary_recv_buffers.buffer_vertex(
701 local_subdomain_info, local_vertex_boundary, neighbor_subdomain_info, neighbor_local_boundary );
702
704 recv_buffer, data, local_subdomain_id, local_vertex_boundary, reduction );
705 }
706 }
707
708 for ( const auto& [local_edge_boundary, neighbors] : neighborhood.neighborhood_edge() )
709 {
710 // Multiple neighbor subdomains per edge.
711 for ( const auto& neighbor : neighbors )
712 {
713 const auto& [neighbor_subdomain_info, neighbor_local_boundary, boundary_direction, neighbor_rank] =
714 neighbor;
715
716 auto recv_buffer = boundary_recv_buffers.buffer_edge(
717 local_subdomain_info, local_edge_boundary, neighbor_subdomain_info, neighbor_local_boundary );
718
720 recv_buffer, data, local_subdomain_id, local_edge_boundary, boundary_direction, reduction );
721 }
722 }
723
724 for ( const auto& [local_face_boundary, neighbor] : neighborhood.neighborhood_face() )
725 {
726 // Single neighbor subdomain per facet.
727 const auto& [neighbor_subdomain_info, neighbor_local_boundary, boundary_directions, neighbor_rank] =
728 neighbor;
729
730 auto recv_buffer = boundary_recv_buffers.buffer_face(
731 local_subdomain_info, local_face_boundary, neighbor_subdomain_info, neighbor_local_boundary );
732
734 recv_buffer, data, local_subdomain_id, local_face_boundary, boundary_directions, reduction );
735 }
736 }
737
738 Kokkos::fence();
739}
740
741/// @brief Executes packing, sending, receiving, and unpacking operations for the shell.
742///
743/// @note THIS MAY COME WITH A PERFORMANCE PENALTY.
744/// This function (re-)allocates send and receive buffers for each call, which could be inefficient.
745/// Use only where performance does not matter (e.g. in tests).
746/// Better: reuse the buffers for subsequent send-recv calls through overloads of this function.
747///
748/// Essentially just calls `pack_send_and_recv_local_subdomain_boundaries()` and `unpack_and_reduce_local_subdomain_boundaries()`.
749template < typename ScalarType >
751 const grid::shell::DistributedDomain& domain,
754{
757
758 shell::pack_send_and_recv_local_subdomain_boundaries( domain, grid, send_buffers, recv_buffers );
759 shell::unpack_and_reduce_local_subdomain_boundaries( domain, grid, recv_buffers, reduction );
760}
761
762/// @brief Executes packing, sending, receiving, and unpacking operations for the shell.
763///
764/// Send and receive buffers must be passed. This is the preferred way to execute communication since the buffers
765/// can be reused.
766///
767/// Essentially just calls `pack_send_and_recv_local_subdomain_boundaries()` and `unpack_and_reduce_local_subdomain_boundaries()`.
768template < typename ScalarType >
779
780} // namespace terra::communication::shell
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
grid::Grid1DDataVec< ScalarType, VecDim > & buffer_edge(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryEdge local_boundary_edge, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryEdge neighbor_boundary_edge)
Mutable reference to the view that is a buffer for an edge of a subdomain.
Definition communication.hpp:131
grid::Grid2DDataVec< ScalarType, VecDim > & buffer_face(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryFace local_boundary_face, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryFace neighbor_boundary_face)
Mutable reference to the view that is a buffer for a face of a subdomain.
Definition communication.hpp:141
const grid::Grid0DDataVec< ScalarType, VecDim > & buffer_vertex(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryVertex local_boundary_vertex, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryVertex neighbor_boundary_vertex) const
Const reference to the view that is a buffer for a vertex of a subdomain.
Definition communication.hpp:73
SubdomainNeighborhoodSendRecvBuffer(const grid::shell::DistributedDomain &domain)
Constructs a SubdomainNeighborhoodSendRecvBuffer for the passed distributed domain object.
Definition communication.hpp:59
grid::Grid0DDataVec< ScalarType, VecDim > & buffer_vertex(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryVertex local_boundary_vertex, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryVertex neighbor_boundary_vertex)
Mutable reference to the view that is a buffer for a vertex of a subdomain.
Definition communication.hpp:120
const grid::Grid2DDataVec< ScalarType, VecDim > & buffer_face(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryFace local_boundary_face, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryFace neighbor_boundary_face) const
Const reference to the view that is a buffer for a face of a subdomain.
Definition communication.hpp:110
const grid::Grid1DDataVec< ScalarType, VecDim > & buffer_edge(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryEdge local_boundary_edge, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryEdge neighbor_boundary_edge) const
Const reference to the view that is a buffer for an edge of a subdomain.
Definition communication.hpp:92
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2498
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2580
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2577
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
(Sortable) Globally unique identifier for a single subdomain of a diamond.
Definition spherical_shell.hpp:595
Neighborhood information of a single subdomain.
Definition spherical_shell.hpp:1031
auto make_unmanaged_like(typename GridViewT::value_type *ptr, int n0=0, int n1=0, int n2=0)
Definition communication.hpp:23
Definition communication.hpp:14
void send_recv(const grid::shell::DistributedDomain &domain, grid::Grid4DDataScalar< ScalarType > &grid, const CommunicationReduction reduction=CommunicationReduction::SUM)
Executes packing, sending, receiving, and unpacking operations for the shell.
Definition communication.hpp:750
constexpr int MPI_TAG_BOUNDARY_DATA
Definition communication.hpp:16
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
void copy_from_buffer_rotate_and_reduce(const grid::Grid0DDataVec< ScalarType, VecDim > &buffer, const ViewType &data, const int local_subdomain_id, const grid::BoundaryVertex boundary_vertex, const CommunicationReduction reduction)
Definition buffer_copy_kernels.hpp:404
std::enable_if_t< detail_view_constraints::is_kokkos_view_v< BufferView > &&detail_view_constraints::is_kokkos_view_v< ViewType > &&(std::decay_t< BufferView >::rank==1)> copy_to_buffer(const BufferView &buffer, const ViewType &data, const int local_subdomain_id, const grid::BoundaryVertex boundary_vertex)
Definition buffer_copy_kernels.hpp:125
CommunicationReduction
Communication reduction modes.
Definition buffer_copy_kernels.hpp:9
@ SUM
Sums up the node values during receive.
constexpr bool is_edge_boundary_radial(const BoundaryEdge id)
Definition grid_types.hpp:219
BoundaryVertex
Enum for identification of the 8 boundary vertices of a subdomain.
Definition grid_types.hpp:121
Kokkos::View< ScalarType ****[VecDim], Layout > Grid4DDataVec
Definition grid_types.hpp:43
Kokkos::View< ScalarType **[VecDim], Layout > Grid2DDataVec
Definition grid_types.hpp:37
Kokkos::View< ScalarType *[VecDim], Layout > Grid1DDataVec
Definition grid_types.hpp:34
constexpr bool is_face_boundary_normal_to_radial_direction(const BoundaryFace id)
Definition grid_types.hpp:225
Kokkos::View< ScalarType[VecDim], Layout > Grid0DDataVec
Definition grid_types.hpp:31
BoundaryFace
Enum for identification of the 6 boundary faces of a subdomain.
Definition grid_types.hpp:170
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:25
BoundaryEdge
Enum for identification of the 12 boundary edges of a subdomain.
Definition grid_types.hpp:142
int MPIRank
Definition mpi.hpp:8
MPIRank rank()
Definition mpi.hpp:10