Loading...
Searching...
No Matches
communication_plan.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <map>
5#include <tuple>
6#include <vector>
7
8#include "dense/vec.hpp"
9#include "grid/grid_types.hpp"
13#include "util/timer.hpp"
14
16
17
18// --------------------------------------------------------------------------------------
19// Reusable, precomputed plan
20// --------------------------------------------------------------------------------------
21//
22// Goal: avoid rebuilding send/recv pair lists, sorting, chunk layout, and per-rank buffer sizes
23// on every halo exchange. We do that once in the ctor, then `exchange_and_reduce(...)` just runs
24// the hot path: local copies, pack, fence, post isends/irecvs, wait, scatter, unpack.
25//
26// Notes:
27// - This keeps your existing per-boundary recv buffers intact and still used by unpack.
28// - It keeps your per-rank aggregation optimization.
29// - It does not depend on send_buffers_ (your argument is unused currently anyway).
30//
31template < class GridDataType >
33{
34 public:
35 using ScalarType = typename GridDataType::value_type;
36 static constexpr int VecDim = grid::grid_data_vec_dim< GridDataType >();
37 using memory_space = typename GridDataType::memory_space;
38 using rank_buffer_view = Kokkos::View< ScalarType*, memory_space >;
39
40 explicit ShellBoundaryCommPlan( const grid::shell::DistributedDomain& domain, bool enable_local_comm = true )
41 : domain_( &domain ), enable_local_comm_( enable_local_comm )
42 {
43 build_plan_();
44 allocate_rank_buffers_();
45 }
46
47 // Call this each timestep/iteration.
49 const GridDataType& data,
52 {
53 util::Timer timer_all( "shell_boundary_exchange_and_reduce" );
54
55 post_irecvs_();
56
57 local_comm_copy_into_recv_buffers_( data, boundary_recv_buffers );
58
59 pack_remote_sends_( data );
60
61 post_isends_();
62
63 // Unpack local pairs from boundary_recv_buffers while MPI progresses.
64 unpack_local_( data, boundary_recv_buffers, reduction );
65
66 // Waitany loop: for each remote recv as it lands, unpack its chunks
67 // directly from the flat per-rank recv buffer. Sends are drained at end.
68 wait_and_unpack_remote_( data, reduction );
69
70 Kokkos::fence();
71 }
72
73 // Optional: if domain topology changes (rare), rebuild everything.
74 void rebuild()
75 {
76 build_plan_();
77 allocate_rank_buffers_();
78 }
79
80 private:
81 struct SendRecvPair
82 {
83 int boundary_type = -1; // 0 vertex, 1 edge, 2 face
84 mpi::MPIRank local_rank;
85 grid::shell::SubdomainInfo local_subdomain;
86 int local_subdomain_boundary;
87 int local_subdomain_id;
88
89 mpi::MPIRank neighbor_rank;
90 grid::shell::SubdomainInfo neighbor_subdomain;
91 int neighbor_subdomain_boundary;
92
93 // Orientation for rotate step in unpack. direction_0 is used for edges
94 // and as the first component for faces; direction_1 only for faces.
97 };
98
99 struct ChunkInfo
100 {
101 SendRecvPair pair;
102 int offset = 0; // in scalars
103 int size = 0; // in scalars
104 };
105
106 // --------------------------
107 // Plan build / layout
108 // --------------------------
109 int piece_num_scalars_( const SendRecvPair& p ) const
110 {
111 const auto& domain = *domain_;
112
113 if ( p.boundary_type == 0 )
114 {
115 return VecDim;
116 }
117 else if ( p.boundary_type == 1 )
118 {
119 const auto local_edge_boundary = static_cast< grid::BoundaryEdge >( p.local_subdomain_boundary );
120 const int n_nodes = grid::is_edge_boundary_radial( local_edge_boundary ) ?
122 domain.domain_info().subdomain_num_nodes_per_side_laterally();
123 return n_nodes * VecDim;
124 }
125 else if ( p.boundary_type == 2 )
126 {
127 const auto local_face_boundary = static_cast< grid::BoundaryFace >( p.local_subdomain_boundary );
128 const int ni = domain.domain_info().subdomain_num_nodes_per_side_laterally();
129 const int nj = grid::is_face_boundary_normal_to_radial_direction( local_face_boundary ) ?
130 domain.domain_info().subdomain_num_nodes_per_side_laterally() :
131 domain.domain_info().subdomain_num_nodes_radially();
132 return ni * nj * VecDim;
133 }
134 Kokkos::abort( "Unknown boundary type" );
135 return 0;
136 }
137
138 void build_plan_()
139 {
140 util::Timer timer( "ShellBoundaryCommPlan::build_plan" );
141
142 const auto& domain = *domain_;
143
144 send_recv_pairs_.clear();
145 send_recv_pairs_.reserve( 1024 );
146
147 // Build the full (unsorted) pair list once.
148 for ( const auto& [local_subdomain_info, idx_and_neighborhood] : domain.subdomains() )
149 {
150 const auto& [local_subdomain_id, neighborhood] = idx_and_neighborhood;
151
152 for ( const auto& [local_vertex_boundary, neighbors] : neighborhood.neighborhood_vertex() )
153 {
154 for ( const auto& neighbor : neighbors )
155 {
156 const auto& [neighbor_subdomain_info, neighbor_local_boundary, neighbor_rank] = neighbor;
157 send_recv_pairs_.push_back( SendRecvPair{
158 .boundary_type = 0,
159 .local_rank = mpi::rank( domain.comm() ),
160 .local_subdomain = local_subdomain_info,
161 .local_subdomain_boundary = static_cast< int >( local_vertex_boundary ),
162 .local_subdomain_id = local_subdomain_id,
163 .neighbor_rank = neighbor_rank,
164 .neighbor_subdomain = neighbor_subdomain_info,
165 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ) } );
166 }
167 }
168
169 for ( const auto& [local_edge_boundary, neighbors] : neighborhood.neighborhood_edge() )
170 {
171 for ( const auto& neighbor : neighbors )
172 {
173 const auto& [neighbor_subdomain_info, neighbor_local_boundary, edge_direction, neighbor_rank] =
174 neighbor;
175 send_recv_pairs_.push_back( SendRecvPair{
176 .boundary_type = 1,
177 .local_rank = mpi::rank( domain.comm() ),
178 .local_subdomain = local_subdomain_info,
179 .local_subdomain_boundary = static_cast< int >( local_edge_boundary ),
180 .local_subdomain_id = local_subdomain_id,
181 .neighbor_rank = neighbor_rank,
182 .neighbor_subdomain = neighbor_subdomain_info,
183 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ),
184 .direction_0 = edge_direction } );
185 }
186 }
187
188 for ( const auto& [local_face_boundary, neighbor] : neighborhood.neighborhood_face() )
189 {
190 const auto& [neighbor_subdomain_info, neighbor_local_boundary, face_directions, neighbor_rank] =
191 neighbor;
192 send_recv_pairs_.push_back( SendRecvPair{
193 .boundary_type = 2,
194 .local_rank = mpi::rank( domain.comm() ),
195 .local_subdomain = local_subdomain_info,
196 .local_subdomain_boundary = static_cast< int >( local_face_boundary ),
197 .local_subdomain_id = local_subdomain_id,
198 .neighbor_rank = neighbor_rank,
199 .neighbor_subdomain = neighbor_subdomain_info,
200 .neighbor_subdomain_boundary = static_cast< int >( neighbor_local_boundary ),
201 .direction_0 = std::get< 0 >( face_directions ),
202 .direction_1 = std::get< 1 >( face_directions ) } );
203 }
204 }
205
206 // Precompute local-comm subset (fixed list).
207 local_pairs_.clear();
208 local_pairs_.reserve( send_recv_pairs_.size() );
209 for ( const auto& p : send_recv_pairs_ )
210 {
211 if ( enable_local_comm_ && p.local_rank == p.neighbor_rank )
212 local_pairs_.push_back( p );
213 }
214
215 // SEND layout (sorted and chunked per rank, remote only)
216 {
217 auto send_pairs = send_recv_pairs_;
218 std::sort( send_pairs.begin(), send_pairs.end(), []( const SendRecvPair& a, const SendRecvPair& b ) {
219 if ( a.boundary_type != b.boundary_type ) return a.boundary_type < b.boundary_type;
220 if ( a.local_subdomain != b.local_subdomain ) return a.local_subdomain < b.local_subdomain;
221 if ( a.local_subdomain_boundary != b.local_subdomain_boundary )
222 return a.local_subdomain_boundary < b.local_subdomain_boundary;
223 if ( a.neighbor_subdomain != b.neighbor_subdomain ) return a.neighbor_subdomain < b.neighbor_subdomain;
224 return a.neighbor_subdomain_boundary < b.neighbor_subdomain_boundary;
225 } );
226
227 send_chunks_by_rank_.clear();
228 send_total_by_rank_.clear();
229
230 for ( const auto& p : send_pairs )
231 {
232 if ( enable_local_comm_ && p.local_rank == p.neighbor_rank )
233 continue;
234
235 const int sz = piece_num_scalars_( p );
236 auto& chunks = send_chunks_by_rank_[p.neighbor_rank];
237
238 const int off = send_total_by_rank_[p.neighbor_rank];
239 send_total_by_rank_[p.neighbor_rank] += sz;
240
241 chunks.push_back( ChunkInfo{ .pair = p, .offset = off, .size = sz } );
242 }
243 }
244
245 // RECV layout (sorted and chunked per rank, remote only)
246 {
247 auto recv_pairs = send_recv_pairs_;
248 std::sort( recv_pairs.begin(), recv_pairs.end(), []( const SendRecvPair& a, const SendRecvPair& b ) {
249 if ( a.boundary_type != b.boundary_type ) return a.boundary_type < b.boundary_type;
250 if ( a.neighbor_subdomain != b.neighbor_subdomain ) return a.neighbor_subdomain < b.neighbor_subdomain;
251 if ( a.neighbor_subdomain_boundary != b.neighbor_subdomain_boundary )
252 return a.neighbor_subdomain_boundary < b.neighbor_subdomain_boundary;
253 if ( a.local_subdomain != b.local_subdomain ) return a.local_subdomain < b.local_subdomain;
254 return a.local_subdomain_boundary < b.local_subdomain_boundary;
255 } );
256
257 recv_chunks_by_rank_.clear();
258 recv_total_by_rank_.clear();
259
260 for ( const auto& p : recv_pairs )
261 {
262 if ( enable_local_comm_ && p.local_rank == p.neighbor_rank )
263 continue;
264
265 const int sz = piece_num_scalars_( p );
266 auto& chunks = recv_chunks_by_rank_[p.neighbor_rank];
267
268 const int off = recv_total_by_rank_[p.neighbor_rank];
269 recv_total_by_rank_[p.neighbor_rank] += sz;
270
271 chunks.push_back( ChunkInfo{ .pair = p, .offset = off, .size = sz } );
272 }
273 }
274 }
275
276 void allocate_rank_buffers_()
277 {
278 util::Timer timer( "ShellBoundaryCommPlan::allocate_rank_buffers" );
279
280 send_rank_buffers_.clear();
281 recv_rank_buffers_.clear();
282
283 for ( const auto& [rank, total] : send_total_by_rank_ )
284 {
285 if ( total > 0 )
286 send_rank_buffers_[rank] = rank_buffer_view( "rank_send_buffer", total );
287 }
288 for ( const auto& [rank, total] : recv_total_by_rank_ )
289 {
290 if ( total > 0 )
291 recv_rank_buffers_[rank] = rank_buffer_view( "rank_recv_buffer", total );
292 }
293
294 data_send_requests_.resize( send_rank_buffers_.size() );
295 data_recv_requests_.resize( recv_rank_buffers_.size() );
296 recv_request_ranks_.resize( recv_rank_buffers_.size() );
297 }
298
299 // --------------------------
300 // Hot path
301 // --------------------------
302 void post_irecvs_() const
303 {
304 util::Timer timer( "ShellBoundaryCommPlan::post_irecvs" );
305
306 int i = 0;
307 for ( const auto& [rank, buf] : recv_rank_buffers_ )
308 {
309 const int total_sz = static_cast< int >( buf.extent( 0 ) );
310 MPI_Irecv(
311 buf.data(),
312 total_sz,
313 mpi::mpi_datatype< ScalarType >(),
314 rank,
316 domain_->comm(),
317 &data_recv_requests_[i] );
318 recv_request_ranks_[i] = rank;
319 ++i;
320 }
321 recv_req_count_ = i;
322 }
323
324 void local_comm_copy_into_recv_buffers_(
325 const GridDataType& data,
327 {
328 util::Timer timer( "ShellBoundaryCommPlan::local_comm" );
329
330 const auto& domain = *domain_;
331
332 for ( const auto& p : local_pairs_ )
333 {
334 if ( !domain.subdomains().contains( p.neighbor_subdomain ) )
335 Kokkos::abort( "Subdomain not found locally - but it should be there..." );
336
337 const auto neighbor_subdomain_id = std::get< 0 >( domain.subdomains().at( p.neighbor_subdomain ) );
338
339 if ( p.boundary_type == 0 )
340 {
341 auto& recv_buf = boundary_recv_buffers.buffer_vertex(
342 p.local_subdomain,
343 static_cast< grid::BoundaryVertex >( p.local_subdomain_boundary ),
344 p.neighbor_subdomain,
345 static_cast< grid::BoundaryVertex >( p.neighbor_subdomain_boundary ) );
346
347 copy_to_buffer<VecDim>(
348 recv_buf,
349 data,
350 neighbor_subdomain_id,
351 static_cast< grid::BoundaryVertex >( p.neighbor_subdomain_boundary ) );
352 }
353 else if ( p.boundary_type == 1 )
354 {
355 auto& recv_buf = boundary_recv_buffers.buffer_edge(
356 p.local_subdomain,
357 static_cast< grid::BoundaryEdge >( p.local_subdomain_boundary ),
358 p.neighbor_subdomain,
359 static_cast< grid::BoundaryEdge >( p.neighbor_subdomain_boundary ) );
360
361 copy_to_buffer<VecDim>(
362 recv_buf,
363 data,
364 neighbor_subdomain_id,
365 static_cast< grid::BoundaryEdge >( p.neighbor_subdomain_boundary ) );
366 }
367 else if ( p.boundary_type == 2 )
368 {
369 auto& recv_buf = boundary_recv_buffers.buffer_face(
370 p.local_subdomain,
371 static_cast< grid::BoundaryFace >( p.local_subdomain_boundary ),
372 p.neighbor_subdomain,
373 static_cast< grid::BoundaryFace >( p.neighbor_subdomain_boundary ) );
374
375 copy_to_buffer<VecDim>(
376 recv_buf,
377 data,
378 neighbor_subdomain_id,
379 static_cast< grid::BoundaryFace >( p.neighbor_subdomain_boundary ) );
380 }
381 else
382 {
383 Kokkos::abort( "Unknown boundary type" );
384 }
385 }
386 }
387
388 void pack_remote_sends_( const GridDataType& data ) const
389 {
390 util::Timer timer( "ShellBoundaryCommPlan::pack_remote" );
391
392 const auto& domain = *domain_;
393
394 for ( const auto& [rank, chunks] : send_chunks_by_rank_ )
395 {
396 auto& rank_buf = send_rank_buffers_.at( rank );
397
398 for ( const auto& ch : chunks )
399 {
400 const auto& p = ch.pair;
401 ScalarType* base_ptr = rank_buf.data() + ch.offset;
402
403 if ( p.boundary_type == 0 )
404 {
405 using BufT = grid::Grid0DDataVec< ScalarType, VecDim >;
406 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr );
407
408 copy_to_buffer<VecDim>(
409 unmanaged,
410 data,
411 p.local_subdomain_id,
412 static_cast< grid::BoundaryVertex >( p.local_subdomain_boundary ) );
413 }
414 else if ( p.boundary_type == 1 )
415 {
416 using BufT = grid::Grid1DDataVec< ScalarType, VecDim >;
417 const auto local_edge_boundary = static_cast< grid::BoundaryEdge >( p.local_subdomain_boundary );
418 const int n_nodes = grid::is_edge_boundary_radial( local_edge_boundary ) ?
419 domain.domain_info().subdomain_num_nodes_radially() :
420 domain.domain_info().subdomain_num_nodes_per_side_laterally();
421
422 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr, n_nodes );
423 copy_to_buffer<VecDim>( unmanaged, data, p.local_subdomain_id, local_edge_boundary );
424 }
425 else if ( p.boundary_type == 2 )
426 {
427 using BufT = grid::Grid2DDataVec< ScalarType, VecDim >;
428 const auto local_face_boundary = static_cast< grid::BoundaryFace >( p.local_subdomain_boundary );
429 const int ni = domain.domain_info().subdomain_num_nodes_per_side_laterally();
430 const int nj = grid::is_face_boundary_normal_to_radial_direction( local_face_boundary ) ?
431 domain.domain_info().subdomain_num_nodes_per_side_laterally() :
432 domain.domain_info().subdomain_num_nodes_radially();
433
434 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr, ni, nj );
435 copy_to_buffer<VecDim>( unmanaged, data, p.local_subdomain_id, local_face_boundary );
436 }
437 else
438 {
439 Kokkos::abort( "Unknown boundary type" );
440 }
441 }
442 }
443
444 Kokkos::fence( "pack_rank_send_buffers" );
445 }
446
447 void post_isends_() const
448 {
449 util::Timer timer( "ShellBoundaryCommPlan::post_isends" );
450
451 int i = 0;
452 for ( const auto& [rank, buf] : send_rank_buffers_ )
453 {
454 const int total_sz = static_cast< int >( buf.extent( 0 ) );
455 MPI_Isend(
456 buf.data(),
457 total_sz,
458 mpi::mpi_datatype< ScalarType >(),
459 rank,
461 domain_->comm(),
462 &data_send_requests_[i] );
463 ++i;
464 }
465 send_req_count_ = i;
466 }
467
468 // Unpack the per-boundary recv buffers for local pairs (filled by
469 // local_comm_copy_into_recv_buffers_). Runs before the MPI wait so the
470 // host/GPU are not idle while remote messages are still in flight.
471 void unpack_local_(
472 const GridDataType& data,
474 CommunicationReduction reduction ) const
475 {
476 util::Timer timer( "ShellBoundaryCommPlan::unpack_local" );
477
478 for ( const auto& p : local_pairs_ )
479 {
480 if ( p.boundary_type == 0 )
481 {
482 const auto local_boundary = static_cast< grid::BoundaryVertex >( p.local_subdomain_boundary );
483 const auto neighbor_boundary = static_cast< grid::BoundaryVertex >( p.neighbor_subdomain_boundary );
484 const auto& recv_buffer = boundary_recv_buffers.buffer_vertex(
485 p.local_subdomain, local_boundary, p.neighbor_subdomain, neighbor_boundary );
487 recv_buffer, data, p.local_subdomain_id, local_boundary, reduction );
488 }
489 else if ( p.boundary_type == 1 )
490 {
491 const auto local_boundary = static_cast< grid::BoundaryEdge >( p.local_subdomain_boundary );
492 const auto neighbor_boundary = static_cast< grid::BoundaryEdge >( p.neighbor_subdomain_boundary );
493 const auto& recv_buffer = boundary_recv_buffers.buffer_edge(
494 p.local_subdomain, local_boundary, p.neighbor_subdomain, neighbor_boundary );
496 recv_buffer, data, p.local_subdomain_id, local_boundary, p.direction_0, reduction );
497 }
498 else if ( p.boundary_type == 2 )
499 {
500 const auto local_boundary = static_cast< grid::BoundaryFace >( p.local_subdomain_boundary );
501 const auto neighbor_boundary = static_cast< grid::BoundaryFace >( p.neighbor_subdomain_boundary );
502 const auto& recv_buffer = boundary_recv_buffers.buffer_face(
503 p.local_subdomain, local_boundary, p.neighbor_subdomain, neighbor_boundary );
505 recv_buffer,
506 data,
507 p.local_subdomain_id,
508 local_boundary,
509 std::make_tuple( p.direction_0, p.direction_1 ),
510 reduction );
511 }
512 else
513 {
514 Kokkos::abort( "Unknown boundary type" );
515 }
516 }
517 }
518
519 // Unpack all chunks received from one remote rank. Reads directly from the
520 // per-rank flat recv buffer via inline unmanaged views.
521 void unpack_remote_rank_(
522 const GridDataType& data,
523 const mpi::MPIRank rank,
524 CommunicationReduction reduction ) const
525 {
526 const auto& domain = *domain_;
527 auto& rank_buf = recv_rank_buffers_.at( rank );
528 const auto& chunks = recv_chunks_by_rank_.at( rank );
529
530 for ( const auto& ch : chunks )
531 {
532 const auto& p = ch.pair;
533 ScalarType* base_ptr = rank_buf.data() + ch.offset;
534
535 if ( p.boundary_type == 0 )
536 {
537 using BufT = grid::Grid0DDataVec< ScalarType, VecDim >;
538 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr );
540 unmanaged,
541 data,
542 p.local_subdomain_id,
543 static_cast< grid::BoundaryVertex >( p.local_subdomain_boundary ),
544 reduction );
545 }
546 else if ( p.boundary_type == 1 )
547 {
548 using BufT = grid::Grid1DDataVec< ScalarType, VecDim >;
549 const auto local_edge = static_cast< grid::BoundaryEdge >( p.local_subdomain_boundary );
550 const int n_nodes = grid::is_edge_boundary_radial( local_edge ) ?
551 domain.domain_info().subdomain_num_nodes_radially() :
552 domain.domain_info().subdomain_num_nodes_per_side_laterally();
553 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr, n_nodes );
555 unmanaged, data, p.local_subdomain_id, local_edge, p.direction_0, reduction );
556 }
557 else if ( p.boundary_type == 2 )
558 {
559 using BufT = grid::Grid2DDataVec< ScalarType, VecDim >;
560 const auto local_face = static_cast< grid::BoundaryFace >( p.local_subdomain_boundary );
561 const int ni = domain.domain_info().subdomain_num_nodes_per_side_laterally();
562 const int nj = grid::is_face_boundary_normal_to_radial_direction( local_face ) ?
563 domain.domain_info().subdomain_num_nodes_per_side_laterally() :
564 domain.domain_info().subdomain_num_nodes_radially();
565 auto unmanaged = detail::make_unmanaged_like< BufT >( base_ptr, ni, nj );
567 unmanaged,
568 data,
569 p.local_subdomain_id,
570 local_face,
571 std::make_tuple( p.direction_0, p.direction_1 ),
572 reduction );
573 }
574 else
575 {
576 Kokkos::abort( "Unknown boundary type" );
577 }
578 }
579 }
580
581 // Wait for remote recvs, dispatching unpack_remote_rank_ as each message
582 // lands. Finally waits on pending sends.
583 //
584 // Sub-timers:
585 // - mpi_waitany_first: time from start until the first recv completes.
586 // - mpi_waitany_rest : per-call time for subsequent recv completions
587 // (count = (num_msgs - 1) * num_iters across runs).
588 // Comparing their aggregates tells us whether waitall is dominated by the
589 // initial handshake round-trip or by tail latency from stragglers.
590 void wait_and_unpack_remote_(
591 const GridDataType& data,
592 CommunicationReduction reduction ) const
593 {
594 util::Timer timer( "ShellBoundaryCommPlan::waitall" );
595
596 for ( int completed = 0; completed < recv_req_count_; ++completed )
597 {
598 int idx = MPI_UNDEFINED;
599 if ( completed == 0 )
600 {
601 util::Timer t( "ShellBoundaryCommPlan::mpi_waitany_first" );
602 MPI_Waitany( recv_req_count_, data_recv_requests_.data(), &idx, MPI_STATUS_IGNORE );
603 }
604 else
605 {
606 util::Timer t( "ShellBoundaryCommPlan::mpi_waitany_rest" );
607 MPI_Waitany( recv_req_count_, data_recv_requests_.data(), &idx, MPI_STATUS_IGNORE );
608 }
609 unpack_remote_rank_( data, recv_request_ranks_[idx], reduction );
610 }
611
612 if ( send_req_count_ > 0 )
613 {
614 util::Timer t( "ShellBoundaryCommPlan::mpi_waitall_sends" );
615 MPI_Waitall( send_req_count_, data_send_requests_.data(), MPI_STATUSES_IGNORE );
616 }
617 }
618
619 private:
620 const grid::shell::DistributedDomain* domain_ = nullptr;
621 bool enable_local_comm_ = true;
622
623 // Precomputed full list
624 std::vector< SendRecvPair > send_recv_pairs_;
625
626 // Precomputed local-only subset
627 std::vector< SendRecvPair > local_pairs_;
628
629 // Precomputed rank aggregation layouts
630 std::map< mpi::MPIRank, std::vector< ChunkInfo > > send_chunks_by_rank_;
631 std::map< mpi::MPIRank, std::vector< ChunkInfo > > recv_chunks_by_rank_;
632 std::map< mpi::MPIRank, int > send_total_by_rank_;
633 std::map< mpi::MPIRank, int > recv_total_by_rank_;
634
635
636 // Reused rank buffers
637 mutable std::map< mpi::MPIRank, rank_buffer_view > send_rank_buffers_;
638 mutable std::map< mpi::MPIRank, rank_buffer_view > recv_rank_buffers_;
639
640 // Reused request storage
641 mutable std::vector< MPI_Request > data_send_requests_;
642 mutable std::vector< MPI_Request > data_recv_requests_;
643 mutable std::vector< mpi::MPIRank > recv_request_ranks_;
644 mutable int send_req_count_ = 0;
645 mutable int recv_req_count_ = 0;
646};
647
648// --------------------------------------------------------------------------------------
649// Unified one-call routine (plan is built once, then just executed each call)
650// --------------------------------------------------------------------------------------
651template < typename GridDataType >
654 const GridDataType& data,
655 SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type,
656 grid::grid_data_vec_dim< GridDataType >() >& recv_buffers,
658{
659 plan.exchange_and_reduce( data, recv_buffers, reduction );
660}
661
662} // namespace terra::communication::shell
Definition communication_plan.hpp:33
ShellBoundaryCommPlan(const grid::shell::DistributedDomain &domain, bool enable_local_comm=true)
Definition communication_plan.hpp:40
void exchange_and_reduce(const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< ScalarType, VecDim > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM) const
Definition communication_plan.hpp:48
typename GridDataType::memory_space memory_space
Definition communication_plan.hpp:37
void rebuild()
Definition communication_plan.hpp:74
typename GridDataType::value_type ScalarType
Definition communication_plan.hpp:35
Kokkos::View< ScalarType *, memory_space > rank_buffer_view
Definition communication_plan.hpp:38
static constexpr int VecDim
Definition communication_plan.hpp:36
const grid::Grid0DDataVec< ScalarType, VecDim > & buffer_vertex(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryVertex local_boundary_vertex, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryVertex neighbor_boundary_vertex) const
Const reference to the view that is a buffer for a vertex of a subdomain.
Definition communication.hpp:73
const grid::Grid2DDataVec< ScalarType, VecDim > & buffer_face(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryFace local_boundary_face, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryFace neighbor_boundary_face) const
Const reference to the view that is a buffer for a face of a subdomain.
Definition communication.hpp:110
const grid::Grid1DDataVec< ScalarType, VecDim > & buffer_edge(const grid::shell::SubdomainInfo &local_subdomain, const grid::BoundaryEdge local_boundary_edge, const grid::shell::SubdomainInfo &neighbor_subdomain, const grid::BoundaryEdge neighbor_boundary_edge) const
Const reference to the view that is a buffer for an edge of a subdomain.
Definition communication.hpp:92
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
MPI_Comm comm() const
MPI communicator this domain lives on. Defaults to MPI_COMM_WORLD. For agglomerated multigrid,...
Definition spherical_shell.hpp:2599
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
(Sortable) Globally unique identifier for a single subdomain of a diamond.
Definition spherical_shell.hpp:595
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
local_subdomain_id
Definition EpsilonDivDiv_kernel_gen.py:23
constexpr int idx(const int loop_idx, const int size, const grid::BoundaryPosition position, const grid::BoundaryDirection direction)
Definition buffer_copy_kernels.hpp:73
DataView::value_type value(const DataView &data, int local_subdomain_id, int x, int y, int r, int d)
Definition buffer_copy_kernels.hpp:45
Definition communication.hpp:14
void send_recv_with_plan(const ShellBoundaryCommPlan< GridDataType > &plan, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Definition communication_plan.hpp:652
constexpr int MPI_TAG_BOUNDARY_DATA
Definition communication.hpp:16
void copy_from_buffer_rotate_and_reduce(const BufferView &buffer, const ViewType &data, const int local_subdomain_id, const grid::BoundaryVertex boundary_vertex, const CommunicationReduction reduction)
Definition buffer_copy_kernels.hpp:386
CommunicationReduction
Communication reduction modes.
Definition buffer_copy_kernels.hpp:9
@ SUM
Sums up the node values during receive.
constexpr bool is_edge_boundary_radial(const BoundaryEdge id)
Definition grid_types.hpp:346
BoundaryVertex
Enum for identification of the 8 boundary vertices of a subdomain.
Definition grid_types.hpp:248
BoundaryDirection
Enum for the iteration direction at a boundary.
Definition grid_types.hpp:311
constexpr bool is_face_boundary_normal_to_radial_direction(const BoundaryFace id)
Definition grid_types.hpp:352
BoundaryFace
Enum for identification of the 6 boundary faces of a subdomain.
Definition grid_types.hpp:297
BoundaryEdge
Enum for identification of the 12 boundary edges of a subdomain.
Definition grid_types.hpp:269
int MPIRank
Definition mpi.hpp:11
MPIRank rank()
Definition mpi.hpp:13