Loading...
Searching...
No Matches
redistribute.hpp
Go to the documentation of this file.
1
2#pragma once
3
4#include <algorithm>
5#include <cassert>
6#include <map>
7#include <numeric>
8#include <stdexcept>
9#include <vector>
10
11#include <Kokkos_Core.hpp>
12
13#include "grid/grid_types.hpp"
15#include "terra/mpi/mpi.hpp"
16#include "util/timer.hpp"
17
19
20/// @brief Move a distributed grid vector from a fine DistributedDomain to a coarse one
21/// whose comm is a subset of the fine domain's comm. Used by agglomerated multigrid to
22/// collapse the active rank set at the descent into a coarse V-cycle level, and then
23/// to broadcast the coarse correction back to the fine rank set on the way up.
24///
25/// Assumptions:
26/// - Both domains describe the same mesh (same num_global_subdomains, same per-subdomain
27/// node layout). Only the owner->rank mapping differs.
28/// - The coarse domain's comm is a subset of the fine domain's comm. Ranks that are not
29/// in the coarse comm get MPI_COMM_NULL for `domain_coarse.comm()` and own zero
30/// subdomains on the coarse side.
31/// - Data layout: grid data is a 4D/5D Kokkos view indexed by (local_sdr, i, j, k[, c]),
32/// with a fixed block size per subdomain. We use the fine domain's layout to determine
33/// that block size (same on both sides by the same-mesh assumption).
34///
35/// The class is stateful: it precomputes send/recv counts and displacements once at
36/// construction, then reuses them across solves. apply() and apply_transpose() are the
37/// hot paths; they pack/Alltoallv/unpack.
38template < class GridDataType >
40{
41 public:
42 using ScalarType = typename GridDataType::value_type;
43 static constexpr int VecDim = grid::grid_data_vec_dim< GridDataType >();
44 using memory_space = typename GridDataType::memory_space;
45 using buffer_view = Kokkos::View< ScalarType*, memory_space >;
46 using host_buffer_view = Kokkos::View< ScalarType*, Kokkos::HostSpace >;
47
48 /// @brief Build a redistribute plan between two distributed domains.
49 /// @param domain_fine Source side; holds data before apply() and after apply_transpose().
50 /// @param domain_coarse Destination side; holds data after apply() and before apply_transpose().
51 /// @param subdomain_to_rank_fine Fine-side owner function (sub-comm-local ranks on fine comm).
52 /// @param subdomain_to_rank_coarse Coarse-side owner function (sub-comm-local ranks on coarse comm).
54 const grid::shell::DistributedDomain& domain_fine,
55 const grid::shell::DistributedDomain& domain_coarse,
56 const grid::shell::SubdomainToRankDistributionFunction& subdomain_to_rank_fine,
57 const grid::shell::SubdomainToRankDistributionFunction& subdomain_to_rank_coarse )
58 : domain_fine_( &domain_fine )
59 , domain_coarse_( &domain_coarse )
60 , union_comm_( domain_fine.comm() )
61 {
62 build_plan_( subdomain_to_rank_fine, subdomain_to_rank_coarse );
63 }
64
65 /// @brief True when the fine and coarse domains have the same comm AND
66 /// every subdomain has the same owner on both sides. In that case there is
67 /// nothing to do — the caller can route restriction output directly to the
68 /// coarse-side buffer and skip calling apply/apply_transpose entirely.
69 bool is_identity() const { return identity_plan_; }
70
71 /// @brief Move data from fine-owned subdomains to coarse-owned subdomains.
72 /// Collective on the fine comm; every rank in it must call this.
73 void apply( const GridDataType& src_fine, GridDataType& dst_coarse )
74 {
75 if ( union_comm_ == MPI_COMM_NULL ) return; // rank not in this redistribute's comm
76 if ( identity_plan_ ) return; // factor=1 — caller already routed src/dst to the same buffer
77 util::Timer t( "redistribute_apply" );
78 run_alltoallv_( src_fine, dst_coarse, /*transpose=*/false );
79 }
80
81 /// @brief Move data back from coarse-owned subdomains to fine-owned subdomains.
82 /// Collective on the fine comm. Used on the way up in a V-cycle after the coarse
83 /// correction has been computed on the reduced rank set.
84 void apply_transpose( const GridDataType& src_coarse, GridDataType& dst_fine )
85 {
86 if ( union_comm_ == MPI_COMM_NULL ) return;
87 if ( identity_plan_ ) return;
88 util::Timer t( "redistribute_apply_transpose" );
89 run_alltoallv_( src_coarse, dst_fine, /*transpose=*/true );
90 }
91
92 private:
93 static constexpr int Ni_( const grid::shell::DistributedDomain& d )
94 {
96 }
97 static constexpr int Nr_( const grid::shell::DistributedDomain& d )
98 {
100 }
101
102 int subdomain_block_size_() const
103 {
104 // Same for both domains by the same-mesh assumption; pick fine.
105 const int ni = Ni_( *domain_fine_ );
106 const int nr = Nr_( *domain_fine_ );
107 return ni * ni * nr * VecDim;
108 }
109
110 // A message: ships one subdomain's DoFs between a send and a receive rank.
111 struct Message
112 {
113 int send_rank; // rank on union_comm_
114 int recv_rank; // rank on union_comm_
115 int local_subdomain_on_fine; // >= 0 if the sender uses the fine domain's local index
116 int local_subdomain_on_coarse; // >= 0 if the receiver uses the coarse domain's local index
117 };
118
119 void build_plan_(
122 {
123 // On ranks that are entirely outside this redistribute's union comm
124 // (common in a hierarchical agglomeration ladder: a rank that dropped
125 // out at descent Li+1 must still hold a per-level Redistribute slot for
126 // descent Li, but never participates in it), skip plan construction.
127 // The apply/apply_transpose guards below also early-return.
128 if ( union_comm_ == MPI_COMM_NULL )
129 return;
130
131 const auto& dom_info = domain_fine_->domain_info();
132 const int n_diam = dom_info.num_subdomains_per_diamond_side();
133 const int n_rad = dom_info.num_subdomains_in_radial_direction();
134
135 // Translate coarse-comm ranks to union-comm ranks using MPI groups.
136 // Ranks not on the coarse comm still need to resolve its ownership so they
137 // know where to send their locally-owned subdomains.
138 MPI_Group union_group = MPI_GROUP_NULL;
139 MPI_Group coarse_group = MPI_GROUP_NULL;
140 MPI_Comm_group( union_comm_, &union_group );
141
142 const MPI_Comm coarse_comm = domain_coarse_->comm();
143 const bool have_coarse = ( coarse_comm != MPI_COMM_NULL );
144 if ( have_coarse )
145 {
146 MPI_Comm_group( coarse_comm, &coarse_group );
147 }
148
149 int coarse_size = 0;
150 if ( have_coarse )
151 {
152 MPI_Group_size( coarse_group, &coarse_size );
153 }
154 // Broadcast coarse_size from the lowest-ranked coarse member so non-members know it.
155 MPI_Allreduce( MPI_IN_PLACE, &coarse_size, 1, MPI_INT, MPI_MAX, union_comm_ );
156
157 // Build the coarse->union rank translation on every rank: every rank on the
158 // union comm needs to know, for each coarse-comm rank c, what the corresponding
159 // union-comm rank is. We do this by having the lowest union-rank coarse member
160 // broadcast the full translation table.
161 std::vector< int > coarse_to_union( coarse_size, MPI_PROC_NULL );
162 if ( have_coarse )
163 {
164 std::vector< int > coarse_ranks( coarse_size );
165 std::iota( coarse_ranks.begin(), coarse_ranks.end(), 0 );
166 MPI_Group_translate_ranks( coarse_group, coarse_size, coarse_ranks.data(),
167 union_group, coarse_to_union.data() );
168 }
169
170 // Elect the first coarse member to broadcast the table. That rank's union index is 0
171 // if we kept rank 0 on the coarse side (which build_level_comms always does by construction).
172 // Use Allreduce(MIN) over each rank's "I am coarse rank 0 → my union rank else INT_MAX" to discover the broadcaster.
173 int bcaster_union_rank = std::numeric_limits< int >::max();
174 if ( have_coarse )
175 {
176 int my_coarse_rank = mpi::rank( coarse_comm );
177 if ( my_coarse_rank == 0 )
178 {
179 bcaster_union_rank = mpi::rank( union_comm_ );
180 }
181 }
182 MPI_Allreduce( MPI_IN_PLACE, &bcaster_union_rank, 1, MPI_INT, MPI_MIN, union_comm_ );
183 MPI_Bcast( coarse_to_union.data(), coarse_size, MPI_INT, bcaster_union_rank, union_comm_ );
184
185 coarse_to_union_ = std::move( coarse_to_union );
186
187 if ( union_group != MPI_GROUP_NULL ) MPI_Group_free( &union_group );
188 if ( coarse_group != MPI_GROUP_NULL ) MPI_Group_free( &coarse_group );
189
190 // Compute local-subdomain index maps for fine and coarse (so pack/unpack know
191 // where each subdomain's DoFs live in the 4D view).
192 //
193 // The existing DistributedDomain caches subdomains() with their local indices.
194 std::map< grid::shell::SubdomainInfo, int > fine_local_idx;
195 for ( const auto& [sdr, info] : domain_fine_->subdomains() )
196 {
197 fine_local_idx[sdr] = std::get< 0 >( info );
198 }
199 std::map< grid::shell::SubdomainInfo, int > coarse_local_idx;
200 for ( const auto& [sdr, info] : domain_coarse_->subdomains() )
201 {
202 coarse_local_idx[sdr] = std::get< 0 >( info );
203 }
204
205 const int my_union_rank = mpi::rank( union_comm_ );
206 const int block = subdomain_block_size_();
207
208 // Walk all subdomains globally. For each one, determine send rank (fine owner
209 // translated to union) and recv rank (coarse owner translated to union).
210 // Record the message on every rank that participates in either side, and count
211 // the per-peer send/recv bytes on this rank.
212 int union_size = 0;
213 MPI_Comm_size( union_comm_, &union_size );
214 send_counts_.assign( union_size, 0 );
215 recv_counts_.assign( union_size, 0 );
216
217 send_messages_.clear();
218 recv_messages_.clear();
219
220 for ( int diamond_id = 0; diamond_id < 10; ++diamond_id )
221 {
222 for ( int x = 0; x < n_diam; ++x )
223 {
224 for ( int y = 0; y < n_diam; ++y )
225 {
226 for ( int r = 0; r < n_rad; ++r )
227 {
228 grid::shell::SubdomainInfo s( diamond_id, x, y, r );
229
230 const int fine_owner = fn_fine( s, n_diam, n_rad ); // union-comm rank
231 const int coarse_owner = fn_coarse( s, n_diam, n_rad ); // coarse-comm rank
232 if ( coarse_owner < 0 || coarse_owner >= coarse_size )
233 {
234 throw std::runtime_error(
235 "Redistribute: coarse subdomain_to_rank produced out-of-range rank" );
236 }
237 const int recv_union = coarse_to_union_[coarse_owner];
238
239 const bool i_send = ( fine_owner == my_union_rank );
240 const bool i_recv = ( recv_union == my_union_rank );
241
242 if ( i_send )
243 {
244 Message m;
245 m.send_rank = my_union_rank;
246 m.recv_rank = recv_union;
247 m.local_subdomain_on_fine = fine_local_idx.at( s );
248 m.local_subdomain_on_coarse = -1;
249 send_messages_.push_back( m );
250 send_counts_[recv_union] += block;
251 }
252 if ( i_recv )
253 {
254 Message m;
255 m.send_rank = fine_owner;
256 m.recv_rank = my_union_rank;
257 m.local_subdomain_on_fine = -1;
258 m.local_subdomain_on_coarse = coarse_local_idx.at( s );
259 recv_messages_.push_back( m );
260 recv_counts_[fine_owner] += block;
261 }
262 }
263 }
264 }
265 }
266
267 // Sort messages by peer and subdomain index so pack/unpack order matches on both sides.
268 auto by_peer_and_subdomain = []( const Message& a, const Message& b ) {
269 if ( a.send_rank != b.send_rank ) return a.send_rank < b.send_rank;
270 if ( a.recv_rank != b.recv_rank ) return a.recv_rank < b.recv_rank;
271 // Use whichever local index is set as tie-breaker
272 const int ai = a.local_subdomain_on_fine >= 0 ? a.local_subdomain_on_fine : a.local_subdomain_on_coarse;
273 const int bi = b.local_subdomain_on_fine >= 0 ? b.local_subdomain_on_fine : b.local_subdomain_on_coarse;
274 return ai < bi;
275 };
276
277 // For sends, we pack per recv_rank. For recvs, we unpack per send_rank.
278 // To make the two sides match, both sides must order by the SAME primary key
279 // (the peer rank across the wire) and the SAME secondary key (subdomain identity).
280 // Sorting by SubdomainInfo is natural but we've already lost that; use the peer's
281 // local subdomain index instead. That's fine because on each (sender, receiver)
282 // pair, a subdomain's local index on its owner's domain is uniquely determined.
283
284 // Since send_messages_ always has local_subdomain_on_fine set and recv_messages_
285 // has local_subdomain_on_coarse set, we sort each independently by a consistent
286 // key (peer rank) and accept that the per-peer chunk order is whatever the loop
287 // produced — that order is globally deterministic (diamond, x, y, r iteration),
288 // so both sides see the same sequence.
289 std::stable_sort( send_messages_.begin(), send_messages_.end(),
290 []( const Message& a, const Message& b ) { return a.recv_rank < b.recv_rank; } );
291 std::stable_sort( recv_messages_.begin(), recv_messages_.end(),
292 []( const Message& a, const Message& b ) { return a.send_rank < b.send_rank; } );
293
294 // Build Alltoallv displacements.
295 send_displs_.assign( union_size, 0 );
296 recv_displs_.assign( union_size, 0 );
297 for ( int r = 1; r < union_size; ++r )
298 {
299 send_displs_[r] = send_displs_[r - 1] + send_counts_[r - 1];
300 recv_displs_[r] = recv_displs_[r - 1] + recv_counts_[r - 1];
301 }
302 const int total_send = send_displs_.back() + send_counts_.back();
303 const int total_recv = recv_displs_.back() + recv_counts_.back();
304
305 // Always allocate at least 1 element so .data() is a valid pointer even
306 // on ranks that have nothing to send or receive — nvpcx MPI_Alltoallv does
307 // not accept nullptr buffer arguments even when the per-rank count is 0.
308 const int send_alloc = std::max( total_send, 1 );
309 const int recv_alloc = std::max( total_recv, 1 );
310 send_buf_ = buffer_view( Kokkos::view_alloc( Kokkos::WithoutInitializing, "redistribute_send" ), send_alloc );
311 recv_buf_ = buffer_view( Kokkos::view_alloc( Kokkos::WithoutInitializing, "redistribute_recv" ), recv_alloc );
312 // Host staging buffers: nvpcx MPI_Alltoallv on GPU pointers hangs on this
313 // cluster (whereas Isend/Irecv is CUDA-aware). Stage through host memory.
314 send_host_ = host_buffer_view( Kokkos::view_alloc( Kokkos::WithoutInitializing, "redistribute_send_host" ),
315 send_alloc );
316 recv_host_ = host_buffer_view( Kokkos::view_alloc( Kokkos::WithoutInitializing, "redistribute_recv_host" ),
317 recv_alloc );
318
319 // Identity detection: if every subdomain I own stays on my rank (i.e.
320 // fine owner == coarse owner on this rank), and this holds on every
321 // rank, the Redistribute has nothing to do at runtime — the caller
322 // should just reuse the same buffer on both sides of the descent.
323 // Check locally and MPI_Allreduce LAND across union_comm_ to make sure
324 // every rank agrees before we claim identity. Otherwise one rank
325 // would skip apply while others wait in Alltoallv.
326 int local_identity = 1;
327 for ( const auto& m : send_messages_ )
328 if ( m.send_rank != m.recv_rank ) { local_identity = 0; break; }
329 if ( local_identity )
330 for ( const auto& m : recv_messages_ )
331 if ( m.send_rank != m.recv_rank ) { local_identity = 0; break; }
332 MPI_Allreduce( MPI_IN_PLACE, &local_identity, 1, MPI_INT, MPI_LAND, union_comm_ );
333 identity_plan_ = ( local_identity != 0 );
334 }
335
336 public:
337 // Pack / unpack are public so nvcc permits extended __host__ __device__ lambdas
338 // inside them (private/protected enclosing functions are disallowed for extended
339 // lambdas). Not part of the intended API surface — call apply()/apply_transpose().
340 void pack_( const GridDataType& src, const buffer_view& buf, const std::vector< Message >& messages,
341 bool use_fine_index ) const
342 {
343 const int ni = Ni_( *domain_fine_ );
344 const int nr = Nr_( *domain_fine_ );
345 const int block = subdomain_block_size_();
346
347 // Copy message list to a device view so the kernel can index it.
348 Kokkos::View< int*, memory_space > msg_sdr( Kokkos::view_alloc( Kokkos::WithoutInitializing, "msg_sdr" ),
349 messages.size() );
350 auto msg_sdr_host = Kokkos::create_mirror_view( msg_sdr );
351 for ( size_t i = 0; i < messages.size(); ++i )
352 {
353 msg_sdr_host( i ) =
354 use_fine_index ? messages[i].local_subdomain_on_fine : messages[i].local_subdomain_on_coarse;
355 }
356 Kokkos::deep_copy( msg_sdr, msg_sdr_host );
357
358 const int num_messages = static_cast< int >( messages.size() );
359 if ( num_messages == 0 ) return;
360
361 // Dispatch at call site, not inside the lambda — nvcc rejects first-capture
362 // inside `if constexpr` bodies of extended __device__ lambdas.
363 if constexpr ( VecDim == 1 )
364 {
365 Kokkos::parallel_for(
366 "redistribute_pack",
367 Kokkos::MDRangePolicy< Kokkos::Rank< 5 > >( { 0, 0, 0, 0, 0 }, { num_messages, ni, ni, nr, VecDim } ),
368 KOKKOS_LAMBDA( int m, int i, int j, int k, int c ) {
369 const int local_sdr = msg_sdr( m );
370 const int flat = m * block + ( ( i * ni + j ) * nr + k ) * VecDim + c;
371 buf( flat ) = src( local_sdr, i, j, k );
372 } );
373 }
374 else
375 {
376 Kokkos::parallel_for(
377 "redistribute_pack",
378 Kokkos::MDRangePolicy< Kokkos::Rank< 5 > >( { 0, 0, 0, 0, 0 }, { num_messages, ni, ni, nr, VecDim } ),
379 KOKKOS_LAMBDA( int m, int i, int j, int k, int c ) {
380 const int local_sdr = msg_sdr( m );
381 const int flat = m * block + ( ( i * ni + j ) * nr + k ) * VecDim + c;
382 buf( flat ) = src( local_sdr, i, j, k, c );
383 } );
384 }
385 Kokkos::fence( "redistribute_pack" );
386 }
387
388 void unpack_( GridDataType& dst, const buffer_view& buf, const std::vector< Message >& messages,
389 bool use_fine_index ) const
390 {
391 const int ni = Ni_( *domain_fine_ );
392 const int nr = Nr_( *domain_fine_ );
393 const int block = subdomain_block_size_();
394
395 Kokkos::View< int*, memory_space > msg_sdr( Kokkos::view_alloc( Kokkos::WithoutInitializing, "msg_sdr" ),
396 messages.size() );
397 auto msg_sdr_host = Kokkos::create_mirror_view( msg_sdr );
398 for ( size_t i = 0; i < messages.size(); ++i )
399 {
400 msg_sdr_host( i ) =
401 use_fine_index ? messages[i].local_subdomain_on_fine : messages[i].local_subdomain_on_coarse;
402 }
403 Kokkos::deep_copy( msg_sdr, msg_sdr_host );
404
405 const int num_messages = static_cast< int >( messages.size() );
406 if ( num_messages == 0 ) return;
407
408 if constexpr ( VecDim == 1 )
409 {
410 Kokkos::parallel_for(
411 "redistribute_unpack",
412 Kokkos::MDRangePolicy< Kokkos::Rank< 5 > >( { 0, 0, 0, 0, 0 }, { num_messages, ni, ni, nr, VecDim } ),
413 KOKKOS_LAMBDA( int m, int i, int j, int k, int c ) {
414 const int local_sdr = msg_sdr( m );
415 const int flat = m * block + ( ( i * ni + j ) * nr + k ) * VecDim + c;
416 dst( local_sdr, i, j, k ) = buf( flat );
417 } );
418 }
419 else
420 {
421 Kokkos::parallel_for(
422 "redistribute_unpack",
423 Kokkos::MDRangePolicy< Kokkos::Rank< 5 > >( { 0, 0, 0, 0, 0 }, { num_messages, ni, ni, nr, VecDim } ),
424 KOKKOS_LAMBDA( int m, int i, int j, int k, int c ) {
425 const int local_sdr = msg_sdr( m );
426 const int flat = m * block + ( ( i * ni + j ) * nr + k ) * VecDim + c;
427 dst( local_sdr, i, j, k, c ) = buf( flat );
428 } );
429 }
430 Kokkos::fence( "redistribute_unpack" );
431 }
432
433 private:
434 void run_alltoallv_( const GridDataType& src, GridDataType& dst, bool transpose )
435 {
436 // Forward pass: pack from fine-indexed subdomains, send to coarse-indexed subdomains.
437 // Transpose: pack from coarse-indexed subdomains, send to fine-indexed subdomains.
438 const std::vector< Message >& pack_msgs = transpose ? recv_messages_ : send_messages_;
439 const std::vector< Message >& unpack_msgs = transpose ? send_messages_ : recv_messages_;
440 const std::vector< int >& s_counts = transpose ? recv_counts_ : send_counts_;
441 const std::vector< int >& s_displs = transpose ? recv_displs_ : send_displs_;
442 const std::vector< int >& r_counts = transpose ? send_counts_ : recv_counts_;
443 const std::vector< int >& r_displs = transpose ? send_displs_ : recv_displs_;
444
445 const bool pack_uses_fine_idx = !transpose; // forward: pack from fine-owned subdomains
446 const bool unpack_uses_fine_idx = transpose;
447
448 // Select staging buffers. "send" side of this direction is the buffer we pack into.
449 buffer_view& device_send = transpose ? recv_buf_ : send_buf_;
450 buffer_view& device_recv = transpose ? send_buf_ : recv_buf_;
451 host_buffer_view& host_send = transpose ? recv_host_ : send_host_;
452 host_buffer_view& host_recv = transpose ? send_host_ : recv_host_;
453
454 pack_( src, device_send, pack_msgs, pack_uses_fine_idx );
455
456 // Device -> host staging (Alltoallv is not GPU-aware on nvpcx).
457 Kokkos::deep_copy( host_send, device_send );
458
459 MPI_Alltoallv( host_send.data(),
460 s_counts.data(),
461 s_displs.data(),
462 mpi::mpi_datatype< ScalarType >(),
463 host_recv.data(),
464 r_counts.data(),
465 r_displs.data(),
466 mpi::mpi_datatype< ScalarType >(),
467 union_comm_ );
468
469 // Host -> device; then unpack.
470 Kokkos::deep_copy( device_recv, host_recv );
471
472 unpack_( dst, device_recv, unpack_msgs, unpack_uses_fine_idx );
473 }
474
475 const grid::shell::DistributedDomain* domain_fine_ = nullptr;
476 const grid::shell::DistributedDomain* domain_coarse_ = nullptr;
477 MPI_Comm union_comm_ = MPI_COMM_NULL;
478
479 std::vector< int > coarse_to_union_; // index = coarse-comm rank, value = union-comm rank
480
481 std::vector< Message > send_messages_;
482 std::vector< Message > recv_messages_;
483
484 std::vector< int > send_counts_;
485 std::vector< int > send_displs_;
486 std::vector< int > recv_counts_;
487 std::vector< int > recv_displs_;
488
489 buffer_view send_buf_;
490 buffer_view recv_buf_;
491 host_buffer_view send_host_;
492 host_buffer_view recv_host_;
493
494 /// True when every rank on union_comm_ owns the same subdomains on both
495 /// sides of the descent — apply/apply_transpose are pure no-ops. Set at
496 /// plan construction via a collective check across union_comm_.
497 bool identity_plan_ = false;
498};
499
500} // namespace terra::communication::shell
Move a distributed grid vector from a fine DistributedDomain to a coarse one whose comm is a subset o...
Definition redistribute.hpp:40
void apply_transpose(const GridDataType &src_coarse, GridDataType &dst_fine)
Move data back from coarse-owned subdomains to fine-owned subdomains. Collective on the fine comm....
Definition redistribute.hpp:84
Kokkos::View< ScalarType *, Kokkos::HostSpace > host_buffer_view
Definition redistribute.hpp:46
Kokkos::View< ScalarType *, memory_space > buffer_view
Definition redistribute.hpp:45
void unpack_(GridDataType &dst, const buffer_view &buf, const std::vector< Message > &messages, bool use_fine_index) const
Definition redistribute.hpp:388
void pack_(const GridDataType &src, const buffer_view &buf, const std::vector< Message > &messages, bool use_fine_index) const
Definition redistribute.hpp:340
typename GridDataType::memory_space memory_space
Definition redistribute.hpp:44
void apply(const GridDataType &src_fine, GridDataType &dst_coarse)
Move data from fine-owned subdomains to coarse-owned subdomains. Collective on the fine comm; every r...
Definition redistribute.hpp:73
static constexpr int VecDim
Definition redistribute.hpp:43
typename GridDataType::value_type ScalarType
Definition redistribute.hpp:42
bool is_identity() const
True when the fine and coarse domains have the same comm AND every subdomain has the same owner on bo...
Definition redistribute.hpp:69
Redistribute(const grid::shell::DistributedDomain &domain_fine, const grid::shell::DistributedDomain &domain_coarse, const grid::shell::SubdomainToRankDistributionFunction &subdomain_to_rank_fine, const grid::shell::SubdomainToRankDistributionFunction &subdomain_to_rank_coarse)
Build a redistribute plan between two distributed domains.
Definition redistribute.hpp:53
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
MPI_Comm comm() const
MPI communicator this domain lives on. Defaults to MPI_COMM_WORLD. For agglomerated multigrid,...
Definition spherical_shell.hpp:2599
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
int num_subdomains_per_diamond_side() const
Definition spherical_shell.hpp:847
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
int r
Definition EpsilonDivDiv_kernel_gen.py:345
Definition communication.hpp:14
std::function< mpi::MPIRank(const SubdomainInfo &, const int, const int) > SubdomainToRankDistributionFunction
Definition spherical_shell.hpp:700
MPIRank rank()
Definition mpi.hpp:13