Loading...
Searching...
No Matches
epsilon_divdiv_kerngen_v05_shmem_src_k.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../../../quadrature/quadrature.hpp"
5#include "dense/vec.hpp"
9#include "impl/Kokkos_Profiling.hpp"
10#include "linalg/operator.hpp"
13#include "linalg/vector.hpp"
14#include "linalg/vector_q1.hpp"
15#include "util/timer.hpp"
16
18
29
30template < typename ScalarT, int VecDim = 3 >
32{
33 public:
36 using ScalarType = ScalarT;
37 static constexpr int LocalMatrixDim = 18;
40
41 private:
42 LocalMatrixStorage local_matrix_storage_;
43
45
50 BoundaryConditions bcs_;
51
52 bool diagonal_;
53
54 linalg::OperatorApplyMode operator_apply_mode_;
55 linalg::OperatorCommunicationMode operator_communication_mode_;
56 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode_;
57
60
63
64 // Quadrature points.
65 const int num_quad_points = quadrature::quad_felippa_1x1_num_quad_points;
66
69
70 int local_subdomains_;
71 int hex_lat_;
72 int hex_rad_;
73 int lat_refinement_level_;
74 int block_size_;
75 int blocks_per_column_;
76 int blocks_;
77
78 ScalarT r_max_;
79 ScalarT r_min_;
80
81 public:
88 BoundaryConditions bcs,
89 bool diagonal,
91 linalg::OperatorCommunicationMode operator_communication_mode =
94 : domain_( domain )
95 , grid_( grid )
96 , radii_( radii )
97 , mask_( mask )
98 , k_( k )
99 , diagonal_( diagonal )
100 , operator_apply_mode_( operator_apply_mode )
101 , operator_communication_mode_( operator_communication_mode )
102 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
103 , send_buffers_( domain )
104 , recv_buffers_( domain )
105 {
106 bcs_[0] = bcs[0];
107 bcs_[1] = bcs[1];
110 const grid::shell::DomainInfo& domain_info = domain_.domain_info();
111 local_subdomains_ = domain_.subdomains().size();
112 hex_lat_ = domain_info.subdomain_num_nodes_per_side_laterally() - 1;
113 hex_rad_ = domain_info.subdomain_num_nodes_radially() - 1;
114 lat_refinement_level_ = domain_info.diamond_lateral_refinement_level();
115 const int threads_per_column = hex_rad_;
116 block_size_ = std::min( 128, threads_per_column );
117 blocks_per_column_ = ( threads_per_column + block_size_ - 1 ) / block_size_;
118 blocks_ = local_subdomains_ * hex_lat_ * hex_lat_ * blocks_per_column_;
119 r_min_ = domain_info.radii()[0];
120 r_max_ = domain_info.radii()[domain_info.radii().size() - 1];
121 util::logroot << "[EpsilonDivDiv] (threads_per_column, block_size_, blocks_per_column_) = "
122 << threads_per_column << ", " << block_size_ << ", " << blocks_per_column_ << ")" << std::endl;
123 }
124
126 const linalg::OperatorApplyMode operator_apply_mode,
127 const linalg::OperatorCommunicationMode operator_communication_mode )
128 {
129 operator_apply_mode_ = operator_apply_mode;
130 operator_communication_mode_ = operator_communication_mode;
131 }
132
133 /// @brief S/Getter for diagonal member
134 void set_diagonal( bool v ) { diagonal_ = v; }
135
136 /// @brief Getter for coefficient
138
139 /// @brief Getter for domain member
140 const grid::shell::DistributedDomain& get_domain() const { return domain_; }
141
142 /// @brief Getter for radii member
144
145 /// @brief Getter for grid member
147
148 /// @brief Getter for mask member
149 KOKKOS_INLINE_FUNCTION
151 const int local_subdomain_id,
152 const int x_cell,
153 const int y_cell,
154 const int r_cell,
156 {
157 return util::has_flag( mask_( local_subdomain_id, x_cell, y_cell, r_cell ), flag );
158 }
159
160 /// @brief allocates memory for the local matrices
162 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode,
163 int level_range,
165 {
166 operator_stored_matrix_mode_ = operator_stored_matrix_mode;
167
168 // allocate storage if necessary
169 if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )
170 {
172 domain_, operator_stored_matrix_mode_, level_range, GCAElements );
173 }
174 }
175
176 linalg::OperatorStoredMatrixMode get_stored_matrix_mode() { return operator_stored_matrix_mode_; }
177
178 /// @brief Set the local matrix stored in the operator
179 KOKKOS_INLINE_FUNCTION
181 const int local_subdomain_id,
182 const int x_cell,
183 const int y_cell,
184 const int r_cell,
185 const int wedge,
187 {
188 KOKKOS_ASSERT( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off );
189 local_matrix_storage_.set_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge, mat );
190 }
191
192 /// @brief Retrives the local matrix
193 /// if there is stored local matrices, the desired local matrix is loaded and returned
194 /// if not, the local matrix is assembled on-the-fly
195 KOKKOS_INLINE_FUNCTION
197 const int local_subdomain_id,
198 const int x_cell,
199 const int y_cell,
200 const int r_cell,
201 const int wedge ) const
202 {
203 // request from storage
204 if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )
205 {
206 if ( !local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge ) )
207 {
208 Kokkos::abort( "No matrix found at that spatial index." );
209 }
210 return local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );
211 }
212 else
213 {
214 return assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );
215 }
216 }
217
218 void apply_impl( const SrcVectorType& src, DstVectorType& dst )
219 {
220 util::Timer timer_apply( "epsilon_divdiv_apply" );
221
222 if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )
223 {
224 assign( dst, 0 );
225 }
226
227 src_ = src.grid_data();
228 dst_ = dst.grid_data();
229
230 if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||
231 src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )
232 {
233 throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );
234 }
235
236 if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )
237 {
238 throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );
239 }
240
241 util::Timer timer_kernel( "epsilon_divdiv_kernel" );
242 Kokkos::TeamPolicy<> policy( blocks_, block_size_ );
243 Kokkos::parallel_for( "matvec", policy, *this );
244 // grid::shell::local_domain_md_range_policy_cells( domain_ ),
245 //s *this );
246 Kokkos::fence();
247 timer_kernel.stop();
248
249 if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )
250 {
251 util::Timer timer_comm( "epsilon_divdiv_comm" );
252
254 domain_, dst_, send_buffers_, recv_buffers_ );
256 }
257 }
258
259 KOKKOS_INLINE_FUNCTION
261 const int dim,
262 const double g0,
263 const double g1,
264 const double g2,
265 double& E00,
266 double& E11,
267 double& E22,
268 double& sym01,
269 double& sym02,
270 double& sym12,
271 double& gdd ) const
272 {
273 E00 = 0.0;
274 E11 = 0.0;
275 E22 = 0.0;
276 sym01 = 0.0;
277 sym02 = 0.0;
278 sym12 = 0.0;
279 gdd = 0.0;
280
281 // dim selects which COLUMN is populated:
282 // dim==0: E[0][0]=g0, E[1][0]=g1, E[2][0]=g2
283 // dim==1: E[0][1]=g0, E[1][1]=g1, E[2][1]=g2
284 // dim==2: E[0][2]=g0, E[1][2]=g1, E[2][2]=g2
285 switch ( dim )
286 {
287 case 0:
288 E00 = g0;
289 gdd = g0; // E[0][0]
290 sym01 = 0.5 * g1; // 0.5*(E[0][1]=0 + E[1][0]=g1)
291 sym02 = 0.5 * g2; // 0.5*(E[0][2]=0 + E[2][0]=g2)
292 sym12 = 0.0;
293 break;
294
295 case 1:
296 E11 = g1;
297 gdd = g1; // E[1][1]
298 sym01 = 0.5 * g0; // 0.5*(E[0][1]=g0 + E[1][0]=0)
299 sym02 = 0.0;
300 sym12 = 0.5 * g2; // 0.5*(E[1][2]=0 + E[2][1]=g2)
301 break;
302
303 default: // case 2
304 E22 = g2;
305 gdd = g2; // E[2][2]
306 sym01 = 0.0;
307 sym02 = 0.5 * g0; // 0.5*(E[0][2]=g0 + E[2][0]=0)
308 sym12 = 0.5 * g1; // 0.5*(E[1][2]=g1 + E[2][1]=0)
309 break;
310 }
311 }
312
313 using Team = Kokkos::TeamPolicy<>::member_type;
314
315 KOKKOS_INLINE_FUNCTION
316 static size_t team_shmem_size( const int ts/*team_size*/ )
317 {
318 // We store wedge_surf_phy_coords[2][3][3], src, k
319 return sizeof( double ) * (2 * 3 * 3 + (ts + 1) * (12 + 4));
320 }
321
322 KOKKOS_INLINE_FUNCTION
323 void operator()( const Team& team ) const
324 {
325 int local_subdomain_id, x_cell, y_cell, r_cell;
326
327 {
328 int tmp = team.league_rank();
329 const int r_block_index = tmp % blocks_per_column_;
330 tmp /= blocks_per_column_;
331 y_cell = tmp & ( hex_lat_ - 1 );
332 tmp >>= lat_refinement_level_;
333 x_cell = tmp & ( hex_lat_ - 1 );
334 tmp >>= lat_refinement_level_;
335 local_subdomain_id = tmp;
336
337 r_cell = r_block_index * team.team_size() + team.team_rank();
338 }
339
340 const bool at_cmb = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
341 const bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
342
343 // ----- FAST PATH (DCA) -----
344 // - Load ALL source dofs (and k) for the team's r-slab into TEAM SHARED MEMORY once.
345 // - Each thread then reads the dofs for its corresponding r_cell (team_rank) from the shared arrays.
346 //
347 // Team covers radial levels: r_base ... r_base + team_size
348 // (need team_size+1 levels because each thread needs r and r+1).
349 //
350 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
351 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
352 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
353
354 // Map wedge-local node (w, node) -> unique node id in [0..7] that matches your final scatter pattern
355 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
356 { 0, 1, 2, 3, 4, 5 }, // wedge 0
357 { 6, 2, 1, 7, 5, 4 } // wedge 1
358 };
359
360 // ---- single quadrature point collapsed: qp0=qp1=1/3, qp2=0, qw=1 ----
361 constexpr double ONE_THIRD = 1.0 / 3.0;
362 constexpr double ONE_SIXTH = 1.0 / 6.0;
363 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
364
365 // Reference gradients at qp0=qp1=1/3, qp2=0 (constexpr)
366 static constexpr double dN_ref[6][3] = {
367 { -0.5, -0.5, -ONE_SIXTH },
368 { 0.5, 0.0, -ONE_SIXTH },
369 { 0.0, 0.5, -ONE_SIXTH },
370 { -0.5, -0.5, ONE_SIXTH },
371 { 0.5, 0.0, ONE_SIXTH },
372 { 0.0, 0.5, ONE_SIXTH } };
373
374 // ---------------------------------------------------------
375 // TEAM SCRATCH LAYOUT
376 // wedge_surf_phy_coords: [2][3][3]
377 // src_sh: [team_size+1][4][3] (levels, xy, dim)
378 // k_sh: [team_size+1][4] (levels, xy)
379 // ---------------------------------------------------------
380 double* shmem =
381 reinterpret_cast< double* >( team.team_shmem().get_shmem( team_shmem_size( team.team_size() ) ) );
382
383 using Scratch3D = Kokkos::
384 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
385 using Scratch3DLevels = Kokkos::
386 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
387 using Scratch2DLevels =
388 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
389
390 // wedge coords
391 Scratch3D wedge_surf_phy_coords( shmem, 2, 3, 3 );
392 shmem += 2 * 3 * 3;
393
394 const int ts = team.team_size();
395 const int nlev = ts + 1;
396
397 // src_sh(level, xy, dim)
398 Scratch3DLevels src_sh( shmem, nlev, 4, 3 );
399 shmem += nlev * 4 * 3;
400
401 // k_sh(level, xy)
402 Scratch2DLevels k_sh( shmem, nlev, 4 );
403 shmem += nlev * 4;
404
405 // ---------------------------------------------------------
406 // (1) Load surface xy geometry (once per team) as before
407 // ---------------------------------------------------------
408 Kokkos::single( Kokkos::PerTeam( team ), [&]() {
409 const double q00x = grid_( local_subdomain_id, x_cell, y_cell, 0 );
410 const double q00y = grid_( local_subdomain_id, x_cell, y_cell, 1 );
411 const double q00z = grid_( local_subdomain_id, x_cell, y_cell, 2 );
412
413 const double q01x = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );
414 const double q01y = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );
415 const double q01z = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );
416
417 const double q10x = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );
418 const double q10y = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );
419 const double q10z = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );
420
421 const double q11x = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );
422 const double q11y = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );
423 const double q11z = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );
424
425 // wedge 0: (q00,q10,q01)
426 wedge_surf_phy_coords( 0, 0, 0 ) = q00x;
427 wedge_surf_phy_coords( 0, 0, 1 ) = q00y;
428 wedge_surf_phy_coords( 0, 0, 2 ) = q00z;
429 wedge_surf_phy_coords( 0, 1, 0 ) = q10x;
430 wedge_surf_phy_coords( 0, 1, 1 ) = q10y;
431 wedge_surf_phy_coords( 0, 1, 2 ) = q10z;
432 wedge_surf_phy_coords( 0, 2, 0 ) = q01x;
433 wedge_surf_phy_coords( 0, 2, 1 ) = q01y;
434 wedge_surf_phy_coords( 0, 2, 2 ) = q01z;
435
436 // wedge 1: (q11,q01,q10)
437 wedge_surf_phy_coords( 1, 0, 0 ) = q11x;
438 wedge_surf_phy_coords( 1, 0, 1 ) = q11y;
439 wedge_surf_phy_coords( 1, 0, 2 ) = q11z;
440 wedge_surf_phy_coords( 1, 1, 0 ) = q01x;
441 wedge_surf_phy_coords( 1, 1, 1 ) = q01y;
442 wedge_surf_phy_coords( 1, 1, 2 ) = q01z;
443 wedge_surf_phy_coords( 1, 2, 0 ) = q10x;
444 wedge_surf_phy_coords( 1, 2, 1 ) = q10y;
445 wedge_surf_phy_coords( 1, 2, 2 ) = q10z;
446 } );
447
448 // ---------------------------------------------------------
449 // (2) Load ALL k/src dofs for the team's radial slab into shared memory
450 //
451 // Layout:
452 // level = 0..team_size (absolute r = r_base + level)
453 // xy = 0..3 mapped by (dx + 2*dy)
454 //
455 // Each thread loads its own level = team_rank
456 // and the last thread additionally loads level = team_size.
457 // ---------------------------------------------------------
458 const int r_base = ( ( team.league_rank() % blocks_per_column_ ) * ts ); // same as r_block_index * ts
459
460 auto load_level = [&]( const int level ) {
461 const int r_abs = r_base + level;
462
463#pragma unroll
464 for ( int dy = 0; dy <= 1; ++dy )
465 {
466#pragma unroll
467 for ( int dx = 0; dx <= 1; ++dx )
468 {
469 const int xy = dx + 2 * dy;
470
471 const int xi = x_cell + dx;
472 const int yi = y_cell + dy;
473
474 k_sh( level, xy ) = k_( local_subdomain_id, xi, yi, r_abs );
475
476 src_sh( level, xy, 0 ) = src_( local_subdomain_id, xi, yi, r_abs, 0 );
477 src_sh( level, xy, 1 ) = src_( local_subdomain_id, xi, yi, r_abs, 1 );
478 src_sh( level, xy, 2 ) = src_( local_subdomain_id, xi, yi, r_abs, 2 );
479 }
480 }
481 };
482
483 // each thread loads its own level
484 load_level( team.team_rank() );
485
486 // one extra level (team_size) needed for r+1 of last thread
487 if ( team.team_rank() == ts - 1 )
488 {
489 load_level( ts );
490 }
491
492 team.team_barrier();
493
494 // ---------------------------------------------------------
495 // Thread-private radii (depends on r_cell)
496 // ---------------------------------------------------------
497 const double r_0 = radii_( local_subdomain_id, r_cell );
498 const double r_1 = radii_( local_subdomain_id, r_cell + 1 );
499
500 // Boundary treatment flags (guard the BC query)
501 const bool at_boundary = at_cmb || at_surface;
502 bool treat_boundary = false;
503 if ( at_boundary )
504 {
505 const ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
506 treat_boundary = ( get_boundary_condition_flag( bcs_, sbf ) == DIRICHLET );
507 }
508
509 const int cmb_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );
510 const int surface_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_surface ) ? 3 : 0 );
511
512 // (1a) unique-node accumulation: 8 nodes per dim
513 double dst8[3][8] = { 0.0 };
514
515 // Local level index for this thread
516 const int lvl0 = team.team_rank(); // corresponds to r_cell
517 // lvl1 = lvl0 + 1 corresponds to r_cell+1, always valid because we loaded nlev = ts+1
518
519#pragma unroll
520 for ( int w = 0; w < 2; ++w )
521 {
522 // -------------------------
523 // (A) k_eval collapsed from shared memory
524 // -------------------------
525 double k_sum = 0.0;
526#pragma unroll
527 for ( int node = 0; node < 6; ++node )
528 {
529 const int dx = WEDGE_NODE_OFF[w][node][0];
530 const int dy = WEDGE_NODE_OFF[w][node][1];
531 const int dr = WEDGE_NODE_OFF[w][node][2];
532
533 const int xy = dx + 2 * dy;
534 const int lvl = lvl0 + dr;
535
536 k_sum += k_sh( lvl, xy );
537 }
538 const double k_eval = ONE_SIXTH * k_sum;
539
540 // -------------------------
541 // (B) Jacobian + inv(J)^T
542 // -------------------------
543 double wJ = 0.0;
544
545 double i00, i01, i02;
546 double i10, i11, i12;
547 double i20, i21, i22;
548
549 {
550 const double half_dr = 0.5 * ( r_1 - r_0 );
551 const double r_mid = 0.5 * ( r_0 + r_1 );
552
553 const double J_0_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) );
554 const double J_0_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 2, 0 ) );
555 const double J_0_2 =
556 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) +
557 wedge_surf_phy_coords( w, 2, 0 ) ) );
558
559 const double J_1_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) );
560 const double J_1_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 2, 1 ) );
561 const double J_1_2 =
562 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) +
563 wedge_surf_phy_coords( w, 2, 1 ) ) );
564
565 const double J_2_0 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) );
566 const double J_2_1 = r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 2, 2 ) );
567 const double J_2_2 =
568 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) +
569 wedge_surf_phy_coords( w, 2, 2 ) ) );
570
571 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
572 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
573
574 const double invJ = 1.0 / J_det;
575
576 // inv(J)^T
577 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
578 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
579 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
580
581 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
582 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
583 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
584
585 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
586 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
587 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
588
589 wJ = Kokkos::abs( J_det ); // qw=1
590 }
591
592 const double kwJ = k_eval * wJ;
593
594 // -------------------------
595 // (C) grad_u + div_u as scalars (src from shared memory)
596 // -------------------------
597 double gu00 = 0.0;
598 double gu10 = 0.0, gu11 = 0.0;
599 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
600 double div_u = 0.0;
601
602 if ( !diagonal_ )
603 {
604 // Assemble gu** and div_u
605#pragma unroll
606 for ( int dimj = 0; dimj < 3; ++dimj )
607 {
608#pragma unroll
609 for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
610 {
611 const double gx = dN_ref[node_idx][0];
612 const double gy = dN_ref[node_idx][1];
613 const double gz = dN_ref[node_idx][2];
614
615 const double g0 = i00 * gx + i01 * gy + i02 * gz;
616 const double g1 = i10 * gx + i11 * gy + i12 * gz;
617 const double g2 = i20 * gx + i21 * gy + i22 * gz;
618
619 double E00, E11, E22, sym01, sym02, sym12, gdd;
620 column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
621
622 const int dx = WEDGE_NODE_OFF[w][node_idx][0];
623 const int dy = WEDGE_NODE_OFF[w][node_idx][1];
624 const int dr = WEDGE_NODE_OFF[w][node_idx][2];
625
626 const int xy = dx + 2 * dy;
627 const int lvl = lvl0 + dr;
628
629 const double s = src_sh( lvl, xy, dimj );
630
631 gu00 += E00 * s;
632 gu10 += sym01 * s;
633 gu11 += E11 * s;
634 gu20 += sym02 * s;
635 gu21 += sym12 * s;
636 gu22 += E22 * s;
637
638 div_u += gdd * s;
639 }
640 }
641
642 // Pairing -> accumulate into unique-node array dst8
643#pragma unroll
644 for ( int dimi = 0; dimi < 3; ++dimi )
645 {
646#pragma unroll
647 for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
648 {
649 const double gx = dN_ref[node_idx][0];
650 const double gy = dN_ref[node_idx][1];
651 const double gz = dN_ref[node_idx][2];
652
653 const double g0 = i00 * gx + i01 * gy + i02 * gz;
654 const double g1 = i10 * gx + i11 * gy + i12 * gz;
655 const double g2 = i20 * gx + i21 * gy + i22 * gz;
656
657 double E00, E11, E22, sym01, sym02, sym12, gdd;
658 column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
659
660 const double pairing0 = 2.0 * sym01;
661 const double pairing1 = 2.0 * sym02;
662 const double pairing2 = 2.0 * sym12;
663
664 const int u = WEDGE_TO_UNIQUE[w][node_idx];
665
666 dst8[dimi][u] += kwJ * ( NEG_TWO_THIRDS * div_u * gdd + pairing0 * gu10 + pairing0 * gu10 +
667 pairing1 * gu20 + pairing1 * gu20 + pairing2 * gu21 + pairing2 * gu21 +
668 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );
669 }
670 }
671 }
672
673 // Diagonal / BC loop -> also accumulate into dst8
674 if ( diagonal_ || ( treat_boundary && at_boundary ) )
675 {
676#pragma unroll
677 for ( int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )
678 {
679#pragma unroll
680 for ( int node_idx = surface_shift; node_idx < 6 - cmb_shift; ++node_idx )
681 {
682 const double gx = dN_ref[node_idx][0];
683 const double gy = dN_ref[node_idx][1];
684 const double gz = dN_ref[node_idx][2];
685
686 const double g0 = i00 * gx + i01 * gy + i02 * gz;
687 const double g1 = i10 * gx + i11 * gy + i12 * gz;
688 const double g2 = i20 * gx + i21 * gy + i22 * gz;
689
690 double E00, E11, E22, sym01, sym02, sym12, gdd;
691 column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
692
693 const int dx = WEDGE_NODE_OFF[w][node_idx][0];
694 const int dy = WEDGE_NODE_OFF[w][node_idx][1];
695 const int dr = WEDGE_NODE_OFF[w][node_idx][2];
696
697 const int xy = dx + 2 * dy;
698 const int lvl = lvl0 + dr;
699
700 const double s = src_sh( lvl, xy, dim_diagBC );
701
702 const double pairing0 = 4.0 * s;
703 const double pairing1 = 2.0 * s;
704
705 const int u = WEDGE_TO_UNIQUE[w][node_idx];
706
707 dst8[dim_diagBC][u] +=
708 kwJ * ( pairing0 * ( sym01 * sym01 ) + pairing0 * ( sym02 * sym02 ) +
709 pairing0 * ( sym12 * sym12 ) + pairing1 * ( E00 * E00 ) + pairing1 * ( E11 * E11 ) +
710 pairing1 * ( E22 * E22 ) + NEG_TWO_THIRDS * ( gdd * gdd ) * s );
711 }
712 }
713 }
714 } // w
715
716 // Final scatter: 8 unique nodes per dim (same result as original merged scatter)
717 for ( int dim_add = 0; dim_add < 3; ++dim_add )
718 {
719 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );
720 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );
721 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );
722 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );
723 Kokkos::atomic_add(
724 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );
725 Kokkos::atomic_add(
726 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );
727 Kokkos::atomic_add(
728 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );
729 Kokkos::atomic_add(
730 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );
731 }
732 }
733
734 /// @brief: For both trial and test space this function sets up a vector:
735 /// each vector element holds the symmetric gradient (a 3x3 matrix) of the shape function of the corresponding dof
736 /// (if dimi == dimj, these are the same and we are on the diagonal of the vectorial diffusion operator)
737 /// Additionally, we compute the scalar factor for the numerical integral comp: determinant of the jacobian,
738 /// evaluation of the coefficient k on the element and the quadrature weight of the current quad-point.
739
740 /// The idea of this function is that the two vectors can be:
741 /// - accumulated to the result of the local matvec with 2 * num_nodes_per_wedge complexity
742 /// by scaling the dot product of the trial vec and local src dofs with each element of the test vec
743 /// (and adding to the dst dofs, this is the fused local matvec).
744 /// - propagated to the local matrix by an outer product of the two vectors
745 /// (without applying it to dofs). This is e.g. required to assemble the finest grid local
746 /// matrix on-the-fly during GCA/Galerkin coarsening.
747
748 ///
749 KOKKOS_INLINE_FUNCTION void assemble_trial_test_vecs(
750 const int wedge,
751 const dense::Vec< ScalarType, VecDim >& quad_point,
752 const ScalarType quad_weight,
753 const ScalarT r_1,
754 const ScalarT r_2,
755 dense::Vec< ScalarT, 3 > ( *wedge_phy_surf )[3],
756 const dense::Vec< ScalarT, 6 >* k_local_hex,
757 const int dimi,
758 const int dimj,
761 ScalarType& jdet_keval_quadweight ) const
762 {
763 dense::Mat< ScalarType, VecDim, VecDim > J = jac( wedge_phy_surf[wedge], r_1, r_2, quad_point );
764 const auto det = J.det();
765 const auto abs_det = Kokkos::abs( det );
766 const dense::Mat< ScalarType, VecDim, VecDim > J_inv_transposed = J.inv_transposed( det );
767
768 // dot of coeff dofs and element-local shape functions to evaluate the coefficent on the current element
769 ScalarType k_eval = 0.0;
770 for ( int k = 0; k < num_nodes_per_wedge; k++ )
771 {
772 k_eval += shape( k, quad_point ) * k_local_hex[wedge]( k );
773 }
774
775 for ( int k = 0; k < num_nodes_per_wedge; k++ )
776 {
777 sym_grad_i[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimi );
778 sym_grad_j[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimj );
779 }
780 jdet_keval_quadweight = quad_weight * k_eval * abs_det;
781 }
782
783 /// @brief assemble the local matrix and return it for a given element, wedge, and vectorial component
784 /// (determined by dimi, dimj)
785 KOKKOS_INLINE_FUNCTION
787 const int local_subdomain_id,
788 const int x_cell,
789 const int y_cell,
790 const int r_cell,
791 const int wedge ) const
792 {
793 // Gather surface points for each wedge.
794 // TODO gather this for only 1 wedge
796 wedge_surface_physical_coords( wedge_phy_surf, grid_, local_subdomain_id, x_cell, y_cell );
797
798 // Gather wedge radii.
799 const ScalarT r_1 = radii_( local_subdomain_id, r_cell );
800 const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );
801
803 extract_local_wedge_scalar_coefficients( k_local_hex, local_subdomain_id, x_cell, y_cell, r_cell, k_ );
804
805 // Compute the local element matrix.
807 for ( int dimi = 0; dimi < 3; ++dimi )
808 {
809 for ( int dimj = 0; dimj < 3; ++dimj )
810 {
811 // spatial dimensions: quadrature points and wedge
812 for ( int q = 0; q < num_quad_points; q++ )
813 {
816 ScalarType jdet_keval_quadweight = 0;
818 wedge,
819 quad_points[q],
820 quad_weights[q],
821 r_1,
822 r_2,
823 wedge_phy_surf,
824 k_local_hex,
825 dimi,
826 dimj,
827 sym_grad_i,
828 sym_grad_j,
829 jdet_keval_quadweight );
830
831 // propagate on local matrix by outer product of test and trial vecs
832 for ( int i = 0; i < num_nodes_per_wedge; i++ )
833 {
834 for ( int j = 0; j < num_nodes_per_wedge; j++ )
835 {
836 A( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) +=
837 jdet_keval_quadweight *
838 ( 2 * sym_grad_j[j].double_contract( sym_grad_i[i] ) -
839 2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * sym_grad_i[i]( dimi, dimi ) );
840 // for the div, we just extract the component from the gradient vector
841 }
842 }
843 }
844 }
845 }
846
847 return A;
848 }
849};
850
853
854} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
void set_stored_matrix_mode(linalg::OperatorStoredMatrixMode operator_stored_matrix_mode, int level_range, grid::Grid4DDataScalar< ScalarType > GCAElements)
allocates memory for the local matrices
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:161
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
assemble the local matrix and return it for a given element, wedge, and vectorial component (determin...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:786
void set_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > &mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:180
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Getter for coefficient.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:137
linalg::OperatorStoredMatrixMode get_stored_matrix_mode()
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:176
void assemble_trial_test_vecs(const int wedge, const dense::Vec< ScalarType, VecDim > &quad_point, const ScalarType quad_weight, const ScalarT r_1, const ScalarT r_2, dense::Vec< ScalarT, 3 >(*wedge_phy_surf)[3], const dense::Vec< ScalarT, 6 > *k_local_hex, const int dimi, const int dimj, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_i, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_j, ScalarType &jdet_keval_quadweight) const
: For both trial and test space this function sets up a vector: each vector element holds the symmetr...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:749
grid::Grid3DDataVec< ScalarT, 3 > get_grid()
Getter for grid member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:146
Kokkos::TeamPolicy<>::member_type Team
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:313
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:143
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:140
terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:38
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:125
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:134
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:36
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:196
EpsilonDivDivKerngenV05ShmemSrcK(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:82
bool has_flag(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, grid::shell::ShellBoundaryFlag flag) const
Getter for mask member.
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:150
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:218
void column_grad_to_sym(const int dim, const double g0, const double g1, const double g2, double &E00, double &E11, double &E22, double &sym01, double &sym02, double &sym12, double &gdd) const
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:260
void operator()(const Team &team) const
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:323
static size_t team_shmem_size(const int ts)
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:316
static constexpr int LocalMatrixDim
Definition epsilon_divdiv_kerngen_v05_shmem_src_k.hpp:37
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
Information about the thick spherical shell mesh.
Definition spherical_shell.hpp:780
const std::vector< double > & radii() const
Definition spherical_shell.hpp:845
int diamond_lateral_refinement_level() const
Definition spherical_shell.hpp:843
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288
bool has_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Checks for presence of a local matrix for a certain element.
Definition local_matrix_storage.hpp:223
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition local_matrix_storage.hpp:175
void set_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > mat) const
Set the local matrix stored in the operator.
Definition local_matrix_storage.hpp:118
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364
Concept for types that can be used as Galerkin coarse-grid operators in a multigrid hierarchy....
Definition operator.hpp:81
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16
constexpr void quad_felippa_1x1_quad_points(dense::Vec< T, 3 >(&quad_points)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:36
constexpr void quad_felippa_1x1_quad_weights(T(&quad_weights)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:43
constexpr int quad_felippa_1x1_num_quad_points
Definition wedge/quadrature/quadrature.hpp:32
constexpr int num_nodes_per_wedge_surface
Definition kernel_helpers.hpp:6
constexpr dense::Mat< T, 3, 3 > symmetric_grad(const dense::Mat< T, 3, 3 > &J_inv_transposed, const dense::Vec< T, 3 > &quad_point, const int dof, const int dim)
Returns the symmetric gradient of the shape function of a dof at a quadrature point.
Definition integrands.hpp:685
void wedge_surface_physical_coords(dense::Vec< T, 3 >(&wedge_surf_phy_coords)[num_wedges_per_hex_cell][num_nodes_per_wedge_surface], const grid::Grid3DDataVec< T, 3 > &lateral_grid, const int local_subdomain_id, const int x_cell, const int y_cell)
Extracts the (unit sphere) surface vertex coords of the two wedges of a hex cell.
Definition kernel_helpers.hpp:26
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5
void extract_local_wedge_scalar_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const grid::Grid4DDataScalar< T > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:306
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7
constexpr T shape(const int node_idx, const T xi, const T eta, const T zeta)
(Tensor-product) Shape function.
Definition integrands.hpp:146
constexpr dense::Mat< T, 3, 3 > jac(const dense::Vec< T, 3 > &p1_phy, const dense::Vec< T, 3 > &p2_phy, const dense::Vec< T, 3 > &p3_phy, const T r_1, const T r_2, const T xi, const T eta, const T zeta)
Definition integrands.hpp:657
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12
BoundaryConditionFlag get_boundary_condition_flag(const BoundaryConditions bcs, ShellBoundaryFlag sbf)
Retrieve the boundary condition flag that is associated with a location in the shell e....
Definition shell/bit_masks.hpp:42
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21
dense::Mat< ScalarType, 3, 3 > trafo_mat_cartesian_to_normal_tangential(const dense::Vec< ScalarType, 3 > &n_input)
Constructs a robust orthonormal transformation matrix from Cartesian to (normal–tangential–tangential...
Definition local_basis_trafo_normal_tangential.hpp:36
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30
@ Replace
Overwrite the destination vector.
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47
@ Off
Do not use stored matrices.
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40
@ CommunicateAdditively
Communicate and add results.
constexpr bool has_flag(E mask_value, E flag) noexcept
Checks if a bitmask value contains a specific flag.
Definition bit_masking.hpp:43
detail::PrefixCout logroot([]() { return detail::log_prefix();})
std::ostream subclass that just logs on root and adds a timestamp for each line.
Definition mat.hpp:10
T double_contract(const Mat &mat)
Definition mat.hpp:226
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51
auto extent(int i) const
Definition grid_types.hpp:75