Loading...
Searching...
No Matches
epsilon_divdiv_kerngen_v04_shmem_coords.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "../../../quadrature/quadrature.hpp"
5#include "dense/vec.hpp"
9#include "impl/Kokkos_Profiling.hpp"
10#include "linalg/operator.hpp"
13#include "linalg/vector.hpp"
14#include "linalg/vector_q1.hpp"
15#include "util/timer.hpp"
16
18
29
30template < typename ScalarT, int VecDim = 3 >
32{
33 public:
36 using ScalarType = ScalarT;
37 static constexpr int LocalMatrixDim = 18;
40
41 private:
42 LocalMatrixStorage local_matrix_storage_;
43
45
50 BoundaryConditions bcs_;
51
52 bool diagonal_;
53
54 linalg::OperatorApplyMode operator_apply_mode_;
55 linalg::OperatorCommunicationMode operator_communication_mode_;
56 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode_;
57
60
63
64 // Quadrature points.
65 const int num_quad_points = quadrature::quad_felippa_1x1_num_quad_points;
66
69
70 int local_subdomains_;
71 int hex_lat_;
72 int hex_rad_;
73 int lat_refinement_level_;
74 int block_size_;
75 int blocks_per_column_;
76 int blocks_;
77
78 ScalarT r_max_;
79 ScalarT r_min_;
80
81 public:
88 BoundaryConditions bcs,
89 bool diagonal,
91 linalg::OperatorCommunicationMode operator_communication_mode =
94 : domain_( domain )
95 , grid_( grid )
96 , radii_( radii )
97 , mask_( mask )
98 , k_( k )
99 , diagonal_( diagonal )
100 , operator_apply_mode_( operator_apply_mode )
101 , operator_communication_mode_( operator_communication_mode )
102 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
103 , send_buffers_( domain )
104 , recv_buffers_( domain )
105 {
106 bcs_[0] = bcs[0];
107 bcs_[1] = bcs[1];
110 const grid::shell::DomainInfo& domain_info = domain_.domain_info();
111 local_subdomains_ = domain_.subdomains().size();
112 hex_lat_ = domain_info.subdomain_num_nodes_per_side_laterally() - 1;
113 hex_rad_ = domain_info.subdomain_num_nodes_radially() - 1;
114 lat_refinement_level_ = domain_info.diamond_lateral_refinement_level();
115 const int threads_per_column = hex_rad_;
116 block_size_ = std::min( 128, threads_per_column );
117 blocks_per_column_ = ( threads_per_column + block_size_ - 1 ) / block_size_;
118 blocks_ = local_subdomains_ * hex_lat_ * hex_lat_ * blocks_per_column_;
119 r_min_ = domain_info.radii()[0];
120 r_max_ = domain_info.radii()[domain_info.radii().size() - 1];
121 util::logroot << "[EpsilonDivDiv] (threads_per_column, block_size_, blocks_per_column_) = "
122 << threads_per_column << ", " << block_size_ << ", " << blocks_per_column_ << ")" << std::endl;
123 }
124
126 const linalg::OperatorApplyMode operator_apply_mode,
127 const linalg::OperatorCommunicationMode operator_communication_mode )
128 {
129 operator_apply_mode_ = operator_apply_mode;
130 operator_communication_mode_ = operator_communication_mode;
131 }
132
133 /// @brief S/Getter for diagonal member
134 void set_diagonal( bool v ) { diagonal_ = v; }
135
136 /// @brief Getter for coefficient
138
139 /// @brief Getter for domain member
140 const grid::shell::DistributedDomain& get_domain() const { return domain_; }
141
142 /// @brief Getter for radii member
144
145 /// @brief Getter for grid member
147
148 /// @brief Getter for mask member
149 KOKKOS_INLINE_FUNCTION
151 const int local_subdomain_id,
152 const int x_cell,
153 const int y_cell,
154 const int r_cell,
156 {
157 return util::has_flag( mask_( local_subdomain_id, x_cell, y_cell, r_cell ), flag );
158 }
159
160 /// @brief allocates memory for the local matrices
162 linalg::OperatorStoredMatrixMode operator_stored_matrix_mode,
163 int level_range,
165 {
166 operator_stored_matrix_mode_ = operator_stored_matrix_mode;
167
168 // allocate storage if necessary
169 if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )
170 {
172 domain_, operator_stored_matrix_mode_, level_range, GCAElements );
173 }
174 }
175
176 linalg::OperatorStoredMatrixMode get_stored_matrix_mode() { return operator_stored_matrix_mode_; }
177
178 /// @brief Set the local matrix stored in the operator
179 KOKKOS_INLINE_FUNCTION
181 const int local_subdomain_id,
182 const int x_cell,
183 const int y_cell,
184 const int r_cell,
185 const int wedge,
187 {
188 KOKKOS_ASSERT( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off );
189 local_matrix_storage_.set_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge, mat );
190 }
191
192 /// @brief Retrives the local matrix
193 /// if there is stored local matrices, the desired local matrix is loaded and returned
194 /// if not, the local matrix is assembled on-the-fly
195 KOKKOS_INLINE_FUNCTION
197 const int local_subdomain_id,
198 const int x_cell,
199 const int y_cell,
200 const int r_cell,
201 const int wedge ) const
202 {
203 // request from storage
204 if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )
205 {
206 if ( !local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge ) )
207 {
208 Kokkos::abort( "No matrix found at that spatial index." );
209 }
210 return local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );
211 }
212 else
213 {
214 return assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );
215 }
216 }
217
218 void apply_impl( const SrcVectorType& src, DstVectorType& dst )
219 {
220 util::Timer timer_apply( "epsilon_divdiv_apply" );
221
222 if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )
223 {
224 assign( dst, 0 );
225 }
226
227 src_ = src.grid_data();
228 dst_ = dst.grid_data();
229
230 if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||
231 src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )
232 {
233 throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );
234 }
235
236 if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )
237 {
238 throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );
239 }
240
241 util::Timer timer_kernel( "epsilon_divdiv_kernel" );
242 Kokkos::TeamPolicy<> policy( blocks_, block_size_ );
243 Kokkos::parallel_for( "matvec", policy, *this );
244 // grid::shell::local_domain_md_range_policy_cells( domain_ ),
245 //s *this );
246 Kokkos::fence();
247 timer_kernel.stop();
248
249 if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )
250 {
251 util::Timer timer_comm( "epsilon_divdiv_comm" );
252
254 domain_, dst_, send_buffers_, recv_buffers_ );
256 }
257 }
258
259 KOKKOS_INLINE_FUNCTION
260 void wedge_node_offsets( const int w, const int node, int& dx, int& dy, int& dr ) const
261 {
262 // w=0 nodes: (0,0,0),(1,0,0),(0,1,0),(0,0,1),(1,0,1),(0,1,1)
263 // w=1 nodes: (1,1,0),(0,1,0),(1,0,0),(1,1,1),(0,1,1),(1,0,1)
264 // Must match your original src_local_hex and k_local_hex assembly.
265 constexpr int off[2][6][3] = {
266 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } }, // w=0
267 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } // w=1
268 };
269 dx = off[w][node][0];
270 dy = off[w][node][1];
271 dr = off[w][node][2];
272 }
273
274 // Read source DoF at (dim, wedge, local node).
275 KOKKOS_INLINE_FUNCTION
276 double src_dof(
277 const int local_subdomain_id,
278 const int x_cell,
279 const int y_cell,
280 const int r_cell,
281 const int dim,
282 const int w,
283 const int node ) const
284 {
285 int dx, dy, dr;
286 wedge_node_offsets( w, node, dx, dy, dr );
287 return src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, dim );
288 }
289
290 // Read k coefficient at (wedge, local node).
291 KOKKOS_INLINE_FUNCTION
292 double k_dof(
293 const int local_subdomain_id,
294 const int x_cell,
295 const int y_cell,
296 const int r_cell,
297 const int w,
298 const int node ) const
299 {
300 int dx, dy, dr;
301 wedge_node_offsets( w, node, dx, dy, dr );
302 return k_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr );
303 }
304
305 // Atomic add wedge-local contribution directly to the correct global dst_ entry.
306 // This matches your original final scatter mapping exactly.
307 KOKKOS_INLINE_FUNCTION
309 const int local_subdomain_id,
310 const int x_cell,
311 const int y_cell,
312 const int r_cell,
313 const int dim,
314 const int w,
315 const int node,
316 const double val ) const
317 {
318 int gx = x_cell;
319 int gy = y_cell;
320 int gr = r_cell;
321
322 if ( w == 0 )
323 {
324 switch ( node )
325 {
326 case 0:
327 gx = x_cell;
328 gy = y_cell;
329 gr = r_cell;
330 break;
331 case 1:
332 gx = x_cell + 1;
333 gy = y_cell;
334 gr = r_cell;
335 break; // shared with (w=1,node=2)
336 case 2:
337 gx = x_cell;
338 gy = y_cell + 1;
339 gr = r_cell;
340 break; // shared with (w=1,node=1)
341 case 3:
342 gx = x_cell;
343 gy = y_cell;
344 gr = r_cell + 1;
345 break;
346 case 4:
347 gx = x_cell + 1;
348 gy = y_cell;
349 gr = r_cell + 1;
350 break; // shared with (w=1,node=5)
351 case 5:
352 gx = x_cell;
353 gy = y_cell + 1;
354 gr = r_cell + 1;
355 break; // shared with (w=1,node=4)
356 }
357 }
358 else
359 {
360 switch ( node )
361 {
362 case 0:
363 gx = x_cell + 1;
364 gy = y_cell + 1;
365 gr = r_cell;
366 break;
367 case 1:
368 gx = x_cell;
369 gy = y_cell + 1;
370 gr = r_cell;
371 break; // shared with (w=0,node=2)
372 case 2:
373 gx = x_cell + 1;
374 gy = y_cell;
375 gr = r_cell;
376 break; // shared with (w=0,node=1)
377 case 3:
378 gx = x_cell + 1;
379 gy = y_cell + 1;
380 gr = r_cell + 1;
381 break;
382 case 4:
383 gx = x_cell;
384 gy = y_cell + 1;
385 gr = r_cell + 1;
386 break; // shared with (w=0,node=5)
387 case 5:
388 gx = x_cell + 1;
389 gy = y_cell;
390 gr = r_cell + 1;
391 break; // shared with (w=0,node=4)
392 }
393 }
394
395 Kokkos::atomic_add( &dst_( local_subdomain_id, gx, gy, gr, dim ), val );
396 }
397
398 KOKKOS_INLINE_FUNCTION
400 const int dim,
401 const double g0,
402 const double g1,
403 const double g2,
404 double& E00,
405 double& E11,
406 double& E22,
407 double& sym01,
408 double& sym02,
409 double& sym12,
410 double& gdd ) const
411 {
412 E00 = 0.0;
413 E11 = 0.0;
414 E22 = 0.0;
415 sym01 = 0.0;
416 sym02 = 0.0;
417 sym12 = 0.0;
418 gdd = 0.0;
419
420 // dim selects which COLUMN is populated:
421 // dim==0: E[0][0]=g0, E[1][0]=g1, E[2][0]=g2
422 // dim==1: E[0][1]=g0, E[1][1]=g1, E[2][1]=g2
423 // dim==2: E[0][2]=g0, E[1][2]=g1, E[2][2]=g2
424 switch ( dim )
425 {
426 case 0:
427 E00 = g0;
428 gdd = g0; // E[0][0]
429 sym01 = 0.5 * g1; // 0.5*(E[0][1]=0 + E[1][0]=g1)
430 sym02 = 0.5 * g2; // 0.5*(E[0][2]=0 + E[2][0]=g2)
431 sym12 = 0.0;
432 break;
433
434 case 1:
435 E11 = g1;
436 gdd = g1; // E[1][1]
437 sym01 = 0.5 * g0; // 0.5*(E[0][1]=g0 + E[1][0]=0)
438 sym02 = 0.0;
439 sym12 = 0.5 * g2; // 0.5*(E[1][2]=0 + E[2][1]=g2)
440 break;
441
442 default: // case 2
443 E22 = g2;
444 gdd = g2; // E[2][2]
445 sym01 = 0.0;
446 sym02 = 0.5 * g0; // 0.5*(E[0][2]=g0 + E[2][0]=0)
447 sym12 = 0.5 * g1; // 0.5*(E[1][2]=g1 + E[2][1]=0)
448 break;
449 }
450 }
451
452 using Team = Kokkos::TeamPolicy<>::member_type;
453
454 // Add this helper in your functor (or nearby) so the policy can request enough team scratch.
455 // Example usage when launching:
456 // TeamPolicy pol(num_leagues, team_size);
457 // pol.set_scratch_size(0, Kokkos::PerTeam(Functor::team_shmem_size(team_size)));
458 // Kokkos::parallel_for("...", pol, functor);
459 KOKKOS_INLINE_FUNCTION
460 static size_t team_shmem_size( const int /*team_size*/ )
461 {
462 // We store wedge_surf_phy_coords[2][3][3] as doubles in team scratch.
463 return sizeof( double ) * 2 * 3 * 3;
464 }
465
466 KOKKOS_INLINE_FUNCTION
467 void operator()( const Team& team ) const
468 {
469 int local_subdomain_id, x_cell, y_cell, r_cell;
470
471 {
472 int tmp = team.league_rank();
473 const int r_block_index = tmp % blocks_per_column_;
474 tmp /= blocks_per_column_;
475 y_cell = tmp & ( hex_lat_ - 1 );
476 tmp >>= lat_refinement_level_;
477 x_cell = tmp & ( hex_lat_ - 1 );
478 tmp >>= lat_refinement_level_;
479 local_subdomain_id = tmp;
480
481 r_cell = r_block_index * team.team_size() + team.team_rank();
482 }
483
484 bool at_cmb = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
485 bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
486
487 if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )
488 {
489 // ---- GCA path unchanged ----
491
492 if ( operator_stored_matrix_mode_ == linalg::OperatorStoredMatrixMode::Full )
493 {
494 A[0] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
495 A[1] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
496 }
497 else if ( operator_stored_matrix_mode_ == linalg::OperatorStoredMatrixMode::Selective )
498 {
499 if ( local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) &&
500 local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) )
501 {
502 A[0] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
503 A[1] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
504 }
505 else
506 {
507 A[0] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
508 A[1] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
509 }
510 }
511 else
512 {
513 A[0] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
514 A[1] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
515 }
516
518 for ( int dimj = 0; dimj < 3; dimj++ )
519 {
522 src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
523
524 for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )
525 {
526 for ( int i = 0; i < num_nodes_per_wedge; i++ )
527 {
528 src[wedge]( dimj * num_nodes_per_wedge + i ) = src_d[wedge]( i );
529 }
530 }
531 }
532
534 boundary_mask.fill( 1.0 );
535
536 bool freeslip_reorder = false;
538
539 if ( at_cmb || at_surface )
540 {
541 ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
542 BoundaryConditionFlag bcf = get_boundary_condition_flag( bcs_, sbf );
543
544 if ( bcf == DIRICHLET )
545 {
546 for ( int dimi = 0; dimi < 3; ++dimi )
547 {
548 for ( int dimj = 0; dimj < 3; ++dimj )
549 {
550 for ( int i = 0; i < num_nodes_per_wedge; i++ )
551 {
552 for ( int j = 0; j < num_nodes_per_wedge; j++ )
553 {
554 if ( ( at_cmb && ( ( dimi == dimj && i != j && ( i < 3 || j < 3 ) ) ||
555 ( dimi != dimj && ( i < 3 || j < 3 ) ) ) ) ||
556 ( at_surface && ( ( dimi == dimj && i != j && ( i >= 3 || j >= 3 ) ) ||
557 ( dimi != dimj && ( i >= 3 || j >= 3 ) ) ) ) )
558 {
559 boundary_mask(
560 i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) = 0.0;
561 }
562 }
563 }
564 }
565 }
566 }
567 else if ( bcf == FREESLIP )
568 {
569 freeslip_reorder = true;
571
572 for ( int wedge = 0; wedge < 2; ++wedge )
573 {
574 for ( int dimi = 0; dimi < 3; ++dimi )
575 {
576 for ( int node_idxi = 0; node_idxi < num_nodes_per_wedge; node_idxi++ )
577 {
578 for ( int dimj = 0; dimj < 3; ++dimj )
579 {
580 for ( int node_idxj = 0; node_idxj < num_nodes_per_wedge; node_idxj++ )
581 {
582 A_tmp[wedge]( node_idxi * 3 + dimi, node_idxj * 3 + dimj ) = A[wedge](
583 node_idxi + dimi * num_nodes_per_wedge,
584 node_idxj + dimj * num_nodes_per_wedge );
585 }
586 }
587 }
588 }
590 }
591
592 constexpr int layer_hex_offset_x[2][3] = { { 0, 1, 0 }, { 1, 0, 1 } };
593 constexpr int layer_hex_offset_y[2][3] = { { 0, 0, 1 }, { 1, 1, 0 } };
594
595 for ( int wedge = 0; wedge < 2; ++wedge )
596 {
597 for ( int i = 0; i < LocalMatrixDim; ++i )
598 {
599 R[wedge]( i, i ) = 1.0;
600 }
601
602 for ( int boundary_node_idx = 0; boundary_node_idx < 3; boundary_node_idx++ )
603 {
605 local_subdomain_id,
606 x_cell + layer_hex_offset_x[wedge][boundary_node_idx],
607 y_cell + layer_hex_offset_y[wedge][boundary_node_idx],
608 r_cell + ( at_cmb ? 0 : 1 ),
609 grid_,
610 radii_ );
611
612 auto R_i = trafo_mat_cartesian_to_normal_tangential( normal );
613
614 int offset_in_R = at_cmb ? 0 : 9;
615 for ( int dimi = 0; dimi < 3; ++dimi )
616 {
617 for ( int dimj = 0; dimj < 3; ++dimj )
618 {
619 R[wedge](
620 offset_in_R + boundary_node_idx * 3 + dimi,
621 offset_in_R + boundary_node_idx * 3 + dimj ) = R_i( dimi, dimj );
622 }
623 }
624 }
625
626 A[wedge] = R[wedge] * A_tmp[wedge] * R[wedge].transposed();
627
628 auto src_tmp = R[wedge] * src[wedge];
629 for ( int i = 0; i < 18; ++i )
630 {
631 src[wedge]( i ) = src_tmp( i );
632 }
633
634 int node_start = at_surface ? 3 : 0;
635 int node_end = at_surface ? 6 : 3;
636
637 for ( int node_idx = node_start; node_idx < node_end; node_idx++ )
638 {
639 int idx = node_idx * 3;
640 for ( int k = 0; k < 18; ++k )
641 {
642 if ( k != idx )
643 {
644 boundary_mask( idx, k ) = 0.0;
645 boundary_mask( k, idx ) = 0.0;
646 }
647 }
648 }
649 }
650 }
651 }
652
653 for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )
654 {
655 A[wedge].hadamard_product( boundary_mask );
656 }
657
658 if ( diagonal_ )
659 {
660 A[0] = A[0].diagonal();
661 A[1] = A[1].diagonal();
662 }
663
665 dst[0] = A[0] * src[0];
666 dst[1] = A[1] * src[1];
667
668 if ( freeslip_reorder )
669 {
671 dst_tmp[0] = R[0].transposed() * dst[0];
672 dst_tmp[1] = R[1].transposed() * dst[1];
673 for ( int i = 0; i < 18; ++i )
674 {
675 dst[0]( i ) = dst_tmp[0]( i );
676 dst[1]( i ) = dst_tmp[1]( i );
677 }
678
681 }
682
683 for ( int dimi = 0; dimi < 3; dimi++ )
684 {
686 dst_d[0] = dst[0].template slice< 6 >( dimi * num_nodes_per_wedge );
687 dst_d[1] = dst[1].template slice< 6 >( dimi * num_nodes_per_wedge );
688
690 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
691 }
692 }
693 else
694 {
695 // ----- FAST PATH (DCA) -----
696 // Implements:
697 // (1a) shrink dst storage to 8 unique nodes (dst8[3][8])
698 // (1b) compute scalar_grad on-the-fly (no scalar_grad[6][3] array)
699 // Keeps: per-wedge register loads for src/k, constant-qp collapse, kwJ folding.
700
701 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
702 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
703 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
704
705
706 // Map wedge-local node (w, node) -> unique node id in [0..7] that matches your final scatter pattern
707 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
708 // w0: (0,0,0),(1,0,0),(0,1,0),(0,0,1),(1,0,1),(0,1,1)
709 { 0, 1, 2, 3, 4, 5 },
710 // w1: (1,1,0),(0,1,0),(1,0,0),(1,1,1),(0,1,1),(1,0,1)
711 { 6, 2, 1, 7, 5, 4 } };
712
713 // ---- single quadrature point collapsed: qp0=qp1=1/3, qp2=0, qw=1 ----
714 constexpr double ONE_THIRD = 1.0 / 3.0;
715 constexpr double ONE_SIXTH = 1.0 / 6.0;
716 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
717
718 // Reference gradients at qp0=qp1=1/3, qp2=0 (constexpr)
719 static constexpr double dN_ref[6][3] = {
720 { -0.5, -0.5, -ONE_SIXTH },
721 { 0.5, 0.0, -ONE_SIXTH },
722 { 0.0, 0.5, -ONE_SIXTH },
723 { -0.5, -0.5, ONE_SIXTH },
724 { 0.5, 0.0, ONE_SIXTH },
725 { 0.0, 0.5, ONE_SIXTH } };
726
727 // Team scratch: wedge_surf_phy_coords[2][3][3]
728 double* shmem =
729 reinterpret_cast< double* >( team.team_shmem().get_shmem( team_shmem_size( team.team_size() ) ) );
730
731 using Scratch3D = Kokkos::
732 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
733
734 Scratch3D wedge_surf_phy_coords( shmem, 2, 3, 3 );
735
736 Kokkos::single( Kokkos::PerTeam( team ), [&]() {
737 const double q00x = grid_( local_subdomain_id, x_cell, y_cell, 0 );
738 const double q00y = grid_( local_subdomain_id, x_cell, y_cell, 1 );
739 const double q00z = grid_( local_subdomain_id, x_cell, y_cell, 2 );
740
741 const double q01x = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );
742 const double q01y = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );
743 const double q01z = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );
744
745 const double q10x = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );
746 const double q10y = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );
747 const double q10z = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );
748
749 const double q11x = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );
750 const double q11y = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );
751 const double q11z = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );
752
753 // wedge 0
754 wedge_surf_phy_coords( 0, 0, 0 ) = q00x;
755 wedge_surf_phy_coords( 0, 0, 1 ) = q00y;
756 wedge_surf_phy_coords( 0, 0, 2 ) = q00z;
757
758 wedge_surf_phy_coords( 0, 1, 0 ) = q10x;
759 wedge_surf_phy_coords( 0, 1, 1 ) = q10y;
760 wedge_surf_phy_coords( 0, 1, 2 ) = q10z;
761
762 wedge_surf_phy_coords( 0, 2, 0 ) = q01x;
763 wedge_surf_phy_coords( 0, 2, 1 ) = q01y;
764 wedge_surf_phy_coords( 0, 2, 2 ) = q01z;
765
766 // wedge 1
767 wedge_surf_phy_coords( 1, 0, 0 ) = q11x;
768 wedge_surf_phy_coords( 1, 0, 1 ) = q11y;
769 wedge_surf_phy_coords( 1, 0, 2 ) = q11z;
770
771 wedge_surf_phy_coords( 1, 1, 0 ) = q01x;
772 wedge_surf_phy_coords( 1, 1, 1 ) = q01y;
773 wedge_surf_phy_coords( 1, 1, 2 ) = q01z;
774
775 wedge_surf_phy_coords( 1, 2, 0 ) = q10x;
776 wedge_surf_phy_coords( 1, 2, 1 ) = q10y;
777 wedge_surf_phy_coords( 1, 2, 2 ) = q10z;
778 } );
779 team.team_barrier();
780
781 // Thread-private (depends on r_cell)
782 const double r_0 = radii_( local_subdomain_id, r_cell );
783 const double r_1 = radii_( local_subdomain_id, r_cell + 1 );
784
785 // Boundary treatment flags (guard the BC query)
786 const bool at_boundary = at_cmb || at_surface;
787 bool treat_boundary = false;
788 if ( at_boundary )
789 {
790 const ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
791 treat_boundary = ( get_boundary_condition_flag( bcs_, sbf ) == DIRICHLET );
792 }
793
794 const int cmb_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );
795 const int surface_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_surface ) ? 3 : 0 );
796
797 // (1a) unique-node accumulation: 8 nodes per dim
798 double dst8[3][8] = { 0.0 };
799
800#pragma unroll
801 for ( int w = 0; w < 2; ++w )
802 {
803 // --------------------------------------------
804 // Load k + src for THIS wedge into registers
805 // --------------------------------------------
806 double k_w[6];
807 double src_w[3][6];
808
809#pragma unroll
810 for ( int node = 0; node < 6; ++node )
811 {
812 const int dx = WEDGE_NODE_OFF[w][node][0];
813 const int dy = WEDGE_NODE_OFF[w][node][1];
814 const int dr = WEDGE_NODE_OFF[w][node][2];
815
816 k_w[node] = k_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr );
817
818 src_w[0][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 0 );
819 src_w[1][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 1 );
820 src_w[2][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 2 );
821 }
822
823 // -------------------------
824 // (A) k_eval collapsed
825 // -------------------------
826 const double k_eval = ONE_SIXTH * ( k_w[0] + k_w[1] + k_w[2] + k_w[3] + k_w[4] + k_w[5] );
827
828 // -------------------------
829 // (B) Jacobian + invJT entries
830 // (1b) scalar_grad computed on-the-fly from invJT and constexpr dN_ref
831 // -------------------------
832 double wJ = 0.0;
833
834 // inv(J)^T entries kept as scalars for on-the-fly gradients
835 double i00, i01, i02;
836 double i10, i11, i12;
837 double i20, i21, i22;
838
839 {
840 const double half_dr = 0.5 * ( r_1 - r_0 );
841 const double r_mid = 0.5 * ( r_0 + r_1 );
842
843 const double J_0_0 =
844 r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) );
845 const double J_0_1 =
846 r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 2, 0 ) );
847 const double J_0_2 =
848 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) +
849 wedge_surf_phy_coords( w, 2, 0 ) ) );
850
851 const double J_1_0 =
852 r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) );
853 const double J_1_1 =
854 r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 2, 1 ) );
855 const double J_1_2 =
856 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) +
857 wedge_surf_phy_coords( w, 2, 1 ) ) );
858
859 const double J_2_0 =
860 r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) );
861 const double J_2_1 =
862 r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 2, 2 ) );
863 const double J_2_2 =
864 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) +
865 wedge_surf_phy_coords( w, 2, 2 ) ) );
866
867 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
868 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
869
870 const double invJ = 1.0 / J_det;
871
872 // inv(J)^T
873 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
874 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
875 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
876
877 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
878 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
879 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
880
881 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
882 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
883 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
884
885 wJ = Kokkos::abs( J_det ); // qw=1
886 }
887
888 // Fold the wJ multiply once
889 const double kwJ = k_eval * wJ;
890
891 // -------------------------
892 // (C) grad_u + div_u as scalars
893 // -------------------------
894 double gu00 = 0.0, gu01 = 0.0, gu02 = 0.0;
895 double gu10 = 0.0, gu11 = 0.0, gu12 = 0.0;
896 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
897 double div_u = 0.0;
898
899 if ( !diagonal_ )
900 {
901// Assemble gu** and div_u
902#pragma unroll
903 for ( int dimj = 0; dimj < 3; ++dimj )
904 {
905#pragma unroll
906 for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
907 {
908 // (1b) compute scalar_grad(node_idx) on-the-fly
909 const double gx = dN_ref[node_idx][0];
910 const double gy = dN_ref[node_idx][1];
911 const double gz = dN_ref[node_idx][2];
912
913 const double g0 = i00 * gx + i01 * gy + i02 * gz;
914 const double g1 = i10 * gx + i11 * gy + i12 * gz;
915 const double g2 = i20 * gx + i21 * gy + i22 * gz;
916
917 double E00, E11, E22, sym01, sym02, sym12, gdd;
918 column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
919
920 const double s = src_w[dimj][node_idx];
921
922 gu00 += E00 * s;
923 gu01 += sym01 * s;
924 gu02 += sym02 * s;
925 gu10 += sym01 * s;
926 gu11 += E11 * s;
927 gu12 += sym12 * s;
928 gu20 += sym02 * s;
929 gu21 += sym12 * s;
930 gu22 += E22 * s;
931
932 div_u += gdd * s;
933 }
934 }
935
936// Pairing -> accumulate into unique-node array dst8
937#pragma unroll
938 for ( int dimi = 0; dimi < 3; ++dimi )
939 {
940#pragma unroll
941 for ( int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
942 {
943 // (1b) compute scalar_grad(node_idx) on-the-fly
944 const double gx = dN_ref[node_idx][0];
945 const double gy = dN_ref[node_idx][1];
946 const double gz = dN_ref[node_idx][2];
947
948 const double g0 = i00 * gx + i01 * gy + i02 * gz;
949 const double g1 = i10 * gx + i11 * gy + i12 * gz;
950 const double g2 = i20 * gx + i21 * gy + i22 * gz;
951
952 double E00, E11, E22, sym01, sym02, sym12, gdd;
953 column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
954
955 const double pairing0 = 2.0 * sym01;
956 const double pairing1 = 2.0 * sym02;
957 const double pairing2 = 2.0 * sym12;
958
959 const int u = WEDGE_TO_UNIQUE[w][node_idx];
960
961 dst8[dimi][u] +=
962 kwJ * ( NEG_TWO_THIRDS * div_u * gdd + pairing0 * gu01 + pairing0 * gu10 +
963 pairing1 * gu02 + pairing1 * gu20 + pairing2 * gu12 + pairing2 * gu21 +
964 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );
965 }
966 }
967 }
968
969 // Diagonal / BC loop -> also accumulate into dst8
970 if ( diagonal_ || ( treat_boundary && at_boundary ) )
971 {
972#pragma unroll
973 for ( int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )
974 {
975#pragma unroll
976 for ( int node_idx = surface_shift; node_idx < 6 - cmb_shift; ++node_idx )
977 {
978 // (1b) compute scalar_grad(node_idx) on-the-fly
979 const double gx = dN_ref[node_idx][0];
980 const double gy = dN_ref[node_idx][1];
981 const double gz = dN_ref[node_idx][2];
982
983 const double g0 = i00 * gx + i01 * gy + i02 * gz;
984 const double g1 = i10 * gx + i11 * gy + i12 * gz;
985 const double g2 = i20 * gx + i21 * gy + i22 * gz;
986
987 double E00, E11, E22, sym01, sym02, sym12, gdd;
988 column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
989
990 const double s = src_w[dim_diagBC][node_idx];
991
992 const double pairing0 = 4.0 * s;
993 const double pairing1 = 2.0 * s;
994
995 const int u = WEDGE_TO_UNIQUE[w][node_idx];
996
997 dst8[dim_diagBC][u] += kwJ * ( pairing0 * ( sym01 * sym01 ) + pairing0 * ( sym02 * sym02 ) +
998 pairing0 * ( sym12 * sym12 ) + pairing1 * ( E00 * E00 ) +
999 pairing1 * ( E11 * E11 ) + pairing1 * ( E22 * E22 ) +
1000 NEG_TWO_THIRDS * ( gdd * gdd ) * s );
1001 }
1002 }
1003 }
1004 } // w
1005
1006 // Final scatter: 8 unique nodes per dim (same result as your original merged scatter)
1007 for ( int dim_add = 0; dim_add < 3; ++dim_add )
1008 {
1009 // u0: (x, y, r)
1010 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );
1011
1012 // u1: (x+1, y, r)
1013 Kokkos::atomic_add(
1014 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );
1015
1016 // u2: (x, y+1, r)
1017 Kokkos::atomic_add(
1018 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );
1019
1020 // u3: (x, y, r+1)
1021 Kokkos::atomic_add(
1022 &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );
1023
1024 // u4: (x+1, y, r+1)
1025 Kokkos::atomic_add(
1026 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );
1027
1028 // u5: (x, y+1, r+1)
1029 Kokkos::atomic_add(
1030 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );
1031
1032 // u6: (x+1, y+1, r)
1033 Kokkos::atomic_add(
1034 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );
1035
1036 // u7: (x+1, y+1, r+1)
1037 Kokkos::atomic_add(
1038 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );
1039 }
1040 }
1041 }
1042
1043 /// @brief: For both trial and test space this function sets up a vector:
1044 /// each vector element holds the symmetric gradient (a 3x3 matrix) of the shape function of the corresponding dof
1045 /// (if dimi == dimj, these are the same and we are on the diagonal of the vectorial diffusion operator)
1046 /// Additionally, we compute the scalar factor for the numerical integral comp: determinant of the jacobian,
1047 /// evaluation of the coefficient k on the element and the quadrature weight of the current quad-point.
1048
1049 /// The idea of this function is that the two vectors can be:
1050 /// - accumulated to the result of the local matvec with 2 * num_nodes_per_wedge complexity
1051 /// by scaling the dot product of the trial vec and local src dofs with each element of the test vec
1052 /// (and adding to the dst dofs, this is the fused local matvec).
1053 /// - propagated to the local matrix by an outer product of the two vectors
1054 /// (without applying it to dofs). This is e.g. required to assemble the finest grid local
1055 /// matrix on-the-fly during GCA/Galerkin coarsening.
1056
1057 ///
1058 KOKKOS_INLINE_FUNCTION void assemble_trial_test_vecs(
1059 const int wedge,
1060 const dense::Vec< ScalarType, VecDim >& quad_point,
1061 const ScalarType quad_weight,
1062 const ScalarT r_1,
1063 const ScalarT r_2,
1064 dense::Vec< ScalarT, 3 > ( *wedge_phy_surf )[3],
1065 const dense::Vec< ScalarT, 6 >* k_local_hex,
1066 const int dimi,
1067 const int dimj,
1070 ScalarType& jdet_keval_quadweight ) const
1071 {
1072 dense::Mat< ScalarType, VecDim, VecDim > J = jac( wedge_phy_surf[wedge], r_1, r_2, quad_point );
1073 const auto det = J.det();
1074 const auto abs_det = Kokkos::abs( det );
1075 const dense::Mat< ScalarType, VecDim, VecDim > J_inv_transposed = J.inv_transposed( det );
1076
1077 // dot of coeff dofs and element-local shape functions to evaluate the coefficent on the current element
1078 ScalarType k_eval = 0.0;
1079 for ( int k = 0; k < num_nodes_per_wedge; k++ )
1080 {
1081 k_eval += shape( k, quad_point ) * k_local_hex[wedge]( k );
1082 }
1083
1084 for ( int k = 0; k < num_nodes_per_wedge; k++ )
1085 {
1086 sym_grad_i[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimi );
1087 sym_grad_j[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimj );
1088 }
1089 jdet_keval_quadweight = quad_weight * k_eval * abs_det;
1090 }
1091
1092 /// @brief assemble the local matrix and return it for a given element, wedge, and vectorial component
1093 /// (determined by dimi, dimj)
1094 KOKKOS_INLINE_FUNCTION
1096 const int local_subdomain_id,
1097 const int x_cell,
1098 const int y_cell,
1099 const int r_cell,
1100 const int wedge ) const
1101 {
1102 // Gather surface points for each wedge.
1103 // TODO gather this for only 1 wedge
1105 wedge_surface_physical_coords( wedge_phy_surf, grid_, local_subdomain_id, x_cell, y_cell );
1106
1107 // Gather wedge radii.
1108 const ScalarT r_1 = radii_( local_subdomain_id, r_cell );
1109 const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );
1110
1112 extract_local_wedge_scalar_coefficients( k_local_hex, local_subdomain_id, x_cell, y_cell, r_cell, k_ );
1113
1114 // Compute the local element matrix.
1116 for ( int dimi = 0; dimi < 3; ++dimi )
1117 {
1118 for ( int dimj = 0; dimj < 3; ++dimj )
1119 {
1120 // spatial dimensions: quadrature points and wedge
1121 for ( int q = 0; q < num_quad_points; q++ )
1122 {
1125 ScalarType jdet_keval_quadweight = 0;
1127 wedge,
1128 quad_points[q],
1129 quad_weights[q],
1130 r_1,
1131 r_2,
1132 wedge_phy_surf,
1133 k_local_hex,
1134 dimi,
1135 dimj,
1136 sym_grad_i,
1137 sym_grad_j,
1138 jdet_keval_quadweight );
1139
1140 // propagate on local matrix by outer product of test and trial vecs
1141 for ( int i = 0; i < num_nodes_per_wedge; i++ )
1142 {
1143 for ( int j = 0; j < num_nodes_per_wedge; j++ )
1144 {
1145 A( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) +=
1146 jdet_keval_quadweight *
1147 ( 2 * sym_grad_j[j].double_contract( sym_grad_i[i] ) -
1148 2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * sym_grad_i[i]( dimi, dimi ) );
1149 // for the div, we just extract the component from the gradient vector
1150 }
1151 }
1152 }
1153 }
1154 }
1155
1156 return A;
1157 }
1158};
1159
1162
1163} // namespace terra::fe::wedge::operators::shell::epsdivdiv_history
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56
terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:38
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Getter for coefficient.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:137
void operator()(const Team &team) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:467
void atomic_add_dst_wedge_node(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int dim, const int w, const int node, const double val) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:308
void assemble_trial_test_vecs(const int wedge, const dense::Vec< ScalarType, VecDim > &quad_point, const ScalarType quad_weight, const ScalarT r_1, const ScalarT r_2, dense::Vec< ScalarT, 3 >(*wedge_phy_surf)[3], const dense::Vec< ScalarT, 6 > *k_local_hex, const int dimi, const int dimj, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_i, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_j, ScalarType &jdet_keval_quadweight) const
: For both trial and test space this function sets up a vector: each vector element holds the symmetr...
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:1058
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:143
grid::Grid3DDataVec< ScalarT, 3 > get_grid()
Getter for grid member.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:146
ScalarT ScalarType
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:36
void set_stored_matrix_mode(linalg::OperatorStoredMatrixMode operator_stored_matrix_mode, int level_range, grid::Grid4DDataScalar< ScalarType > GCAElements)
allocates memory for the local matrices
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:161
void column_grad_to_sym(const int dim, const double g0, const double g1, const double g2, double &E00, double &E11, double &E22, double &sym01, double &sym02, double &sym12, double &gdd) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:399
static size_t team_shmem_size(const int)
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:460
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:196
double k_dof(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int w, const int node) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:292
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:125
EpsilonDivDivKerngenV04ShmemCoords(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, BoundaryConditions bcs, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:82
void wedge_node_offsets(const int w, const int node, int &dx, int &dy, int &dr) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:260
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
assemble the local matrix and return it for a given element, wedge, and vectorial component (determin...
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:1095
Kokkos::TeamPolicy<>::member_type Team
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:452
bool has_flag(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, grid::shell::ShellBoundaryFlag flag) const
Getter for mask member.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:150
void set_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > &mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:180
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:134
double src_dof(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int dim, const int w, const int node) const
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:276
linalg::OperatorStoredMatrixMode get_stored_matrix_mode()
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:176
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:140
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:218
static constexpr int LocalMatrixDim
Definition epsilon_divdiv_kerngen_v04_shmem_coords.hpp:37
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518
const std::map< SubdomainInfo, std::tuple< LocalSubdomainIdx, SubdomainNeighborhood > > & subdomains() const
Definition spherical_shell.hpp:2650
const DomainInfo & domain_info() const
Returns a const reference.
Definition spherical_shell.hpp:2647
Information about the thick spherical shell mesh.
Definition spherical_shell.hpp:780
const std::vector< double > & radii() const
Definition spherical_shell.hpp:845
int diamond_lateral_refinement_level() const
Definition spherical_shell.hpp:843
int subdomain_num_nodes_radially() const
Equivalent to calling subdomain_num_nodes_radially( subdomain_refinement_level() )
Definition spherical_shell.hpp:861
int subdomain_num_nodes_per_side_laterally() const
Equivalent to calling subdomain_num_nodes_per_side_laterally( subdomain_refinement_level() )
Definition spherical_shell.hpp:852
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288
bool has_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Checks for presence of a local matrix for a certain element.
Definition local_matrix_storage.hpp:223
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition local_matrix_storage.hpp:175
void set_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > mat) const
Set the local matrix stored in the operator.
Definition local_matrix_storage.hpp:118
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364
Concept for types that can be used as Galerkin coarse-grid operators in a multigrid hierarchy....
Definition operator.hpp:81
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242
Definition epsilon_divdiv_kerngen_v01_initial.hpp:16
constexpr void quad_felippa_1x1_quad_points(dense::Vec< T, 3 >(&quad_points)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:36
constexpr void quad_felippa_1x1_quad_weights(T(&quad_weights)[quad_felippa_1x1_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:43
constexpr int quad_felippa_1x1_num_quad_points
Definition wedge/quadrature/quadrature.hpp:32
constexpr int num_nodes_per_wedge_surface
Definition kernel_helpers.hpp:6
constexpr dense::Mat< T, 3, 3 > symmetric_grad(const dense::Mat< T, 3, 3 > &J_inv_transposed, const dense::Vec< T, 3 > &quad_point, const int dof, const int dim)
Returns the symmetric gradient of the shape function of a dof at a quadrature point.
Definition integrands.hpp:685
void wedge_surface_physical_coords(dense::Vec< T, 3 >(&wedge_surf_phy_coords)[num_wedges_per_hex_cell][num_nodes_per_wedge_surface], const grid::Grid3DDataVec< T, 3 > &lateral_grid, const int local_subdomain_id, const int x_cell, const int y_cell)
Extracts the (unit sphere) surface vertex coords of the two wedges of a hex cell.
Definition kernel_helpers.hpp:26
constexpr void reorder_local_dofs(const DoFOrdering doo_from, const DoFOrdering doo_to, dense::Vec< ScalarT, 18 > &dofs)
Definition kernel_helpers.hpp:619
void atomically_add_local_wedge_vector_coefficients(const grid::Grid4DDataVec< T, VecDim > &global_coefficients, const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const dense::Vec< T, 6 > local_coefficients[2])
Performs an atomic add of the two local wedge coefficient vectors of a hex cell into the global coeff...
Definition kernel_helpers.hpp:465
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5
void extract_local_wedge_scalar_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const grid::Grid4DDataScalar< T > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:306
void extract_local_wedge_vector_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const grid::Grid4DDataVec< T, VecDim > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:356
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7
constexpr T shape(const int node_idx, const T xi, const T eta, const T zeta)
(Tensor-product) Shape function.
Definition integrands.hpp:146
constexpr dense::Mat< T, 3, 3 > jac(const dense::Vec< T, 3 > &p1_phy, const dense::Vec< T, 3 > &p2_phy, const dense::Vec< T, 3 > &p3_phy, const T r_1, const T r_2, const T xi, const T eta, const T zeta)
Definition integrands.hpp:657
dense::Vec< typename CoordsShellType::value_type, 3 > coords(const int subdomain, const int x, const int y, const int r, const CoordsShellType &coords_shell, const CoordsRadiiType &coords_radii)
Definition spherical_shell.hpp:2871
BoundaryConditionMapping[2] BoundaryConditions
Definition shell/bit_masks.hpp:37
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12
BoundaryConditionFlag get_boundary_condition_flag(const BoundaryConditions bcs, ShellBoundaryFlag sbf)
Retrieve the boundary condition flag that is associated with a location in the shell e....
Definition shell/bit_masks.hpp:42
BoundaryConditionFlag
FlagLike that indicates the type of boundary condition
Definition shell/bit_masks.hpp:25
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21
dense::Mat< ScalarType, 3, 3 > trafo_mat_cartesian_to_normal_tangential(const dense::Vec< ScalarType, 3 > &n_input)
Constructs a robust orthonormal transformation matrix from Cartesian to (normal–tangential–tangential...
Definition local_basis_trafo_normal_tangential.hpp:36
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30
@ Replace
Overwrite the destination vector.
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47
@ Selective
Use stored matrices on selected, marked elements only, assemble on all others.
@ Full
Use stored matrices on all elements.
@ Off
Do not use stored matrices.
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40
@ CommunicateAdditively
Communicate and add results.
constexpr bool has_flag(E mask_value, E flag) noexcept
Checks if a bitmask value contains a specific flag.
Definition bit_masking.hpp:43
detail::PrefixCout logroot([]() { return detail::log_prefix();})
std::ostream subclass that just logs on root and adds a timestamp for each line.
Definition mat.hpp:10
void fill(const T value)
Definition mat.hpp:201
constexpr Mat diagonal() const
Definition mat.hpp:377
constexpr Mat< T, Cols, Rows > transposed() const
Definition mat.hpp:187
Mat & hadamard_product(const Mat &mat)
Definition mat.hpp:213
T double_contract(const Mat &mat)
Definition mat.hpp:226
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51
auto extent(int i) const
Definition grid_types.hpp:75