66 using Team = Kokkos::TeamPolicy<>::member_type;
78 BoundaryConditions bcs_;
99 int local_subdomains_;
102 int lat_refinement_level_;
121 enum class KernelPath
124 FastDirichletNeumann,
127 KernelPath kernel_path_ = KernelPath::FastDirichletNeumann;
138 void update_kernel_path_flag_host_only()
140 const BoundaryConditionFlag cmb_bc = get_boundary_condition_flag( bcs_, CMB );
141 const BoundaryConditionFlag surface_bc = get_boundary_condition_flag( bcs_, SURFACE );
143 const bool has_freeslip = ( cmb_bc == FREESLIP ) || ( surface_bc == FREESLIP );
146 if ( has_stored_matrices )
147 kernel_path_ = KernelPath::Slow;
148 else if ( has_freeslip )
149 kernel_path_ = KernelPath::FastFreeslip;
151 kernel_path_ = KernelPath::FastDirichletNeumann;
161 BoundaryConditions bcs,
172 , diagonal_( diagonal )
173 , operator_apply_mode_( operator_apply_mode )
174 , operator_communication_mode_( operator_communication_mode )
175 , operator_stored_matrix_mode_( operator_stored_matrix_mode )
176 , recv_buffers_( domain )
177 , comm_plan_( domain )
186 local_subdomains_ = domain_.
subdomains().size();
194 lat_tiles_ = ( hex_lat_ + lat_tile_ - 1 ) / lat_tile_;
195 r_tiles_ = ( hex_rad_ + r_tile_ - 1 ) / r_tile_;
197 team_size_ = lat_tile_ * lat_tile_ * r_tile_;
198 blocks_ = local_subdomains_ * lat_tiles_ * lat_tiles_ * r_tiles_;
200 r_min_ = domain_info.
radii()[0];
201 r_max_ = domain_info.
radii()[domain_info.
radii().size() - 1];
204 update_kernel_path_flag_host_only();
206 util::logroot <<
"[EpsilonDivDiv] tile size (x,y,r)=(" << lat_tile_ <<
"," << lat_tile_ <<
"," << r_tile_ <<
")"
208 util::logroot <<
"[EpsilonDivDiv] number of tiles (x,y,r)=(" << lat_tiles_ <<
"," << lat_tiles_ <<
","
209 << r_tiles_ <<
"), team_size=" << team_size_ <<
", blocks=" << blocks_ << std::endl;
210 const char* path_name = ( kernel_path_ == KernelPath::Slow ) ?
"slow" :
211 ( kernel_path_ == KernelPath::FastFreeslip ) ?
"fast-freeslip" :
212 "fast-dirichlet-neumann";
213 util::logroot <<
"[EpsilonDivDiv] kernel path = " << path_name << std::endl;
220 operator_apply_mode_ = operator_apply_mode;
221 operator_communication_mode_ = operator_communication_mode;
232 update_kernel_path_flag_host_only();
240 KOKKOS_INLINE_FUNCTION
242 const int local_subdomain_id,
248 return util::has_flag( mask_( local_subdomain_id, x_cell, y_cell, r_cell ), flag );
256 operator_stored_matrix_mode_ = operator_stored_matrix_mode;
261 domain_, operator_stored_matrix_mode_, level_range, GCAElements );
265 update_kernel_path_flag_host_only();
267 util::logroot <<
"[EpsilonDivDiv] (set_stored_matrix_mode) kernel path = "
268 << (( kernel_path_ == KernelPath::Slow ) ?
"slow" :
269 ( kernel_path_ == KernelPath::FastFreeslip ) ?
"fast-freeslip" :
270 "fast-dirichlet-neumann")
276 KOKKOS_INLINE_FUNCTION
278 const int local_subdomain_id,
286 local_matrix_storage_.
set_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge, mat );
289 KOKKOS_INLINE_FUNCTION
291 const int local_subdomain_id,
295 const int wedge )
const
299 if ( !local_matrix_storage_.
has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge ) )
301 Kokkos::abort(
"No matrix found at that spatial index." );
303 return local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );
333 util::Timer timer_kernel(
"epsilon_divdiv_kernel" );
334 Kokkos::TeamPolicy<> policy( blocks_, team_size_ );
337 if ( kernel_path_ != KernelPath::Slow )
339 policy.set_scratch_size( 0, Kokkos::PerTeam(
team_shmem_size( team_size_ ) ) );
343 if ( kernel_path_ == KernelPath::Slow )
345 Kokkos::parallel_for(
346 "epsilon_divdiv_apply_kernel_slow", policy, KOKKOS_CLASS_LAMBDA(
const Team& team ) {
347 this->run_team_slow( team );
350 else if ( kernel_path_ == KernelPath::FastFreeslip )
352 Kokkos::parallel_for(
353 "epsilon_divdiv_apply_kernel_fast_freeslip", policy, KOKKOS_CLASS_LAMBDA(
const Team& team ) {
354 this->run_team_fast_freeslip( team );
360 Kokkos::TeamPolicy< > dn_policy( blocks_, team_size_ );
361 dn_policy.set_scratch_size( 0, Kokkos::PerTeam(
team_shmem_size( team_size_ ) ) );
362 Kokkos::parallel_for(
363 "epsilon_divdiv_apply_kernel_fast_dirichlet_neumann",
365 KOKKOS_CLASS_LAMBDA(
const Team& team ) { this->run_team_fast_dirichlet_neumann( team ); } );
388 KOKKOS_INLINE_FUNCTION
402 E00 = E11 = E22 = sym01 = sym02 = sym12 = gdd = 0.0;
433 KOKKOS_INLINE_FUNCTION
436 const int nlev = r_tile_ + 1;
437 const int n = lat_tile_ + 1;
438 const int nxy = n * n;
440 const size_t nscalars =
441 size_t( nxy ) * 3 + size_t( nxy ) * 3 * nlev + size_t( nxy ) * nlev + size_t( nlev ) + 1;
453 KOKKOS_INLINE_FUNCTION
454 void decode_team_indices(
456 int& local_subdomain_id,
467 int tmp = team.league_rank();
469 const int r_tile_id = tmp % r_tiles_;
472 const int lat_y_id = tmp % lat_tiles_;
475 const int lat_x_id = tmp % lat_tiles_;
478 local_subdomain_id = tmp;
480 x0 = lat_x_id * lat_tile_;
481 y0 = lat_y_id * lat_tile_;
482 r0 = r_tile_id * r_tile_;
484 const int tid = team.team_rank();
486 tx = ( tid / r_tile_ ) % lat_tile_;
487 ty = tid / ( r_tile_ * lat_tile_ );
509 KOKKOS_INLINE_FUNCTION
510 void run_team_slow(
const Team& team )
const
512 int local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell;
513 decode_team_indices( team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell );
518 const bool at_cmb = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
519 const bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
522 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
528 KOKKOS_INLINE_FUNCTION
529 void run_team_fast_dirichlet_neumann(
const Team& team )
const
531 int local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell;
532 decode_team_indices( team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell );
537 const bool at_cmb = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
538 const bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
540 operator_fast_dirichlet_neumann_path(
541 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
547 KOKKOS_INLINE_FUNCTION
548 void run_team_fast_freeslip(
const Team& team )
const
550 int local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell;
551 decode_team_indices( team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell );
556 const bool at_cmb = has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
557 const bool at_surface = has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
559 operator_fast_freeslip_path(
560 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
564 KOKKOS_INLINE_FUNCTION
565 void operator_slow_path(
567 const int local_subdomain_id,
578 const bool at_surface )
const
588 if ( x_cell >= hex_lat_ || y_cell >= hex_lat_ || r_cell >= hex_rad_ )
595 A[0] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
596 A[1] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
600 if ( local_matrix_storage_.
has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) &&
601 local_matrix_storage_.
has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) )
603 A[0] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
604 A[1] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
619 for (
int dimj = 0; dimj < 3; dimj++ )
633 dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > boundary_mask;
634 boundary_mask.fill( 1.0 );
636 bool freeslip_reorder =
false;
639 if ( at_cmb || at_surface )
644 if ( bcf == DIRICHLET )
646 for (
int dimi = 0; dimi < 3; ++dimi )
648 for (
int dimj = 0; dimj < 3; ++dimj )
654 if ( ( at_cmb && ( ( dimi == dimj && i != j && ( i < 3 || j < 3 ) ) ||
655 ( dimi != dimj && ( i < 3 || j < 3 ) ) ) ) ||
656 ( at_surface && ( ( dimi == dimj && i != j && ( i >= 3 || j >= 3 ) ) ||
657 ( dimi != dimj && ( i >= 3 || j >= 3 ) ) ) ) )
667 else if ( bcf == FREESLIP )
669 freeslip_reorder =
true;
672 for (
int wedge = 0; wedge < 2; ++wedge )
674 for (
int dimi = 0; dimi < 3; ++dimi )
678 for (
int dimj = 0; dimj < 3; ++dimj )
682 A_tmp[wedge]( node_idxi * 3 + dimi, node_idxj * 3 + dimj ) = A[wedge](
692 constexpr int layer_hex_offset_x[2][3] = { { 0, 1, 0 }, { 1, 0, 1 } };
693 constexpr int layer_hex_offset_y[2][3] = { { 0, 0, 1 }, { 1, 1, 0 } };
695 for (
int wedge = 0; wedge < 2; ++wedge )
699 R[wedge]( i, i ) = 1.0;
702 for (
int boundary_node_idx = 0; boundary_node_idx < 3; boundary_node_idx++ )
706 x_cell + layer_hex_offset_x[wedge][boundary_node_idx],
707 y_cell + layer_hex_offset_y[wedge][boundary_node_idx],
708 r_cell + ( at_cmb ? 0 : 1 ),
714 const int offset_in_R = at_cmb ? 0 : 9;
715 for (
int dimi = 0; dimi < 3; ++dimi )
717 for (
int dimj = 0; dimj < 3; ++dimj )
720 offset_in_R + boundary_node_idx * 3 + dimi,
721 offset_in_R + boundary_node_idx * 3 + dimj ) = R_i( dimi, dimj );
726 A[wedge] = R[wedge] * A_tmp[wedge] * R[wedge].
transposed();
728 auto src_tmp = R[wedge] * src[wedge];
729 for (
int i = 0; i < 18; ++i )
731 src[wedge]( i ) = src_tmp( i );
734 const int node_start = at_surface ? 3 : 0;
735 const int node_end = at_surface ? 6 : 3;
737 for (
int node_idx = node_start; node_idx < node_end; node_idx++ )
739 const int idx = node_idx * 3;
740 for (
int k = 0; k < 18; ++k )
744 boundary_mask( idx, k ) = 0.0;
745 boundary_mask( k, idx ) = 0.0;
765 dst[0] = A[0] * src[0];
766 dst[1] = A[1] * src[1];
768 if ( freeslip_reorder )
772 dst_tmp[1] = R[1].transposed() * dst[1];
774 for (
int i = 0; i < 18; ++i )
776 dst[0]( i ) = dst_tmp[0]( i );
777 dst[1]( i ) = dst_tmp[1]( i );
784 for (
int dimi = 0; dimi < 3; dimi++ )
791 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
796 KOKKOS_INLINE_FUNCTION
797 void operator_fast_dirichlet_neumann_path(
799 const int local_subdomain_id,
810 const bool at_surface )
const
812 const int nlev = r_tile_ + 1;
813 const int nxy = ( lat_tile_ + 1 ) * ( lat_tile_ + 1 );
816 reinterpret_cast< double*
>( team.team_shmem().get_shmem(
team_shmem_size( team.team_size() ) ) );
818 using ScratchCoords =
819 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
820 using ScratchSrc = Kokkos::
821 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
823 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
825 ScratchCoords coords_sh( shmem, nxy, 3 );
828 ScratchSrc src_sh( shmem, nxy, 3, nlev );
829 shmem += nxy * 3 * nlev;
831 ScratchK k_sh( shmem, nxy, nlev );
835 Kokkos::View< double*, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >(
838 auto node_id = [&](
int nx,
int ny ) ->
int {
return nx + ( lat_tile_ + 1 ) * ny; };
840 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, nxy ), [&](
int n ) {
841 const int dxn = n % ( lat_tile_ + 1 );
842 const int dyn = n / ( lat_tile_ + 1 );
843 const int xi = x0 + dxn;
844 const int yi = y0 + dyn;
846 if ( xi <= hex_lat_ && yi <= hex_lat_ )
848 coords_sh( n, 0 ) = grid_( local_subdomain_id, xi, yi, 0 );
849 coords_sh( n, 1 ) = grid_( local_subdomain_id, xi, yi, 1 );
850 coords_sh( n, 2 ) = grid_( local_subdomain_id, xi, yi, 2 );
854 coords_sh( n, 0 ) = coords_sh( n, 1 ) = coords_sh( n, 2 ) = 0.0;
858 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, nlev ), [&](
int lvl ) {
859 const int rr = r0 + lvl;
860 r_sh( lvl ) = ( rr <= hex_rad_ ) ? radii_( local_subdomain_id, rr ) : 0.0;
863 const int total_pairs = nxy * nlev;
864 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, total_pairs ), [&](
int t ) {
865 const int node = t / nlev;
866 const int lvl = t - node * nlev;
868 const int dxn = node % ( lat_tile_ + 1 );
869 const int dyn = node / ( lat_tile_ + 1 );
871 const int xi = x0 + dxn;
872 const int yi = y0 + dyn;
873 const int rr = r0 + lvl;
875 if ( xi <= hex_lat_ && yi <= hex_lat_ && rr <= hex_rad_ )
877 k_sh( node, lvl ) = k_( local_subdomain_id, xi, yi, rr );
878 src_sh( node, 0, lvl ) = src_( local_subdomain_id, xi, yi, rr, 0 );
879 src_sh( node, 1, lvl ) = src_( local_subdomain_id, xi, yi, rr, 1 );
880 src_sh( node, 2, lvl ) = src_( local_subdomain_id, xi, yi, rr, 2 );
884 k_sh( node, lvl ) = 0.0;
885 src_sh( node, 0, lvl ) = src_sh( node, 1, lvl ) = src_sh( node, 2, lvl ) = 0.0;
891 if ( x_cell >= hex_lat_ || y_cell >= hex_lat_ || r_cell >= hex_rad_ )
895 const double r_0 = r_sh( lvl0 );
896 const double r_1 = r_sh( lvl0 + 1 );
898 const bool at_boundary = at_cmb || at_surface;
899 bool treat_boundary_dirichlet =
false;
906 const int cmb_shift = ( ( at_boundary && treat_boundary_dirichlet && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );
908 ( ( at_boundary && treat_boundary_dirichlet && ( !diagonal_ ) && at_surface ) ? 3 : 0 );
910 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
911 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
912 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
914 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
915 { 0, 1, 2, 3, 4, 5 },
919 constexpr double ONE_THIRD = 1.0 / 3.0;
920 constexpr double ONE_SIXTH = 1.0 / 6.0;
921 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
923 static constexpr double dN_ref[6][3] = {
924 { -0.5, -0.5, -ONE_SIXTH },
925 { 0.5, 0.0, -ONE_SIXTH },
926 { 0.0, 0.5, -ONE_SIXTH },
927 { -0.5, -0.5, ONE_SIXTH },
928 { 0.5, 0.0, ONE_SIXTH },
929 { 0.0, 0.5, ONE_SIXTH } };
931 const int n00 = node_id( tx, ty );
932 const int n01 = node_id( tx, ty + 1 );
933 const int n10 = node_id( tx + 1, ty );
934 const int n11 = node_id( tx + 1, ty + 1 );
936 for (
int w = 0; w < 2; ++w )
938 const int v0 = w == 0 ? n00 : n11;
939 const int v1 = w == 0 ? n10 : n01;
940 const int v2 = w == 0 ? n01 : n10;
944 for (
int node = 0; node < 6; ++node )
946 const int ddx = WEDGE_NODE_OFF[w][node][0];
947 const int ddy = WEDGE_NODE_OFF[w][node][1];
948 const int ddr = WEDGE_NODE_OFF[w][node][2];
950 const int nid = node_id( tx + ddx, ty + ddy );
951 const int lvl = lvl0 + ddr;
953 k_sum += k_sh( nid, lvl );
955 const double k_eval = ONE_SIXTH * k_sum;
958 double i00, i01, i02;
959 double i10, i11, i12;
960 double i20, i21, i22;
963 const double half_dr = 0.5 * ( r_1 - r_0 );
964 const double r_mid = 0.5 * ( r_0 + r_1 );
966 const double J_0_0 = r_mid * ( -coords_sh( v0, 0 ) + coords_sh( v1, 0 ) );
967 const double J_0_1 = r_mid * ( -coords_sh( v0, 0 ) + coords_sh( v2, 0 ) );
968 const double J_0_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 0 ) + coords_sh( v1, 0 ) + coords_sh( v2, 0 ) ) );
970 const double J_1_0 = r_mid * ( -coords_sh( v0, 1 ) + coords_sh( v1, 1 ) );
971 const double J_1_1 = r_mid * ( -coords_sh( v0, 1 ) + coords_sh( v2, 1 ) );
972 const double J_1_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 1 ) + coords_sh( v1, 1 ) + coords_sh( v2, 1 ) ) );
974 const double J_2_0 = r_mid * ( -coords_sh( v0, 2 ) + coords_sh( v1, 2 ) );
975 const double J_2_1 = r_mid * ( -coords_sh( v0, 2 ) + coords_sh( v2, 2 ) );
976 const double J_2_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 2 ) + coords_sh( v1, 2 ) + coords_sh( v2, 2 ) ) );
978 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
979 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
981 const double invJ = 1.0 /
J_det;
983 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
984 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
985 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
986 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
987 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
988 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
989 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
990 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
991 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
993 wJ = Kokkos::abs( J_det );
996 const double kwJ =
k_eval * wJ;
999 double dst_w[3][6] = { 0.0 };
1002 double gu10 = 0.0, gu11 = 0.0;
1003 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
1013 const double gx = dN_ref[n][0];
1014 const double gy = dN_ref[n][1];
1015 const double gz = dN_ref[n][2];
1016 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1017 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1018 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1020 const int ddx = WEDGE_NODE_OFF[w][n][0];
1021 const int ddy = WEDGE_NODE_OFF[w][n][1];
1022 const int ddr = WEDGE_NODE_OFF[w][n][2];
1023 const int nid = node_id( tx + ddx, ty + ddy );
1024 const int lvl = lvl0 + ddr;
1026 const double s0 = src_sh( nid, 0, lvl );
1027 const double s1 = src_sh( nid, 1, lvl );
1028 const double s2 = src_sh( nid, 2, lvl );
1033 gu10 += 0.5 * (
g1 * s0 + g0 * s1 );
1034 gu20 += 0.5 * (
g2 * s0 + g0 * s2 );
1035 gu21 += 0.5 * (
g2 * s1 +
g1 * s2 );
1044 const double gx = dN_ref[n][0];
1045 const double gy = dN_ref[n][1];
1046 const double gz = dN_ref[n][2];
1047 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1048 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1049 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1052 kwJ * ( 2.0 * ( g0 * gu00 +
g1 * gu10 +
g2 * gu20 ) + NEG_TWO_THIRDS * g0 * div_u );
1054 kwJ * ( 2.0 * ( g0 * gu10 +
g1 * gu11 +
g2 * gu21 ) + NEG_TWO_THIRDS * g1 * div_u );
1056 kwJ * ( 2.0 * ( g0 * gu20 +
g1 * gu21 +
g2 * gu22 ) + NEG_TWO_THIRDS * g2 * div_u );
1060 if ( diagonal_ || ( treat_boundary_dirichlet && at_boundary ) )
1064 for (
int n = surface_shift; n < 6 -
cmb_shift; ++n )
1066 const double gx = dN_ref[n][0];
1067 const double gy = dN_ref[n][1];
1068 const double gz = dN_ref[n][2];
1069 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1070 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1071 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1072 const double gg = g0 * g0 +
g1 *
g1 +
g2 *
g2;
1074 const int ddx = WEDGE_NODE_OFF[w][n][0];
1075 const int ddy = WEDGE_NODE_OFF[w][n][1];
1076 const int ddr = WEDGE_NODE_OFF[w][n][2];
1077 const int nid = node_id( tx + ddx, ty + ddy );
1078 const int lvl = lvl0 + ddr;
1080 const double s0 = src_sh( nid, 0, lvl );
1081 const double s1 = src_sh( nid, 1, lvl );
1082 const double s2 = src_sh( nid, 2, lvl );
1083 dst_w[0][n] += kwJ * s0 * ( gg + ONE_THIRD * g0 * g0 );
1084 dst_w[1][n] += kwJ * s1 * ( gg + ONE_THIRD *
g1 *
g1 );
1085 dst_w[2][n] += kwJ * s2 * ( gg + ONE_THIRD *
g2 *
g2 );
1091 for (
int n = 0; n < 6; ++n )
1093 const int ddx = WEDGE_NODE_OFF[w][n][0];
1094 const int ddy = WEDGE_NODE_OFF[w][n][1];
1095 const int ddr = WEDGE_NODE_OFF[w][n][2];
1096 for (
int dim_add = 0; dim_add < 3; ++dim_add )
1099 &dst_( local_subdomain_id, x_cell + ddx, y_cell + ddy, r_cell + ddr, dim_add ),
1100 dst_w[dim_add][n] );
1108 KOKKOS_INLINE_FUNCTION
1109 void normalize3(
double& x,
double& y,
double& z )
const
1111 const double n2 = x * x + y * y + z * z;
1114 const double invn = 1.0 / Kokkos::sqrt( n2 );
1121 KOKKOS_INLINE_FUNCTION
1122 void project_tangential_inplace(
1130 const double dot = nx * ux + ny * uy + nz * uz;
1136 KOKKOS_INLINE_FUNCTION
1137 void operator_fast_freeslip_path(
1139 const int local_subdomain_id,
1150 const bool at_surface )
const
1152 const int nlev = r_tile_ + 1;
1153 const int nxy = ( lat_tile_ + 1 ) * ( lat_tile_ + 1 );
1156 reinterpret_cast< double*
>( team.team_shmem().get_shmem(
team_shmem_size( team.team_size() ) ) );
1158 using ScratchCoords =
1159 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
1160 using ScratchSrc = Kokkos::
1161 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
1163 Kokkos::View< double**, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
1165 ScratchCoords coords_sh( shmem, nxy, 3 );
1168 ScratchSrc src_sh( shmem, nxy, 3, nlev );
1169 shmem += nxy * 3 * nlev;
1171 ScratchK k_sh( shmem, nxy, nlev );
1172 shmem += nxy * nlev;
1175 Kokkos::View< double*, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >(
1178 auto node_id = [&](
int nx,
int ny ) ->
int {
return nx + ( lat_tile_ + 1 ) * ny; };
1180 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, nxy ), [&](
int n ) {
1181 const int dxn = n % ( lat_tile_ + 1 );
1182 const int dyn = n / ( lat_tile_ + 1 );
1183 const int xi = x0 + dxn;
1184 const int yi = y0 + dyn;
1186 if ( xi <= hex_lat_ && yi <= hex_lat_ )
1188 coords_sh( n, 0 ) = grid_( local_subdomain_id, xi, yi, 0 );
1189 coords_sh( n, 1 ) = grid_( local_subdomain_id, xi, yi, 1 );
1190 coords_sh( n, 2 ) = grid_( local_subdomain_id, xi, yi, 2 );
1194 coords_sh( n, 0 ) = coords_sh( n, 1 ) = coords_sh( n, 2 ) = 0.0;
1198 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, nlev ), [&](
int lvl ) {
1199 const int rr = r0 + lvl;
1200 r_sh( lvl ) = ( rr <= hex_rad_ ) ? radii_( local_subdomain_id, rr ) : 0.0;
1203 const int total_pairs = nxy * nlev;
1204 Kokkos::parallel_for( Kokkos::TeamThreadRange( team, total_pairs ), [&](
int t ) {
1205 const int node = t / nlev;
1206 const int lvl = t - node * nlev;
1208 const int dxn = node % ( lat_tile_ + 1 );
1209 const int dyn = node / ( lat_tile_ + 1 );
1211 const int xi = x0 + dxn;
1212 const int yi = y0 + dyn;
1213 const int rr = r0 + lvl;
1215 if ( xi <= hex_lat_ && yi <= hex_lat_ && rr <= hex_rad_ )
1217 k_sh( node, lvl ) = k_( local_subdomain_id, xi, yi, rr );
1218 src_sh( node, 0, lvl ) = src_( local_subdomain_id, xi, yi, rr, 0 );
1219 src_sh( node, 1, lvl ) = src_( local_subdomain_id, xi, yi, rr, 1 );
1220 src_sh( node, 2, lvl ) = src_( local_subdomain_id, xi, yi, rr, 2 );
1224 k_sh( node, lvl ) = 0.0;
1225 src_sh( node, 0, lvl ) = src_sh( node, 1, lvl ) = src_sh( node, 2, lvl ) = 0.0;
1229 team.team_barrier();
1231 if ( x_cell >= hex_lat_ || y_cell >= hex_lat_ || r_cell >= hex_rad_ )
1234 const int lvl0 = tr;
1235 const double r_0 = r_sh( lvl0 );
1236 const double r_1 = r_sh( lvl0 + 1 );
1241 const bool cmb_freeslip = at_cmb && ( cmb_bc ==
FREESLIP );
1242 const bool surf_freeslip = at_surface && ( surface_bc ==
FREESLIP );
1243 const bool cmb_dirichlet = at_cmb && ( cmb_bc ==
DIRICHLET );
1244 const bool surface_dirichlet = at_surface && ( surface_bc ==
DIRICHLET );
1246 const int cmb_shift = ( ( cmb_dirichlet && ( !diagonal_ ) ) ? 3 : 0 );
1247 const int surface_shift = ( ( surface_dirichlet && ( !diagonal_ ) ) ? 3 : 0 );
1249 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
1250 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
1251 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
1253 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
1254 { 0, 1, 2, 3, 4, 5 },
1255 { 6, 2, 1, 7, 5, 4 }
1258 constexpr double ONE_THIRD = 1.0 / 3.0;
1259 constexpr double ONE_SIXTH = 1.0 / 6.0;
1260 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
1262 static constexpr double dN_ref[6][3] = {
1263 { -0.5, -0.5, -ONE_SIXTH },
1264 { 0.5, 0.0, -ONE_SIXTH },
1265 { 0.0, 0.5, -ONE_SIXTH },
1266 { -0.5, -0.5, ONE_SIXTH },
1267 { 0.5, 0.0, ONE_SIXTH },
1268 { 0.0, 0.5, ONE_SIXTH } };
1270 const int n00 = node_id( tx, ty );
1271 const int n01 = node_id( tx, ty + 1 );
1272 const int n10 = node_id( tx + 1, ty );
1273 const int n11 = node_id( tx + 1, ty + 1 );
1277 src8[0][0] = src_sh( n00, 0, lvl0 );
1278 src8[1][0] = src_sh( n00, 1, lvl0 );
1279 src8[2][0] = src_sh( n00, 2, lvl0 );
1280 src8[0][1] = src_sh( n10, 0, lvl0 );
1281 src8[1][1] = src_sh( n10, 1, lvl0 );
1282 src8[2][1] = src_sh( n10, 2, lvl0 );
1283 src8[0][2] = src_sh( n01, 0, lvl0 );
1284 src8[1][2] = src_sh( n01, 1, lvl0 );
1285 src8[2][2] = src_sh( n01, 2, lvl0 );
1286 src8[0][6] = src_sh( n11, 0, lvl0 );
1287 src8[1][6] = src_sh( n11, 1, lvl0 );
1288 src8[2][6] = src_sh( n11, 2, lvl0 );
1290 src8[0][3] = src_sh( n00, 0, lvl0 + 1 );
1291 src8[1][3] = src_sh( n00, 1, lvl0 + 1 );
1292 src8[2][3] = src_sh( n00, 2, lvl0 + 1 );
1293 src8[0][4] = src_sh( n10, 0, lvl0 + 1 );
1294 src8[1][4] = src_sh( n10, 1, lvl0 + 1 );
1295 src8[2][4] = src_sh( n10, 2, lvl0 + 1 );
1296 src8[0][5] = src_sh( n01, 0, lvl0 + 1 );
1297 src8[1][5] = src_sh( n01, 1, lvl0 + 1 );
1298 src8[2][5] = src_sh( n01, 2, lvl0 + 1 );
1299 src8[0][7] = src_sh( n11, 0, lvl0 + 1 );
1300 src8[1][7] = src_sh( n11, 1, lvl0 + 1 );
1301 src8[2][7] = src_sh( n11, 2, lvl0 + 1 );
1303 double nx00 = coords_sh( n00, 0 ), ny00 = coords_sh( n00, 1 ), nz00 = coords_sh( n00, 2 );
1304 double nx10 = coords_sh( n10, 0 ), ny10 = coords_sh( n10, 1 ), nz10 = coords_sh( n10, 2 );
1305 double nx01 = coords_sh( n01, 0 ), ny01 = coords_sh( n01, 1 ), nz01 = coords_sh( n01, 2 );
1306 double nx11 = coords_sh( n11, 0 ), ny11 = coords_sh( n11, 1 ), nz11 = coords_sh( n11, 2 );
1308 normalize3( nx00, ny00, nz00 );
1309 normalize3( nx10, ny10, nz10 );
1310 normalize3( nx01, ny01, nz01 );
1311 normalize3( nx11, ny11, nz11 );
1320 double u_n_cmb[4] = {};
1321 double u_n_surf[4] = {};
1324 u_n_cmb[0] = nx00 * src8[0][0] + ny00 * src8[1][0] + nz00 * src8[2][0];
1325 u_n_cmb[1] = nx10 * src8[0][1] + ny10 * src8[1][1] + nz10 * src8[2][1];
1326 u_n_cmb[2] = nx01 * src8[0][2] + ny01 * src8[1][2] + nz01 * src8[2][2];
1327 u_n_cmb[3] = nx11 * src8[0][6] + ny11 * src8[1][6] + nz11 * src8[2][6];
1329 if ( surf_freeslip )
1331 u_n_surf[0] = nx00 * src8[0][3] + ny00 * src8[1][3] + nz00 * src8[2][3];
1332 u_n_surf[1] = nx10 * src8[0][4] + ny10 * src8[1][4] + nz10 * src8[2][4];
1333 u_n_surf[2] = nx01 * src8[0][5] + ny01 * src8[1][5] + nz01 * src8[2][5];
1334 u_n_surf[3] = nx11 * src8[0][7] + ny11 * src8[1][7] + nz11 * src8[2][7];
1339 project_tangential_inplace( nx00, ny00, nz00, src8[0][0], src8[1][0], src8[2][0] );
1340 project_tangential_inplace( nx10, ny10, nz10, src8[0][1], src8[1][1], src8[2][1] );
1341 project_tangential_inplace( nx01, ny01, nz01, src8[0][2], src8[1][2], src8[2][2] );
1342 project_tangential_inplace( nx11, ny11, nz11, src8[0][6], src8[1][6], src8[2][6] );
1344 if ( surf_freeslip )
1346 project_tangential_inplace( nx00, ny00, nz00, src8[0][3], src8[1][3], src8[2][3] );
1347 project_tangential_inplace( nx10, ny10, nz10, src8[0][4], src8[1][4], src8[2][4] );
1348 project_tangential_inplace( nx01, ny01, nz01, src8[0][5], src8[1][5], src8[2][5] );
1349 project_tangential_inplace( nx11, ny11, nz11, src8[0][7], src8[1][7], src8[2][7] );
1352 double dst8[3][8] = { { 0.0 } };
1353 double normal_corr[3][8] = {};
1357 for (
int w = 0; w < 2; ++w )
1359 const int v0 = w == 0 ? n00 : n11;
1360 const int v1 = w == 0 ? n10 : n01;
1361 const int v2 = w == 0 ? n01 : n10;
1365 for (
int node = 0; node < 6; ++node )
1367 const int ddx = WEDGE_NODE_OFF[w][node][0];
1368 const int ddy = WEDGE_NODE_OFF[w][node][1];
1369 const int ddr = WEDGE_NODE_OFF[w][node][2];
1371 const int nid = node_id( tx + ddx, ty + ddy );
1372 const int lvl = lvl0 + ddr;
1373 k_sum += k_sh( nid, lvl );
1375 const double k_eval = ONE_SIXTH * k_sum;
1378 double i00, i01, i02;
1379 double i10, i11, i12;
1380 double i20, i21, i22;
1383 const double half_dr = 0.5 * ( r_1 - r_0 );
1384 const double r_mid = 0.5 * ( r_0 + r_1 );
1386 const double J_0_0 = r_mid * ( -coords_sh( v0, 0 ) + coords_sh( v1, 0 ) );
1387 const double J_0_1 = r_mid * ( -coords_sh( v0, 0 ) + coords_sh( v2, 0 ) );
1388 const double J_0_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 0 ) + coords_sh( v1, 0 ) + coords_sh( v2, 0 ) ) );
1390 const double J_1_0 = r_mid * ( -coords_sh( v0, 1 ) + coords_sh( v1, 1 ) );
1391 const double J_1_1 = r_mid * ( -coords_sh( v0, 1 ) + coords_sh( v2, 1 ) );
1392 const double J_1_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 1 ) + coords_sh( v1, 1 ) + coords_sh( v2, 1 ) ) );
1394 const double J_2_0 = r_mid * ( -coords_sh( v0, 2 ) + coords_sh( v1, 2 ) );
1395 const double J_2_1 = r_mid * ( -coords_sh( v0, 2 ) + coords_sh( v2, 2 ) );
1396 const double J_2_2 = half_dr * ( ONE_THIRD * ( coords_sh( v0, 2 ) + coords_sh( v1, 2 ) + coords_sh( v2, 2 ) ) );
1398 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
1399 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
1401 const double invJ = 1.0 /
J_det;
1403 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
1404 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
1405 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
1407 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
1408 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
1409 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
1411 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
1412 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
1413 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
1415 wJ = Kokkos::abs( J_det );
1418 const double kwJ =
k_eval * wJ;
1429 if ( !diagonal_ && ( cmb_freeslip || surf_freeslip ) )
1431 static constexpr int CMB_NODE_TO_CORNER[2][3] = { { 0, 1, 2 }, { 3, 2, 1 } };
1432 const double cn[4][3] = { { nx00, ny00, nz00 },
1433 { nx10, ny10, nz10 },
1434 { nx01, ny01, nz01 },
1435 { nx11, ny11, nz11 } };
1438 for (
int ni = 0; ni < 3; ++ni )
1440 const int corner = CMB_NODE_TO_CORNER[w][ni];
1441 const double nxu = cn[corner][0], nyu = cn[corner][1], nzu = cn[corner][2];
1443 const double gx = dN_ref[ni][0];
1444 const double gy = dN_ref[ni][1];
1445 const double gz = dN_ref[ni][2];
1446 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1447 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1448 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1449 const double gg = g0 * g0 +
g1 *
g1 +
g2 *
g2;
1450 const double ng = nxu * g0 + nyu *
g1 + nzu *
g2;
1451 const double Ann = kwJ * ( gg + ONE_THIRD * ng * ng );
1452 const double c = Ann * u_n_cmb[corner];
1453 const int u = WEDGE_TO_UNIQUE[w][ni];
1455 normal_corr[0][u] += c * nxu;
1456 normal_corr[1][u] += c * nyu;
1457 normal_corr[2][u] += c * nzu;
1460 if ( surf_freeslip )
1462 for (
int ni = 0; ni < 3; ++ni )
1464 const int corner = CMB_NODE_TO_CORNER[w][ni];
1465 const double nxu = cn[corner][0], nyu = cn[corner][1], nzu = cn[corner][2];
1467 const double gx = dN_ref[ni + 3][0];
1468 const double gy = dN_ref[ni + 3][1];
1469 const double gz = dN_ref[ni + 3][2];
1470 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1471 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1472 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1473 const double gg = g0 * g0 +
g1 *
g1 +
g2 *
g2;
1474 const double ng = nxu * g0 + nyu *
g1 + nzu *
g2;
1475 const double Ann = kwJ * ( gg + ONE_THIRD * ng * ng );
1476 const double c = Ann * u_n_surf[corner];
1477 const int u = WEDGE_TO_UNIQUE[w][ni + 3];
1479 normal_corr[0][u] += c * nxu;
1480 normal_corr[1][u] += c * nyu;
1481 normal_corr[2][u] += c * nzu;
1487 double gu10 = 0.0, gu11 = 0.0;
1488 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
1493 for (
int dimj = 0; dimj < 3; ++dimj )
1496 for (
int node_idx = cmb_shift; node_idx < 6 -
surface_shift; ++node_idx )
1498 const double gx = dN_ref[node_idx][0];
1499 const double gy = dN_ref[node_idx][1];
1500 const double gz = dN_ref[node_idx][2];
1502 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1503 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1504 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1506 double E00, E11, E22, sym01, sym02, sym12, gdd;
1507 column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
1509 const int u = WEDGE_TO_UNIQUE[w][node_idx];
1510 const double s = src8[dimj][u];
1522 for (
int dimi = 0; dimi < 3; ++dimi )
1525 for (
int node_idx = cmb_shift; node_idx < 6 -
surface_shift; ++node_idx )
1527 const double gx = dN_ref[node_idx][0];
1528 const double gy = dN_ref[node_idx][1];
1529 const double gz = dN_ref[node_idx][2];
1531 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1532 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1533 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1535 double E00, E11, E22, sym01, sym02, sym12, gdd;
1536 column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
1538 const int u = WEDGE_TO_UNIQUE[w][node_idx];
1541 kwJ * ( NEG_TWO_THIRDS *
div_u * gdd + 4.0 * sym01 * gu10 + 4.0 * sym02 * gu20 +
1542 4.0 * sym12 * gu21 + 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );
1548 if ( diagonal_ || cmb_dirichlet || surface_dirichlet )
1550 for (
int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )
1552 for (
int node_idx = surface_shift; node_idx < 6 -
cmb_shift; ++node_idx )
1556 if ( diagonal_ && cmb_freeslip && node_idx < 3 )
1558 if ( diagonal_ && surf_freeslip && node_idx >= 3 )
1561 const double gx = dN_ref[node_idx][0];
1562 const double gy = dN_ref[node_idx][1];
1563 const double gz = dN_ref[node_idx][2];
1565 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1566 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1567 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1569 double E00, E11, E22, sym01, sym02, sym12, gdd;
1570 column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
1572 const int u = WEDGE_TO_UNIQUE[w][node_idx];
1573 const double s = src8[dim_diagBC][u];
1575 dst8[dim_diagBC][u] += kwJ * ( 4.0 * s * ( sym01 * sym01 + sym02 * sym02 + sym12 * sym12 ) +
1576 2.0 * s * ( E00 * E00 + E11 * E11 + E22 * E22 ) +
1577 NEG_TWO_THIRDS * ( gdd * gdd ) * s );
1588 static constexpr int FS_CORNER_MAP[2][3] = { { 0, 1, 2 }, { 3, 2, 1 } };
1589 const double ncoords[4][3] = { { nx00, ny00, nz00 },
1590 { nx10, ny10, nz10 },
1591 { nx01, ny01, nz01 },
1592 { nx11, ny11, nz11 } };
1594 auto apply_rotated_diag =
1595 [&](
const int ni,
const int node_idx,
const double* u_n_arr ) {
1596 const int corner = FS_CORNER_MAP[w][ni];
1597 const double nxu = ncoords[corner][0];
1598 const double nyu = ncoords[corner][1];
1599 const double nzu = ncoords[corner][2];
1600 const int u = WEDGE_TO_UNIQUE[w][node_idx];
1602 const double gx = dN_ref[node_idx][0];
1603 const double gy = dN_ref[node_idx][1];
1604 const double gz = dN_ref[node_idx][2];
1606 const double g0 = i00 * gx + i01 * gy + i02 * gz;
1607 const double g1 = i10 * gx + i11 * gy + i12 * gz;
1608 const double g2 = i20 * gx + i21 * gy + i22 * gz;
1609 const double gg_loc = g0 * g0 +
g1 *
g1 +
g2 *
g2;
1611 dense::Vec< double, 3 > n_vec;
1615 const auto R_rot = trafo_mat_cartesian_to_normal_tangential< double >( n_vec );
1619 const double s0 = src8[0][u] + u_n_arr[corner] * nxu;
1620 const double s1 = src8[1][u] + u_n_arr[corner] * nyu;
1621 const double s2 = src8[2][u] + u_n_arr[corner] * nzu;
1624 const double Rg0 = R_rot( 0, 0 ) * g0 + R_rot( 0, 1 ) *
g1 + R_rot( 0, 2 ) *
g2;
1625 const double Rg1 = R_rot( 1, 0 ) * g0 + R_rot( 1, 1 ) *
g1 + R_rot( 1, 2 ) *
g2;
1626 const double Rg2 = R_rot( 2, 0 ) * g0 + R_rot( 2, 1 ) *
g1 + R_rot( 2, 2 ) *
g2;
1627 const double Rs0 = R_rot( 0, 0 ) * s0 + R_rot( 0, 1 ) * s1 + R_rot( 0, 2 ) * s2;
1628 const double Rs1 = R_rot( 1, 0 ) * s0 + R_rot( 1, 1 ) * s1 + R_rot( 1, 2 ) * s2;
1629 const double Rs2 = R_rot( 2, 0 ) * s0 + R_rot( 2, 1 ) * s1 + R_rot( 2, 2 ) * s2;
1633 const double v0 = kwJ * ( gg_loc + ONE_THIRD * Rg0 * Rg0 ) * Rs0;
1634 const double v1 = kwJ * ( gg_loc + ONE_THIRD * Rg1 * Rg1 ) * Rs1;
1635 const double v2 = kwJ * ( gg_loc + ONE_THIRD * Rg2 * Rg2 ) * Rs2;
1638 dst8[0][u] += R_rot( 0, 0 ) * v0 + R_rot( 1, 0 ) * v1 + R_rot( 2, 0 ) * v2;
1639 dst8[1][u] += R_rot( 0, 1 ) * v0 + R_rot( 1, 1 ) * v1 + R_rot( 2, 1 ) * v2;
1640 dst8[2][u] += R_rot( 0, 2 ) * v0 + R_rot( 1, 2 ) * v1 + R_rot( 2, 2 ) * v2;
1645 for (
int ni = 0; ni < 3; ++ni )
1646 apply_rotated_diag( ni, ni, u_n_cmb );
1648 if ( surf_freeslip )
1650 for (
int ni = 0; ni < 3; ++ni )
1651 apply_rotated_diag( ni, ni + 3, u_n_surf );
1660 if ( !diagonal_ && cmb_freeslip )
1662 project_tangential_inplace( nx00, ny00, nz00, dst8[0][0], dst8[1][0], dst8[2][0] );
1663 project_tangential_inplace( nx10, ny10, nz10, dst8[0][1], dst8[1][1], dst8[2][1] );
1664 project_tangential_inplace( nx01, ny01, nz01, dst8[0][2], dst8[1][2], dst8[2][2] );
1665 project_tangential_inplace( nx11, ny11, nz11, dst8[0][6], dst8[1][6], dst8[2][6] );
1667 if ( !diagonal_ && surf_freeslip )
1669 project_tangential_inplace( nx00, ny00, nz00, dst8[0][3], dst8[1][3], dst8[2][3] );
1670 project_tangential_inplace( nx10, ny10, nz10, dst8[0][4], dst8[1][4], dst8[2][4] );
1671 project_tangential_inplace( nx01, ny01, nz01, dst8[0][5], dst8[1][5], dst8[2][5] );
1672 project_tangential_inplace( nx11, ny11, nz11, dst8[0][7], dst8[1][7], dst8[2][7] );
1678 if ( !diagonal_ && ( cmb_freeslip || surf_freeslip ) )
1680 for (
int dim = 0;
dim < 3; ++
dim )
1681 for (
int u = 0; u < 8; ++u )
1682 dst8[dim][u] += normal_corr[dim][u];
1685 for (
int dim_add = 0; dim_add < 3; ++dim_add )
1687 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );
1688 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );
1689 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );
1690 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );
1692 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );
1694 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );
1696 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );
1698 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );
1712 KOKKOS_INLINE_FUNCTION
1715 int local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell;
1716 decode_team_indices( team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell );
1718 if ( tr >= r_tile_ )
1721 const bool at_cmb =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
1722 const bool at_surface =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
1724 if ( kernel_path_ == KernelPath::Slow )
1727 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
1729 else if ( kernel_path_ == KernelPath::FastFreeslip )
1731 operator_fast_freeslip_path(
1732 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
1736 operator_fast_dirichlet_neumann_path(
1737 team, local_subdomain_id, x0, y0, r0, tx, ty, tr, x_cell, y_cell, r_cell, at_cmb, at_surface );
1756 const auto det = J.det();
1757 const auto abs_det = Kokkos::abs( det );
1763 k_eval +=
shape( k, quad_point ) * k_local_hex[wedge]( k );
1768 sym_grad_i[k] =
symmetric_grad( J_inv_transposed, quad_point, k, dimi );
1769 sym_grad_j[k] =
symmetric_grad( J_inv_transposed, quad_point, k, dimj );
1772 jdet_keval_quadweight = quad_weight * k_eval * abs_det;
1778 KOKKOS_INLINE_FUNCTION
1780 const int local_subdomain_id,
1784 const int wedge )
const
1789 const ScalarT r_1 = radii_( local_subdomain_id, r_cell );
1790 const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );
1797 for (
int dimi = 0; dimi < 3; ++dimi )
1799 for (
int dimj = 0; dimj < 3; ++dimj )
1801 for (
int q = 0; q < num_quad_points; q++ )
1819 jdet_keval_quadweight );
1826 jdet_keval_quadweight *
1828 2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * sym_grad_i[i]( dimi, dimi ) );