469 int local_subdomain_id, x_cell, y_cell, r_cell;
472 int tmp = team.league_rank();
473 const int r_block_index = tmp % blocks_per_column_;
474 tmp /= blocks_per_column_;
475 y_cell = tmp & ( hex_lat_ - 1 );
476 tmp >>= lat_refinement_level_;
477 x_cell = tmp & ( hex_lat_ - 1 );
478 tmp >>= lat_refinement_level_;
479 local_subdomain_id = tmp;
481 r_cell = r_block_index * team.team_size() + team.team_rank();
484 bool at_cmb =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell, CMB );
485 bool at_surface =
has_flag( local_subdomain_id, x_cell, y_cell, r_cell + 1, SURFACE );
494 A[0] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
495 A[1] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
499 if ( local_matrix_storage_.
has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) &&
500 local_matrix_storage_.
has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) )
502 A[0] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );
503 A[1] = local_matrix_storage_.
get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );
518 for (
int dimj = 0; dimj < 3; dimj++ )
522 src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );
534 boundary_mask.
fill( 1.0 );
536 bool freeslip_reorder =
false;
539 if ( at_cmb || at_surface )
541 ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
542 BoundaryConditionFlag bcf = get_boundary_condition_flag( bcs_, sbf );
544 if ( bcf == DIRICHLET )
546 for (
int dimi = 0; dimi < 3; ++dimi )
548 for (
int dimj = 0; dimj < 3; ++dimj )
554 if ( ( at_cmb && ( ( dimi == dimj && i != j && ( i < 3 || j < 3 ) ) ||
555 ( dimi != dimj && ( i < 3 || j < 3 ) ) ) ) ||
556 ( at_surface && ( ( dimi == dimj && i != j && ( i >= 3 || j >= 3 ) ) ||
557 ( dimi != dimj && ( i >= 3 || j >= 3 ) ) ) ) )
567 else if ( bcf == FREESLIP )
569 freeslip_reorder =
true;
572 for (
int wedge = 0; wedge < 2; ++wedge )
574 for (
int dimi = 0; dimi < 3; ++dimi )
578 for (
int dimj = 0; dimj < 3; ++dimj )
582 A_tmp[wedge]( node_idxi * 3 + dimi, node_idxj * 3 + dimj ) = A[wedge](
592 constexpr int layer_hex_offset_x[2][3] = { { 0, 1, 0 }, { 1, 0, 1 } };
593 constexpr int layer_hex_offset_y[2][3] = { { 0, 0, 1 }, { 1, 1, 0 } };
595 for (
int wedge = 0; wedge < 2; ++wedge )
599 R[wedge]( i, i ) = 1.0;
602 for (
int boundary_node_idx = 0; boundary_node_idx < 3; boundary_node_idx++ )
606 x_cell + layer_hex_offset_x[wedge][boundary_node_idx],
607 y_cell + layer_hex_offset_y[wedge][boundary_node_idx],
608 r_cell + ( at_cmb ? 0 : 1 ),
612 auto R_i = trafo_mat_cartesian_to_normal_tangential( normal );
614 int offset_in_R = at_cmb ? 0 : 9;
615 for (
int dimi = 0; dimi < 3; ++dimi )
617 for (
int dimj = 0; dimj < 3; ++dimj )
620 offset_in_R + boundary_node_idx * 3 + dimi,
621 offset_in_R + boundary_node_idx * 3 + dimj ) = R_i( dimi, dimj );
626 A[wedge] = R[wedge] * A_tmp[wedge] * R[wedge].
transposed();
628 auto src_tmp = R[wedge] * src[wedge];
629 for (
int i = 0; i < 18; ++i )
631 src[wedge]( i ) = src_tmp( i );
634 int node_start = at_surface ? 3 : 0;
635 int node_end = at_surface ? 6 : 3;
637 for (
int node_idx = node_start; node_idx < node_end; node_idx++ )
639 int idx = node_idx * 3;
640 for (
int k = 0; k < 18; ++k )
644 boundary_mask( idx, k ) = 0.0;
645 boundary_mask( k, idx ) = 0.0;
665 dst[0] = A[0] * src[0];
666 dst[1] = A[1] * src[1];
668 if ( freeslip_reorder )
673 for (
int i = 0; i < 18; ++i )
675 dst[0]( i ) = dst_tmp[0]( i );
676 dst[1]( i ) = dst_tmp[1]( i );
683 for (
int dimi = 0; dimi < 3; dimi++ )
690 dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );
701 static constexpr int WEDGE_NODE_OFF[2][6][3] = {
702 { { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 }, { 1, 0, 1 }, { 0, 1, 1 } },
703 { { 1, 1, 0 }, { 0, 1, 0 }, { 1, 0, 0 }, { 1, 1, 1 }, { 0, 1, 1 }, { 1, 0, 1 } } };
707 static constexpr int WEDGE_TO_UNIQUE[2][6] = {
709 { 0, 1, 2, 3, 4, 5 },
711 { 6, 2, 1, 7, 5, 4 } };
714 constexpr double ONE_THIRD = 1.0 / 3.0;
715 constexpr double ONE_SIXTH = 1.0 / 6.0;
716 constexpr double NEG_TWO_THIRDS = -0.66666666666666663;
719 static constexpr double dN_ref[6][3] = {
720 { -0.5, -0.5, -ONE_SIXTH },
721 { 0.5, 0.0, -ONE_SIXTH },
722 { 0.0, 0.5, -ONE_SIXTH },
723 { -0.5, -0.5, ONE_SIXTH },
724 { 0.5, 0.0, ONE_SIXTH },
725 { 0.0, 0.5, ONE_SIXTH } };
729 reinterpret_cast< double*
>( team.team_shmem().get_shmem(
team_shmem_size( team.team_size() ) ) );
731 using Scratch3D = Kokkos::
732 View< double***, Kokkos::LayoutRight, typename Team::scratch_memory_space, Kokkos::MemoryUnmanaged >;
734 Scratch3D wedge_surf_phy_coords( shmem, 2, 3, 3 );
736 Kokkos::single( Kokkos::PerTeam( team ), [&]() {
737 const double q00x = grid_( local_subdomain_id, x_cell, y_cell, 0 );
738 const double q00y = grid_( local_subdomain_id, x_cell, y_cell, 1 );
739 const double q00z = grid_( local_subdomain_id, x_cell, y_cell, 2 );
741 const double q01x = grid_( local_subdomain_id, x_cell, y_cell + 1, 0 );
742 const double q01y = grid_( local_subdomain_id, x_cell, y_cell + 1, 1 );
743 const double q01z = grid_( local_subdomain_id, x_cell, y_cell + 1, 2 );
745 const double q10x = grid_( local_subdomain_id, x_cell + 1, y_cell, 0 );
746 const double q10y = grid_( local_subdomain_id, x_cell + 1, y_cell, 1 );
747 const double q10z = grid_( local_subdomain_id, x_cell + 1, y_cell, 2 );
749 const double q11x = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 0 );
750 const double q11y = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 1 );
751 const double q11z = grid_( local_subdomain_id, x_cell + 1, y_cell + 1, 2 );
754 wedge_surf_phy_coords( 0, 0, 0 ) = q00x;
755 wedge_surf_phy_coords( 0, 0, 1 ) = q00y;
756 wedge_surf_phy_coords( 0, 0, 2 ) = q00z;
758 wedge_surf_phy_coords( 0, 1, 0 ) = q10x;
759 wedge_surf_phy_coords( 0, 1, 1 ) = q10y;
760 wedge_surf_phy_coords( 0, 1, 2 ) = q10z;
762 wedge_surf_phy_coords( 0, 2, 0 ) = q01x;
763 wedge_surf_phy_coords( 0, 2, 1 ) = q01y;
764 wedge_surf_phy_coords( 0, 2, 2 ) = q01z;
767 wedge_surf_phy_coords( 1, 0, 0 ) = q11x;
768 wedge_surf_phy_coords( 1, 0, 1 ) = q11y;
769 wedge_surf_phy_coords( 1, 0, 2 ) = q11z;
771 wedge_surf_phy_coords( 1, 1, 0 ) = q01x;
772 wedge_surf_phy_coords( 1, 1, 1 ) = q01y;
773 wedge_surf_phy_coords( 1, 1, 2 ) = q01z;
775 wedge_surf_phy_coords( 1, 2, 0 ) = q10x;
776 wedge_surf_phy_coords( 1, 2, 1 ) = q10y;
777 wedge_surf_phy_coords( 1, 2, 2 ) = q10z;
782 const double r_0 = radii_( local_subdomain_id, r_cell );
783 const double r_1 = radii_( local_subdomain_id, r_cell + 1 );
786 const bool at_boundary = at_cmb || at_surface;
787 bool treat_boundary =
false;
790 const ShellBoundaryFlag sbf = at_cmb ? CMB : SURFACE;
791 treat_boundary = ( get_boundary_condition_flag( bcs_, sbf ) == DIRICHLET );
794 const int cmb_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_cmb ) ? 3 : 0 );
795 const int surface_shift = ( ( at_boundary && treat_boundary && ( !diagonal_ ) && at_surface ) ? 3 : 0 );
798 double dst8[3][8] = { 0.0 };
801 for (
int w = 0; w < 2; ++w )
810 for (
int node = 0; node < 6; ++node )
812 const int dx = WEDGE_NODE_OFF[w][node][0];
813 const int dy = WEDGE_NODE_OFF[w][node][1];
814 const int dr = WEDGE_NODE_OFF[w][node][2];
816 k_w[node] = k_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr );
818 src_w[0][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 0 );
819 src_w[1][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 1 );
820 src_w[2][node] = src_( local_subdomain_id, x_cell + dx, y_cell + dy, r_cell + dr, 2 );
826 const double k_eval = ONE_SIXTH * ( k_w[0] + k_w[1] + k_w[2] + k_w[3] + k_w[4] + k_w[5] );
835 double i00, i01, i02;
836 double i10, i11, i12;
837 double i20, i21, i22;
840 const double half_dr = 0.5 * ( r_1 - r_0 );
841 const double r_mid = 0.5 * ( r_0 + r_1 );
844 r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) );
846 r_mid * ( -wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 2, 0 ) );
848 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 0 ) + wedge_surf_phy_coords( w, 1, 0 ) +
849 wedge_surf_phy_coords( w, 2, 0 ) ) );
852 r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) );
854 r_mid * ( -wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 2, 1 ) );
856 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 1 ) + wedge_surf_phy_coords( w, 1, 1 ) +
857 wedge_surf_phy_coords( w, 2, 1 ) ) );
860 r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) );
862 r_mid * ( -wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 2, 2 ) );
864 half_dr * ( ONE_THIRD * ( wedge_surf_phy_coords( w, 0, 2 ) + wedge_surf_phy_coords( w, 1, 2 ) +
865 wedge_surf_phy_coords( w, 2, 2 ) ) );
867 const double J_det = J_0_0 * J_1_1 * J_2_2 - J_0_0 * J_1_2 * J_2_1 - J_0_1 * J_1_0 * J_2_2 +
868 J_0_1 * J_1_2 * J_2_0 + J_0_2 * J_1_0 * J_2_1 - J_0_2 * J_1_1 * J_2_0;
870 const double invJ = 1.0 / J_det;
873 i00 = invJ * ( J_1_1 * J_2_2 - J_1_2 * J_2_1 );
874 i01 = invJ * ( -J_1_0 * J_2_2 + J_1_2 * J_2_0 );
875 i02 = invJ * ( J_1_0 * J_2_1 - J_1_1 * J_2_0 );
877 i10 = invJ * ( -J_0_1 * J_2_2 + J_0_2 * J_2_1 );
878 i11 = invJ * ( J_0_0 * J_2_2 - J_0_2 * J_2_0 );
879 i12 = invJ * ( -J_0_0 * J_2_1 + J_0_1 * J_2_0 );
881 i20 = invJ * ( J_0_1 * J_1_2 - J_0_2 * J_1_1 );
882 i21 = invJ * ( -J_0_0 * J_1_2 + J_0_2 * J_1_0 );
883 i22 = invJ * ( J_0_0 * J_1_1 - J_0_1 * J_1_0 );
885 wJ = Kokkos::abs( J_det );
889 const double kwJ = k_eval * wJ;
894 double gu00 = 0.0, gu01 = 0.0, gu02 = 0.0;
895 double gu10 = 0.0, gu11 = 0.0, gu12 = 0.0;
896 double gu20 = 0.0, gu21 = 0.0, gu22 = 0.0;
903 for (
int dimj = 0; dimj < 3; ++dimj )
906 for (
int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
909 const double gx = dN_ref[node_idx][0];
910 const double gy = dN_ref[node_idx][1];
911 const double gz = dN_ref[node_idx][2];
913 const double g0 = i00 * gx + i01 * gy + i02 * gz;
914 const double g1 = i10 * gx + i11 * gy + i12 * gz;
915 const double g2 = i20 * gx + i21 * gy + i22 * gz;
917 double E00, E11, E22, sym01, sym02, sym12, gdd;
918 column_grad_to_sym( dimj, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
920 const double s = src_w[dimj][node_idx];
938 for (
int dimi = 0; dimi < 3; ++dimi )
941 for (
int node_idx = cmb_shift; node_idx < 6 - surface_shift; ++node_idx )
944 const double gx = dN_ref[node_idx][0];
945 const double gy = dN_ref[node_idx][1];
946 const double gz = dN_ref[node_idx][2];
948 const double g0 = i00 * gx + i01 * gy + i02 * gz;
949 const double g1 = i10 * gx + i11 * gy + i12 * gz;
950 const double g2 = i20 * gx + i21 * gy + i22 * gz;
952 double E00, E11, E22, sym01, sym02, sym12, gdd;
953 column_grad_to_sym( dimi, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
955 const double pairing0 = 2.0 * sym01;
956 const double pairing1 = 2.0 * sym02;
957 const double pairing2 = 2.0 * sym12;
959 const int u = WEDGE_TO_UNIQUE[w][node_idx];
962 kwJ * ( NEG_TWO_THIRDS * div_u * gdd + pairing0 * gu01 + pairing0 * gu10 +
963 pairing1 * gu02 + pairing1 * gu20 + pairing2 * gu12 + pairing2 * gu21 +
964 2.0 * E00 * gu00 + 2.0 * E11 * gu11 + 2.0 * E22 * gu22 );
970 if ( diagonal_ || ( treat_boundary && at_boundary ) )
973 for (
int dim_diagBC = 0; dim_diagBC < 3; ++dim_diagBC )
976 for (
int node_idx = surface_shift; node_idx < 6 - cmb_shift; ++node_idx )
979 const double gx = dN_ref[node_idx][0];
980 const double gy = dN_ref[node_idx][1];
981 const double gz = dN_ref[node_idx][2];
983 const double g0 = i00 * gx + i01 * gy + i02 * gz;
984 const double g1 = i10 * gx + i11 * gy + i12 * gz;
985 const double g2 = i20 * gx + i21 * gy + i22 * gz;
987 double E00, E11, E22, sym01, sym02, sym12, gdd;
988 column_grad_to_sym( dim_diagBC, g0, g1, g2, E00, E11, E22, sym01, sym02, sym12, gdd );
990 const double s = src_w[dim_diagBC][node_idx];
992 const double pairing0 = 4.0 * s;
993 const double pairing1 = 2.0 * s;
995 const int u = WEDGE_TO_UNIQUE[w][node_idx];
997 dst8[dim_diagBC][u] += kwJ * ( pairing0 * ( sym01 * sym01 ) + pairing0 * ( sym02 * sym02 ) +
998 pairing0 * ( sym12 * sym12 ) + pairing1 * ( E00 * E00 ) +
999 pairing1 * ( E11 * E11 ) + pairing1 * ( E22 * E22 ) +
1000 NEG_TWO_THIRDS * ( gdd * gdd ) * s );
1007 for (
int dim_add = 0; dim_add < 3; ++dim_add )
1010 Kokkos::atomic_add( &dst_( local_subdomain_id, x_cell, y_cell, r_cell, dim_add ), dst8[dim_add][0] );
1014 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell, dim_add ), dst8[dim_add][1] );
1018 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell, dim_add ), dst8[dim_add][2] );
1022 &dst_( local_subdomain_id, x_cell, y_cell, r_cell + 1, dim_add ), dst8[dim_add][3] );
1026 &dst_( local_subdomain_id, x_cell + 1, y_cell, r_cell + 1, dim_add ), dst8[dim_add][4] );
1030 &dst_( local_subdomain_id, x_cell, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][5] );
1034 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell, dim_add ), dst8[dim_add][6] );
1038 &dst_( local_subdomain_id, x_cell + 1, y_cell + 1, r_cell + 1, dim_add ), dst8[dim_add][7] );