terraneo/epsilon__divdiv_8hpp_source.html

#pragma once


#include "../../quadrature/quadrature.hpp"

#include "communication/shell/communication.hpp"

#include "dense/vec.hpp"

#include "fe/wedge/integrands.hpp"

#include "fe/wedge/kernel_helpers.hpp"

#include "grid/shell/spherical_shell.hpp"

#include "linalg/operator.hpp"

#include "linalg/solvers/gca/local_matrix_storage.hpp"

#include "linalg/vector.hpp"

#include "linalg/vector_q1.hpp"

#include "util/timer.hpp"


namespace terra::fe::wedge::operators::shell {


template < typename ScalarT, int VecDim = 3 >


class EpsilonDivDiv

{

  public:

    using SrcVectorType                 = linalg::VectorQ1Vec< ScalarT, VecDim >;

    using DstVectorType                 = linalg::VectorQ1Vec< ScalarT, VecDim >;

    using ScalarType                    = ScalarT;

    static constexpr int LocalMatrixDim = 18;

    using Grid4DDataLocalMatrices = terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 >;

    using LocalMatrixStorage      = linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >;


  private:

    LocalMatrixStorage local_matrix_storage_;


    grid::shell::DistributedDomain domain_;


    grid::Grid3DDataVec< ScalarT, 3 >                        grid_;

    grid::Grid2DDataScalar< ScalarT >                        radii_;

    grid::Grid4DDataScalar< ScalarType >                     k_;

    grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > mask_;


    bool treat_boundary_;

    bool diagonal_;


    linalg::OperatorApplyMode         operator_apply_mode_;

    linalg::OperatorCommunicationMode operator_communication_mode_;

    linalg::OperatorStoredMatrixMode  operator_stored_matrix_mode_;


    communication::shell::SubdomainNeighborhoodSendRecvBuffer< ScalarT, VecDim > send_buffers_;

    communication::shell::SubdomainNeighborhoodSendRecvBuffer< ScalarT, VecDim > recv_buffers_;


    grid::Grid4DDataVec< ScalarType, VecDim > src_;

    grid::Grid4DDataVec< ScalarType, VecDim > dst_;


    // Quadrature points.

    const int num_quad_points = quadrature::quad_felippa_3x2_num_quad_points;


    dense::Vec< ScalarT, 3 > quad_points[quadrature::quad_felippa_3x2_num_quad_points];

    ScalarT                  quad_weights[quadrature::quad_felippa_3x2_num_quad_points];


  public:


    EpsilonDivDiv(

        const grid::shell::DistributedDomain&                           domain,

        const grid::Grid3DDataVec< ScalarT, 3 >&                        grid,

        const grid::Grid2DDataScalar< ScalarT >&                        radii,

        const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag >& mask,

        const grid::Grid4DDataScalar< ScalarT >&                        k,

        bool                                                            treat_boundary,

        bool                                                            diagonal,

        linalg::OperatorApplyMode         operator_apply_mode = linalg::OperatorApplyMode::Replace,

        linalg::OperatorCommunicationMode operator_communication_mode =

            linalg::OperatorCommunicationMode::CommunicateAdditively,

        linalg::OperatorStoredMatrixMode operator_stored_matrix_mode = linalg::OperatorStoredMatrixMode::Off )

    : domain_( domain )

    , grid_( grid )

    , radii_( radii )

    , mask_( mask )

    , k_( k )

    , treat_boundary_( treat_boundary )

    , diagonal_( diagonal )

    , operator_apply_mode_( operator_apply_mode )

    , operator_communication_mode_( operator_communication_mode )

    , operator_stored_matrix_mode_( operator_stored_matrix_mode )

    // TODO: we can reuse the send and recv buffers and pass in from the outside somehow

    , send_buffers_( domain )

    , recv_buffers_( domain )

    {

        quadrature::quad_felippa_3x2_quad_points( quad_points );

        quadrature::quad_felippa_3x2_quad_weights( quad_weights );

    }


    void set_operator_apply_and_communication_modes(

        const linalg::OperatorApplyMode         operator_apply_mode,

        const linalg::OperatorCommunicationMode operator_communication_mode )

    {

        operator_apply_mode_         = operator_apply_mode;

        operator_communication_mode_ = operator_communication_mode;

    }


    /// @brief S/Getter for diagonal member

    void set_diagonal( bool v ) { diagonal_ = v; }


    /// @brief Getter for coefficient

    const grid::Grid4DDataScalar< ScalarType >& k_grid_data() { return k_; }


    /// @brief Getter for domain member

    const grid::shell::DistributedDomain& get_domain() const { return domain_; }


    /// @brief Getter for radii member

    grid::Grid2DDataScalar< ScalarT > get_radii() const { return radii_; }


    /// @brief Getter for grid member

    grid::Grid3DDataVec< ScalarT, 3 > get_grid() { return grid_; }


    /// @brief Getter for mask member

    KOKKOS_INLINE_FUNCTION


    bool has_flag(

        const int                      local_subdomain_id,

        const int                      x_cell,

        const int                      y_cell,

        const int                      r_cell,

        grid::shell::ShellBoundaryFlag flag ) const

    { return util::has_flag( mask_( local_subdomain_id, x_cell, y_cell, r_cell ), flag ); }


    /// @brief allocates memory for the local matrices


    void set_stored_matrix_mode(

        linalg::OperatorStoredMatrixMode     operator_stored_matrix_mode,

        int                                  level_range,

        grid::Grid4DDataScalar< ScalarType > GCAElements )

    {

        operator_stored_matrix_mode_ = operator_stored_matrix_mode;


        // allocate storage if necessary

        if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )

        {

            local_matrix_storage_ = linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >(

                domain_, operator_stored_matrix_mode_, level_range, GCAElements );

        }

    }


    linalg::OperatorStoredMatrixMode get_stored_matrix_mode() { return operator_stored_matrix_mode_; }


    /// @brief Set the local matrix stored in the operator

    KOKKOS_INLINE_FUNCTION


    void set_local_matrix(

        const int                                                    local_subdomain_id,

        const int                                                    x_cell,

        const int                                                    y_cell,

        const int                                                    r_cell,

        const int                                                    wedge,

        const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim >& mat ) const

    {

        KOKKOS_ASSERT( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off );

        local_matrix_storage_.set_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge, mat );

    }


    /// @brief Retrives the local matrix

    /// if there is stored local matrices, the desired local matrix is loaded and returned

    /// if not, the local matrix is assembled on-the-fly

    KOKKOS_INLINE_FUNCTION


    dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(

        const int local_subdomain_id,

        const int x_cell,

        const int y_cell,

        const int r_cell,

        const int wedge ) const

    {

        // request from storage

        if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )

        {

            if ( !local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge ) )

            {

                Kokkos::abort( "No matrix found at that spatial index." );

            }

            return local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );

        }

        else

        {

            return assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, wedge );

        }

    }


    void apply_impl( const SrcVectorType& src, DstVectorType& dst )

    {

        util::Timer timer_apply( "epsilon_divdiv_apply" );


        if ( operator_apply_mode_ == linalg::OperatorApplyMode::Replace )

        {

            assign( dst, 0 );

        }


        src_ = src.grid_data();

        dst_ = dst.grid_data();


        if ( src_.extent( 0 ) != dst_.extent( 0 ) || src_.extent( 1 ) != dst_.extent( 1 ) ||

             src_.extent( 2 ) != dst_.extent( 2 ) || src_.extent( 3 ) != dst_.extent( 3 ) )

        {

            throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );

        }


        if ( src_.extent( 1 ) != grid_.extent( 1 ) || src_.extent( 2 ) != grid_.extent( 2 ) )

        {

            throw std::runtime_error( "EpsilonDivDiv: src/dst mismatch" );

        }


        util::Timer timer_kernel( "epsilon_divdiv_kernel" );

        Kokkos::parallel_for( "matvec", grid::shell::local_domain_md_range_policy_cells( domain_ ), *this );

        Kokkos::fence();

        timer_kernel.stop();


        if ( operator_communication_mode_ == linalg::OperatorCommunicationMode::CommunicateAdditively )

        {

            util::Timer timer_comm( "epsilon_divdiv_comm" );


            communication::shell::pack_send_and_recv_local_subdomain_boundaries(

                domain_, dst_, send_buffers_, recv_buffers_ );

            communication::shell::unpack_and_reduce_local_subdomain_boundaries( domain_, dst_, recv_buffers_ );

        }

    }


    KOKKOS_INLINE_FUNCTION void


        operator()( const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell ) const

    {

        // If we have stored lmatrices, use them.

        // It's the user's responsibility to write meaningful matrices via set_lmatrix()

        // We probably never want to assemble lmatrices with DCA and store,

        // so GCA should be the actor storing matrices.

        // use stored matrices (at least on some elements)

        if ( operator_stored_matrix_mode_ != linalg::OperatorStoredMatrixMode::Off )

        {

            dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > A[num_wedges_per_hex_cell] = { 0 };


            if ( operator_stored_matrix_mode_ == linalg::OperatorStoredMatrixMode::Full )

            {

                A[0] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );

                A[1] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );

            }

            else if ( operator_stored_matrix_mode_ == linalg::OperatorStoredMatrixMode::Selective )

            {

                if ( local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 ) &&

                     local_matrix_storage_.has_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 ) )

                {

                    A[0] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );

                    A[1] = local_matrix_storage_.get_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );

                }

                else

                {

                    // Kokkos::abort("Matrix not found.");

                    A[0] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 0 );

                    A[1] = assemble_local_matrix( local_subdomain_id, x_cell, y_cell, r_cell, 1 );

                }

            }

            // BCs are applied by GCA ... to be discussed for free-slip


            if ( diagonal_ )

            {

                A[0] = A[0].diagonal();

                A[1] = A[1].diagonal();

            }


            dense::Vec< ScalarT, 18 > src[num_wedges_per_hex_cell];

            for ( int dimj = 0; dimj < 3; dimj++ )

            {

                dense::Vec< ScalarT, 6 > src_d[num_wedges_per_hex_cell];

                extract_local_wedge_vector_coefficients(

                    src_d, local_subdomain_id, x_cell, y_cell, r_cell, dimj, src_ );


                for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )

                {

                    for ( int i = 0; i < num_nodes_per_wedge; i++ )

                    {

                        src[wedge]( dimj * num_nodes_per_wedge + i ) = src_d[wedge]( i );

                    }

                }

            }


            dense::Vec< ScalarT, LocalMatrixDim > dst[num_wedges_per_hex_cell];


            dst[0] = A[0] * src[0];

            dst[1] = A[1] * src[1];


            for ( int dimi = 0; dimi < 3; dimi++ )

            {

                dense::Vec< ScalarT, 6 > dst_d[num_wedges_per_hex_cell];

                dst_d[0] = dst[0].template slice< 6 >( dimi * num_nodes_per_wedge );

                dst_d[1] = dst[1].template slice< 6 >( dimi * num_nodes_per_wedge );


                atomically_add_local_wedge_vector_coefficients(

                    dst_, local_subdomain_id, x_cell, y_cell, r_cell, dimi, dst_d );

            }

        }

        else

        {

            // Gather surface points for each wedge.

            dense::Vec< ScalarT, 3 > wedge_phy_surf[num_wedges_per_hex_cell][num_nodes_per_wedge_surface] = {};

            wedge_surface_physical_coords( wedge_phy_surf, grid_, local_subdomain_id, x_cell, y_cell );


            // Gather wedge radii.

            const ScalarT r_1 = radii_( local_subdomain_id, r_cell );

            const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );


            dense::Vec< ScalarT, 6 > k_local_hex[num_wedges_per_hex_cell];

            extract_local_wedge_scalar_coefficients( k_local_hex, local_subdomain_id, x_cell, y_cell, r_cell, k_ );


            constexpr int hex_offset_x[8] = { 0, 1, 0, 1, 0, 1, 0, 1 };

            constexpr int hex_offset_y[8] = { 0, 0, 1, 1, 0, 0, 1, 1 };

            constexpr int hex_offset_r[8] = { 0, 0, 0, 0, 1, 1, 1, 1 };


            // FE dimensions: velocity coupling components of epsilon operator


            for ( int dimi = 0; dimi < 3; ++dimi )

            {

                for ( int dimj = 0; dimj < 3; ++dimj )

                {

                    if ( diagonal_ and dimi != dimj )

                        continue;


                    ScalarType src_local_hex[8] = {};

                    ScalarType dst_local_hex[8] = {};


                    for ( int i = 0; i < 8; i++ )

                    {

                        src_local_hex[i] = src_(

                            local_subdomain_id,

                            x_cell + hex_offset_x[i],

                            y_cell + hex_offset_y[i],

                            r_cell + hex_offset_r[i],

                            dimi );

                    }


                    // spatial dimensions: quadrature points and wedge

                    for ( int q = 0; q < num_quad_points; q++ )

                    {

                        for ( int wedge = 0; wedge < num_wedges_per_hex_cell; wedge++ )

                        {

                            dense::Mat< ScalarType, VecDim, VecDim > sym_grad_i[num_nodes_per_wedge];

                            dense::Mat< ScalarType, VecDim, VecDim > sym_grad_j[num_nodes_per_wedge];

                            ScalarType                               jdet_keval_quadweight = 0;


                            assemble_trial_test_vecs(

                                wedge,

                                quad_points[q],

                                quad_weights[q],

                                r_1,

                                r_2,

                                wedge_phy_surf,

                                k_local_hex,

                                dimi,

                                dimj,

                                sym_grad_i,

                                sym_grad_j,

                                jdet_keval_quadweight );


                            fused_local_mv(

                                src_local_hex,

                                dst_local_hex,

                                wedge,

                                jdet_keval_quadweight,

                                sym_grad_i,

                                sym_grad_j,

                                dimi,

                                dimj,

                                r_cell );

                        }

                    }


                    for ( int i = 0; i < 8; i++ )

                    {

                        Kokkos::atomic_add(

                            &dst_(

                                local_subdomain_id,

                                x_cell + hex_offset_x[i],

                                y_cell + hex_offset_y[i],

                                r_cell + hex_offset_r[i],

                                dimj ),

                            dst_local_hex[i] );

                    }

                }

            }

        }

    }


    /// @brief: For both trial and test space this function sets up a vector:

    /// each vector element holds the symmetric gradient (a 3x3 matrix) of the shape function of the corresponding dof

    /// (if dimi == dimj, these are the same and we are on the diagonal of the vectorial diffusion operator)

    /// Additionally, we compute the scalar factor for the numerical integral comp: determinant of the jacobian,

    /// evaluation of the coefficient k on the element and the quadrature weight of the current quad-point.


    /// The idea of this function is that the two vectors can be:

    /// - accumulated to the result of the local matvec with 2 * num_nodes_per_wedge complexity

    ///   by scaling the dot product of the trial vec and local src dofs with each element of the test vec

    ///   (and adding to the dst dofs, this is the fused local matvec).

    /// - propagated to the local matrix by an outer product of the two vectors

    ///   (without applying it to dofs). This is e.g. required to assemble the finest grid local

    ///   matrix on-the-fly during GCA/Galerkin coarsening.


    ///


    KOKKOS_INLINE_FUNCTION void assemble_trial_test_vecs(

        const int                               wedge,

        const dense::Vec< ScalarType, VecDim >& quad_point,

        const ScalarType                        quad_weight,

        const ScalarT                           r_1,

        const ScalarT                           r_2,

        dense::Vec< ScalarT, 3 > ( *wedge_phy_surf )[3],

        const dense::Vec< ScalarT, 6 >*           k_local_hex,

        const int                                 dimi,

        const int                                 dimj,

        dense::Mat< ScalarType, VecDim, VecDim >* sym_grad_i,

        dense::Mat< ScalarType, VecDim, VecDim >* sym_grad_j,

        ScalarType&                               jdet_keval_quadweight ) const

    {

        dense::Mat< ScalarType, VecDim, VecDim >       J       = jac( wedge_phy_surf[wedge], r_1, r_2, quad_point );

        const auto                                     det     = J.det();

        const auto                                     abs_det = Kokkos::abs( det );

        const dense::Mat< ScalarType, VecDim, VecDim > J_inv_transposed = J.inv_transposed( det );


        // dot of coeff dofs and element-local shape functions to evaluate the coefficent on the current element

        ScalarType k_eval = 0.0;

        for ( int k = 0; k < num_nodes_per_wedge; k++ )

        {

            k_eval += shape( k, quad_point ) * k_local_hex[wedge]( k );

        }


        for ( int k = 0; k < num_nodes_per_wedge; k++ )

        {

            sym_grad_i[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimi );

            sym_grad_j[k] = symmetric_grad( J_inv_transposed, quad_point, k, dimj );

        }

        jdet_keval_quadweight = quad_weight * k_eval * abs_det;

    }


    /// @brief assemble the local matrix and return it for a given element, wedge, and vectorial component

    /// (determined by dimi, dimj)

    KOKKOS_INLINE_FUNCTION


    dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(

        const int local_subdomain_id,

        const int x_cell,

        const int y_cell,

        const int r_cell,

        const int wedge ) const

    {

        // Gather surface points for each wedge.

        // TODO gather this for only 1 wedge

        dense::Vec< ScalarT, 3 > wedge_phy_surf[num_wedges_per_hex_cell][num_nodes_per_wedge_surface] = {};

        wedge_surface_physical_coords( wedge_phy_surf, grid_, local_subdomain_id, x_cell, y_cell );


        // Gather wedge radii.

        const ScalarT r_1 = radii_( local_subdomain_id, r_cell );

        const ScalarT r_2 = radii_( local_subdomain_id, r_cell + 1 );


        dense::Vec< ScalarT, 6 > k_local_hex[num_wedges_per_hex_cell];

        extract_local_wedge_scalar_coefficients( k_local_hex, local_subdomain_id, x_cell, y_cell, r_cell, k_ );


        // Compute the local element matrix.

        dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > A = {};

        for ( int dimi = 0; dimi < 3; ++dimi )

        {

            for ( int dimj = 0; dimj < 3; ++dimj )

            {

                // spatial dimensions: quadrature points and wedge

                for ( int q = 0; q < num_quad_points; q++ )

                {

                    dense::Mat< ScalarType, VecDim, VecDim > sym_grad_i[num_nodes_per_wedge];

                    dense::Mat< ScalarType, VecDim, VecDim > sym_grad_j[num_nodes_per_wedge];

                    ScalarType                               jdet_keval_quadweight = 0;

                    assemble_trial_test_vecs(

                        wedge,

                        quad_points[q],

                        quad_weights[q],

                        r_1,

                        r_2,

                        wedge_phy_surf,

                        k_local_hex,

                        dimi,

                        dimj,

                        sym_grad_i,

                        sym_grad_j,

                        jdet_keval_quadweight );


                    // propagate on local matrix by outer product of test and trial vecs

                    for ( int i = 0; i < num_nodes_per_wedge; i++ )

                    {

                        for ( int j = 0; j < num_nodes_per_wedge; j++ )

                        {

                            A( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) +=

                                jdet_keval_quadweight *

                                ( 2 * sym_grad_j[j].double_contract( sym_grad_i[i] ) -

                                  2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * sym_grad_i[i]( dimi, dimi ) );

                            // for the div, we just extract the component from the gradient vector

                        }

                    }

                }

            }

        }


        if ( treat_boundary_ )

        {

            dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > boundary_mask;

            boundary_mask.fill( 1.0 );


            for ( int dimi = 0; dimi < 3; ++dimi )

            {

                for ( int dimj = 0; dimj < 3; ++dimj )

                {

                    if ( r_cell == 0 )

                    {

                        // Inner boundary (CMB).

                        for ( int i = 0; i < 6; i++ )

                        {

                            for ( int j = 0; j < 6; j++ )

                            {

                                // on diagonal components of the vectorial diffusion operator, we exclude the diagonal entries from elimination

                                if ( ( dimi == dimj && i != j && ( i < 3 || j < 3 ) ) or

                                     ( dimi != dimj && ( i < 3 || j < 3 ) ) )

                                {

                                    boundary_mask( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) =

                                        0.0;

                                }

                            }

                        }

                    }


                    if ( r_cell + 1 == radii_.extent( 1 ) - 1 )

                    {

                        // Outer boundary (surface).

                        for ( int i = 0; i < 6; i++ )

                        {

                            for ( int j = 0; j < 6; j++ )

                            {

                                // on diagonal components of the vectorial diffusion operator, we exclude the diagonal entries from elimination

                                if ( ( dimi == dimj && i != j && ( i >= 3 || j >= 3 ) ) or

                                     ( dimi != dimj && ( i >= 3 || j >= 3 ) ) )

                                {

                                    boundary_mask( i + dimi * num_nodes_per_wedge, j + dimj * num_nodes_per_wedge ) =

                                        0.0;

                                }

                            }

                        }

                    }

                }

            }

            A.hadamard_product( boundary_mask );

        }


        return A;

    }


    // executes the fused local matvec on an element, given the assembled trial and test vectors


    KOKKOS_INLINE_FUNCTION void fused_local_mv(

        ScalarType                      src_local_hex[8],

        ScalarType                      dst_local_hex[8],

        const int                       wedge,

        const ScalarType                jdet_keval_quadweight,

        dense::Mat< ScalarType, 3, 3 >* sym_grad_i,

        dense::Mat< ScalarType, 3, 3 >* sym_grad_j,

        const int                       dimi,

        const int                       dimj,

        int                             r_cell ) const

    {

        constexpr int offset_x[2][6] = { { 0, 1, 0, 0, 1, 0 }, { 1, 0, 1, 1, 0, 1 } };

        constexpr int offset_y[2][6] = { { 0, 0, 1, 0, 0, 1 }, { 1, 1, 0, 1, 1, 0 } };

        constexpr int offset_r[2][6] = { { 0, 0, 0, 1, 1, 1 }, { 0, 0, 0, 1, 1, 1 } };


        dense::Mat< ScalarType, 3, 3 > grad_u;

        ScalarType                     divu = 0.0;

        grad_u.fill( 0.0 );


        const bool at_cmb        = r_cell == 0;

        const bool at_surface    = r_cell + 1 == radii_.extent( 1 ) - 1;

        int        cmb_shift     = 0;

        int        surface_shift = 0;


        // Compute ∇u at this quadrature point.

        if ( !diagonal_ )

        {

            if ( treat_boundary_ && at_cmb )

            {

                // at the core-mantle boundary, we exclude dofs that are lower-indexed than the dof on the boundary

                cmb_shift = 3;

            }

            else if ( treat_boundary_ && at_surface )

            {

                // at the surface boundary, we exclude dofs that are higher-indexed than the dof on the boundary

                surface_shift = 3;

            }


            // accumulate the element-local gradient/divergence of the trial function (loop over columns of local matrix/local dofs)

            // by dot of trial vec and src dofs

            for ( int i = 0 + cmb_shift; i < num_nodes_per_wedge - surface_shift; i++ )

            {

                grad_u =

                    grad_u +

                    sym_grad_i[i] * src_local_hex[4 * offset_r[wedge][i] + 2 * offset_y[wedge][i] + offset_x[wedge][i]];

                divu += sym_grad_i[i]( dimi, dimi ) *

                        src_local_hex[4 * offset_r[wedge][i] + 2 * offset_y[wedge][i] + offset_x[wedge][i]];

            }


            // Add the test function contributions.

            // for each row of the local matrix (test-functions):

            // multiply trial part (fully assembled for the current element from loop above) with test part corresponding to the current row/dof

            // += due to contributions from other elements

            for ( int j = 0 + cmb_shift; j < num_nodes_per_wedge - surface_shift; j++ )

            {

                dst_local_hex[4 * offset_r[wedge][j] + 2 * offset_y[wedge][j] + offset_x[wedge][j]] +=

                    jdet_keval_quadweight * ( 2 * ( sym_grad_j[j] ).double_contract( grad_u ) -

                                              2.0 / 3.0 * sym_grad_j[j]( dimj, dimj ) * divu );

                // for the div, we just extract the component from the gradient vector

            }

        }


        // Dirichlet DoFs are only to be eliminated on diagonal blocks of epsilon

        if ( diagonal_ || ( dimi == dimj && ( treat_boundary_ && ( at_cmb || at_surface ) ) ) )

        {

            // for the diagonal elements at the boundary, we switch the shifts

            for ( int i = 0 + surface_shift; i < num_nodes_per_wedge - cmb_shift; i++ )

            {

                const auto grad_u_diag =

                    sym_grad_i[i] * src_local_hex[4 * offset_r[wedge][i] + 2 * offset_y[wedge][i] + offset_x[wedge][i]];

                const auto div_u_diag =

                    sym_grad_i[i]( dimi, dimi ) *

                    src_local_hex[4 * offset_r[wedge][i] + 2 * offset_y[wedge][i] + offset_x[wedge][i]];


                dst_local_hex[4 * offset_r[wedge][i] + 2 * offset_y[wedge][i] + offset_x[wedge][i]] +=

                    jdet_keval_quadweight * ( 2 * ( sym_grad_j[i] ).double_contract( grad_u_diag ) -

                                              2.0 / 3.0 * sym_grad_j[i]( dimj, dimj ) * div_u_diag );

            }

        }

    }


};


static_assert( linalg::GCACapable< EpsilonDivDiv< float > > );

static_assert( linalg::GCACapable< EpsilonDivDiv< double > > );


} // namespace terra::fe::wedge::operators::shell

terra::communication::shell::SubdomainNeighborhoodSendRecvBuffer
Send and receive buffers for all process-local subdomain boundaries.
Definition communication.hpp:56

terra::fe::wedge::operators::shell::EpsilonDivDiv
Definition epsilon_divdiv.hpp:19

terra::fe::wedge::operators::shell::EpsilonDivDiv::operator()
void operator()(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell) const
Definition epsilon_divdiv.hpp:218

terra::fe::wedge::operators::shell::EpsilonDivDiv::set_stored_matrix_mode
void set_stored_matrix_mode(linalg::OperatorStoredMatrixMode operator_stored_matrix_mode, int level_range, grid::Grid4DDataScalar< ScalarType > GCAElements)
allocates memory for the local matrices
Definition epsilon_divdiv.hpp:122

terra::fe::wedge::operators::shell::EpsilonDivDiv::get_stored_matrix_mode
linalg::OperatorStoredMatrixMode get_stored_matrix_mode()
Definition epsilon_divdiv.hpp:137

terra::fe::wedge::operators::shell::EpsilonDivDiv::has_flag
bool has_flag(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, grid::shell::ShellBoundaryFlag flag) const
Getter for mask member.
Definition epsilon_divdiv.hpp:113

terra::fe::wedge::operators::shell::EpsilonDivDiv::LocalMatrixDim
static constexpr int LocalMatrixDim
Definition epsilon_divdiv.hpp:24

terra::fe::wedge::operators::shell::EpsilonDivDiv::assemble_trial_test_vecs
void assemble_trial_test_vecs(const int wedge, const dense::Vec< ScalarType, VecDim > &quad_point, const ScalarType quad_weight, const ScalarT r_1, const ScalarT r_2, dense::Vec< ScalarT, 3 >(*wedge_phy_surf)[3], const dense::Vec< ScalarT, 6 > *k_local_hex, const int dimi, const int dimj, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_i, dense::Mat< ScalarType, VecDim, VecDim > *sym_grad_j, ScalarType &jdet_keval_quadweight) const
: For both trial and test space this function sets up a vector: each vector element holds the symmetr...
Definition epsilon_divdiv.hpp:394

terra::fe::wedge::operators::shell::EpsilonDivDiv::ScalarType
ScalarT ScalarType
Definition epsilon_divdiv.hpp:23

terra::fe::wedge::operators::shell::EpsilonDivDiv::get_radii
grid::Grid2DDataScalar< ScalarT > get_radii() const
Getter for radii member.
Definition epsilon_divdiv.hpp:106

terra::fe::wedge::operators::shell::EpsilonDivDiv::EpsilonDivDiv
EpsilonDivDiv(const grid::shell::DistributedDomain &domain, const grid::Grid3DDataVec< ScalarT, 3 > &grid, const grid::Grid2DDataScalar< ScalarT > &radii, const grid::Grid4DDataScalar< grid::shell::ShellBoundaryFlag > &mask, const grid::Grid4DDataScalar< ScalarT > &k, bool treat_boundary, bool diagonal, linalg::OperatorApplyMode operator_apply_mode=linalg::OperatorApplyMode::Replace, linalg::OperatorCommunicationMode operator_communication_mode=linalg::OperatorCommunicationMode::CommunicateAdditively, linalg::OperatorStoredMatrixMode operator_stored_matrix_mode=linalg::OperatorStoredMatrixMode::Off)
Definition epsilon_divdiv.hpp:58

terra::fe::wedge::operators::shell::EpsilonDivDiv::fused_local_mv
void fused_local_mv(ScalarType src_local_hex[8], ScalarType dst_local_hex[8], const int wedge, const ScalarType jdet_keval_quadweight, dense::Mat< ScalarType, 3, 3 > *sym_grad_i, dense::Mat< ScalarType, 3, 3 > *sym_grad_j, const int dimi, const int dimj, int r_cell) const
Definition epsilon_divdiv.hpp:545

terra::fe::wedge::operators::shell::EpsilonDivDiv::set_operator_apply_and_communication_modes
void set_operator_apply_and_communication_modes(const linalg::OperatorApplyMode operator_apply_mode, const linalg::OperatorCommunicationMode operator_communication_mode)
Definition epsilon_divdiv.hpp:88

terra::fe::wedge::operators::shell::EpsilonDivDiv::get_domain
const grid::shell::DistributedDomain & get_domain() const
Getter for domain member.
Definition epsilon_divdiv.hpp:103

terra::fe::wedge::operators::shell::EpsilonDivDiv::get_grid
grid::Grid3DDataVec< ScalarT, 3 > get_grid()
Getter for grid member.
Definition epsilon_divdiv.hpp:109

terra::fe::wedge::operators::shell::EpsilonDivDiv::set_local_matrix
void set_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, const dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > &mat) const
Set the local matrix stored in the operator.
Definition epsilon_divdiv.hpp:141

terra::fe::wedge::operators::shell::EpsilonDivDiv::assemble_local_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > assemble_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
assemble the local matrix and return it for a given element, wedge, and vectorial component (determin...
Definition epsilon_divdiv.hpp:431

terra::fe::wedge::operators::shell::EpsilonDivDiv::apply_impl
void apply_impl(const SrcVectorType &src, DstVectorType &dst)
Definition epsilon_divdiv.hpp:179

terra::fe::wedge::operators::shell::EpsilonDivDiv::get_local_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_local_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition epsilon_divdiv.hpp:157

terra::fe::wedge::operators::shell::EpsilonDivDiv::Grid4DDataLocalMatrices
terra::grid::Grid4DDataMatrices< ScalarType, LocalMatrixDim, LocalMatrixDim, 2 > Grid4DDataLocalMatrices
Definition epsilon_divdiv.hpp:25

terra::fe::wedge::operators::shell::EpsilonDivDiv::set_diagonal
void set_diagonal(bool v)
S/Getter for diagonal member.
Definition epsilon_divdiv.hpp:97

terra::fe::wedge::operators::shell::EpsilonDivDiv::k_grid_data
const grid::Grid4DDataScalar< ScalarType > & k_grid_data()
Getter for coefficient.
Definition epsilon_divdiv.hpp:100

terra::grid::shell::DistributedDomain
Parallel data structure organizing the thick spherical shell metadata for distributed (MPI parallel) ...
Definition spherical_shell.hpp:2518

terra::linalg::VectorQ1Vec
Static assertion: VectorQ1Scalar satisfies VectorLike concept.
Definition vector_q1.hpp:168

terra::linalg::VectorQ1Vec::grid_data
const grid::Grid4DDataVec< ScalarType, VecDim > & grid_data() const
Get const reference to grid data.
Definition vector_q1.hpp:288

terra::linalg::solvers::LocalMatrixStorage< ScalarType, LocalMatrixDim >

terra::linalg::solvers::LocalMatrixStorage::has_matrix
bool has_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Checks for presence of a local matrix for a certain element.
Definition local_matrix_storage.hpp:223

terra::linalg::solvers::LocalMatrixStorage::get_matrix
dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > get_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge) const
Retrives the local matrix if there is stored local matrices, the desired local matrix is loaded and r...
Definition local_matrix_storage.hpp:175

terra::linalg::solvers::LocalMatrixStorage::set_matrix
void set_matrix(const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int wedge, dense::Mat< ScalarT, LocalMatrixDim, LocalMatrixDim > mat) const
Set the local matrix stored in the operator.
Definition local_matrix_storage.hpp:118

terra::util::Timer
Timer supporting RAII scope or manual stop.
Definition timer.hpp:342

terra::util::Timer::stop
void stop()
Stop the timer and record elapsed time.
Definition timer.hpp:364

communication.hpp

terra::linalg::GCACapable
Concept for types that can be used as Galerkin coarse-grid operators in a multigrid hierarchy....
Definition operator.hpp:81

integrands.hpp

kernel_helpers.hpp

local_matrix_storage.hpp

terra::communication::shell::unpack_and_reduce_local_subdomain_boundaries
void unpack_and_reduce_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers, CommunicationReduction reduction=CommunicationReduction::SUM)
Unpacks and reduces local subdomain boundaries.
Definition communication.hpp:672

terra::communication::shell::pack_send_and_recv_local_subdomain_boundaries
void pack_send_and_recv_local_subdomain_boundaries(const grid::shell::DistributedDomain &domain, const GridDataType &data, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_send_buffers, SubdomainNeighborhoodSendRecvBuffer< typename GridDataType::value_type, grid::grid_data_vec_dim< GridDataType >() > &boundary_recv_buffers)
Packs, sends and recvs local subdomain boundaries using two sets of buffers.
Definition communication.hpp:242

terra::fe::wedge::operators::shell
Definition boundary_mass.hpp:14

terra::fe::wedge::quadrature::quad_felippa_3x2_quad_weights
constexpr void quad_felippa_3x2_quad_weights(T(&quad_weights)[quad_felippa_3x2_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:93

terra::fe::wedge::quadrature::quad_felippa_3x2_num_quad_points
constexpr int quad_felippa_3x2_num_quad_points
Definition wedge/quadrature/quadrature.hpp:66

terra::fe::wedge::quadrature::quad_felippa_3x2_quad_points
constexpr void quad_felippa_3x2_quad_points(dense::Vec< T, 3 >(&quad_points)[quad_felippa_3x2_num_quad_points])
Definition wedge/quadrature/quadrature.hpp:70

terra::fe::wedge::num_nodes_per_wedge_surface
constexpr int num_nodes_per_wedge_surface
Definition kernel_helpers.hpp:6

terra::fe::wedge::symmetric_grad
constexpr dense::Mat< T, 3, 3 > symmetric_grad(const dense::Mat< T, 3, 3 > &J_inv_transposed, const dense::Vec< T, 3 > &quad_point, const int dof, const int dim)
Returns the symmetric gradient of the shape function of a dof at a quadrature point.
Definition integrands.hpp:685

terra::fe::wedge::wedge_surface_physical_coords
void wedge_surface_physical_coords(dense::Vec< T, 3 >(&wedge_surf_phy_coords)[num_wedges_per_hex_cell][num_nodes_per_wedge_surface], const grid::Grid3DDataVec< T, 3 > &lateral_grid, const int local_subdomain_id, const int x_cell, const int y_cell)
Extracts the (unit sphere) surface vertex coords of the two wedges of a hex cell.
Definition kernel_helpers.hpp:26

terra::fe::wedge::atomically_add_local_wedge_vector_coefficients
void atomically_add_local_wedge_vector_coefficients(const grid::Grid4DDataVec< T, VecDim > &global_coefficients, const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const dense::Vec< T, 6 > local_coefficients[2])
Performs an atomic add of the two local wedge coefficient vectors of a hex cell into the global coeff...
Definition kernel_helpers.hpp:465

terra::fe::wedge::num_wedges_per_hex_cell
constexpr int num_wedges_per_hex_cell
Definition kernel_helpers.hpp:5

terra::fe::wedge::extract_local_wedge_scalar_coefficients
void extract_local_wedge_scalar_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const grid::Grid4DDataScalar< T > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:306

terra::fe::wedge::extract_local_wedge_vector_coefficients
void extract_local_wedge_vector_coefficients(dense::Vec< T, 6 >(&local_coefficients)[2], const int local_subdomain_id, const int x_cell, const int y_cell, const int r_cell, const int d, const grid::Grid4DDataVec< T, VecDim > &global_coefficients)
Extracts the local vector coefficients for the two wedges of a hex cell from the global coefficient v...
Definition kernel_helpers.hpp:356

terra::fe::wedge::num_nodes_per_wedge
constexpr int num_nodes_per_wedge
Definition kernel_helpers.hpp:7

terra::fe::wedge::shape
constexpr T shape(const int node_idx, const T xi, const T eta, const T zeta)
(Tensor-product) Shape function.
Definition integrands.hpp:146

terra::fe::wedge::jac
constexpr dense::Mat< T, 3, 3 > jac(const dense::Vec< T, 3 > &p1_phy, const dense::Vec< T, 3 > &p2_phy, const dense::Vec< T, 3 > &p3_phy, const T r_1, const T r_2, const T xi, const T eta, const T zeta)
Definition integrands.hpp:657

terra::grid::shell::ShellBoundaryFlag
ShellBoundaryFlag
FlagLike that indicates boundary types for the thick spherical shell.
Definition shell/bit_masks.hpp:12

terra::grid::shell::local_domain_md_range_policy_cells
Kokkos::MDRangePolicy< Kokkos::Rank< 4 > > local_domain_md_range_policy_cells(const DistributedDomain &distributed_domain)
Definition spherical_shell.hpp:2739

terra::grid::Grid4DDataMatrices
Kokkos::View< dense::Mat< ScalarType, Rows, Cols > ****[NumMatrices], Layout > Grid4DDataMatrices
Definition grid_types.hpp:173

terra::grid::Grid3DDataVec
Kokkos::View< ScalarType ***[VecDim], Layout > Grid3DDataVec
Definition grid_types.hpp:42

terra::grid::Grid4DDataScalar
Kokkos::View< ScalarType ****, Layout > Grid4DDataScalar
Definition grid_types.hpp:27

terra::grid::Grid2DDataScalar
Kokkos::View< ScalarType **, Layout > Grid2DDataScalar
Definition grid_types.hpp:21

terra::linalg::OperatorApplyMode
OperatorApplyMode
Modes for applying an operator to a vector.
Definition operator.hpp:30

terra::linalg::OperatorApplyMode::Replace
@ Replace
Overwrite the destination vector.

terra::linalg::OperatorStoredMatrixMode
OperatorStoredMatrixMode
Modes for applying stored matrices.
Definition operator.hpp:47

terra::linalg::OperatorStoredMatrixMode::Selective
@ Selective
Use stored matrices on selected, marked elements only, assemble on all others.

terra::linalg::OperatorStoredMatrixMode::Full
@ Full
Use stored matrices on all elements.

terra::linalg::OperatorStoredMatrixMode::Off
@ Off
Do not use stored matrices.

terra::linalg::OperatorCommunicationMode
OperatorCommunicationMode
Modes for communication during operator application.
Definition operator.hpp:40

terra::linalg::OperatorCommunicationMode::CommunicateAdditively
@ CommunicateAdditively
Communicate and add results.

terra::util::has_flag
constexpr bool has_flag(E mask_value, E flag) noexcept
Checks if a bitmask value contains a specific flag.
Definition bit_masking.hpp:43

operator.hpp

spherical_shell.hpp

terra::dense::Mat
Definition mat.hpp:10

terra::dense::Mat::fill
void fill(const T value)
Definition mat.hpp:201

terra::dense::Mat::diagonal
constexpr Mat diagonal() const
Definition mat.hpp:377

terra::dense::Mat::hadamard_product
Mat & hadamard_product(const Mat &mat)
Definition mat.hpp:213

terra::dense::Mat::double_contract
T double_contract(const Mat &mat)
Definition mat.hpp:226

terra::dense::Vec< ScalarT, 3 >

terra::grid::Grid4DDataVec
SoA (Structure-of-Arrays) 4D vector grid data.
Definition grid_types.hpp:51

terra::grid::Grid4DDataVec::extent
auto extent(int i) const
Definition grid_types.hpp:75

timer.hpp

vec.hpp

vector.hpp

vector_q1.hpp