597 *
for (
unsigned int i = 0; i < this->n_source_centers; ++i)
601 *
std::exp(-x_minus_xi.norm_square() / (this->width * this->width));
610 *
const unsigned int = 0) const override
613 *
for (
unsigned int i = 0; i < this->n_source_centers; ++i)
618 * (-2 / (this->width * this->width) *
619 *
std::exp(-x_minus_xi.norm_square() / (this->width * this->width)) *
632 * This
class implements a function where the
scalar solution and its
negative
633 *
gradient are collected together. This function is used when computing the
634 * error of the HDG approximation and its implementation is to simply call
639 *
class SolutionAndGradient :
public Function<dim>,
protected SolutionBase<dim>
642 * SolutionAndGradient()
643 * : Function<dim>(dim + 1)
647 * Vector<double> &v)
const override
650 * Solution<dim> solution;
651 * Tensor<1, dim> grad = solution.gradient(p);
652 *
for (
unsigned int d = 0;
d < dim; ++
d)
654 * v[dim] = solution.value(p);
662 * Next comes the implementation of the convection velocity. As described in
663 * the introduction, we choose a velocity field that is @f$(y, -x)@f$ in 2
d and
664 * @f$(y, -x, 1)@f$ in 3
d. This gives a divergence-free velocity field.
671 * ConvectionVelocity()
675 *
virtual Tensor<1, dim>
value(
const Point<dim> &p)
const override
677 * Tensor<1, dim> convection;
684 * convection[0] = p[1];
685 * convection[1] = -p[0];
688 * convection[0] = p[1];
689 * convection[1] = -p[0];
703 * The last function we implement is the right hand side
for the
704 * manufactured solution. It is very similar to @ref step_7
"step-7", with the exception
705 * that we now have a convection term instead of the reaction term. Since
706 * the velocity field is incompressible, i.e., @f$\nabla \cdot \mathbf{c} =
707 * 0@f$, the advection term simply reads @f$\mathbf{c} \nabla u@f$.
711 *
class RightHandSide :
public Function<dim>,
protected SolutionBase<dim>
714 *
virtual double value(
const Point<dim> &p,
715 *
const unsigned int = 0) const override
717 * ConvectionVelocity<dim> convection_velocity;
718 * Tensor<1, dim> convection = convection_velocity.value(p);
720 *
for (
unsigned int i = 0; i < this->n_source_centers; ++i)
722 *
const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
725 * ((2 * dim - 2 * convection * x_minus_xi -
726 * 4 * x_minus_xi.norm_square() / (this->width * this->width)) /
727 * (this->width * this->width) *
728 *
std::exp(-x_minus_xi.norm_square() / (this->width * this->width)));
741 * <a name=
"step_51-TheHDGsolverclass"></a>
742 * <h3>The HDG solver
class</h3>
746 * The HDG solution procedure follows closely that of @ref step_7
"step-7". The major
747 * difference is the use of three different sets of
DoFHandler and FE
749 * vectors. We also use
WorkStream to enable a multithreaded local solution
750 * process which exploits the embarrassingly
parallel nature of the local
751 * solver. For
WorkStream, we define the local operations on a cell and a
752 *
copy function into the global
matrix and vector. We
do this both
for the
753 * assembly (which is run twice, once when we generate the system matrix and
754 * once when we compute the element-interior solutions from the skeleton
755 * values) and
for the postprocessing where we
extract a solution that
756 * converges at higher order.
763 *
enum RefinementMode
766 * adaptive_refinement
769 * HDG(
const unsigned int degree,
const RefinementMode refinement_mode);
773 *
void setup_system();
774 *
void assemble_system(
const bool reconstruct_trace =
false);
776 *
void postprocess();
777 *
void refine_grid(
const unsigned int cycle);
778 *
void output_results(
const unsigned int cycle);
782 * Data
for the assembly and solution of the primal variables.
785 *
struct PerTaskData;
786 *
struct ScratchData;
790 * Post-processing the solution to obtain @f$u^*@f$ is an element-by-element
791 * procedure; as such, we
do not need to
assemble any global data and
do
792 * not declare any
'task data' for WorkStream to use.
795 *
struct PostProcessScratchData;
800 * work of the program.
803 *
void assemble_system_one_cell(
805 * ScratchData &scratch,
806 * PerTaskData &task_data);
808 *
void copy_local_to_global(
const PerTaskData &data);
810 *
void postprocess_one_cell(
812 * PostProcessScratchData &scratch,
813 *
unsigned int &empty_data);
820 * The
'local' solutions are interior to each element. These
821 * represent the primal solution field @f$u@f$ as well as the auxiliary
822 * field @f$\mathbf{q}@f$.
831 * The
new finite element type and corresponding <code>
DoFHandler</code> are
832 * used
for the global skeleton solution that couples the element-level
843 * As stated in the introduction, HDG solutions can be post-processed to
844 * attain superconvergence rates of @f$\mathcal{
O}(h^{p+2})@f$. The
845 * post-processed solution is a discontinuous finite element solution
846 * representing the primal variable on the interior of each cell. We define
847 * a FE type of degree @f$p+1@f$ to represent
this post-processed solution,
848 * which we only use
for output after constructing it.
857 * The degrees of freedom corresponding to the skeleton strongly enforce
858 * Dirichlet boundary conditions, just as in a continuous Galerkin finite
859 * element method. We can enforce the boundary conditions in an analogous
861 * handled in the same way as
for continuous finite elements: For the face
862 * elements which only define degrees of freedom on the face,
this process
863 * sets the solution on the refined side to coincide with the
864 * representation on the coarse side.
868 * Note that
for HDG, the elimination of hanging nodes is not the only
869 * possibility — in terms of the HDG theory,
one could also use the
870 * unknowns from the refined side and express the local solution on the
871 * coarse side through the
trace values on the refined side. However, such
872 * a setup is not as easily implemented in terms of deal.II loops and not
882 * the actual
matrix object. When creating the sparsity pattern, we just
883 * have to additionally pass the size of local blocks.
891 * Same as @ref step_7
"step-7":
894 *
const RefinementMode refinement_mode;
901 * <a name=
"step_51-TheHDGclassimplementation"></a>
902 * <h3>The HDG
class implementation</h3>
907 * <a name=
"step_51-Constructor"></a>
908 * <h4>Constructor</h4>
909 * The constructor is similar to those in other examples, with the exception
911 * create a system of finite elements
for the local DG part, including the
916 * HDG<dim>::HDG(
const unsigned int degree,
const RefinementMode refinement_mode)
918 * , dof_handler_local(triangulation)
920 * , dof_handler(triangulation)
921 * , fe_u_post(degree + 1)
922 * , dof_handler_u_post(triangulation)
923 * , refinement_mode(refinement_mode)
931 * <a name=
"step_51-HDGsetup_system"></a>
932 * <h4>HDG::setup_system</h4>
933 * The system
for an HDG solution is setup in an analogous manner to most
934 * of the other tutorial programs. We are careful to distribute dofs with
935 * all of our
DoFHandler objects. The @p solution and @p system_matrix
936 * objects go with the global skeleton solution.
940 *
void HDG<dim>::setup_system()
942 * dof_handler_local.distribute_dofs(fe_local);
943 * dof_handler.distribute_dofs(fe);
944 * dof_handler_u_post.distribute_dofs(fe_u_post);
946 * std::cout <<
" Number of degrees of freedom: " << dof_handler.n_dofs()
949 * solution.reinit(dof_handler.n_dofs());
950 * system_rhs.reinit(dof_handler.n_dofs());
952 * solution_local.reinit(dof_handler_local.n_dofs());
953 * solution_u_post.reinit(dof_handler_u_post.n_dofs());
955 * constraints.clear();
957 * std::map<types::boundary_id, const Function<dim> *> boundary_functions;
958 * Solution<dim> solution_function;
959 * boundary_functions[0] = &solution_function;
961 * boundary_functions,
964 * constraints.close();
968 * When creating the chunk sparsity pattern, we first create the usual
969 *
dynamic sparsity pattern and then set the chunk size, which is
equal
970 * to the number of dofs on a face, when copying
this into the
final
977 * sparsity_pattern.copy_from(dsp, fe.n_dofs_per_face());
979 * system_matrix.reinit(sparsity_pattern);
987 * <a name=
"step_51-HDGPerTaskData"></a>
988 * <h4>HDG::PerTaskData</h4>
989 * Next comes the definition of the local data structures
for the
parallel
990 * assembly. The first structure @p PerTaskData contains the local vector
991 * and
matrix that are written into the global
matrix, whereas the
992 * ScratchData contains all data that we need
for the local assembly. There
993 * is
one variable worth noting here, namely the
boolean variable @p
994 * trace_reconstruct. As mentioned in the introduction, we solve the HDG
995 * system in two steps. First, we create a linear system
for the skeleton
996 * system where we condense the local part into it via the Schur complement
997 * @f$D-CA^{-1}B@f$. Then, we solve
for the local part
using the skeleton
998 * solution. For these two steps, we need the same matrices on the elements
999 * twice, which we want to compute by two assembly steps. Since most of the
1000 * code is similar, we
do this with the same function but only
switch
1001 * between the two based on a flag that we set when starting the
1002 * assembly. Since we need to pass
this information on to the local worker
1003 * routines, we store it once in the task data.
1006 *
template <
int dim>
1007 *
struct HDG<dim>::PerTaskData
1010 * Vector<double> cell_vector;
1011 * std::vector<types::global_dof_index> dof_indices;
1013 *
bool trace_reconstruct;
1015 * PerTaskData(
const unsigned int n_dofs,
const bool trace_reconstruct)
1017 * , cell_vector(n_dofs)
1018 * , dof_indices(n_dofs)
1019 * , trace_reconstruct(trace_reconstruct)
1028 * <a name=
"step_51-HDGScratchData"></a>
1029 * <h4>HDG::ScratchData</h4>
1030 * @p ScratchData contains persistent data
for each
1032 * and vector objects should be familiar by now. There are two objects that
1033 * need to be discussed: `std::vector<std::vector<unsigned int> >
1034 * fe_local_support_on_face` and `std::vector<std::vector<unsigned int> >
1035 * fe_support_on_face`. These are used to indicate whether or not the finite
1036 * elements chosen have support (non-zero values) on a given face of the
1037 * reference cell
for the local part associated to @p fe_local and the
1038 * skeleton part @p fe. We
extract this information in the
1039 * constructor and store it once
for all cells that we work on. Had we not
1040 * stored
this information, we would be forced to
assemble a large number of
1041 *
zero terms on each cell, which would significantly slow the program.
1044 *
template <
int dim>
1045 *
struct HDG<dim>::ScratchData
1047 * FEValues<dim> fe_values_local;
1048 * FEFaceValues<dim> fe_face_values_local;
1049 * FEFaceValues<dim> fe_face_values;
1051 * FullMatrix<double> ll_matrix;
1052 * FullMatrix<double> lf_matrix;
1053 * FullMatrix<double> fl_matrix;
1054 * FullMatrix<double> tmp_matrix;
1055 * Vector<double> l_rhs;
1056 * Vector<double> tmp_rhs;
1058 * std::vector<Tensor<1, dim>> q_phi;
1059 * std::vector<double> q_phi_div;
1060 * std::vector<double> u_phi;
1061 * std::vector<Tensor<1, dim>> u_phi_grad;
1062 * std::vector<double> tr_phi;
1063 * std::vector<double> trace_values;
1065 * std::vector<std::vector<unsigned int>> fe_local_support_on_face;
1066 * std::vector<std::vector<unsigned int>> fe_support_on_face;
1068 * ConvectionVelocity<dim> convection_velocity;
1069 * RightHandSide<dim> right_hand_side;
1070 *
const Solution<dim> exact_solution;
1072 * ScratchData(
const FiniteElement<dim> &fe,
1073 *
const FiniteElement<dim> &fe_local,
1074 *
const QGauss<dim> &quadrature_formula,
1075 *
const QGauss<dim - 1> &face_quadrature_formula,
1079 * : fe_values_local(fe_local, quadrature_formula, local_flags)
1080 * , fe_face_values_local(fe_local,
1081 * face_quadrature_formula,
1083 * , fe_face_values(fe, face_quadrature_formula, flags)
1084 * , ll_matrix(fe_local.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1085 * , lf_matrix(fe_local.n_dofs_per_cell(), fe.n_dofs_per_cell())
1086 * , fl_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1087 * , tmp_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1088 * , l_rhs(fe_local.n_dofs_per_cell())
1089 * , tmp_rhs(fe_local.n_dofs_per_cell())
1090 * , q_phi(fe_local.n_dofs_per_cell())
1091 * , q_phi_div(fe_local.n_dofs_per_cell())
1092 * , u_phi(fe_local.n_dofs_per_cell())
1093 * , u_phi_grad(fe_local.n_dofs_per_cell())
1094 * , tr_phi(fe.n_dofs_per_cell())
1095 * , trace_values(face_quadrature_formula.size())
1098 * , exact_solution()
1101 *
for (
unsigned int i = 0; i < fe_local.n_dofs_per_cell(); ++i)
1103 *
if (fe_local.has_support_on_face(i, face_no))
1104 * fe_local_support_on_face[face_no].push_back(i);
1108 *
for (
unsigned int i = 0; i < fe.n_dofs_per_cell(); ++i)
1110 *
if (fe.has_support_on_face(i, face_no))
1111 * fe_support_on_face[face_no].push_back(i);
1115 * ScratchData(
const ScratchData &sd)
1116 * : fe_values_local(sd.fe_values_local.get_fe(),
1117 * sd.fe_values_local.get_quadrature(),
1118 * sd.fe_values_local.get_update_flags())
1119 * , fe_face_values_local(sd.fe_face_values_local.get_fe(),
1120 * sd.fe_face_values_local.get_quadrature(),
1121 * sd.fe_face_values_local.get_update_flags())
1122 * , fe_face_values(sd.fe_face_values.get_fe(),
1123 * sd.fe_face_values.get_quadrature(),
1124 * sd.fe_face_values.get_update_flags())
1125 * , ll_matrix(sd.ll_matrix)
1126 * , lf_matrix(sd.lf_matrix)
1127 * , fl_matrix(sd.fl_matrix)
1128 * , tmp_matrix(sd.tmp_matrix)
1130 * , tmp_rhs(sd.tmp_rhs)
1132 * , q_phi_div(sd.q_phi_div)
1134 * , u_phi_grad(sd.u_phi_grad)
1135 * , tr_phi(sd.tr_phi)
1136 * , trace_values(sd.trace_values)
1137 * , fe_local_support_on_face(sd.fe_local_support_on_face)
1138 * , fe_support_on_face(sd.fe_support_on_face)
1139 * , exact_solution()
1148 * <a name=
"step_51-HDGPostProcessScratchData"></a>
1149 * <h4>HDG::PostProcessScratchData</h4>
1150 * @p PostProcessScratchData contains the data used by
WorkStream
1151 * when post-processing the local solution @f$u^*@f$. It is similar, but much
1152 * simpler, than @p ScratchData.
1155 *
template <
int dim>
1156 *
struct HDG<dim>::PostProcessScratchData
1158 * FEValues<dim> fe_values_local;
1159 * FEValues<dim> fe_values;
1161 * std::vector<double> u_values;
1162 * std::vector<Tensor<1, dim>> u_gradients;
1165 * Vector<double> cell_rhs;
1166 * Vector<double> cell_sol;
1168 * PostProcessScratchData(
const FiniteElement<dim> &fe,
1169 *
const FiniteElement<dim> &fe_local,
1170 *
const QGauss<dim> &quadrature_formula,
1173 * : fe_values_local(fe_local, quadrature_formula, local_flags)
1174 * , fe_values(fe, quadrature_formula, flags)
1175 * , u_values(quadrature_formula.size())
1176 * , u_gradients(quadrature_formula.size())
1177 * ,
cell_matrix(fe.n_dofs_per_cell(), fe.n_dofs_per_cell())
1178 * , cell_rhs(fe.n_dofs_per_cell())
1179 * , cell_sol(fe.n_dofs_per_cell())
1182 * PostProcessScratchData(
const PostProcessScratchData &sd)
1183 * : fe_values_local(sd.fe_values_local.get_fe(),
1184 * sd.fe_values_local.get_quadrature(),
1185 * sd.fe_values_local.get_update_flags())
1186 * , fe_values(sd.fe_values.get_fe(),
1187 * sd.fe_values.get_quadrature(),
1188 * sd.fe_values.get_update_flags())
1189 * , u_values(sd.u_values)
1190 * , u_gradients(sd.u_gradients)
1192 * , cell_rhs(sd.cell_rhs)
1193 * , cell_sol(sd.cell_sol)
1202 * <a name=
"step_51-HDGassemble_system"></a>
1203 * <h4>HDG::assemble_system</h4>
1204 * The @p assemble_system function is similar to the
one on @ref step_32
"step-32", where
1205 * the quadrature formula and the update flags are set up, and then
1206 * <code>
WorkStream</code> is used to
do the work in a multi-threaded
1207 * manner. The @p trace_reconstruct input parameter is used to decide
1208 * whether we are solving
for the global skeleton solution (
false) or the
1209 * local solution (true).
1213 * One thing worth noting for the multi-threaded execution of assembly is
1214 * the fact that the local computations in `assemble_system_one_cell()` call
1215 * into BLAS and LAPACK functions if those are available in deal.II. Thus,
1216 * the underlying BLAS/LAPACK library must support calls from multiple
1217 * threads at the same time. Most implementations do support this, but some
1218 * libraries need to be built in a specific way to avoid problems. For
1219 * example, OpenBLAS compiled without multithreading inside the BLAS/LAPACK
1220 * calls needs to built with a flag called `USE_LOCKING` set to true.
1223 * template <
int dim>
1224 *
void HDG<dim>::assemble_system(const
bool trace_reconstruct)
1226 *
const QGauss<dim> quadrature_formula(fe.degree + 1);
1227 *
const QGauss<dim - 1> face_quadrature_formula(fe.degree + 1);
1237 * PerTaskData task_data(fe.n_dofs_per_cell(), trace_reconstruct);
1238 * ScratchData scratch(fe,
1240 * quadrature_formula,
1241 * face_quadrature_formula,
1247 * dof_handler.end(),
1249 * &HDG<dim>::assemble_system_one_cell,
1250 * &HDG<dim>::copy_local_to_global,
1260 * <a name=
"step_51-HDGassemble_system_one_cell"></a>
1261 * <h4>HDG::assemble_system_one_cell</h4>
1262 * The real work of the HDG program is done by @p assemble_system_one_cell.
1263 * Assembling the local matrices @f$A, B,
C@f$ is done here, along with the
1264 * local contributions of the global
matrix @f$D@f$.
1267 *
template <
int dim>
1268 *
void HDG<dim>::assemble_system_one_cell(
1270 * ScratchData &scratch,
1271 * PerTaskData &task_data)
1275 * Construct iterator
for dof_handler_local
for FEValues reinit function.
1279 * cell->as_dof_handler_iterator(dof_handler_local);
1281 *
const unsigned int n_q_points =
1282 * scratch.fe_values_local.get_quadrature().size();
1283 *
const unsigned int n_face_q_points =
1284 * scratch.fe_face_values_local.get_quadrature().size();
1286 *
const unsigned int loc_dofs_per_cell =
1287 * scratch.fe_values_local.get_fe().n_dofs_per_cell();
1292 * scratch.ll_matrix = 0;
1293 * scratch.l_rhs = 0;
1294 *
if (!task_data.trace_reconstruct)
1296 * scratch.lf_matrix = 0;
1297 * scratch.fl_matrix = 0;
1298 * task_data.cell_matrix = 0;
1299 * task_data.cell_vector = 0;
1301 * scratch.fe_values_local.reinit(loc_cell);
1305 * We first compute the cell-interior contribution to @p ll_matrix
matrix
1306 * (referred to as
matrix @f$A@f$ in the introduction) corresponding to
1307 * local-local coupling, as well as the local right-hand-side vector. We
1308 * store the values at each quadrature point
for the basis functions, the
1309 * right-hand-side
value, and the convection velocity, in order to have
1310 * quick access to these fields.
1313 * for (
unsigned int q = 0; q < n_q_points; ++q)
1315 *
const double rhs_value = scratch.right_hand_side.value(
1316 * scratch.fe_values_local.quadrature_point(q));
1317 *
const Tensor<1, dim> convection = scratch.convection_velocity.value(
1318 * scratch.fe_values_local.quadrature_point(q));
1319 *
const double JxW = scratch.fe_values_local.JxW(q);
1320 *
for (
unsigned int k = 0; k < loc_dofs_per_cell; ++k)
1322 * scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k, q);
1323 * scratch.q_phi_div[k] =
1324 * scratch.fe_values_local[fluxes].divergence(k, q);
1325 * scratch.u_phi[k] = scratch.fe_values_local[
scalar].value(k, q);
1326 * scratch.u_phi_grad[k] =
1327 * scratch.fe_values_local[
scalar].gradient(k, q);
1329 *
for (
unsigned int i = 0; i < loc_dofs_per_cell; ++i)
1331 *
for (
unsigned int j = 0; j < loc_dofs_per_cell; ++j)
1332 * scratch.ll_matrix(i, j) +=
1333 * (scratch.q_phi[i] * scratch.q_phi[j] -
1334 * scratch.q_phi_div[i] * scratch.u_phi[j] +
1335 * scratch.u_phi[i] * scratch.q_phi_div[j] -
1336 * (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]) *
1338 * scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW;
1344 * Face terms are assembled on all faces of all elements. This is in
1345 * contrast to more traditional DG methods, where each face is only visited
1346 * once in the assembly procedure.
1349 *
for (
const auto face_no : cell->face_indices())
1351 * scratch.fe_face_values_local.reinit(loc_cell, face_no);
1352 * scratch.fe_face_values.reinit(cell, face_no);
1356 * The already obtained @f$\hat{u}@f$ values are needed when solving
for the
1360 *
if (task_data.trace_reconstruct)
1361 * scratch.fe_face_values.get_function_values(solution,
1362 * scratch.trace_values);
1364 *
for (
unsigned int q = 0; q < n_face_q_points; ++q)
1366 *
const double JxW = scratch.fe_face_values.JxW(q);
1368 * scratch.fe_face_values.quadrature_point(q);
1370 * scratch.fe_face_values.normal_vector(q);
1372 * scratch.convection_velocity.value(quadrature_point);
1376 * Here we compute the stabilization parameter discussed in the
1377 * introduction: since the diffusion is
one and the diffusion
1378 * length
scale is set to 1/5, it simply results in a contribution
1379 * of 5
for the diffusion part and the magnitude of convection
1380 * through the element boundary in a centered scheme
for the
1384 *
const double tau_stab = (5. +
std::abs(convection * normal));
1388 * We store the non-
zero flux and
scalar values, making use of the
1389 * support_on_face information we created in @p ScratchData.
1392 *
for (
unsigned int k = 0;
1393 * k < scratch.fe_local_support_on_face[face_no].size();
1396 *
const unsigned int kk =
1397 * scratch.fe_local_support_on_face[face_no][k];
1398 * scratch.q_phi[k] =
1399 * scratch.fe_face_values_local[fluxes].value(kk, q);
1400 * scratch.u_phi[k] =
1401 * scratch.fe_face_values_local[
scalar].value(kk, q);
1406 * When @p trace_reconstruct=
false, we are preparing to
assemble the
1407 * system
for the skeleton variable @f$\hat{u}@f$. If
this is the
case,
1408 * we must
assemble all local matrices associated with the problem:
1409 * local-local, local-face, face-local, and face-face. The
1410 * face-face
matrix is stored as @p TaskData::cell_matrix, so that
1411 * it can be assembled into the global system by @p
1412 * copy_local_to_global.
1415 *
if (!task_data.trace_reconstruct)
1417 *
for (
unsigned int k = 0;
1418 * k < scratch.fe_support_on_face[face_no].size();
1420 * scratch.tr_phi[k] = scratch.fe_face_values.shape_value(
1421 * scratch.fe_support_on_face[face_no][k], q);
1422 *
for (
unsigned int i = 0;
1423 * i < scratch.fe_local_support_on_face[face_no].size();
1425 *
for (
unsigned int j = 0;
1426 * j < scratch.fe_support_on_face[face_no].size();
1429 *
const unsigned int ii =
1430 * scratch.fe_local_support_on_face[face_no][i];
1431 *
const unsigned int jj =
1432 * scratch.fe_support_on_face[face_no][j];
1433 * scratch.lf_matrix(ii, jj) +=
1434 * ((scratch.q_phi[i] * normal +
1435 * (convection * normal - tau_stab) * scratch.u_phi[i]) *
1436 * scratch.tr_phi[j]) *
1441 * Note the
sign of the face_no-local
matrix. We negate
1442 * the
sign during assembly here so that we can use the
1447 * scratch.fl_matrix(jj, ii) -=
1448 * ((scratch.q_phi[i] * normal +
1449 * tau_stab * scratch.u_phi[i]) *
1450 * scratch.tr_phi[j]) *
1454 *
for (
unsigned int i = 0;
1455 * i < scratch.fe_support_on_face[face_no].size();
1457 *
for (
unsigned int j = 0;
1458 * j < scratch.fe_support_on_face[face_no].size();
1461 *
const unsigned int ii =
1462 * scratch.fe_support_on_face[face_no][i];
1463 *
const unsigned int jj =
1464 * scratch.fe_support_on_face[face_no][j];
1465 * task_data.cell_matrix(ii, jj) +=
1466 * ((convection * normal - tau_stab) * scratch.tr_phi[i] *
1467 * scratch.tr_phi[j]) *
1471 *
if (cell->face(face_no)->at_boundary() &&
1472 * (cell->face(face_no)->boundary_id() == 1))
1474 *
const double neumann_value =
1475 * -scratch.exact_solution.gradient(quadrature_point) *
1477 * convection * normal *
1478 * scratch.exact_solution.value(quadrature_point);
1479 *
for (
unsigned int i = 0;
1480 * i < scratch.fe_support_on_face[face_no].size();
1483 *
const unsigned int ii =
1484 * scratch.fe_support_on_face[face_no][i];
1485 * task_data.cell_vector(ii) +=
1486 * scratch.tr_phi[i] * neumann_value * JxW;
1493 * This last term adds the contribution of the term @f$\left<
w,\tau
1494 * u_h\right>_{\partial \mathcal
T}@f$ to the local
matrix. As opposed
1495 * to the face matrices above, we need it in both assembly stages.
1498 *
for (
unsigned int i = 0;
1499 * i < scratch.fe_local_support_on_face[face_no].size();
1501 *
for (
unsigned int j = 0;
1502 * j < scratch.fe_local_support_on_face[face_no].size();
1505 *
const unsigned int ii =
1506 * scratch.fe_local_support_on_face[face_no][i];
1507 *
const unsigned int jj =
1508 * scratch.fe_local_support_on_face[face_no][j];
1509 * scratch.ll_matrix(ii, jj) +=
1510 * tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW;
1515 * When @p trace_reconstruct=
true, we are solving
for the local
1516 * solutions on an element by element basis. The local
1517 * right-hand-side is calculated by replacing the basis
functions @p
1518 * tr_phi in the @p lf_matrix computation by the computed values @p
1519 * trace_values. Of course, the
sign of the
matrix is now minus
1520 * since we have moved everything to the other side of the equation.
1523 *
if (task_data.trace_reconstruct)
1524 *
for (
unsigned int i = 0;
1525 * i < scratch.fe_local_support_on_face[face_no].size();
1528 *
const unsigned int ii =
1529 * scratch.fe_local_support_on_face[face_no][i];
1530 * scratch.l_rhs(ii) -=
1531 * (scratch.q_phi[i] * normal +
1532 * scratch.u_phi[i] * (convection * normal - tau_stab)) *
1533 * scratch.trace_values[q] * JxW;
1540 * Once assembly of all of the local contributions is complete, we must
1541 * either: (1) assemble the global system, or (2) compute the local solution
1542 * values and save them. In either
case, the first step is to
invert the
1546 * scratch.ll_matrix.gauss_jordan();
1550 * For (1), we compute the Schur complement and add it to the @p
1554 *
if (task_data.trace_reconstruct ==
false)
1556 * scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix);
1557 * scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs);
1558 * scratch.tmp_matrix.mmult(task_data.cell_matrix,
1559 * scratch.lf_matrix,
1561 * cell->get_dof_indices(task_data.dof_indices);
1565 * For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs).
1566 * Hence, we multiply @p l_rhs by our already inverted local-local
matrix
1567 * and store the result
using the <code>
set_dof_values</code> function.
1572 * scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs);
1573 * loc_cell->set_dof_values(scratch.tmp_rhs, solution_local);
1582 * <a name=
"step_51-HDGcopy_local_to_global"></a>
1583 * <h4>HDG::copy_local_to_global</h4>
1584 * If we are in the first step of the solution, i.e. @p trace_reconstruct=
false,
1585 * then we
assemble the local matrices into the global system.
1588 *
template <
int dim>
1589 *
void HDG<dim>::copy_local_to_global(
const PerTaskData &data)
1591 *
if (data.trace_reconstruct ==
false)
1592 * constraints.distribute_local_to_global(data.cell_matrix,
1604 * <a name=
"step_51-HDGsolve"></a>
1605 * <h4>HDG::solve</h4>
1606 * The skeleton solution is solved
for by
using a BiCGStab solver with
1610 *
template <
int dim>
1611 *
void HDG<dim>::solve()
1614 * 1e-11 * system_rhs.l2_norm());
1618 * std::cout <<
" Number of BiCGStab iterations: "
1619 * << solver_control.last_step() << std::endl;
1621 * system_matrix.clear();
1622 * sparsity_pattern.reinit(0, 0, 0, 1);
1624 * constraints.distribute(solution);
1628 * Once we have solved
for the skeleton solution,
1629 * we can solve
for the local solutions in an element-by-element
1630 * fashion. We
do this by re-
using the same @p assemble_system function
1631 * but switching @p trace_reconstruct to
true.
1634 * assemble_system(
true);
1642 * <a name=
"step_51-HDGpostprocess"></a>
1643 * <h4>HDG::postprocess</h4>
1647 * The postprocess method serves two purposes. First, we want to construct a
1648 * post-processed
scalar variables in the element space of degree @f$p+1@f$ that
1649 * we hope will converge at order @f$p+2@f$. This is again an element-by-element
1650 * process and only involves the
scalar solution as well as the
gradient on
1651 * the local cell. To
do this, we introduce the already defined scratch data
1652 * together with some update flags and
run the work stream to
do this in
1657 * Secondly, we want to compute discretization errors just as we did in
1658 * @ref step_7
"step-7". The overall procedure is similar with calls to
1660 * the errors
for the
scalar variable and the
gradient variable. In @ref step_7
"step-7",
1662 * contributions. Here, we have a
DoFHandler with these two contributions
1663 * computed and
sorted by their vector component, <code>[0, dim)</code>
for
1665 * gradient and @p dim
for the
scalar. To compute their
value, we hence use
1667 * SolutionAndGradient
class introduced above that contains the analytic
1668 * parts of either of them. Eventually, we also compute the L2-error of the
1669 * post-processed solution and add the results into the convergence table.
1672 * template <int dim>
1673 *
void HDG<dim>::postprocess()
1676 *
const QGauss<dim> quadrature_formula(fe_u_post.degree + 1);
1681 * PostProcessScratchData scratch(
1682 * fe_u_post, fe_local, quadrature_formula, local_flags, flags);
1685 * dof_handler_u_post.begin_active(),
1686 * dof_handler_u_post.end(),
1688 * PostProcessScratchData &scratch,
1689 *
unsigned int &data) {
1690 * this->postprocess_one_cell(cell, scratch, data);
1692 * std::function<
void(
const unsigned int &)>(),
1697 *
Vector<float> difference_per_cell(triangulation.n_active_cells());
1702 * SolutionAndGradient<dim>(),
1703 * difference_per_cell,
1707 *
const double L2_error =
1709 * difference_per_cell,
1713 * std::pair<unsigned int, unsigned int>(0, dim), dim + 1);
1716 * SolutionAndGradient<dim>(),
1717 * difference_per_cell,
1720 * &gradient_select);
1721 *
const double grad_error =
1723 * difference_per_cell,
1729 * difference_per_cell,
1732 *
const double post_error =
1734 * difference_per_cell,
1737 * convergence_table.add_value(
"cells", triangulation.n_active_cells());
1738 * convergence_table.add_value(
"dofs", dof_handler.n_dofs());
1740 * convergence_table.add_value(
"val L2", L2_error);
1741 * convergence_table.set_scientific(
"val L2",
true);
1742 * convergence_table.set_precision(
"val L2", 3);
1744 * convergence_table.add_value(
"grad L2", grad_error);
1745 * convergence_table.set_scientific(
"grad L2",
true);
1746 * convergence_table.set_precision(
"grad L2", 3);
1748 * convergence_table.add_value(
"val L2-post", post_error);
1749 * convergence_table.set_scientific(
"val L2-post",
true);
1750 * convergence_table.set_precision(
"val L2-post", 3);
1758 * <a name=
"step_51-HDGpostprocess_one_cell"></a>
1759 * <h4>HDG::postprocess_one_cell</h4>
1763 * This is the actual work done
for the postprocessing. According to the
1764 * discussion in the introduction, we need to set up a system that projects
1766 * post-processed variable. Moreover, we need to set the average of the
new
1767 * post-processed variable to
equal the average of the
scalar DG solution
1772 * More technically speaking, the projection of the
gradient is a system
1773 * that would potentially fills our @p dofs_per_cell times @p dofs_per_cell
1774 *
matrix but is singular (the sum of all rows would be zero because the
1775 * constant function has zero gradient). Therefore, we take
one row away and
1776 * use it
for imposing the average of the
scalar value. We pick the first
1777 * row
for the
scalar part, even though we could pick any row
for @f$\mathcal
1778 * Q_{-p}@f$ elements. However, had we used
FE_DGP elements instead, the first
1779 * row would correspond to the
constant part already and deleting
e.g. the
1780 * last row would give us a singular system. This way, our program can also
1781 * be used
for those elements.
1784 *
template <
int dim>
1785 *
void HDG<dim>::postprocess_one_cell(
1787 * PostProcessScratchData &scratch,
1791 * cell->as_dof_handler_iterator(dof_handler_local);
1793 * scratch.fe_values_local.reinit(loc_cell);
1794 * scratch.fe_values.reinit(cell);
1799 *
const unsigned int n_q_points = scratch.fe_values.get_quadrature().size();
1800 *
const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell;
1802 * scratch.fe_values_local[
scalar].get_function_values(solution_local,
1803 * scratch.u_values);
1804 * scratch.fe_values_local[fluxes].get_function_values(solution_local,
1805 * scratch.u_gradients);
1808 *
for (
unsigned int i = 1; i < dofs_per_cell; ++i)
1810 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
1813 *
for (
unsigned int q = 0; q < n_q_points; ++q)
1814 * sum += (scratch.fe_values.shape_grad(i, q) *
1815 * scratch.fe_values.shape_grad(j, q)) *
1816 * scratch.fe_values.JxW(q);
1817 * scratch.cell_matrix(i, j) =
sum;
1821 *
for (
unsigned int q = 0; q < n_q_points; ++q)
1822 * sum -= (scratch.fe_values.shape_grad(i, q) * scratch.u_gradients[q]) *
1823 * scratch.fe_values.JxW(q);
1824 * scratch.cell_rhs(i) =
sum;
1826 *
for (
unsigned int j = 0; j < dofs_per_cell; ++j)
1829 *
for (
unsigned int q = 0; q < n_q_points; ++q)
1830 * sum += scratch.fe_values.shape_value(j, q) * scratch.fe_values.JxW(q);
1831 * scratch.cell_matrix(0, j) =
sum;
1835 *
for (
unsigned int q = 0; q < n_q_points; ++q)
1836 * sum += scratch.u_values[q] * scratch.fe_values.JxW(q);
1837 * scratch.cell_rhs(0) =
sum;
1842 * Having assembled all terms, we can again go on and solve the linear
1843 * system. We
invert the
matrix and then multiply the inverse by the
1844 * right hand side. An alternative (and more numerically stable) method
1845 * would have been to only factorize the
matrix and
apply the factorization.
1848 * scratch.cell_matrix.gauss_jordan();
1849 * scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs);
1850 * cell->distribute_local_to_global(scratch.cell_sol, solution_u_post);
1858 * <a name=
"step_51-HDGoutput_results"></a>
1859 * <h4>HDG::output_results</h4>
1860 * We have 3 sets of results that we would like to output: the local
1861 * solution, the post-processed local solution, and the skeleton solution. The
1862 * former 2 both
'live' on element volumes, whereas the latter lives on
1863 * codimension-1 surfaces
1864 * of the triangulation. Our @p output_results function writes all local solutions
1865 * to the same
vtk file, even though they correspond to different
1866 *
DoFHandler objects. The graphical output
for the skeleton
1867 * variable is done through use of the
DataOutFaces class.
1870 *
template <
int dim>
1871 *
void HDG<dim>::output_results(
const unsigned int cycle)
1873 * std::string filename;
1874 *
switch (refinement_mode)
1876 *
case global_refinement:
1877 * filename =
"solution-global";
1879 *
case adaptive_refinement:
1880 * filename =
"solution-adaptive";
1886 * std::string face_out(filename);
1887 * face_out +=
"-face";
1891 * filename +=
".vtk";
1892 * std::ofstream output(filename);
1898 * We first define the names and
types of the local solution,
1899 * and add the data to @p data_out.
1902 * std::vector<std::string> names(dim,
"gradient");
1903 * names.emplace_back(
"solution");
1904 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1905 * component_interpretation(
1907 * component_interpretation[dim] =
1909 * data_out.add_data_vector(dof_handler_local,
1912 * component_interpretation);
1916 * The second data item we add is the post-processed solution.
1917 * In
this case, it is a single
scalar variable belonging to
1921 * std::vector<std::string> post_name(1,
"u_post");
1922 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1924 * data_out.add_data_vector(dof_handler_u_post,
1929 * data_out.build_patches(fe.degree);
1930 * data_out.write_vtk(output);
1934 * face_out +=
".vtk";
1935 * std::ofstream face_output(face_out);
1939 * The <code>
DataOutFaces</code>
class works analogously to the
1941 * defines the solution on the skeleton of the triangulation. We treat it
1942 * as such here, and the code is similar to that above.
1946 * std::vector<std::string> face_name(1,
"u_hat");
1947 * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1950 * data_out_face.add_data_vector(dof_handler,
1953 * face_component_type);
1955 * data_out_face.build_patches(fe.degree);
1956 * data_out_face.write_vtk(face_output);
1962 * <a name=
"step_51-HDGrefine_grid"></a>
1963 * <h4>HDG::refine_grid</h4>
1967 * We implement two different refinement cases
for HDG, just as in
1968 * <code>@ref step_7
"step-7"</code>: adaptive_refinement and global_refinement. The
1969 * global_refinement option recreates the entire triangulation every
1970 * time. This is because we want to use a finer sequence of meshes than what
1971 * we would get with
one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ...
1972 * elements per direction.
1977 * give a decent indication of the non-regular regions in the
scalar local
1981 *
template <
int dim>
1982 *
void HDG<dim>::refine_grid(
const unsigned int cycle)
1987 * triangulation.refine_global(3 - dim);
1990 *
switch (refinement_mode)
1992 *
case global_refinement:
1994 * triangulation.clear();
1999 * triangulation.refine_global(3 - dim + cycle / 2);
2003 *
case adaptive_refinement:
2006 * triangulation.n_active_cells());
2009 * std::map<types::boundary_id, const Function<dim> *>
2015 * estimated_error_per_cell,
2016 * fe_local.component_mask(
2020 * triangulation, estimated_error_per_cell, 0.3, 0.);
2022 * triangulation.execute_coarsening_and_refinement();
2035 * Just as in @ref step_7
"step-7", we set the boundary indicator of two of the faces to 1
2036 * where we want to specify Neumann boundary conditions instead of Dirichlet
2037 * conditions. Since we re-create the triangulation every time
for global
2038 * refinement, the flags are set in every refinement step, not just at the
2042 *
for (
const auto &cell : triangulation.cell_iterators())
2043 *
for (
const auto &face : cell->face_iterators())
2044 *
if (face->at_boundary())
2045 *
if ((std::fabs(face->center()[0] - (-1)) < 1e-12) ||
2046 * (std::fabs(face->center()[1] - (-1)) < 1e-12))
2047 * face->set_boundary_id(1);
2053 * <a name=
"step_51-HDGrun"></a>
2055 * The functionality here is basically the same as <code>@ref step_7
"step-7"</code>.
2056 * We
loop over 10 cycles, refining the grid on each
one. At the
end,
2057 * convergence tables are created.
2060 *
template <
int dim>
2061 *
void HDG<dim>::run()
2063 *
for (
unsigned int cycle = 0; cycle < 10; ++cycle)
2065 * std::cout <<
"Cycle " << cycle <<
':' << std::endl;
2067 * refine_grid(cycle);
2069 * assemble_system(
false);
2072 * output_results(cycle);
2077 * There is
one minor change
for the convergence table compared to @ref step_7
"step-7":
2078 * Since we did not
refine our mesh by a factor two in each cycle (but
2079 * rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the
2080 * convergence rate evaluation about this. We
do this by setting the
2081 * number of cells as a reference column and additionally specifying the
2082 *
dimension of the problem, which gives the necessary information
for the
2083 * relation between number of cells and mesh size.
2086 *
if (refinement_mode == global_refinement)
2088 * convergence_table.evaluate_convergence_rates(
2090 * convergence_table.evaluate_convergence_rates(
2092 * convergence_table.evaluate_convergence_rates(
2095 * convergence_table.write_text(std::cout);
2104 *
const unsigned int dim = 2;
2110 * Now
for the three calls to the main
class in complete analogy to
2111 * @ref step_7
"step-7".
2115 * std::cout <<
"Solving with Q1 elements, adaptive refinement"
2117 * <<
"============================================="
2121 * Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::adaptive_refinement);
2122 * hdg_problem.run();
2124 * std::cout << std::endl;
2128 * std::cout <<
"Solving with Q1 elements, global refinement" << std::endl
2129 * <<
"===========================================" << std::endl
2132 * Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::global_refinement);
2133 * hdg_problem.run();
2135 * std::cout << std::endl;
2139 * std::cout <<
"Solving with Q3 elements, global refinement" << std::endl
2140 * <<
"===========================================" << std::endl
2143 * Step51::HDG<dim> hdg_problem(3, Step51::HDG<dim>::global_refinement);
2144 * hdg_problem.run();
2146 * std::cout << std::endl;
2149 *
catch (std::exception &exc)
2151 * std::cerr << std::endl
2153 * <<
"----------------------------------------------------"
2155 * std::cerr <<
"Exception on processing: " << std::endl
2156 * << exc.what() << std::endl
2157 * <<
"Aborting!" << std::endl
2158 * <<
"----------------------------------------------------"
2164 * std::cerr << std::endl
2166 * <<
"----------------------------------------------------"
2168 * std::cerr <<
"Unknown exception!" << std::endl
2169 * <<
"Aborting!" << std::endl
2170 * <<
"----------------------------------------------------"
2178<a name=
"step_51-Results"></a><h1>Results</h1>
2181<a name=
"step_51-Programoutput"></a><h3>Program output</h3>
2184We first have a look at the output generated by the program when
run in 2D. In
2185the four images below, we show the solution
for polynomial degree @f$p=1@f$
2186and cycles 2, 3, 4, and 8 of the program. In the plots, we overlay the data
2187generated from the
internal data (DG part) with the skeleton part (@f$\hat{u}@f$)
2188into the same plot. We had to generate two different data sets because cells
2189and faces represent different geometric entities, the combination of which (in
2190the same file) is not supported in the VTK output of deal.II.
2192The images show the distinctive features of HDG: The cell solution (colored
2193surfaces) is discontinuous between the cells. The solution on the skeleton
2194variable sits on the faces and ties together the local parts. The skeleton
2195solution is not continuous on the vertices where the faces meet, even though
2196its values are quite close along lines in the same coordinate direction. The
2197skeleton solution can be interpreted as a rubber spring between the two sides
2198that balances the jumps in the solution (or rather, the flux @f$\kappa \nabla u
2199+ \mathbf{c} u@f$). From the picture at the top left, it is clear that
2200the bulk solution frequently over- and undershoots and that the
2201skeleton variable in indeed a better approximation to the exact
2202solution;
this explains why we can get a better solution
using a
2205As the mesh is refined, the jumps between the cells get
2206small (we represent a smooth solution), and the skeleton solution approaches
2207the interior parts. For cycle 8, there is no visible difference in the two
2208variables. We also see how boundary conditions are implemented weakly and that
2209the interior variables
do not exactly satisfy boundary conditions. On the
2210lower and left boundaries, we set Neumann boundary conditions, whereas we set
2211Dirichlet conditions on the right and top boundaries.
2213<table align=
"center">
2215 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.sol_2.png" alt=
""></td>
2216 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.sol_3.png" alt=
""></td>
2219 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.sol_4.png" alt=
""></td>
2220 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.sol_8.png" alt=
""></td>
2224Next, we have a look at the post-processed solution, again at cycles 2, 3, 4,
2225and 8. This is a discontinuous solution that is locally described by second
2226order polynomials. While the solution does not look very good on the mesh of
2227cycle two, it looks much better
for cycles three and four. As shown by the
2228convergence table below, we find that is also converges more quickly to the
2231<table align=
"center">
2233 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.post_2.png" alt=
""></td>
2234 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.post_3.png" alt=
""></td>
2237 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.post_4.png" alt=
""></td>
2238 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.post_8.png" alt=
""></td>
2242Finally, we look at the solution
for @f$p=3@f$ at cycle 2. Despite the coarse
2243mesh with only 64 cells, the post-processed solution is similar in quality
2244to the linear solution (not post-processed) at cycle 8 with 4,096
2245cells. This clearly shows the superiority of high order methods
for smooth
2248<table align=
"center">
2250 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.sol_q3_2.png" alt=
""></td>
2251 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.post_q3_2.png" alt=
""></td>
2255<a name=
"step_51-Convergencetables"></a><h4>Convergence tables</h4>
2258When the program is
run, it also outputs information about the respective
2259steps and convergence tables with errors in the various components in the
2260end. In 2D, the convergence tables look the following:
2263Q1 elements, adaptive refinement:
2264cells dofs val
L2 grad
L2 val
L2-post
2265 16 80 1.804e+01 2.207e+01 1.798e+01
2266 31 170 9.874e+00 1.322e+01 9.798e+00
2267 61 314 7.452e-01 3.793e+00 4.891e-01
2268 121 634 3.240e-01 1.511e+00 2.616e-01
2269 238 1198 8.585e-02 8.212e-01 1.808e-02
2270 454 2290 4.802e-02 5.178e-01 2.195e-02
2271 898 4378 2.561e-02 2.947e-01 4.318e-03
2272 1720 7864 1.306e-02 1.664e-01 2.978e-03
2273 3271 14638 7.025e-03 9.815e-02 1.075e-03
2274 6217 27214 4.119e-03 6.407e-02 9.975e-04
2276Q1 elements, global refinement:
2277cells dofs val
L2 grad
L2 val
L2-post
2278 16 80 1.804e+01 - 2.207e+01 - 1.798e+01 -
2279 36 168 6.125e+00 2.66 9.472e+00 2.09 6.084e+00 2.67
2280 64 288 9.785e-01 6.38 4.260e+00 2.78 7.102e-01 7.47
2281 144 624 2.730e-01 3.15 1.866e+00 2.04 6.115e-02 6.05
2282 256 1088 1.493e-01 2.10 1.046e+00 2.01 2.880e-02 2.62
2283 576 2400 6.965e-02 1.88 4.846e-01 1.90 9.204e-03 2.81
2284 1024 4224 4.018e-02 1.91 2.784e-01 1.93 4.027e-03 2.87
2285 2304 9408 1.831e-02 1.94 1.264e-01 1.95 1.236e-03 2.91
2286 4096 16640 1.043e-02 1.96 7.185e-02 1.96 5.306e-04 2.94
2287 9216 37248 4.690e-03 1.97 3.228e-02 1.97 1.599e-04 2.96
2289Q3 elements, global refinement:
2290cells dofs val
L2 grad
L2 val
L2-post
2291 16 160 3.613e-01 - 1.891e+00 - 3.020e-01 -
2292 36 336 6.411e-02 4.26 5.081e-01 3.24 3.238e-02 5.51
2293 64 576 3.480e-02 2.12 2.533e-01 2.42 5.277e-03 6.31
2294 144 1248 8.297e-03 3.54 5.924e-02 3.58 6.330e-04 5.23
2295 256 2176 2.254e-03 4.53 1.636e-02 4.47 1.403e-04 5.24
2296 576 4800 4.558e-04 3.94 3.277e-03 3.96 1.844e-05 5.01
2297 1024 8448 1.471e-04 3.93 1.052e-03 3.95 4.378e-06 5.00
2298 2304 18816 2.956e-05 3.96 2.104e-04 3.97 5.750e-07 5.01
2299 4096 33280 9.428e-06 3.97 6.697e-05 3.98 1.362e-07 5.01
2300 9216 74496 1.876e-06 3.98 1.330e-05 3.99 1.788e-08 5.01
2304One can see the error reduction upon grid refinement, and
for the cases where
2305global refinement was performed, also the convergence rates. The quadratic
2306convergence rates of Q1 elements in the @f$L_2@f$
norm for both the
scalar
2307variable and the
gradient variable is apparent, as is the cubic rate
for the
2308postprocessed
scalar variable in the @f$L_2@f$
norm. Note
this distinctive
2309feature of an HDG solution. In typical continuous finite elements, the
2310gradient of the solution of order @f$p@f$ converges at rate @f$p@f$ only, as
2311opposed to @f$p+1@f$
for the actual solution. Even though superconvergence
2312results
for finite elements are also available (
e.g. superconvergent patch
2313recovery first introduced by Zienkiewicz and Zhu), these are typically limited
2314to structured meshes and other special cases. For Q3 HDG variables, the
scalar
2315variable and
gradient converge at fourth order and the postprocessed
scalar
2316variable at fifth order.
2318The same convergence rates are observed in 3
d.
2320Q1 elements, adaptive refinement:
2321cells dofs val
L2 grad
L2 val
L2-post
2322 8 144 7.122e+00 1.941e+01 6.102e+00
2323 29 500 3.309e+00 1.023e+01 2.145e+00
2324 113 1792 2.204e+00 1.023e+01 1.912e+00
2325 379 5732 6.085e-01 5.008e+00 2.233e-01
2326 1317 19412 1.543e-01 1.464e+00 4.196e-02
2327 4579 64768 5.058e-02 5.611e-01 9.521e-03
2328 14596 199552 2.129e-02 3.122e-01 4.569e-03
2329 46180 611400 1.033e-02 1.622e-01 1.684e-03
2330144859 1864212 5.007e-03 8.371e-02 7.364e-04
2331451060 5684508 2.518e-03 4.562e-02 3.070e-04
2333Q1 elements, global refinement:
2334cells dofs val
L2 grad
L2 val
L2-post
2335 8 144 7.122e+00 - 1.941e+01 - 6.102e+00 -
2336 27 432 5.491e+00 0.64 2.184e+01 -0.29 4.448e+00 0.78
2337 64 960 3.646e+00 1.42 1.299e+01 1.81 3.306e+00 1.03
2338 216 3024 1.595e+00 2.04 8.550e+00 1.03 1.441e+00 2.05
2339 512 6912 6.922e-01 2.90 5.306e+00 1.66 2.511e-01 6.07
2340 1728 22464 2.915e-01 2.13 2.490e+00 1.87 8.588e-02 2.65
2341 4096 52224 1.684e-01 1.91 1.453e+00 1.87 4.055e-02 2.61
2342 13824 172800 7.972e-02 1.84 6.861e-01 1.85 1.335e-02 2.74
2343 32768 405504 4.637e-02 1.88 3.984e-01 1.89 5.932e-03 2.82
2344110592 1354752 2.133e-02 1.92 1.830e-01 1.92 1.851e-03 2.87
2346Q3 elements, global refinement:
2347cells dofs val
L2 grad
L2 val
L2-post
2348 8 576 5.670e+00 - 1.868e+01 - 5.462e+00 -
2349 27 1728 1.048e+00 4.16 6.988e+00 2.42 8.011e-01 4.73
2350 64 3840 2.831e-01 4.55 2.710e+00 3.29 1.363e-01 6.16
2351 216 12096 7.883e-02 3.15 7.721e-01 3.10 2.158e-02 4.55
2352 512 27648 3.642e-02 2.68 3.305e-01 2.95 5.231e-03 4.93
2353 1728 89856 8.546e-03 3.58 7.581e-02 3.63 7.640e-04 4.74
2354 4096 208896 2.598e-03 4.14 2.313e-02 4.13 1.783e-04 5.06
2355 13824 691200 5.314e-04 3.91 4.697e-03 3.93 2.355e-05 4.99
2356 32768 1622016 1.723e-04 3.91 1.517e-03 3.93 5.602e-06 4.99
2357110592 5419008 3.482e-05 3.94 3.055e-04 3.95 7.374e-07 5.00
2360<a name=
"step_51-Comparisonwithcontinuousfiniteelements"></a><h3>Comparison with continuous finite elements</h3>
2363<a name=
"step_51-Resultsfor2D"></a><h4>Results
for 2D</h4>
2366The convergence tables verify the expected convergence rates stated in the
2367introduction. Now, we want to show a quick comparison of the computational
2368efficiency of the HDG method compared to a usual finite element (continuous
2369Galkerin) method on the problem of
this tutorial. Of course, stability aspects
2370of the HDG method compared to continuous finite elements
for
2371transport-dominated problems are also important in practice, which is an
2372aspect not seen on a problem with smooth analytic solution. In the picture
2373below, we compare the @f$L_2@f$ error as a function of the number of degrees of
2374freedom (left) and of the computing time spent in the linear solver (right)
2375for two space dimensions of continuous finite elements (CG) and the hybridized
2376discontinuous Galerkin method presented in
this tutorial. As opposed to the
2377tutorial where we only use unpreconditioned BiCGStab, the times shown in the
2378figures below use the Trilinos algebraic multigrid preconditioner in
2381block structure in the
matrix on the finest level.
2383<table align=
"center">
2385 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2d_plain.png" width=
"400" alt=
""></td>
2386 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2dt_plain.png" width=
"400" alt=
""></td>
2390The results in the graphs show that the HDG method is slower than continuous
2391finite elements at @f$p=1@f$, about equally fast
for cubic elements and
2392faster
for sixth order elements. However, we have seen above that the HDG
2393method actually produces solutions which are more accurate than what is
2394represented in the original variables. Therefore, in the next two plots below
2395we instead display the error of the post-processed solution
for HDG (denoted
2396by @f$p=1^*@f$
for example). We now see a clear advantage of HDG
for the same
2397amount of work
for both @f$p=3@f$ and @f$p=6@f$, and about the same quality
2400<table align=
"center">
2402 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2d_post.png" width=
"400" alt=
""></td>
2403 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2dt_post.png" width=
"400" alt=
""></td>
2407Since the HDG method actually produces results converging as
2408@f$h^{p+2}@f$, we should compare it to a continuous Galerkin
2409solution with the same asymptotic convergence behavior, i.e.,
FE_Q with degree
2410@f$p+1@f$. If we
do this, we get the convergence curves below. We see that
2411CG with second order polynomials is again clearly better than HDG with
2412linears. However, the advantage of HDG
for higher orders remains.
2414<table align=
"center">
2416 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2d_postb.png" width=
"400" alt=
""></td>
2417 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.2dt_postb.png" width=
"400" alt=
""></td>
2421The results are in line with properties of DG methods in
general: Best
2422performance is typically not achieved
for linear elements, but rather at
2423somewhat higher order, usually around @f$p=3@f$. This is because of a
2424volume-to-surface effect
for discontinuous solutions with too much of the
2425solution living on the surfaces and hence duplicating work when the elements
2426are linear. Put in other words, DG methods are often most efficient when used
2427at relatively high order, despite their focus on a discontinuous (and hence,
2428seemingly low accurate) representation of solutions.
2430<a name=
"step_51-Resultsfor3D"></a><h4>Results
for 3D</h4>
2433We now show the same figures in 3D: The first row shows the number of degrees
2434of freedom and computing time versus the @f$L_2@f$ error in the
scalar variable
2435@f$u@f$
for CG and HDG at order @f$p@f$, the second row shows the
2436post-processed HDG solution instead of the original
one, and the third row
2437compares the post-processed HDG solution with CG at order @f$p+1@f$. In 3D,
2438the
volume-to-surface effect makes the cost of HDG somewhat higher and the CG
2439solution is clearly better than HDG
for linears by any metric. For cubics, HDG
2440and CG are of similar quality, whereas HDG is again more efficient
for sixth
2441order polynomials. One can alternatively also use the combination of
FE_DGP
2443polynomials of degree @f$p@f$ but Legendre polynomials of <i>complete</i>
2444degree @f$p@f$. There are fewer degrees of freedom on the skeleton variable
2445for FE_FaceP for a given mesh size, but the solution quality (error vs. number
2446of DoFs) is very similar to the results
for FE_FaceQ.
2448<table align=
"center">
2450 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3d_plain.png" width=
"400" alt=
""></td>
2451 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3dt_plain.png" width=
"400" alt=
""></td>
2454 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3d_post.png" width=
"400" alt=
""></td>
2455 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3dt_post.png" width=
"400" alt=
""></td>
2458 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3d_postb.png" width=
"400" alt=
""></td>
2459 <td><img src=
"https://www.dealii.org/images/steps/developer/step-51.3dt_postb.png" width=
"400" alt=
""></td>
2463One
final note on the efficiency comparison: We tried to use
general-purpose
2464sparse
matrix structures and similar solvers (optimal AMG preconditioners
for
2465both without particular tuning of the AMG parameters on any of them) to give a
2466fair picture of the cost versus accuracy of two methods, on a toy example. It
2467should be noted however that geometric multigrid (GMG)
for continuous finite
2468elements is about a factor four to five faster
for @f$p=3@f$ and @f$p=6@f$. As of
24692019, optimal-complexity iterative solvers
for HDG are still under development
2470in the research community. Also, there are other implementation aspects
for CG
2471available such as fast
matrix-free approaches as shown in @ref step_37
"step-37" that make
2472higher order continuous elements more competitive. Again, it is not clear to
2473the authors of the tutorial whether similar improvements could be made
for
2474HDG. We refer to <a href=
"https://dx.doi.org/10.1137/16M110455X">Kronbichler
2475and Wall (2018)</a>
for a recent efficiency evaluation.
2478<a name=
"step_51-Possibilitiesforimprovements"></a><h3>Possibilities
for improvements</h3>
2481As already mentioned in the introduction,
one possibility is to implement
2482another post-processing technique as discussed in the literature.
2484A second item that is not done optimally relates to the performance of
this
2485program, which is of course an issue in practical applications (weighing in
2486also the better solution quality of (H)DG methods
for transport-dominated
2487problems). Let us look at
2488the computing time of the tutorial program and the share of the individual
2491<table align=
"center" class=
"doxtable">
2498 <th>Trace reconstruct</th>
2499 <th>Post-processing</th>
2505 <th colspan=
"6">Relative share</th>
2508 <td align=
"left">2D, Q1, cycle 9, 37,248 dofs</td>
2509 <td align=
"center">5.34s</td>
2510 <td align=
"center">0.7%</td>
2511 <td align=
"center">1.2%</td>
2512 <td align=
"center">89.5%</td>
2513 <td align=
"center">0.9%</td>
2514 <td align=
"center">2.3%</td>
2515 <td align=
"center">5.4%</td>
2518 <td align=
"left">2D, Q3, cycle 9, 74,496 dofs</td>
2519 <td align=
"center">22.2s</td>
2520 <td align=
"center">0.4%</td>
2521 <td align=
"center">4.3%</td>
2522 <td align=
"center">84.1%</td>
2523 <td align=
"center">4.1%</td>
2524 <td align=
"center">3.5%</td>
2525 <td align=
"center">3.6%</td>
2528 <td align=
"left">3D, Q1, cycle 7, 172,800 dofs</td>
2529 <td align=
"center">9.06s</td>
2530 <td align=
"center">3.1%</td>
2531 <td align=
"center">8.9%</td>
2532 <td align=
"center">42.7%</td>
2533 <td align=
"center">7.0%</td>
2534 <td align=
"center">20.6%</td>
2535 <td align=
"center">17.7%</td>
2538 <td align=
"left">3D, Q3, cycle 7, 691,200 dofs</td>
2539 <td align=
"center">516s</td>
2540 <td align=
"center">0.6%</td>
2541 <td align=
"center">34.5%</td>
2542 <td align=
"center">13.4%</td>
2543 <td align=
"center">32.8%</td>
2544 <td align=
"center">17.1%</td>
2545 <td align=
"center">1.5%</td>
2549As can be seen from the table, the solver and assembly calls dominate the
2550runtime of the program. This also gives a clear indication of where
2551improvements would make the most sense:
2554 <li> Better linear solvers: We use a BiCGStab iterative solver without
2555 preconditioner, where the number of iteration increases with increasing
2556 problem size (the number of iterations
for Q1 elements and global
2557 refinements starts at 35
for the small sizes but increase up to 701
for the
2558 largest size). To
do better,
one could
for example use an algebraic
2559 multigrid preconditioner from Trilinos, or some more advanced variants as
2560 the
one discussed in <a
2561 href=
"https://dx.doi.org/10.1137/16M110455X">Kronbichler and Wall
2562 (2018)</a>. For diffusion-dominated problems such as the problem at hand
2563 with finer meshes, such a solver can be designed that uses the
matrix-vector
2565 long as we are not working in
parallel with
MPI. For
MPI-parallelized
2568 <li> Speed up assembly by pre-assembling parts that
do not change from
one
2569 cell to another (those that
do neither contain variable coefficients nor
2574<a name=
"step_51-PlainProg"></a>
2575<h1> The plain program</h1>
2576@include
"step-51.cc"
void mmult(FullMatrix< number2 > &C, const FullMatrix< number2 > &B, const bool adding=false) const
virtual void vector_value(const Point< dim > &p, Vector< RangeNumberType > &values) const
static void estimate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Quadrature< dim - 1 > &quadrature, const std::map< types::boundary_id, const Function< spacedim, Number > * > &neumann_bc, const ReadVector< Number > &solution, Vector< float > &error, const ComponentMask &component_mask={}, const Function< spacedim > *coefficients=nullptr, const unsigned int n_threads=numbers::invalid_unsigned_int, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id, const types::material_id material_id=numbers::invalid_material_id, const Strategy strategy=cell_diameter_over_24)
TensorFunction(const time_type initial_time=time_type(0.0))
const unsigned int DoFAccessor< structdim, dim, spacedim, level_dof_access >::dimension
#define DEAL_II_NOT_IMPLEMENTED()
#define AssertDimension(dim1, dim2)
typename ActiveSelector::active_cell_iterator active_cell_iterator
void loop(IteratorType begin, std_cxx20::type_identity_t< IteratorType > end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(std_cxx20::type_identity_t< DOFINFO > &, std_cxx20::type_identity_t< DOFINFO > &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, AssemblerType &assembler, const LoopControl &lctrl=LoopControl())
const bool IsBlockVector< VectorType >::value
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternBase &sparsity_pattern, const AffineConstraints< number > &constraints={}, const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
@ update_values
Shape function values.
@ update_normal_vectors
Normal vectors.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
MappingQ< dim, spacedim > StaticMappingQ1< dim, spacedim >::mapping
@ component_is_part_of_vector
Expression sign(const Expression &x)
void subdivided_hyper_cube(Triangulation< dim, spacedim > &tria, const unsigned int repetitions, const double left=0., const double right=1., const bool colorize=false)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
void refine_and_coarsen_fixed_number(Triangulation< dim, spacedim > &triangulation, const Vector< Number > &criteria, const double top_fraction_of_cells, const double bottom_fraction_of_cells, const unsigned int max_n_cells=std::numeric_limits< unsigned int >::max())
@ matrix
Contents is actually a matrix.
@ general
No special properties.
constexpr types::blas_int zero
constexpr types::blas_int one
void cell_matrix(FullMatrix< double > &M, const FEValuesBase< dim > &fe, const FEValuesBase< dim > &fetest, const ArrayView< const std::vector< double > > &velocity, const double factor=1.)
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim > > > &Du)
void L2(Vector< number > &result, const FEValuesBase< dim > &fe, const std::vector< double > &input, const double factor=1.)
SymmetricTensor< 2, dim, Number > C(const Tensor< 2, dim, Number > &F)
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void apply(const Kokkos::TeamPolicy< MemorySpace::Default::kokkos_space::execution_space >::member_type &team_member, const Kokkos::View< Number *, MemorySpace::Default::kokkos_space > shape_data, const ViewTypeIn in, ViewTypeOut out)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
VectorType::value_type * end(VectorType &V)
T sum(const T &t, const MPI_Comm mpi_communicator)
std::string int_to_string(const unsigned int value, const unsigned int digits=numbers::invalid_unsigned_int)
void run(const Iterator &begin, const std_cxx20::type_identity_t< Iterator > &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
void run(const std::vector< std::vector< Iterator > > &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
void copy(const T *begin, const T *end, U *dest)
int(& functions)(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
void reinit(MatrixBlock< MatrixType > &v, const BlockSparsityPattern &p)
void set_dof_values(const DoFCellAccessor< dim, spacedim, lda > &cell, const Vector< number > &local_values, OutputVector &values, const bool perform_check)
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
static constexpr unsigned int faces_per_cell
static std_cxx20::ranges::iota_view< unsigned int, unsigned int > face_indices()
DEAL_II_HOST constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &)
DEAL_II_HOST constexpr Number trace(const SymmetricTensor< 2, dim2, Number > &)
std_cxx20::type_identity< T > identity