16#ifndef dealii_matrix_free_evaluation_kernels_h
17#define dealii_matrix_free_evaluation_kernels_h
37 template <MatrixFreeFunctions::ElementType element,
bool is_
long>
41 template <
bool is_
long>
59 template <
bool is_
long>
78 template <
bool is_
long>
130 const Number *values_dofs_actual,
136 Number *values_dofs_actual,
138 const bool add_into_values_array);
143 *univariate_shape_data)
166 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
174 evaluate(
const unsigned int n_components,
176 const Number *values_dofs_actual,
180 integrate(
const unsigned int n_components,
182 Number *values_dofs_actual,
184 const bool add_into_values_array);
196 const unsigned int n_components,
198 const Number *values_dofs_actual,
204 std::array<const MatrixFreeFunctions::UnivariateShapeData<Number2> *, 3>
205 univariate_shape_data;
209 univariate_shape_data.fill(&shape_data.front());
211 if (shape_data.size() == dim)
212 for (
int i = 1; i < dim; ++i)
213 univariate_shape_data[i] = &shape_data[i];
219 const unsigned int temp_size =
230 shape_data.front().fe_degree + 1),
232 shape_data.front().n_q_points_1d));
236 temp2 = temp1 + temp_size;
239 const std::size_t n_q_points = temp_size == 0 ?
242 const std::size_t dofs_per_comp =
246 const Number *values_dofs =
256 const_cast<Number *
>(values_dofs),
266 for (
unsigned int c = 0; c < n_components; ++c)
269 eval0.template values<0, true, false>(values_dofs, values_quad);
271 eval0.template gradients<0, true, false>(values_dofs,
275 values_dofs += dofs_per_comp;
276 values_quad += n_q_points;
277 gradients_quad += n_q_points;
282 for (
unsigned int c = 0; c < n_components; ++c)
287 eval0.template gradients<0, true, false>(values_dofs, temp1);
288 eval1.template values<1, true, false, 2>(temp1,
293 eval0.template values<0, true, false>(values_dofs, temp1);
295 eval1.template gradients<1, true, false, 2>(temp1,
300 eval1.template values<1, true, false>(temp1, values_quad);
303 values_dofs += dofs_per_comp;
304 values_quad += n_q_points;
305 gradients_quad += 2 * n_q_points;
310 for (
unsigned int c = 0; c < n_components; ++c)
315 eval0.template gradients<0, true, false>(values_dofs, temp1);
316 eval1.template values<1, true, false>(temp1, temp2);
317 eval2.template values<2, true, false, 3>(temp2,
322 eval0.template values<0, true, false>(values_dofs, temp1);
325 eval1.template gradients<1, true, false>(temp1, temp2);
326 eval2.template values<2, true, false, 3>(temp2,
332 eval1.template values<1, true, false>(temp1, temp2);
334 eval2.template gradients<2, true, false, 3>(temp2,
340 eval2.template values<2, true, false>(temp2, values_quad);
343 values_dofs += dofs_per_comp;
344 values_quad += n_q_points;
345 gradients_quad += 3 * n_q_points;
358 values_quad -= n_components * n_q_points;
359 values_dofs -= n_components * dofs_per_comp;
360 for (std::size_t c = 0; c < n_components; ++c)
361 for (std::size_t q = 0; q < n_q_points; ++q)
362 values_quad[c * n_q_points + q] +=
363 values_dofs[(c + 1) * dofs_per_comp - 1];
376 const unsigned int n_components,
378 Number *values_dofs_actual,
380 const bool add_into_values_array)
382 std::array<const MatrixFreeFunctions::UnivariateShapeData<Number2> *, 3>
383 univariate_shape_data;
386 univariate_shape_data.fill(&shape_data.front());
388 if (shape_data.size() == dim)
389 for (
int i = 1; i < dim; ++i)
390 univariate_shape_data[i] = &shape_data[i];
396 const unsigned int temp_size =
407 shape_data.front().fe_degree + 1),
409 shape_data.front().n_q_points_1d));
413 temp2 = temp1 + temp_size;
416 const std::size_t n_q_points = temp_size == 0 ?
419 const unsigned int dofs_per_comp =
425 Number *values_dofs =
438 for (
unsigned int c = 0; c < n_components; ++c)
442 if (add_into_values_array ==
false)
443 eval0.template values<0, false, false>(values_quad,
446 eval0.template values<0, false, true>(values_quad,
452 add_into_values_array ==
true)
453 eval0.template gradients<0, false, true>(gradients_quad,
456 eval0.template gradients<0, false, false>(gradients_quad,
461 values_dofs += dofs_per_comp;
462 values_quad += n_q_points;
463 gradients_quad += n_q_points;
468 for (
unsigned int c = 0; c < n_components; ++c)
473 eval1.template values<1, false, false>(values_quad, temp1);
474 if (add_into_values_array ==
false)
475 eval0.template values<0, false, false>(temp1, values_dofs);
477 eval0.template values<0, false, true>(temp1, values_dofs);
481 eval1.template gradients<1, false, false, 2>(gradients_quad +
485 eval1.template values<1, false, true>(values_quad, temp1);
486 if (add_into_values_array ==
false)
487 eval0.template values<0, false, false>(temp1, values_dofs);
489 eval0.template values<0, false, true>(temp1, values_dofs);
490 eval1.template values<1, false, false, 2>(gradients_quad,
492 eval0.template gradients<0, false, true>(temp1, values_dofs);
496 values_dofs += dofs_per_comp;
497 values_quad += n_q_points;
498 gradients_quad += 2 * n_q_points;
503 for (
unsigned int c = 0; c < n_components; ++c)
508 eval2.template values<2, false, false>(values_quad, temp1);
509 eval1.template values<1, false, false>(temp1, temp2);
510 if (add_into_values_array ==
false)
511 eval0.template values<0, false, false>(temp2, values_dofs);
513 eval0.template values<0, false, true>(temp2, values_dofs);
517 eval2.template gradients<2, false, false, 3>(gradients_quad +
521 eval2.template values<2, false, true>(values_quad, temp1);
522 eval1.template values<1, false, false>(temp1, temp2);
523 eval2.template values<2, false, false, 3>(gradients_quad + 1,
525 eval1.template gradients<1, false, true>(temp1, temp2);
526 if (add_into_values_array ==
false)
527 eval0.template values<0, false, false>(temp2, values_dofs);
529 eval0.template values<0, false, true>(temp2, values_dofs);
530 eval2.template values<2, false, false, 3>(gradients_quad,
532 eval1.template values<1, false, false>(temp1, temp2);
533 eval0.template gradients<0, false, true>(temp2, values_dofs);
537 values_dofs += dofs_per_comp;
538 values_quad += n_q_points;
539 gradients_quad += 3 * n_q_points;
550 values_dofs -= n_components * dofs_per_comp - dofs_per_comp + 1;
551 values_quad -= n_components * n_q_points;
553 for (
unsigned int c = 0; c < n_components; ++c)
555 values_dofs[0] = values_quad[0];
556 for (
unsigned int q = 1; q < n_q_points; ++q)
557 values_dofs[0] += values_quad[q];
558 values_dofs += dofs_per_comp;
559 values_quad += n_q_points;
563 for (
unsigned int c = 0; c < n_components; ++c)
564 values_dofs[c * dofs_per_comp] = Number();
565 values_dofs += n_components * dofs_per_comp;
573 values_dofs - dofs_per_comp * n_components,
579 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
588 const Number *values_dofs_actual,
593 const std::size_t n_dofs =
595 const std::size_t n_q_points = fe_eval.
get_shape_info().n_q_points;
604 const auto *
const shape_values = shape_data.front().shape_values.data();
606 const auto *in = values_dofs_actual;
608 for (
unsigned int c = 0; c < n_components; c += 3)
610 if (c + 1 == n_components)
619 shape_values, in, out, n_dofs, n_q_points, 1, 1);
620 else if (c + 2 == n_components)
629 shape_values, in, out, n_dofs, n_q_points, 1, 1);
639 shape_values, in, out, n_dofs, n_q_points, 1, 1);
641 out += 3 * n_q_points;
648 const auto *
const shape_gradients =
649 shape_data.front().shape_gradients.data();
651 const auto *in = values_dofs_actual;
653 for (
unsigned int c = 0; c < n_components; c += 3)
655 if (c + 1 == n_components)
664 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
665 else if (c + 2 == n_components)
674 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
684 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
686 out += 3 * n_q_points * dim;
694 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
703 Number *values_dofs_actual,
705 const bool add_into_values_array)
710 const std::size_t n_dofs =
712 const std::size_t n_q_points = fe_eval.
get_shape_info().n_q_points;
721 const auto *
const shape_values = shape_data.front().shape_values.data();
723 auto *out = values_dofs_actual;
725 for (
unsigned int c = 0; c < n_components; c += 3)
727 if (add_into_values_array ==
false)
729 if (c + 1 == n_components)
738 shape_values, in, out, n_dofs, n_q_points, 1, 1);
739 else if (c + 2 == n_components)
748 shape_values, in, out, n_dofs, n_q_points, 1, 1);
758 shape_values, in, out, n_dofs, n_q_points, 1, 1);
762 if (c + 1 == n_components)
771 shape_values, in, out, n_dofs, n_q_points, 1, 1);
772 else if (c + 2 == n_components)
781 shape_values, in, out, n_dofs, n_q_points, 1, 1);
791 shape_values, in, out, n_dofs, n_q_points, 1, 1);
794 in += 3 * n_q_points;
800 const auto *
const shape_gradients =
801 shape_data.front().shape_gradients.data();
803 auto *out = values_dofs_actual;
805 for (
unsigned int c = 0; c < n_components; c += 3)
807 if (add_into_values_array ==
false &&
810 if (c + 1 == n_components)
819 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
820 else if (c + 2 == n_components)
829 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
839 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
843 if (c + 1 == n_components)
852 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
853 else if (c + 2 == n_components)
862 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
872 shape_gradients, in, out, n_dofs, n_q_points * dim, 1, 1);
875 in += 3 * n_q_points * dim;
898 static_assert(basis_size_1 == 0 || basis_size_1 <= basis_size_2,
899 "The second dimension must not be smaller than the first");
923 template <
typename Number,
typename Number2>
930 const Number *values_in,
932 const unsigned int basis_size_1_variable =
934 const unsigned int basis_size_2_variable =
938 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
939 ExcMessage(
"The second dimension must not be smaller than the first"));
947 constexpr int next_dim = (dim == 1 || (dim == 2 && basis_size_1 > 0 &&
948 basis_size_1 == basis_size_2)) ?
955 (basis_size_1 == 0 ? 0 : basis_size_2),
958 eval_val(transformation_matrix,
961 basis_size_1_variable,
962 basis_size_2_variable);
963 const unsigned int np_1 =
964 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
965 const unsigned int np_2 =
966 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
968 ExcMessage(
"Cannot transform with 0-point basis"));
970 ExcMessage(
"Cannot transform with 0-point basis"));
977 for (
unsigned int c = n_components; c != 0; --c)
982 for (
unsigned int q = np_1; q != 0; --q)
989 transformation_matrix,
994 basis_size_1_variable,
995 basis_size_2_variable);
1000 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
1002 eval_val.template values<0, true, false>(values_in, values_out);
1003 eval_val.template values<1, true, false>(values_out, values_out);
1006 eval_val.template values<dim - 1,
true,
false>(values_in,
1009 eval_val.template values<dim - 1,
true,
false>(values_out,
1044 template <
typename Number,
typename Number2>
1051 const bool add_into_result,
1054 const unsigned int basis_size_1_variable =
1056 const unsigned int basis_size_2_variable =
1060 basis_size_1 != 0 || basis_size_1_variable <= basis_size_2_variable,
1061 ExcMessage(
"The second dimension must not be smaller than the first"));
1062 Assert(add_into_result ==
false || values_in != values_out,
1064 "Input and output cannot alias with each other when "
1065 "adding the result of the basis change to existing data"));
1071 constexpr int next_dim =
1073 ((basis_size_1 == 0 || basis_size_2 > basis_size_1) && dim > 1)) ?
1079 (basis_size_1 == 0 ? 0 : basis_size_2),
1082 eval_val(transformation_matrix,
1083 transformation_matrix,
1084 transformation_matrix,
1085 basis_size_1_variable,
1086 basis_size_2_variable);
1087 const unsigned int np_1 =
1088 basis_size_1 > 0 ? basis_size_1 : basis_size_1_variable;
1089 const unsigned int np_2 =
1090 basis_size_1 > 0 ? basis_size_2 : basis_size_2_variable;
1092 ExcMessage(
"Cannot transform with 0-point basis"));
1094 ExcMessage(
"Cannot transform with 0-point basis"));
1096 for (
unsigned int c = 0; c < n_components; ++c)
1098 if (basis_size_1 > 0 && basis_size_2 == basis_size_1 && dim == 2)
1101 eval_val.template values<1, false, false>(values_in, values_in);
1103 eval_val.template hessians<1, false, false>(values_in,
1106 if (add_into_result)
1109 eval_val.template values<0, false, true>(values_in,
1112 eval_val.template hessians<0, false, true>(values_in,
1118 eval_val.template values<0, false, false>(values_in,
1121 eval_val.template hessians<0, false, false>(values_in,
1127 if (dim == 1 && add_into_result)
1130 eval_val.template values<0, false, true>(values_in,
1133 eval_val.template hessians<0, false, true>(values_in,
1139 eval_val.template values<0, false, false>(values_in,
1142 eval_val.template hessians<0, false, false>(values_in,
1148 eval_val.template values<dim - 1,
false,
false>(values_in,
1151 eval_val.template hessians<dim - 1,
false,
false>(
1152 values_in, values_in);
1156 for (
unsigned int q = 0; q < np_1; ++q)
1163 transformation_matrix,
1169 basis_size_1_variable,
1170 basis_size_2_variable);
1197 template <
typename Number,
typename Number2>
1202 const Number *values_in,
1203 Number *scratch_data,
1206 constexpr int next_dim = dim > 1 ? dim - 1 : dim;
1207 Number *my_scratch =
1208 basis_size_1 != basis_size_2 ? scratch_data : values_out;
1210 const unsigned int size_per_component =
Utilities::pow(basis_size_2, dim);
1211 Assert(coefficients.
size() == size_per_component ||
1212 coefficients.
size() == n_components * size_per_component,
1214 const unsigned int stride =
1215 coefficients.
size() == size_per_component ? 0 : 1;
1217 for (
unsigned int q = basis_size_1; q != 0; --q)
1224 transformation_matrix,
1237 eval_val(transformation_matrix);
1238 const unsigned int n_inner_blocks =
1239 (dim > 1 && basis_size_2 < 10) ? basis_size_2 : 1;
1240 const unsigned int n_blocks =
Utilities::pow(basis_size_2, dim - 1);
1241 for (
unsigned int ii = 0; ii < n_blocks; ii += n_inner_blocks)
1242 for (
unsigned int c = 0; c < n_components; ++c)
1244 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1245 eval_val.template values_one_line<dim - 1, true, false>(
1246 my_scratch + i, my_scratch + i);
1247 for (
unsigned int q = 0; q < basis_size_2; ++q)
1248 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1249 my_scratch[i + q * n_blocks + c * size_per_component] *=
1250 coefficients[i + q * n_blocks +
1251 c * stride * size_per_component];
1252 for (
unsigned int i = ii; i < ii + n_inner_blocks; ++i)
1253 eval_val.template values_one_line<dim - 1, false, false>(
1254 my_scratch + i, my_scratch + i);
1256 for (
unsigned int q = 0; q < basis_size_1; ++q)
1263 transformation_matrix,
1279 template <
int n_po
ints_1d,
int dim,
typename Number,
typename Number2>
1283 const Number *values,
1287 (n_points_1d + 1) / 2 * n_points_1d);
1305 eval.template gradients<0, true, false>(values, gradients);
1309 eval.template gradients<2, true, false, dim>(values, gradients + 2);
1310 constexpr unsigned int loop_bound = (dim > 2 ? n_points_1d : 1);
1311 constexpr unsigned int n_points_2d = n_points_1d * n_points_1d;
1312 const Number *in = values + (loop_bound - 1) * n_points_2d;
1313 Number *out = gradients + (loop_bound - 1) * dim * n_points_2d;
1314 for (
unsigned int l = 0; l < loop_bound; ++l)
1316 eval_2d.template gradients<0, true, false, dim>(in, out);
1317 eval_2d.template gradients<1, true, false, dim>(in, out + 1);
1319 out -= dim * n_points_2d;
1333 template <
int n_po
ints_1d,
int dim,
typename Number,
typename Number2>
1338 const Number *gradients,
1339 const bool add_into_values_array)
1342 (n_points_1d + 1) / 2 * n_points_1d);
1361 if (add_into_values_array)
1362 eval.template gradients<0, false, true>(gradients, values);
1364 eval.template gradients<0, false, false>(gradients, values);
1368 constexpr unsigned int loop_bound = (dim > 2 ? n_points_1d : 1);
1369 constexpr unsigned int n_points_2d = n_points_1d * n_points_1d;
1371 const Number *in = gradients + (loop_bound - 1) * dim * n_points_2d;
1372 Number *out = values + (loop_bound - 1) * n_points_2d;
1373 for (
unsigned int l = 0; l < loop_bound; ++l)
1375 if (add_into_values_array)
1376 eval_2d.template gradients<0, false, true, dim>(in, out);
1378 eval_2d.template gradients<0, false, false, dim>(in, out);
1379 eval_2d.template gradients<1, false, true, dim>(in + 1, out);
1380 in -= dim * n_points_2d;
1385 eval.template gradients<2, false, true, dim>(gradients + 2, values);
1396 template <
int n_po
ints_1d,
int dim,
typename Number>
1426 const std::size_t n_points = fe_eval.
get_shape_info().n_q_points;
1427 for (
unsigned int comp = 0; comp < n_components; ++comp)
1430 eval.template hessians<0, true, false>(values, hessians);
1436 eval.template gradients<0, true, false>(values, scratch);
1437 eval.template gradients<1, true, false>(scratch,
1438 hessians + dim * n_points);
1440 eval.template hessians<1, true, false>(values, hessians + n_points);
1445 eval.template gradients<2, true, false>(scratch,
1446 hessians + 4 * n_points);
1448 eval.template gradients<1, true, false>(values, scratch);
1449 eval.template gradients<2, true, false>(scratch,
1450 hessians + 5 * n_points);
1452 eval.template hessians<2, true, false>(values,
1453 hessians + 2 * n_points);
1457 hessians += (dim * (dim + 1)) / 2 * n_points;
1469 template <
int n_q_po
ints_1d,
int dim,
typename Number>
1473 const bool add_into_values_array)
1496 const std::size_t n_points = fe_eval.
get_shape_info().n_q_points;
1498 for (
unsigned int comp = 0; comp < n_components; ++comp)
1501 if (add_into_values_array ==
true)
1502 eval.template hessians<0, false, true>(hessians, values);
1504 eval.template hessians<0, false, false>(hessians, values);
1508 eval.template hessians<1, false, true>(hessians + n_points, values);
1512 eval.template hessians<2, false, true>(hessians + 2 * n_points,
1515 eval.template gradients<2, false, false>(hessians + 5 * n_points,
1517 eval.template gradients<1, false, true>(scratch, values);
1520 eval.template gradients<2, false, false>(hessians + 4 * n_points,
1527 eval.template gradients<1,
false, (dim > 2)>(hessians +
1530 eval.template gradients<0, false, true>(scratch, values);
1534 hessians += (dim * (dim + 1)) / 2 * n_points;
1546 template <
int dim,
typename Number>
1549 const Number *values_dofs,
1552 const auto &univariate_shape_data = fe_eval.
get_shape_info().data;
1555 using Eval =
typename Impl::Eval;
1557 Impl::create_evaluator_tensor_product(&univariate_shape_data[0]);
1558 Eval eval1 = Impl::create_evaluator_tensor_product(
1560 univariate_shape_data.size() - 1)]);
1561 Eval eval2 = Impl::create_evaluator_tensor_product(
1563 univariate_shape_data.size() - 1)]);
1565 const unsigned int n_points = fe_eval.
get_shape_info().n_q_points;
1569 univariate_shape_data.front().fe_degree + 1),
1571 univariate_shape_data.front().n_q_points_1d));
1574 for (
unsigned int comp = 0; comp < n_components;
1576 hessians += n_points * dim * (dim + 1) / 2,
1582 eval0.template hessians<0, true, false>(values_dofs, hessians);
1586 eval0.template hessians<0, true, false>(values_dofs, tmp1);
1587 eval1.template values<1, true, false>(tmp1, hessians);
1589 eval0.template gradients<0, true, false>(values_dofs, tmp1);
1590 eval1.template gradients<1, true, false>(tmp1,
1591 hessians + 2 * n_points);
1593 eval0.template values<0, true, false>(values_dofs, tmp1);
1594 eval1.template hessians<1, true, false>(tmp1, hessians + n_points);
1598 eval0.template hessians<0, true, false>(values_dofs, tmp1);
1599 eval1.template values<1, true, false>(tmp1, tmp2);
1600 eval2.template values<2, true, false>(tmp2, hessians);
1602 eval0.template gradients<0, true, false>(values_dofs, tmp1);
1603 eval1.template gradients<1, true, false>(tmp1, tmp2);
1604 eval2.template values<2, true, false>(tmp2,
1605 hessians + 3 * n_points);
1607 eval1.template values<1, true, false>(tmp1, tmp2);
1608 eval2.template gradients<2, true, false>(tmp2,
1609 hessians + 4 * n_points);
1611 eval0.template values<0, true, false>(values_dofs, tmp1);
1612 eval1.template hessians<1, true, false>(tmp1, tmp2);
1613 eval2.template values<2, true, false>(tmp2, hessians + n_points);
1615 eval1.template gradients<1, true, false>(tmp1, tmp2);
1616 eval2.template gradients<2, true, false>(tmp2,
1617 hessians + 5 * n_points);
1619 eval1.template values<1, true, false>(tmp1, tmp2);
1620 eval2.template hessians<2, true, false>(tmp2,
1621 hessians + 2 * n_points);
1627 "Only 1d, 2d and 3d implemented for Hessian"));
1640 template <
int dim,
typename Number>
1644 Number *values_dofs,
1645 const bool add_into_values_array)
1647 const auto &univariate_shape_data = fe_eval.
get_shape_info().data;
1650 using Eval =
typename Impl::Eval;
1652 Impl::create_evaluator_tensor_product(&univariate_shape_data[0]);
1653 Eval eval1 = Impl::create_evaluator_tensor_product(
1655 univariate_shape_data.size() - 1)]);
1656 Eval eval2 = Impl::create_evaluator_tensor_product(
1658 univariate_shape_data.size() - 1)]);
1660 const unsigned int n_points = fe_eval.
get_shape_info().n_q_points;
1664 univariate_shape_data.front().fe_degree + 1),
1666 univariate_shape_data.front().n_q_points_1d));
1669 for (
unsigned int comp = 0; comp < n_components;
1671 hessians += n_points * dim * (dim + 1) / 2,
1677 if (add_into_values_array)
1678 eval0.template hessians<0, false, true>(hessians, values_dofs);
1680 eval0.template hessians<0, false, false>(hessians, values_dofs);
1684 eval1.template values<1, false, false>(hessians, tmp1);
1685 if (add_into_values_array)
1686 eval0.template hessians<0, false, true>(tmp1, values_dofs);
1688 eval0.template hessians<0, false, false>(tmp1, values_dofs);
1691 eval1.template gradients<1, false, false>(hessians + 2 * n_points,
1693 eval0.template gradients<0, false, true>(tmp1, values_dofs);
1695 eval1.template hessians<1, false, false>(hessians + n_points, tmp1);
1696 eval0.template values<0, false, true>(tmp1, values_dofs);
1700 eval2.template values<2, false, false>(hessians, tmp1);
1701 eval1.template values<1, false, false>(tmp1, tmp2);
1703 if (add_into_values_array)
1704 eval0.template hessians<0, false, true>(tmp2, values_dofs);
1706 eval0.template hessians<0, false, false>(tmp2, values_dofs);
1709 eval2.template values<2, false, false>(hessians + 3 * n_points,
1711 eval1.template gradients<1, false, false>(tmp1, tmp2);
1713 eval2.template gradients<2, false, false>(hessians + 4 * n_points,
1715 eval1.template values<1, false, true>(tmp1, tmp2);
1716 eval1.template values<0, false, true>(tmp2, values_dofs);
1719 eval2.template values<2, false, false>(hessians + n_points, tmp1);
1720 eval1.template hessians<1, false, false>(tmp1, tmp2);
1723 eval2.template gradients<2, false, false>(hessians + 5 * n_points,
1725 eval1.template gradients<1, false, true>(tmp1, tmp2);
1728 eval2.template hessians<2, false, false>(hessians + 2 * n_points,
1730 eval1.template values<1, false, true>(tmp1, tmp2);
1731 eval0.template values<0, false, true>(tmp2, values_dofs);
1737 "Only 1d, 2d and 3d implemented for Hessian"));
1755 template <
int dim,
int fe_degree,
typename Number>
1770 const Number *values_dofs,
1773 constexpr std::size_t n_points =
Utilities::pow(fe_degree + 1, dim);
1775 for (
unsigned int c = 0; c < n_components; ++c)
1778 for (
unsigned int i = 0; i < n_points; ++i)
1780 values_dofs[n_points * c + i];
1785 values_dofs + c * n_points,
1793 Number *values_dofs,
1795 const bool add_into_values_array)
1797 constexpr std::size_t n_points =
Utilities::pow(fe_degree + 1, dim);
1799 for (
unsigned int c = 0; c < n_components; ++c)
1803 if (add_into_values_array)
1804 for (
unsigned int i = 0; i < n_points; ++i)
1805 values_dofs[n_points * c + i] +=
1808 for (
unsigned int i = 0; i < n_points; ++i)
1809 values_dofs[n_points * c + i] =
1816 values_dofs + c * n_points,
1818 add_into_values_array ||
1836 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1842 const Number *values_dofs,
1847 Assert(n_q_points_1d > fe_degree,
1848 ExcMessage(
"You lose information when going to a collocation "
1849 "space of lower degree, so the evaluation results "
1850 "would be wrong. Thus, this class does not permit "
1851 "the chosen operation."));
1852 constexpr std::size_t n_dofs =
Utilities::pow(fe_degree + 1, dim);
1853 constexpr std::size_t n_q_points =
Utilities::pow(n_q_points_1d, dim);
1855 for (
unsigned int c = 0; c < n_components; ++c)
1861 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1862 n_q_points_1d>::do_forward(1,
1863 shape_data.shape_values_eo,
1864 values_dofs + c * n_dofs,
1879 Number *values_dofs,
1881 const bool add_into_values_array)
1885 Assert(n_q_points_1d > fe_degree,
1886 ExcMessage(
"You lose information when going to a collocation "
1887 "space of lower degree, so the evaluation results "
1888 "would be wrong. Thus, this class does not permit "
1889 "the chosen operation."));
1890 constexpr std::size_t n_q_points =
Utilities::pow(n_q_points_1d, dim);
1892 for (
unsigned int c = 0; c < n_components; ++c)
1908 (fe_degree >= n_q_points_1d ? n_q_points_1d : fe_degree + 1),
1909 n_q_points_1d>::do_backward(1,
1910 shape_data.shape_values_eo,
1911 add_into_values_array,
1926 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1936 template <
bool integrate>
1940 Number *values_dofs_actual,
1942 const bool add_into_values_array =
false);
1947 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
1948 template <
bool integrate>
1955 evaluate_or_integrate(
1957 Number *values_dofs,
1961 Assert(dim == 2 || dim == 3,
1962 ExcMessage(
"Only dim = 2,3 implemented for Raviart-Thomas "
1963 "evaluation/integration"));
1977 const unsigned int dofs_per_component =
1979 const unsigned int n_points =
Utilities::pow(n_q_points_1d, dim);
1994 if constexpr (dim > 2)
1995 eval.template tangential<2, 0>(shape_data[1], values, values);
1996 eval.template tangential<1, 0>(shape_data[1], values, values);
1997 eval.template normal<0>(shape_data[0], values, values_dofs, add);
2000 gradients += n_points * dim;
2001 values_dofs += dofs_per_component;
2008 if constexpr (dim > 2)
2009 eval.template tangential<2, 1>(shape_data[1], values, values);
2010 eval.template tangential<0, 1>(shape_data[1], values, values);
2011 eval.template normal<1>(shape_data[0], values, values_dofs, add);
2013 if constexpr (dim > 2)
2016 gradients += n_points * dim;
2017 values_dofs += dofs_per_component;
2024 eval.template tangential<1, 2>(shape_data[1], values, values);
2025 eval.template tangential<0, 2>(shape_data[1], values, values);
2026 eval.template normal<2>(shape_data[0], values, values_dofs, add);
2033 eval.template normal<0>(shape_data[0], values_dofs, values);
2034 eval.template tangential<1, 0>(shape_data[1], values, values);
2035 if constexpr (dim > 2)
2036 eval.template tangential<2, 0>(shape_data[1], values, values);
2043 gradients += n_points * dim;
2044 values_dofs += dofs_per_component;
2046 eval.template normal<1>(shape_data[0], values_dofs, values);
2047 eval.template tangential<0, 1>(shape_data[1], values, values);
2048 if constexpr (dim > 2)
2049 eval.template tangential<2, 1>(shape_data[1], values, values);
2055 if constexpr (dim > 2)
2058 gradients += n_points * dim;
2059 values_dofs += dofs_per_component;
2061 eval.template normal<2>(shape_data[0], values_dofs, values);
2062 eval.template tangential<0, 2>(shape_data[1], values, values);
2063 eval.template tangential<1, 2>(shape_data[1], values, values);
2089 template <
int dim,
typename Number,
bool do_
integrate>
2092 template <
int fe_degree,
int n_q_po
ints_1d,
typename OtherNumber>
2094 run(
const unsigned int n_components,
2096 OtherNumber *values_dofs,
2098 const bool sum_into_values_array_in =
false)
2102 static_assert(std::is_same_v<Number, std::remove_const_t<OtherNumber>>,
2103 "Type of Number and of OtherNumber do not match.");
2110 element_type == ElementType::tensor_general) ||
2111 element_type == ElementType::tensor_raviart_thomas,
2115 bool sum_into_values_array = sum_into_values_array_in;
2121 if constexpr (do_integrate)
2134 sum_into_values_array);
2135 sum_into_values_array =
true;
2140 if (fe_degree >= 0 && fe_degree + 1 == n_q_points_1d &&
2141 element_type == ElementType::tensor_symmetric_collocation)
2149 sum_into_values_array);
2153 else if (fe_degree >= 0 &&
2155 element_type <= ElementType::tensor_symmetric)
2166 sum_into_values_array);
2168 else if (fe_degree >= 0 &&
2169 element_type <= ElementType::tensor_symmetric_no_collocation)
2180 sum_into_values_array);
2182 else if (element_type == ElementType::tensor_none)
2190 sum_into_values_array);
2192 else if (element_type == ElementType::tensor_symmetric_plus_dg0)
2199 Number>>(n_components,
2203 sum_into_values_array);
2205 else if (element_type == ElementType::truncated_tensor)
2216 sum_into_values_array);
2218 else if (element_type == ElementType::tensor_raviart_thomas)
2220 if constexpr (fe_degree > 0 && n_q_points_1d > 0 && dim > 1)
2229 const_cast<Number *
>(values_dofs),
2231 sum_into_values_array);
2237 "in 2d/3d and with templated degree"));
2251 sum_into_values_array);
2269 template <
typename T>
2272 const unsigned int n_components,
2274 const Number *values_dofs,
2276 const bool sum_into_values_array,
2277 std::bool_constant<false>)
2279 (void)sum_into_values_array;
2281 T::evaluate(n_components, evaluation_flag, values_dofs, fe_eval);
2284 template <
typename T>
2287 const unsigned int n_components,
2289 Number *values_dofs,
2291 const bool sum_into_values_array,
2292 std::bool_constant<true>)
2294 T::integrate(n_components,
2298 sum_into_values_array);
2301 template <
typename T,
typename OtherNumber>
2304 const unsigned int n_components,
2306 OtherNumber *values_dofs,
2308 const bool sum_into_values_array)
2314 sum_into_values_array,
2315 std::bool_constant<do_integrate>());
2325 template <
int dim,
typename Number>
2331 template <
int fe_degree,
int = 0>
2333 run(
const unsigned int n_components,
2335 const Number *in_array,
2338 const unsigned int given_degree =
2339 (fe_degree > -1) ? fe_degree :
2342 const unsigned int dofs_per_component =
2362 for (
unsigned int d = 0; d < n_components; ++d)
2364 const Number *in = in_array + d * dofs_per_component;
2365 Number *out = out_array + d * dofs_per_component;
2368 evaluator.template hessians<0, true, false>(in, out);
2370 evaluator.template hessians<1, true, false>(out, out);
2372 evaluator.template hessians<2, true, false>(out, out);
2374 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2376 const Number inverse_JxW_q = Number(1.) / fe_eval.
JxW(q);
2377 for (
unsigned int d = 0; d < n_components; ++d)
2378 out_array[q + d * dofs_per_component] *= inverse_JxW_q;
2380 for (
unsigned int d = 0; d < n_components; ++d)
2382 Number *out = out_array + d * dofs_per_component;
2384 evaluator.template hessians<2, false, false>(out, out);
2386 evaluator.template hessians<1, false, false>(out, out);
2387 evaluator.template hessians<0, false, false>(out, out);
2401 template <
int dim,
typename Number>
2407 template <
int fe_degree,
int = 0>
2409 run(
const unsigned int n_desired_components,
2412 const bool dyadic_coefficients,
2413 const Number *in_array,
2416 const unsigned int given_degree =
2417 (fe_degree > -1) ? fe_degree :
2420 const unsigned int dofs_per_component =
2424 inverse_coefficients.
size() % dofs_per_component == 0,
2426 "Expected diagonal to be a multiple of scalar dof per cells"));
2428 if (!dyadic_coefficients)
2430 if (inverse_coefficients.
size() != dofs_per_component)
2432 inverse_coefficients.
size());
2438 inverse_coefficients.
size());
2458 const Number *in = in_array;
2459 Number *out = out_array;
2461 const Number *inv_coefficient = inverse_coefficients.
data();
2463 const unsigned int shift_coefficient =
2464 inverse_coefficients.
size() > dofs_per_component ? dofs_per_component :
2467 const auto n_comp_outer = dyadic_coefficients ? 1 : n_desired_components;
2468 const auto n_comp_inner = dyadic_coefficients ? n_desired_components : 1;
2470 for (
unsigned int d = 0; d < n_comp_outer; ++d)
2472 for (
unsigned int di = 0; di < n_comp_inner; ++di)
2474 const Number *in_ = in + di * dofs_per_component;
2475 Number *out_ = out + di * dofs_per_component;
2476 evaluator.template hessians<0, true, false>(in_, out_);
2478 evaluator.template hessians<1, true, false>(out_, out_);
2480 evaluator.template hessians<2, true, false>(out_, out_);
2482 if (dyadic_coefficients)
2484 const auto n_coeff_components =
2485 n_desired_components * n_desired_components;
2486 if (n_desired_components == dim)
2488 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2489 vmult<dim>(&inv_coefficient[q * n_coeff_components],
2492 dofs_per_component);
2496 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2497 vmult<-1>(&inv_coefficient[q * n_coeff_components],
2501 n_desired_components);
2505 for (
unsigned int q = 0; q < dofs_per_component; ++q)
2506 out[q] *= inv_coefficient[q];
2508 for (
unsigned int di = 0; di < n_comp_inner; ++di)
2510 Number *out_ = out + di * dofs_per_component;
2512 evaluator.template hessians<2, false, false>(out_, out_);
2514 evaluator.template hessians<1, false, false>(out_, out_);
2515 evaluator.template hessians<0, false, false>(out_, out_);
2518 in += dofs_per_component;
2519 out += dofs_per_component;
2520 inv_coefficient += shift_coefficient;
2527 template <
int n_components>
2529 vmult(
const Number *inverse_coefficients,
2532 const unsigned int dofs_per_component,
2533 const unsigned int n_given_components = 0)
2535 const unsigned int n_desired_components =
2536 (n_components > -1) ? n_components : n_given_components;
2538 std::array<Number, dim + 2> tmp = {};
2539 Assert(n_desired_components <= dim + 2,
2541 "Number of components larger than dim+2 not supported."));
2543 for (
unsigned int d = 0; d < n_desired_components; ++d)
2544 tmp[d] = src[d * dofs_per_component];
2546 for (
unsigned int d1 = 0; d1 < n_desired_components; ++d1)
2548 const Number *inv_coeff_row =
2549 &inverse_coefficients[d1 * n_desired_components];
2550 Number sum = inv_coeff_row[0] * tmp[0];
2551 for (
unsigned int d2 = 1; d2 < n_desired_components; ++d2)
2552 sum += inv_coeff_row[d2] * tmp[d2];
2553 dst[d1 * dofs_per_component] = sum;
2566 template <
int dim,
typename Number>
2569 template <
int fe_degree,
int n_q_po
ints_1d>
2571 run(
const unsigned int n_desired_components,
2573 const Number *in_array,
2576 static const bool do_inplace =
2577 fe_degree > -1 && (fe_degree + 1 == n_q_points_1d);
2583 const auto &inverse_shape =
2588 const std::size_t dofs_per_component =
2591 const std::size_t n_q_points = do_inplace ?
2609 for (
unsigned int d = 0; d < n_desired_components; ++d)
2611 const Number *in = in_array + d * n_q_points;
2612 Number *out = out_array + d * dofs_per_component;
2615 auto *temp_2 = do_inplace ?
2617 (temp_1 +
std::max(n_q_points, dofs_per_component));
2621 evaluator.template hessians<2, false, false>(in, temp_1);
2622 evaluator.template hessians<1, false, false>(temp_1, temp_2);
2623 evaluator.template hessians<0, false, false>(temp_2, out);
2627 evaluator.template hessians<1, false, false>(in, temp_1);
2628 evaluator.template hessians<0, false, false>(temp_1, out);
2631 evaluator.template hessians<0, false, false>(in, out);
value_type * data() const noexcept
ScalarNumber shape_info_number_type
const ShapeInfoType & get_shape_info() const
Number JxW(const unsigned int q_point) const
const Number * begin_gradients() const
ArrayView< Number > get_scratch_data() const
const Number * begin_values() const
const Number * begin_hessians() const
#define DEAL_II_ALWAYS_INLINE
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
static ::ExceptionBase & ExcNotImplemented()
#define Assert(cond, exc)
#define AssertDimension(dim1, dim2)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
@ tensor_symmetric_no_collocation
@ tensor_symmetric_plus_dg0
EvaluationFlags
The EvaluationFlags enum.
constexpr T fixed_power(const T t)
constexpr T pow(const T base, const int iexp)
void evaluate_hessians_collocation(const unsigned int n_components, FEEvaluationData< dim, Number, false > &fe_eval)
void embed_truncated_into_full_tensor_product(const unsigned int n_components, Number *values_dofs, const Number *values_dofs_actual, FEEvaluationData< dim, Number, is_face > &fe_eval)
constexpr bool use_collocation_evaluation(const unsigned int fe_degree, const unsigned int n_q_points_1d)
void integrate_gradients_collocation(const MatrixFreeFunctions::UnivariateShapeData< Number2 > &shape, Number *values, const Number *gradients, const bool add_into_values_array)
void evaluate_hessians_slow(const unsigned int n_components, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval)
std::enable_if_t<(variant==evaluate_general), void > apply_matrix_vector_product(const Number2 *matrix, const Number *in, Number *out)
void integrate_hessians_collocation(const unsigned int n_components, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
void truncate_tensor_product_to_complete_degrees(const unsigned int n_components, Number *values_dofs_actual, const Number *values_dofs, FEEvaluationData< dim, Number, is_face > &fe_eval)
void evaluate_gradients_collocation(const MatrixFreeFunctions::UnivariateShapeData< Number2 > &shape, const Number *values, Number *gradients)
void integrate_hessians_slow(const unsigned int n_components, const FEEvaluationData< dim, Number, false > &fe_eval, Number *values_dofs, const bool add_into_values_array)
constexpr unsigned int invalid_unsigned_int
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static bool run(const unsigned int n_components, const FEEvaluationData< dim, Number, false > &fe_eval, const Number *in_array, Number *out_array)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static bool run(const unsigned int n_desired_components, const FEEvaluationData< dim, Number, false > &fe_eval, const ArrayView< const Number > &inverse_coefficients, const bool dyadic_coefficients, const Number *in_array, Number *out_array)
static void vmult(const Number *inverse_coefficients, const Number *src, Number *dst, const unsigned int dofs_per_component, const unsigned int n_given_components=0)
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static const EvaluatorVariant variant
static constexpr unsigned int n_rows_of_product
static constexpr unsigned int n_columns_of_product
static void do_backward(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const bool add_into_result, Number *values_in, Number *values_out, const unsigned int basis_size_1_variable=numbers::invalid_unsigned_int, const unsigned int basis_size_2_variable=numbers::invalid_unsigned_int)
static void do_forward(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const Number *values_in, Number *values_out, const unsigned int basis_size_1_variable=numbers::invalid_unsigned_int, const unsigned int basis_size_2_variable=numbers::invalid_unsigned_int)
static void do_mass(const unsigned int n_components, const AlignedVector< Number2 > &transformation_matrix, const AlignedVector< Number > &coefficients, const Number *values_in, Number *scratch_data, Number *values_out)
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval)
EvaluatorTensorProduct< evaluate_evenodd, dim, fe_degree+1, fe_degree+1, Number, Number2 > Eval
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, OtherNumber *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array)
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array, std::bool_constant< true >)
static bool run(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, OtherNumber *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array_in=false)
static void evaluate_or_integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs, FEEvaluationData< dim, Number, false > &fe_eval, const bool sum_into_values_array, std::bool_constant< false >)
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval)
static void evaluate_or_integrate(const EvaluationFlags::EvaluationFlags evaluation_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array=false)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static const EvaluatorVariant variant
static void integrate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags integration_flag, Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval, const bool add_into_values_array)
static Eval create_evaluator_tensor_product(const MatrixFreeFunctions::UnivariateShapeData< Number2 > *univariate_shape_data)
typename FEEvaluationData< dim, Number, false >::shape_info_number_type Number2
static void evaluate(const unsigned int n_components, const EvaluationFlags::EvaluationFlags evaluation_flag, const Number *values_dofs_actual, FEEvaluationData< dim, Number, false > &fe_eval)
EvaluatorTensorProduct< variant, dim, fe_degree+1, n_q_points_1d, Number, Number2 > Eval
AlignedVector< Number > shape_values
AlignedVector< Number > shape_hessians_collocation
AlignedVector< Number > shape_values_eo
AlignedVector< Number > shape_hessians_eo
AlignedVector< Number > shape_gradients_collocation_eo
unsigned int n_q_points_1d
AlignedVector< Number > shape_gradients_eo
AlignedVector< Number > shape_hessians
AlignedVector< Number > shape_gradients
AlignedVector< Number > shape_gradients_collocation