deal.II version 9.7.0
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
Loading...
Searching...
No Matches
scalapack.cc
Go to the documentation of this file.
1// ------------------------------------------------------------------------
2//
3// SPDX-License-Identifier: LGPL-2.1-or-later
4// Copyright (C) 2017 - 2025 by the deal.II authors
5//
6// This file is part of the deal.II library.
7//
8// Part of the source code is dual licensed under Apache-2.0 WITH
9// LLVM-exception OR LGPL-2.1-or-later. Detailed license information
10// governing the source code and code contributions can be found in
11// LICENSE.md and CONTRIBUTING.md at the top level directory of deal.II.
12//
13// ------------------------------------------------------------------------
14
15
17
18#ifdef DEAL_II_WITH_SCALAPACK
19
21# include <deal.II/base/mpi.h>
22# include <deal.II/base/mpi.templates.h>
23
24# include <deal.II/lac/scalapack.templates.h>
25
26# ifdef DEAL_II_WITH_HDF5
27# include <hdf5.h>
28# endif
29
30# include <limits>
31# include <memory>
32
34
35# ifdef DEAL_II_WITH_HDF5
36
37template <typename number>
38inline hid_t
39hdf5_type_id(const number *)
40{
42 // don't know what to put here; it does not matter
43 return -1;
44}
45
46inline hid_t
47hdf5_type_id(const double *)
48{
49 return H5T_NATIVE_DOUBLE;
50}
51
52inline hid_t
53hdf5_type_id(const float *)
54{
55 return H5T_NATIVE_FLOAT;
56}
57
58inline hid_t
59hdf5_type_id(const int *)
60{
61 return H5T_NATIVE_INT;
62}
63
64inline hid_t
65hdf5_type_id(const unsigned int *)
66{
67 return H5T_NATIVE_UINT;
68}
69
70inline hid_t
71hdf5_type_id(const char *)
72{
73 return H5T_NATIVE_CHAR;
74}
75# endif // DEAL_II_WITH_HDF5
76
77
78
79template <typename NumberType>
81 const size_type n_rows_,
82 const size_type n_columns_,
83 const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
84 const size_type row_block_size_,
85 const size_type column_block_size_,
86 const LAPACKSupport::Property property_)
87 : uplo('L')
88 , // for non-symmetric matrices this is not needed
91 , submatrix_row(1)
93{
94 reinit(n_rows_,
95 n_columns_,
96 process_grid,
97 row_block_size_,
98 column_block_size_,
99 property_);
100}
101
102
103
104template <typename NumberType>
106 const size_type size,
107 const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
108 const size_type block_size,
110 : ScaLAPACKMatrix<NumberType>(size,
111 size,
112 process_grid,
113 block_size,
114 block_size,
115 property)
116{}
117
118
119
120template <typename NumberType>
122 const std::string &filename,
123 const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
126 : uplo('L')
127 , // for non-symmetric matrices this is not needed
130 , submatrix_row(1)
132{
133# ifndef DEAL_II_WITH_HDF5
134 (void)filename;
135 (void)process_grid;
136 (void)row_block_size;
137 (void)column_block_size;
138 Assert(
139 false,
141 "This function is only available when deal.II is configured with HDF5"));
142# else
143
144 const unsigned int this_mpi_process(
145 Utilities::MPI::this_mpi_process(process_grid->mpi_communicator));
146
147 // Before reading the content from disk the root process determines the
148 // dimensions of the matrix. Subsequently, memory is allocated by a call to
149 // reinit() and the matrix is loaded by a call to load().
150 if (this_mpi_process == 0)
151 {
152 herr_t status = 0;
153
154 // open file in read-only mode
155 hid_t file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
156 AssertThrow(file >= 0, ExcIO());
157
158 // get data set in file
159 hid_t dataset = H5Dopen2(file, "/matrix", H5P_DEFAULT);
160 AssertThrow(dataset >= 0, ExcIO());
161
162 // determine file space
163 hid_t filespace = H5Dget_space(dataset);
164
165 // get number of dimensions in data set
166 int rank = H5Sget_simple_extent_ndims(filespace);
167 AssertThrow(rank == 2, ExcIO());
168 hsize_t dims[2];
169 status = H5Sget_simple_extent_dims(filespace, dims, nullptr);
170 AssertThrow(status >= 0, ExcIO());
171
172 // due to ScaLAPACK's column-major memory layout the dimensions are
173 // swapped
174 n_rows = dims[1];
175 n_columns = dims[0];
176
177 // close/release resources
178 status = H5Sclose(filespace);
179 AssertThrow(status >= 0, ExcIO());
180 status = H5Dclose(dataset);
181 AssertThrow(status >= 0, ExcIO());
182 status = H5Fclose(file);
183 AssertThrow(status >= 0, ExcIO());
184 }
185 int ierr = MPI_Bcast(&n_rows,
186 1,
188 0 /*from root*/,
189 process_grid->mpi_communicator);
190 AssertThrowMPI(ierr);
191
192 ierr = MPI_Bcast(&n_columns,
193 1,
195 0 /*from root*/,
196 process_grid->mpi_communicator);
197 AssertThrowMPI(ierr);
198
199 // the property will be overwritten by the subsequent call to load()
201 n_columns,
202 process_grid,
206
207 load(filename.c_str());
208
209# endif // DEAL_II_WITH_HDF5
210}
211
212
213
214template <typename NumberType>
215void
217 const size_type n_rows_,
218 const size_type n_columns_,
219 const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
220 const size_type row_block_size_,
221 const size_type column_block_size_,
222 const LAPACKSupport::Property property_)
223{
224 Assert(row_block_size_ > 0, ExcMessage("Row block size has to be positive."));
225 Assert(column_block_size_ > 0,
226 ExcMessage("Column block size has to be positive."));
227 Assert(
228 row_block_size_ <= n_rows_,
230 "Row block size can not be greater than the number of rows of the matrix"));
231 Assert(
232 column_block_size_ <= n_columns_,
234 "Column block size can not be greater than the number of columns of the matrix"));
235
237 property = property_;
238 grid = process_grid;
239 n_rows = n_rows_;
240 n_columns = n_columns_;
241 row_block_size = row_block_size_;
242 column_block_size = column_block_size_;
243
244 if (grid->mpi_process_is_active)
245 {
246 // Get local sizes:
247 n_local_rows = numroc_(&n_rows,
249 &(grid->this_process_row),
251 &(grid->n_process_rows));
252 n_local_columns = numroc_(&n_columns,
254 &(grid->this_process_column),
256 &(grid->n_process_columns));
257
258 // LLD_A = MAX(1,NUMROC(M_A, MB_A, MYROW, RSRC_A, NPROW)), different
259 // between processes
260 int lda = std::max(1, n_local_rows);
261
262 int info = 0;
263 descinit_(descriptor,
264 &n_rows,
265 &n_columns,
270 &(grid->blacs_context),
271 &lda,
272 &info);
273 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("descinit_", info));
274
276 }
277 else
278 {
279 // set process-local variables to something telling:
280 n_local_rows = -1;
281 n_local_columns = -1;
282 std::fill(std::begin(descriptor), std::end(descriptor), -1);
283 }
284}
285
286
287
288template <typename NumberType>
289void
291 const size_type size,
292 const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
293 const size_type block_size,
295{
296 reinit(size, size, process_grid, block_size, block_size, property);
297}
298
299
300
301template <typename NumberType>
302void
304 const LAPACKSupport::Property property_)
305{
306 property = property_;
307}
308
309
310
311template <typename NumberType>
317
318
319
320template <typename NumberType>
323{
324 return state;
325}
326
327
328
329template <typename NumberType>
332{
333 // FIXME: another way to copy is to use pdgeadd_ PBLAS routine.
334 // This routine computes the sum of two matrices B := a*A + b*B.
335 // Matrices can have different distribution,in particular matrix A can
336 // be owned by only one process, so we can set a=1 and b=0 to copy
337 // non-distributed matrix A into distributed matrix B.
338 Assert(n_rows == int(matrix.m()), ExcDimensionMismatch(n_rows, matrix.m()));
339 Assert(n_columns == int(matrix.n()),
340 ExcDimensionMismatch(n_columns, matrix.n()));
341
342 if (grid->mpi_process_is_active)
343 {
344 for (int i = 0; i < n_local_rows; ++i)
345 {
346 const int glob_i = global_row(i);
347 for (int j = 0; j < n_local_columns; ++j)
348 {
349 const int glob_j = global_column(j);
350 local_el(i, j) = matrix(glob_i, glob_j);
351 }
352 }
353 }
355 return *this;
356}
357
358
359
360template <typename NumberType>
361void
363 const unsigned int rank)
364{
365 if (n_rows * n_columns == 0)
366 return;
367
368 const unsigned int this_mpi_process(
369 Utilities::MPI::this_mpi_process(this->grid->mpi_communicator));
370
371 if constexpr (running_in_debug_mode())
372 {
373 Assert(Utilities::MPI::max(rank, this->grid->mpi_communicator) == rank,
375 "All processes have to call routine with identical rank"));
376 Assert(Utilities::MPI::min(rank, this->grid->mpi_communicator) == rank,
378 "All processes have to call routine with identical rank"));
379 }
380
381 // root process has to be active in the grid of A
382 if (this_mpi_process == rank)
383 {
384 Assert(grid->mpi_process_is_active, ExcInternalError());
385 Assert(n_rows == int(B.m()), ExcDimensionMismatch(n_rows, B.m()));
386 Assert(n_columns == int(B.n()), ExcDimensionMismatch(n_columns, B.n()));
387 }
388 // Create 1x1 grid for matrix B.
389 // The underlying grid for matrix B only contains the process #rank.
390 // This grid will be used to copy the serial matrix B to the distributed
391 // matrix using the ScaLAPACK routine pgemr2d.
392 MPI_Group group_A;
393 MPI_Comm_group(this->grid->mpi_communicator, &group_A);
394 const int n = 1;
395 const std::vector<int> ranks(n, rank);
396 MPI_Group group_B;
397 MPI_Group_incl(group_A, n, ranks.data(), &group_B);
398 MPI_Comm communicator_B;
399
401 const int ierr = MPI_Comm_create_group(this->grid->mpi_communicator,
402 group_B,
403 mpi_tag,
404 &communicator_B);
405 AssertThrowMPI(ierr);
406 int n_proc_rows_B = 1, n_proc_cols_B = 1;
407 int this_process_row_B = -1, this_process_column_B = -1;
408 int blacs_context_B = -1;
409 if (MPI_COMM_NULL != communicator_B)
410 {
411 // Initialize Cblas context from the provided communicator
412 blacs_context_B = Csys2blacs_handle(communicator_B);
413 const char *order = "Col";
414 Cblacs_gridinit(&blacs_context_B, order, n_proc_rows_B, n_proc_cols_B);
415 Cblacs_gridinfo(blacs_context_B,
416 &n_proc_rows_B,
417 &n_proc_cols_B,
418 &this_process_row_B,
419 &this_process_column_B);
420 Assert(n_proc_rows_B * n_proc_cols_B == 1, ExcInternalError());
421 // the active process of grid B has to be process #rank of the
422 // communicator attached to A
423 Assert(this_mpi_process == rank, ExcInternalError());
424 }
425 const bool mpi_process_is_active_B =
426 (this_process_row_B >= 0 && this_process_column_B >= 0);
427
428 // create descriptor for matrix B
429 std::vector<int> descriptor_B(9, -1);
430 const int first_process_row_B = 0, first_process_col_B = 0;
431
432 if (mpi_process_is_active_B)
433 {
434 // Get local sizes:
435 int n_local_rows_B = numroc_(&n_rows,
436 &n_rows,
437 &this_process_row_B,
438 &first_process_row_B,
439 &n_proc_rows_B);
440 int n_local_cols_B = numroc_(&n_columns,
441 &n_columns,
442 &this_process_column_B,
443 &first_process_col_B,
444 &n_proc_cols_B);
445 Assert(n_local_rows_B == n_rows, ExcInternalError());
446 Assert(n_local_cols_B == n_columns, ExcInternalError());
447
448 int lda = std::max(1, n_local_rows_B);
449 int info = 0;
450 descinit_(descriptor_B.data(),
451 &n_rows,
452 &n_columns,
453 &n_rows,
454 &n_columns,
455 &first_process_row_B,
456 &first_process_col_B,
457 &blacs_context_B,
458 &lda,
459 &info);
460 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("descinit_", info));
461 }
462 if (this->grid->mpi_process_is_active)
463 {
464 const int ii = 1;
465 NumberType *loc_vals_A =
466 this->values.size() > 0 ? this->values.data() : nullptr;
467 const NumberType *loc_vals_B =
468 mpi_process_is_active_B ? &(B(0, 0)) : nullptr;
469
470 // pgemr2d has to be called only for processes active on grid attached to
471 // matrix A
472 pgemr2d(&n_rows,
473 &n_columns,
474 loc_vals_B,
475 &ii,
476 &ii,
477 descriptor_B.data(),
478 loc_vals_A,
479 &ii,
480 &ii,
481 this->descriptor,
482 &(this->grid->blacs_context));
483 }
484 if (mpi_process_is_active_B)
485 Cblacs_gridexit(blacs_context_B);
486
487 MPI_Group_free(&group_A);
488 MPI_Group_free(&group_B);
489 if (MPI_COMM_NULL != communicator_B)
490 Utilities::MPI::free_communicator(communicator_B);
491
493}
494
495
496
497template <typename NumberType>
498unsigned int
499ScaLAPACKMatrix<NumberType>::global_row(const unsigned int loc_row) const
500{
501 Assert(n_local_rows >= 0 && loc_row < static_cast<unsigned int>(n_local_rows),
502 ExcIndexRange(loc_row, 0, n_local_rows));
503 const int i = loc_row + 1;
504 return indxl2g_(&i,
506 &(grid->this_process_row),
508 &(grid->n_process_rows)) -
509 1;
510}
511
512
513
514template <typename NumberType>
515unsigned int
516ScaLAPACKMatrix<NumberType>::global_column(const unsigned int loc_column) const
517{
518 Assert(n_local_columns >= 0 &&
519 loc_column < static_cast<unsigned int>(n_local_columns),
520 ExcIndexRange(loc_column, 0, n_local_columns));
521 const int j = loc_column + 1;
522 return indxl2g_(&j,
524 &(grid->this_process_column),
526 &(grid->n_process_columns)) -
527 1;
528}
529
530
531
532template <typename NumberType>
533void
535 const unsigned int rank) const
536{
537 if (n_rows * n_columns == 0)
538 return;
539
540 const unsigned int this_mpi_process(
541 Utilities::MPI::this_mpi_process(this->grid->mpi_communicator));
542
543 if constexpr (running_in_debug_mode())
544 {
545 Assert(Utilities::MPI::max(rank, this->grid->mpi_communicator) == rank,
547 "All processes have to call routine with identical rank"));
548 Assert(Utilities::MPI::min(rank, this->grid->mpi_communicator) == rank,
550 "All processes have to call routine with identical rank"));
551 }
552
553 if (this_mpi_process == rank)
554 {
555 // the process which gets the serial copy has to be in the process grid
556 Assert(this->grid->is_process_active(), ExcInternalError());
557 Assert(n_rows == int(B.m()), ExcDimensionMismatch(n_rows, B.m()));
558 Assert(n_columns == int(B.n()), ExcDimensionMismatch(n_columns, B.n()));
559 }
560
561 // Create 1x1 grid for matrix B.
562 // The underlying grid for matrix B only contains the process #rank.
563 // This grid will be used to copy to the distributed matrix to the serial
564 // matrix B using the ScaLAPACK routine pgemr2d.
565 MPI_Group group_A;
566 MPI_Comm_group(this->grid->mpi_communicator, &group_A);
567 const int n = 1;
568 const std::vector<int> ranks(n, rank);
569 MPI_Group group_B;
570 MPI_Group_incl(group_A, n, ranks.data(), &group_B);
571 MPI_Comm communicator_B;
572
574 const int ierr = MPI_Comm_create_group(this->grid->mpi_communicator,
575 group_B,
576 mpi_tag,
577 &communicator_B);
578 AssertThrowMPI(ierr);
579 int n_proc_rows_B = 1, n_proc_cols_B = 1;
580 int this_process_row_B = -1, this_process_column_B = -1;
581 int blacs_context_B = -1;
582 if (MPI_COMM_NULL != communicator_B)
583 {
584 // Initialize Cblas context from the provided communicator
585 blacs_context_B = Csys2blacs_handle(communicator_B);
586 const char *order = "Col";
587 Cblacs_gridinit(&blacs_context_B, order, n_proc_rows_B, n_proc_cols_B);
588 Cblacs_gridinfo(blacs_context_B,
589 &n_proc_rows_B,
590 &n_proc_cols_B,
591 &this_process_row_B,
592 &this_process_column_B);
593 Assert(n_proc_rows_B * n_proc_cols_B == 1, ExcInternalError());
594 // the active process of grid B has to be process #rank of the
595 // communicator attached to A
596 Assert(this_mpi_process == rank, ExcInternalError());
597 }
598 const bool mpi_process_is_active_B =
599 (this_process_row_B >= 0 && this_process_column_B >= 0);
600
601 // create descriptor for matrix B
602 std::vector<int> descriptor_B(9, -1);
603 const int first_process_row_B = 0, first_process_col_B = 0;
604
605 if (mpi_process_is_active_B)
606 {
607 // Get local sizes:
608 int n_local_rows_B = numroc_(&n_rows,
609 &n_rows,
610 &this_process_row_B,
611 &first_process_row_B,
612 &n_proc_rows_B);
613 int n_local_cols_B = numroc_(&n_columns,
614 &n_columns,
615 &this_process_column_B,
616 &first_process_col_B,
617 &n_proc_cols_B);
618 Assert(n_local_rows_B == n_rows, ExcInternalError());
619 Assert(n_local_cols_B == n_columns, ExcInternalError());
620
621 int lda = std::max(1, n_local_rows_B);
622 int info = 0;
623 // fill descriptor for matrix B
624 descinit_(descriptor_B.data(),
625 &n_rows,
626 &n_columns,
627 &n_rows,
628 &n_columns,
629 &first_process_row_B,
630 &first_process_col_B,
631 &blacs_context_B,
632 &lda,
633 &info);
634 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("descinit_", info));
635 }
636 // pgemr2d has to be called only for processes active on grid attached to
637 // matrix A
638 if (this->grid->mpi_process_is_active)
639 {
640 const int ii = 1;
641 const NumberType *loc_vals_A =
642 this->values.size() > 0 ? this->values.data() : nullptr;
643 NumberType *loc_vals_B = mpi_process_is_active_B ? &(B(0, 0)) : nullptr;
644
645 pgemr2d(&n_rows,
646 &n_columns,
647 loc_vals_A,
648 &ii,
649 &ii,
650 this->descriptor,
651 loc_vals_B,
652 &ii,
653 &ii,
654 descriptor_B.data(),
655 &(this->grid->blacs_context));
656 }
657 if (mpi_process_is_active_B)
658 Cblacs_gridexit(blacs_context_B);
659
660 MPI_Group_free(&group_A);
661 MPI_Group_free(&group_B);
662 if (MPI_COMM_NULL != communicator_B)
663 Utilities::MPI::free_communicator(communicator_B);
664}
665
666
667
668template <typename NumberType>
669void
671{
672 // FIXME: use PDGEMR2d for copying?
673 // PDGEMR2d copies a submatrix of A on a submatrix of B.
674 // A and B can have different distributions
675 // see http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=50
676 Assert(n_rows == int(matrix.m()), ExcDimensionMismatch(n_rows, matrix.m()));
677 Assert(n_columns == int(matrix.n()),
678 ExcDimensionMismatch(n_columns, matrix.n()));
679
680 matrix = 0.;
681 if (grid->mpi_process_is_active)
682 {
683 for (int i = 0; i < n_local_rows; ++i)
684 {
685 const int glob_i = global_row(i);
686 for (int j = 0; j < n_local_columns; ++j)
687 {
688 const int glob_j = global_column(j);
689 matrix(glob_i, glob_j) = local_el(i, j);
690 }
691 }
692 }
693 Utilities::MPI::sum(matrix, grid->mpi_communicator, matrix);
694
695 // we could move the following lines under the main loop above,
696 // but they would be dependent on glob_i and glob_j, which
697 // won't make it much prettier
699 {
701 for (unsigned int i = 0; i < matrix.n(); ++i)
702 for (unsigned int j = i + 1; j < matrix.m(); ++j)
703 matrix(i, j) = 0.;
705 for (unsigned int i = 0; i < matrix.n(); ++i)
706 for (unsigned int j = 0; j < i; ++j)
707 matrix(i, j) = 0.;
708 }
711 {
712 if (uplo == 'L')
713 for (unsigned int i = 0; i < matrix.n(); ++i)
714 for (unsigned int j = i + 1; j < matrix.m(); ++j)
715 matrix(i, j) = matrix(j, i);
716 else if (uplo == 'U')
717 for (unsigned int i = 0; i < matrix.n(); ++i)
718 for (unsigned int j = 0; j < i; ++j)
719 matrix(i, j) = matrix(j, i);
720 }
721}
722
723
724
725template <typename NumberType>
726void
729 const std::pair<unsigned int, unsigned int> &offset_A,
730 const std::pair<unsigned int, unsigned int> &offset_B,
731 const std::pair<unsigned int, unsigned int> &submatrix_size) const
732{
733 // submatrix is empty
734 if (submatrix_size.first == 0 || submatrix_size.second == 0)
735 return;
736
737 // range checking for matrix A
738 AssertIndexRange(offset_A.first, n_rows - submatrix_size.first + 1);
739 AssertIndexRange(offset_A.second, n_columns - submatrix_size.second + 1);
740
741 // range checking for matrix B
742 AssertIndexRange(offset_B.first, B.n_rows - submatrix_size.first + 1);
743 AssertIndexRange(offset_B.second, B.n_columns - submatrix_size.second + 1);
744
745 // Currently, copying of matrices will only be supported if A and B share the
746 // same MPI communicator
747 int ierr, comparison;
748 ierr = MPI_Comm_compare(grid->mpi_communicator,
750 &comparison);
751 AssertThrowMPI(ierr);
752 Assert(comparison == MPI_IDENT,
753 ExcMessage("Matrix A and B must have a common MPI Communicator"));
754
755 /*
756 * The routine pgemr2d requires a BLACS context resembling at least the union
757 * of process grids described by the BLACS contexts held by the ProcessGrids
758 * of matrix A and B. As A and B share the same MPI communicator, there is no
759 * need to create a union MPI communicator to initialize the BLACS context
760 */
761 int union_blacs_context = Csys2blacs_handle(this->grid->mpi_communicator);
762 const char *order = "Col";
763 int union_n_process_rows =
764 Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator);
765 int union_n_process_columns = 1;
766 Cblacs_gridinit(&union_blacs_context,
767 order,
768 union_n_process_rows,
769 union_n_process_columns);
770
771 int n_grid_rows_A, n_grid_columns_A, my_row_A, my_column_A;
772 Cblacs_gridinfo(this->grid->blacs_context,
773 &n_grid_rows_A,
774 &n_grid_columns_A,
775 &my_row_A,
776 &my_column_A);
777
778 // check whether process is in the BLACS context of matrix A
779 const bool in_context_A =
780 (my_row_A >= 0 && my_row_A < n_grid_rows_A) &&
781 (my_column_A >= 0 && my_column_A < n_grid_columns_A);
782
783 int n_grid_rows_B, n_grid_columns_B, my_row_B, my_column_B;
784 Cblacs_gridinfo(B.grid->blacs_context,
785 &n_grid_rows_B,
786 &n_grid_columns_B,
787 &my_row_B,
788 &my_column_B);
789
790 // check whether process is in the BLACS context of matrix B
791 const bool in_context_B =
792 (my_row_B >= 0 && my_row_B < n_grid_rows_B) &&
793 (my_column_B >= 0 && my_column_B < n_grid_columns_B);
794
795 const int n_rows_submatrix = submatrix_size.first;
796 const int n_columns_submatrix = submatrix_size.second;
797
798 // due to Fortran indexing one has to be added
799 int ia = offset_A.first + 1, ja = offset_A.second + 1;
800 int ib = offset_B.first + 1, jb = offset_B.second + 1;
801
802 std::array<int, 9> desc_A, desc_B;
803
804 const NumberType *loc_vals_A = nullptr;
805 NumberType *loc_vals_B = nullptr;
806
807 // Note: the function pgemr2d has to be called for all processes in the union
808 // BLACS context If the calling process is not part of the BLACS context of A,
809 // desc_A[1] has to be -1 and all other parameters do not have to be set If
810 // the calling process is not part of the BLACS context of B, desc_B[1] has to
811 // be -1 and all other parameters do not have to be set
812 if (in_context_A)
813 {
814 if (this->values.size() != 0)
815 loc_vals_A = this->values.data();
816
817 for (unsigned int i = 0; i < desc_A.size(); ++i)
818 desc_A[i] = this->descriptor[i];
819 }
820 else
821 desc_A[1] = -1;
822
823 if (in_context_B)
824 {
825 if (B.values.size() != 0)
826 loc_vals_B = B.values.data();
827
828 for (unsigned int i = 0; i < desc_B.size(); ++i)
829 desc_B[i] = B.descriptor[i];
830 }
831 else
832 desc_B[1] = -1;
833
834 pgemr2d(&n_rows_submatrix,
835 &n_columns_submatrix,
836 loc_vals_A,
837 &ia,
838 &ja,
839 desc_A.data(),
840 loc_vals_B,
841 &ib,
842 &jb,
843 desc_B.data(),
844 &union_blacs_context);
845
847
848 // releasing the union BLACS context
849 Cblacs_gridexit(union_blacs_context);
850}
851
852
853
854template <typename NumberType>
855void
857{
859 Assert(n_columns == dest.n_columns,
861
862 if (this->grid->mpi_process_is_active)
864 this->descriptor[0] == 1,
866 "Copying of ScaLAPACK matrices only implemented for dense matrices"));
867 if (dest.grid->mpi_process_is_active)
869 dest.descriptor[0] == 1,
871 "Copying of ScaLAPACK matrices only implemented for dense matrices"));
872
873 /*
874 * just in case of different process grids or block-cyclic distributions
875 * inter-process communication is necessary
876 * if distributed matrices have the same process grid and block sizes, local
877 * copying is enough
878 */
879 if ((this->grid != dest.grid) || (row_block_size != dest.row_block_size) ||
881 {
882 /*
883 * get the MPI communicator, which is the union of the source and
884 * destination MPI communicator
885 */
886 int ierr = 0;
887 MPI_Group group_source, group_dest, group_union;
888 ierr = MPI_Comm_group(this->grid->mpi_communicator, &group_source);
889 AssertThrowMPI(ierr);
890 ierr = MPI_Comm_group(dest.grid->mpi_communicator, &group_dest);
891 AssertThrowMPI(ierr);
892 ierr = MPI_Group_union(group_source, group_dest, &group_union);
893 AssertThrowMPI(ierr);
894 MPI_Comm mpi_communicator_union;
895
896 // to create a communicator representing the union of the source
897 // and destination MPI communicator we need a communicator that
898 // is guaranteed to contain all desired processes -- i.e.,
899 // MPI_COMM_WORLD. on the other hand, as documented in the MPI
900 // standard, MPI_Comm_create_group is not collective on all
901 // processes in the first argument, but instead is collective on
902 // only those processes listed in the group. in other words,
903 // there is really no harm in passing MPI_COMM_WORLD as the
904 // first argument, even if the program we are currently running
905 // and that is calling this function only works on a subset of
906 // processes. the same holds for the wrapper/fallback we are using here.
907
909 ierr = MPI_Comm_create_group(MPI_COMM_WORLD,
910 group_union,
911 mpi_tag,
912 &mpi_communicator_union);
913 AssertThrowMPI(ierr);
914
915 /*
916 * The routine pgemr2d requires a BLACS context resembling at least the
917 * union of process grids described by the BLACS contexts of matrix A and
918 * B
919 */
920 int union_blacs_context = Csys2blacs_handle(mpi_communicator_union);
921 const char *order = "Col";
922 int union_n_process_rows =
923 Utilities::MPI::n_mpi_processes(mpi_communicator_union);
924 int union_n_process_columns = 1;
925 Cblacs_gridinit(&union_blacs_context,
926 order,
927 union_n_process_rows,
928 union_n_process_columns);
929
930 const NumberType *loc_vals_source = nullptr;
931 NumberType *loc_vals_dest = nullptr;
932
933 if (this->grid->mpi_process_is_active && (this->values.size() > 0))
934 {
935 AssertThrow(this->values.size() > 0,
937 "source: process is active but local matrix empty"));
938 loc_vals_source = this->values.data();
939 }
940 if (dest.grid->mpi_process_is_active && (dest.values.size() > 0))
941 {
943 dest.values.size() > 0,
945 "destination: process is active but local matrix empty"));
946 loc_vals_dest = dest.values.data();
947 }
948 pgemr2d(&n_rows,
949 &n_columns,
950 loc_vals_source,
954 loc_vals_dest,
955 &dest.submatrix_row,
956 &dest.submatrix_column,
957 dest.descriptor,
958 &union_blacs_context);
959
960 Cblacs_gridexit(union_blacs_context);
961
962 if (mpi_communicator_union != MPI_COMM_NULL)
963 Utilities::MPI::free_communicator(mpi_communicator_union);
964 ierr = MPI_Group_free(&group_source);
965 AssertThrowMPI(ierr);
966 ierr = MPI_Group_free(&group_dest);
967 AssertThrowMPI(ierr);
968 ierr = MPI_Group_free(&group_union);
969 AssertThrowMPI(ierr);
970 }
971 else
972 // process is active in the process grid
973 if (this->grid->mpi_process_is_active)
974 dest.values = this->values;
975
976 dest.state = state;
977 dest.property = property;
978}
979
980
981
982template <typename NumberType>
983void
989
990
991
992template <typename NumberType>
993void
995 const NumberType alpha,
996 const NumberType beta,
997 const bool transpose_B)
998{
999 if (transpose_B)
1000 {
1007 }
1008 else
1009 {
1017 }
1018 Assert(this->grid == B.grid,
1019 ExcMessage("The matrices A and B need to have the same process grid"));
1020
1021 if (this->grid->mpi_process_is_active)
1022 {
1023 char trans_b = transpose_B ? 'T' : 'N';
1024 NumberType *A_loc =
1025 (this->values.size() > 0) ? this->values.data() : nullptr;
1026 const NumberType *B_loc =
1027 (B.values.size() > 0) ? B.values.data() : nullptr;
1028
1029 pgeadd(&trans_b,
1030 &n_rows,
1031 &n_columns,
1032 &beta,
1033 B_loc,
1034 &B.submatrix_row,
1036 B.descriptor,
1037 &alpha,
1038 A_loc,
1041 descriptor);
1042 }
1044}
1045
1046
1047
1048template <typename NumberType>
1049void
1052{
1053 add(B, 1, a, false);
1054}
1055
1056
1057
1058template <typename NumberType>
1059void
1062{
1063 add(B, 1, a, true);
1064}
1065
1066
1067
1068template <typename NumberType>
1069void
1072 const NumberType c,
1074 const bool transpose_A,
1075 const bool transpose_B) const
1076{
1077 Assert(this->grid == B.grid,
1078 ExcMessage("The matrices A and B need to have the same process grid"));
1079 Assert(C.grid == B.grid,
1080 ExcMessage("The matrices B and C need to have the same process grid"));
1081
1082 // see for further info:
1083 // https://www.ibm.com/support/knowledgecenter/SSNR5K_4.2.0/com.ibm.cluster.pessl.v4r2.pssl100.doc/am6gr_lgemm.htm
1084 if (!transpose_A && !transpose_B)
1085 {
1086 Assert(this->n_columns == B.n_rows,
1088 Assert(this->n_rows == C.n_rows,
1089 ExcDimensionMismatch(this->n_rows, C.n_rows));
1090 Assert(B.n_columns == C.n_columns,
1091 ExcDimensionMismatch(B.n_columns, C.n_columns));
1092 Assert(this->row_block_size == C.row_block_size,
1093 ExcDimensionMismatch(this->row_block_size, C.row_block_size));
1096 Assert(B.column_block_size == C.column_block_size,
1097 ExcDimensionMismatch(B.column_block_size, C.column_block_size));
1098 }
1099 else if (transpose_A && !transpose_B)
1100 {
1101 Assert(this->n_rows == B.n_rows,
1103 Assert(this->n_columns == C.n_rows,
1104 ExcDimensionMismatch(this->n_columns, C.n_rows));
1105 Assert(B.n_columns == C.n_columns,
1106 ExcDimensionMismatch(B.n_columns, C.n_columns));
1107 Assert(this->column_block_size == C.row_block_size,
1108 ExcDimensionMismatch(this->column_block_size, C.row_block_size));
1111 Assert(B.column_block_size == C.column_block_size,
1112 ExcDimensionMismatch(B.column_block_size, C.column_block_size));
1113 }
1114 else if (!transpose_A && transpose_B)
1115 {
1116 Assert(this->n_columns == B.n_columns,
1118 Assert(this->n_rows == C.n_rows,
1119 ExcDimensionMismatch(this->n_rows, C.n_rows));
1120 Assert(B.n_rows == C.n_columns,
1121 ExcDimensionMismatch(B.n_rows, C.n_columns));
1122 Assert(this->row_block_size == C.row_block_size,
1123 ExcDimensionMismatch(this->row_block_size, C.row_block_size));
1127 Assert(B.row_block_size == C.column_block_size,
1128 ExcDimensionMismatch(B.row_block_size, C.column_block_size));
1129 }
1130 else // if (transpose_A && transpose_B)
1131 {
1132 Assert(this->n_rows == B.n_columns,
1134 Assert(this->n_columns == C.n_rows,
1135 ExcDimensionMismatch(this->n_columns, C.n_rows));
1136 Assert(B.n_rows == C.n_columns,
1137 ExcDimensionMismatch(B.n_rows, C.n_columns));
1138 Assert(this->column_block_size == C.row_block_size,
1139 ExcDimensionMismatch(this->row_block_size, C.row_block_size));
1142 Assert(B.row_block_size == C.column_block_size,
1143 ExcDimensionMismatch(B.column_block_size, C.column_block_size));
1144 }
1145
1146 if (this->grid->mpi_process_is_active)
1147 {
1148 char trans_a = transpose_A ? 'T' : 'N';
1149 char trans_b = transpose_B ? 'T' : 'N';
1150
1151 const NumberType *A_loc =
1152 (this->values.size() > 0) ? this->values.data() : nullptr;
1153 const NumberType *B_loc =
1154 (B.values.size() > 0) ? B.values.data() : nullptr;
1155 NumberType *C_loc = (C.values.size() > 0) ? C.values.data() : nullptr;
1156 int m = C.n_rows;
1157 int n = C.n_columns;
1158 int k = transpose_A ? this->n_rows : this->n_columns;
1159
1160 pgemm(&trans_a,
1161 &trans_b,
1162 &m,
1163 &n,
1164 &k,
1165 &b,
1166 A_loc,
1167 &(this->submatrix_row),
1168 &(this->submatrix_column),
1169 this->descriptor,
1170 B_loc,
1171 &B.submatrix_row,
1173 B.descriptor,
1174 &c,
1175 C_loc,
1176 &C.submatrix_row,
1177 &C.submatrix_column,
1178 C.descriptor);
1179 }
1180 C.state = LAPACKSupport::matrix;
1181}
1182
1183
1184
1185template <typename NumberType>
1186void
1189 const bool adding) const
1190{
1191 if (adding)
1192 mult(1., B, 1., C, false, false);
1193 else
1194 mult(1., B, 0, C, false, false);
1195}
1196
1197
1198
1199template <typename NumberType>
1200void
1203 const bool adding) const
1204{
1205 if (adding)
1206 mult(1., B, 1., C, true, false);
1207 else
1208 mult(1., B, 0, C, true, false);
1209}
1210
1211
1212
1213template <typename NumberType>
1214void
1217 const bool adding) const
1218{
1219 if (adding)
1220 mult(1., B, 1., C, false, true);
1221 else
1222 mult(1., B, 0, C, false, true);
1223}
1224
1225
1226
1227template <typename NumberType>
1228void
1231 const bool adding) const
1232{
1233 if (adding)
1234 mult(1., B, 1., C, true, true);
1235 else
1236 mult(1., B, 0, C, true, true);
1237}
1238
1239
1240
1241template <typename NumberType>
1242void
1244{
1245 Assert(
1247 ExcMessage(
1248 "Cholesky factorization can be applied to symmetric matrices only."));
1250 ExcMessage(
1251 "Matrix has to be in Matrix state before calling this function."));
1252
1253 if (grid->mpi_process_is_active)
1254 {
1255 int info = 0;
1256 NumberType *A_loc = this->values.data();
1257 // pdpotrf_(&uplo,&n_columns,A_loc,&submatrix_row,&submatrix_column,descriptor,&info);
1258 ppotrf(&uplo,
1259 &n_columns,
1260 A_loc,
1263 descriptor,
1264 &info);
1265 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("ppotrf", info));
1266 }
1268 property = (uplo == 'L' ? LAPACKSupport::lower_triangular :
1270}
1271
1272
1273
1274template <typename NumberType>
1275void
1277{
1279 ExcMessage(
1280 "Matrix has to be in Matrix state before calling this function."));
1281
1282 if (grid->mpi_process_is_active)
1283 {
1284 int info = 0;
1285 NumberType *A_loc = this->values.data();
1286
1287 const int iarow = indxg2p_(&submatrix_row,
1289 &(grid->this_process_row),
1291 &(grid->n_process_rows));
1292 const int mp = numroc_(&n_rows,
1294 &(grid->this_process_row),
1295 &iarow,
1296 &(grid->n_process_rows));
1297 ipiv.resize(mp + row_block_size);
1298
1299 pgetrf(&n_rows,
1300 &n_columns,
1301 A_loc,
1304 descriptor,
1305 ipiv.data(),
1306 &info);
1307 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgetrf", info));
1308 }
1311}
1312
1313
1314
1315template <typename NumberType>
1316void
1318{
1319 // Check whether matrix is symmetric and save flag.
1320 // If a Cholesky factorization has been applied previously,
1321 // the original matrix was symmetric.
1322 const bool is_symmetric = (property == LAPACKSupport::symmetric ||
1324
1325 // Check whether matrix is triangular and is in an unfactorized state.
1326 const bool is_triangular = (property == LAPACKSupport::upper_triangular ||
1327 property == LAPACKSupport::lower_triangular) &&
1330
1331 if (is_triangular)
1332 {
1333 if (grid->mpi_process_is_active)
1334 {
1335 const char uploTriangular =
1336 property == LAPACKSupport::upper_triangular ? 'U' : 'L';
1337 const char diag = 'N';
1338 int info = 0;
1339 NumberType *A_loc = this->values.data();
1340 ptrtri(&uploTriangular,
1341 &diag,
1342 &n_columns,
1343 A_loc,
1346 descriptor,
1347 &info);
1348 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("ptrtri", info));
1349 // The inversion is stored in the same part as the triangular matrix,
1350 // so we don't need to re-set the property here.
1351 }
1352 }
1353 else
1354 {
1355 // Matrix is neither in Cholesky nor LU state.
1356 // Compute the required factorizations based on the property of the
1357 // matrix.
1360 {
1361 if (is_symmetric)
1363 else
1365 }
1366 if (grid->mpi_process_is_active)
1367 {
1368 int info = 0;
1369 NumberType *A_loc = this->values.data();
1370
1371 if (is_symmetric)
1372 {
1373 ppotri(&uplo,
1374 &n_columns,
1375 A_loc,
1378 descriptor,
1379 &info);
1380 AssertThrow(info == 0,
1381 LAPACKSupport::ExcErrorCode("ppotri", info));
1383 }
1384 else
1385 {
1386 int lwork = -1, liwork = -1;
1387 work.resize(1);
1388 iwork.resize(1);
1389
1390 pgetri(&n_columns,
1391 A_loc,
1394 descriptor,
1395 ipiv.data(),
1396 work.data(),
1397 &lwork,
1398 iwork.data(),
1399 &liwork,
1400 &info);
1401
1402 AssertThrow(info == 0,
1403 LAPACKSupport::ExcErrorCode("pgetri", info));
1404 lwork = static_cast<int>(work[0]);
1405 liwork = iwork[0];
1406 work.resize(lwork);
1407 iwork.resize(liwork);
1408
1409 pgetri(&n_columns,
1410 A_loc,
1413 descriptor,
1414 ipiv.data(),
1415 work.data(),
1416 &lwork,
1417 iwork.data(),
1418 &liwork,
1419 &info);
1420
1421 AssertThrow(info == 0,
1422 LAPACKSupport::ExcErrorCode("pgetri", info));
1423 }
1424 }
1425 }
1427}
1428
1429
1430
1431template <typename NumberType>
1432std::vector<NumberType>
1434 const std::pair<unsigned int, unsigned int> &index_limits,
1435 const bool compute_eigenvectors)
1436{
1437 // check validity of index limits
1438 AssertIndexRange(index_limits.first, n_rows);
1439 AssertIndexRange(index_limits.second, n_rows);
1440
1441 std::pair<unsigned int, unsigned int> idx =
1442 std::make_pair(std::min(index_limits.first, index_limits.second),
1443 std::max(index_limits.first, index_limits.second));
1444
1445 // compute all eigenvalues/eigenvectors
1446 if (idx.first == 0 && idx.second == static_cast<unsigned int>(n_rows - 1))
1447 return eigenpairs_symmetric(compute_eigenvectors);
1448 else
1449 return eigenpairs_symmetric(compute_eigenvectors, idx);
1450}
1451
1452
1453
1454template <typename NumberType>
1455std::vector<NumberType>
1457 const std::pair<NumberType, NumberType> &value_limits,
1458 const bool compute_eigenvectors)
1459{
1460 Assert(!std::isnan(value_limits.first),
1461 ExcMessage("value_limits.first is NaN"));
1462 Assert(!std::isnan(value_limits.second),
1463 ExcMessage("value_limits.second is NaN"));
1464
1465 std::pair<unsigned int, unsigned int> indices =
1466 std::make_pair(numbers::invalid_unsigned_int,
1468
1469 return eigenpairs_symmetric(compute_eigenvectors, indices, value_limits);
1470}
1471
1472
1473
1474template <typename NumberType>
1475std::vector<NumberType>
1477 const bool compute_eigenvectors,
1478 const std::pair<unsigned int, unsigned int> &eigenvalue_idx,
1479 const std::pair<NumberType, NumberType> &eigenvalue_limits)
1480{
1482 ExcMessage(
1483 "Matrix has to be in Matrix state before calling this function."));
1485 ExcMessage("Matrix has to be symmetric for this operation."));
1486
1487 std::lock_guard<std::mutex> lock(mutex);
1488
1489 const bool use_values = (std::isnan(eigenvalue_limits.first) ||
1490 std::isnan(eigenvalue_limits.second)) ?
1491 false :
1492 true;
1493 const bool use_indices =
1494 ((eigenvalue_idx.first == numbers::invalid_unsigned_int) ||
1495 (eigenvalue_idx.second == numbers::invalid_unsigned_int)) ?
1496 false :
1497 true;
1498
1499 Assert(
1500 !(use_values && use_indices),
1501 ExcMessage(
1502 "Prescribing both the index and value range for the eigenvalues is ambiguous"));
1503
1504 // if computation of eigenvectors is not required use a sufficiently small
1505 // distributed matrix
1506 std::unique_ptr<ScaLAPACKMatrix<NumberType>> eigenvectors =
1507 compute_eigenvectors ?
1508 std::make_unique<ScaLAPACKMatrix<NumberType>>(n_rows,
1509 grid,
1511 std::make_unique<ScaLAPACKMatrix<NumberType>>(
1512 grid->n_process_rows, grid->n_process_columns, grid, 1, 1);
1513
1514 eigenvectors->property = property;
1515 // number of eigenvalues to be returned from psyevx; upon successful exit ev
1516 // contains the m seclected eigenvalues in ascending order set to all
1517 // eigenvaleus in case we will be using psyev.
1518 int m = n_rows;
1519 std::vector<NumberType> ev(n_rows);
1520
1521 if (grid->mpi_process_is_active)
1522 {
1523 int info = 0;
1524 /*
1525 * for jobz==N only eigenvalues are computed, for jobz='V' also the
1526 * eigenvectors of the matrix are computed
1527 */
1528 char jobz = compute_eigenvectors ? 'V' : 'N';
1529 char range = 'A';
1530 // default value is to compute all eigenvalues and optionally eigenvectors
1531 bool all_eigenpairs = true;
1532 NumberType vl = NumberType(), vu = NumberType();
1533 int il = 1, iu = 1;
1534 // number of eigenvectors to be returned;
1535 // upon successful exit the first m=nz columns contain the selected
1536 // eigenvectors (only if jobz=='V')
1537 int nz = 0;
1538 NumberType abstol = NumberType();
1539
1540 // orfac decides which eigenvectors should be reorthogonalized
1541 // see
1542 // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1543 // for explanation to keeps simple no reorthogonalized will be done by
1544 // setting orfac to 0
1545 NumberType orfac = 0;
1546 // contains the indices of eigenvectors that failed to converge
1547 std::vector<int> ifail;
1548 // This array contains indices of eigenvectors corresponding to
1549 // a cluster of eigenvalues that could not be reorthogonalized
1550 // due to insufficient workspace
1551 // see
1552 // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1553 // for explanation
1554 std::vector<int> iclustr;
1555 // This array contains the gap between eigenvalues whose
1556 // eigenvectors could not be reorthogonalized.
1557 // see
1558 // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1559 // for explanation
1560 std::vector<NumberType> gap(n_local_rows * n_local_columns);
1561
1562 // index range for eigenvalues is not specified
1563 if (!use_indices)
1564 {
1565 // interval for eigenvalues is not specified and consequently all
1566 // eigenvalues/eigenpairs will be computed
1567 if (!use_values)
1568 {
1569 range = 'A';
1570 all_eigenpairs = true;
1571 }
1572 else
1573 {
1574 range = 'V';
1575 all_eigenpairs = false;
1576 vl = std::min(eigenvalue_limits.first, eigenvalue_limits.second);
1577 vu = std::max(eigenvalue_limits.first, eigenvalue_limits.second);
1578 }
1579 }
1580 else
1581 {
1582 range = 'I';
1583 all_eigenpairs = false;
1584 // as Fortran starts counting/indexing from 1 unlike C/C++, where it
1585 // starts from 0
1586 il = std::min(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1587 iu = std::max(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1588 }
1589 NumberType *A_loc = this->values.data();
1590 /*
1591 * by setting lwork to -1 a workspace query for optimal length of work is
1592 * performed
1593 */
1594 int lwork = -1;
1595 int liwork = -1;
1596 NumberType *eigenvectors_loc =
1597 (compute_eigenvectors ? eigenvectors->values.data() : nullptr);
1598 /*
1599 * According to the official "documentation" found on the internet
1600 * (aka source file ppsyevx.f [1]) the work array has to have a
1601 * minimal size of max(3, lwork). Because we query for optimal size
1602 * (lwork == -1) we have to guarantee at least three doubles. The
1603 * necessary size of iwork is not specified, so let's use three as
1604 * well.
1605 * [1]
1606 * https://netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1607 */
1608 work.resize(3);
1609 iwork.resize(3);
1610
1611 if (all_eigenpairs)
1612 {
1613 psyev(&jobz,
1614 &uplo,
1615 &n_rows,
1616 A_loc,
1619 descriptor,
1620 ev.data(),
1621 eigenvectors_loc,
1622 &eigenvectors->submatrix_row,
1623 &eigenvectors->submatrix_column,
1624 eigenvectors->descriptor,
1625 work.data(),
1626 &lwork,
1627 &info);
1628 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyev", info));
1629 }
1630 else
1631 {
1632 char cmach = compute_eigenvectors ? 'U' : 'S';
1633 plamch(&(this->grid->blacs_context), &cmach, abstol);
1634 abstol *= 2;
1635 ifail.resize(n_rows);
1636 iclustr.resize(2 * grid->n_process_rows * grid->n_process_columns);
1637 gap.resize(grid->n_process_rows * grid->n_process_columns);
1638
1639 psyevx(&jobz,
1640 &range,
1641 &uplo,
1642 &n_rows,
1643 A_loc,
1646 descriptor,
1647 &vl,
1648 &vu,
1649 &il,
1650 &iu,
1651 &abstol,
1652 &m,
1653 &nz,
1654 ev.data(),
1655 &orfac,
1656 eigenvectors_loc,
1657 &eigenvectors->submatrix_row,
1658 &eigenvectors->submatrix_column,
1659 eigenvectors->descriptor,
1660 work.data(),
1661 &lwork,
1662 iwork.data(),
1663 &liwork,
1664 ifail.data(),
1665 iclustr.data(),
1666 gap.data(),
1667 &info);
1668 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevx", info));
1669 }
1670 lwork = static_cast<int>(work[0]);
1671 work.resize(lwork);
1672
1673 if (all_eigenpairs)
1674 {
1675 psyev(&jobz,
1676 &uplo,
1677 &n_rows,
1678 A_loc,
1681 descriptor,
1682 ev.data(),
1683 eigenvectors_loc,
1684 &eigenvectors->submatrix_row,
1685 &eigenvectors->submatrix_column,
1686 eigenvectors->descriptor,
1687 work.data(),
1688 &lwork,
1689 &info);
1690
1691 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyev", info));
1692 }
1693 else
1694 {
1695 liwork = iwork[0];
1696 AssertThrow(liwork > 0, ExcInternalError());
1697 iwork.resize(liwork);
1698
1699 psyevx(&jobz,
1700 &range,
1701 &uplo,
1702 &n_rows,
1703 A_loc,
1706 descriptor,
1707 &vl,
1708 &vu,
1709 &il,
1710 &iu,
1711 &abstol,
1712 &m,
1713 &nz,
1714 ev.data(),
1715 &orfac,
1716 eigenvectors_loc,
1717 &eigenvectors->submatrix_row,
1718 &eigenvectors->submatrix_column,
1719 eigenvectors->descriptor,
1720 work.data(),
1721 &lwork,
1722 iwork.data(),
1723 &liwork,
1724 ifail.data(),
1725 iclustr.data(),
1726 gap.data(),
1727 &info);
1728
1729 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevx", info));
1730 }
1731 // if eigenvectors are queried copy eigenvectors to original matrix
1732 // as the temporary matrix eigenvectors has identical dimensions and
1733 // block-cyclic distribution we simply swap the local array
1734 if (compute_eigenvectors)
1735 this->values.swap(eigenvectors->values);
1736
1737 // adapt the size of ev to fit m upon return
1738 while (ev.size() > static_cast<size_type>(m))
1739 ev.pop_back();
1740 }
1741 /*
1742 * send number of computed eigenvalues to inactive processes
1743 */
1744 grid->send_to_inactive(&m, 1);
1745
1746 /*
1747 * inactive processes have to resize array of eigenvalues
1748 */
1749 if (!grid->mpi_process_is_active)
1750 ev.resize(m);
1751 /*
1752 * send the eigenvalues to processors not being part of the process grid
1753 */
1754 grid->send_to_inactive(ev.data(), ev.size());
1755
1756 /*
1757 * if only eigenvalues are queried the content of the matrix will be destroyed
1758 * if the eigenpairs are queried matrix A on exit stores the eigenvectors in
1759 * the columns
1760 */
1761 if (compute_eigenvectors)
1762 {
1765 }
1766 else
1768
1769 return ev;
1770}
1771
1772
1773
1774template <typename NumberType>
1775std::vector<NumberType>
1777 const std::pair<unsigned int, unsigned int> &index_limits,
1778 const bool compute_eigenvectors)
1779{
1780 // Check validity of index limits.
1781 AssertIndexRange(index_limits.first, static_cast<unsigned int>(n_rows));
1782 AssertIndexRange(index_limits.second, static_cast<unsigned int>(n_rows));
1783
1784 const std::pair<unsigned int, unsigned int> idx =
1785 std::make_pair(std::min(index_limits.first, index_limits.second),
1786 std::max(index_limits.first, index_limits.second));
1787
1788 // Compute all eigenvalues/eigenvectors.
1789 if (idx.first == 0 && idx.second == static_cast<unsigned int>(n_rows - 1))
1790 return eigenpairs_symmetric_MRRR(compute_eigenvectors);
1791 else
1792 return eigenpairs_symmetric_MRRR(compute_eigenvectors, idx);
1793}
1794
1795
1796
1797template <typename NumberType>
1798std::vector<NumberType>
1800 const std::pair<NumberType, NumberType> &value_limits,
1801 const bool compute_eigenvectors)
1802{
1803 AssertIsFinite(value_limits.first);
1804 AssertIsFinite(value_limits.second);
1805
1806 const std::pair<unsigned int, unsigned int> indices =
1807 std::make_pair(numbers::invalid_unsigned_int,
1809
1810 return eigenpairs_symmetric_MRRR(compute_eigenvectors, indices, value_limits);
1811}
1812
1813
1814
1815template <typename NumberType>
1816std::vector<NumberType>
1818 const bool compute_eigenvectors,
1819 const std::pair<unsigned int, unsigned int> &eigenvalue_idx,
1820 const std::pair<NumberType, NumberType> &eigenvalue_limits)
1821{
1823 ExcMessage(
1824 "Matrix has to be in Matrix state before calling this function."));
1826 ExcMessage("Matrix has to be symmetric for this operation."));
1827
1828 std::lock_guard<std::mutex> lock(mutex);
1829
1830 const bool use_values = (std::isnan(eigenvalue_limits.first) ||
1831 std::isnan(eigenvalue_limits.second)) ?
1832 false :
1833 true;
1834 const bool use_indices =
1835 ((eigenvalue_idx.first == numbers::invalid_unsigned_int) ||
1836 (eigenvalue_idx.second == numbers::invalid_unsigned_int)) ?
1837 false :
1838 true;
1839
1840 Assert(
1841 !(use_values && use_indices),
1842 ExcMessage(
1843 "Prescribing both the index and value range for the eigenvalues is ambiguous"));
1844
1845 // If computation of eigenvectors is not required, use a sufficiently small
1846 // distributed matrix.
1847 std::unique_ptr<ScaLAPACKMatrix<NumberType>> eigenvectors =
1848 compute_eigenvectors ?
1849 std::make_unique<ScaLAPACKMatrix<NumberType>>(n_rows,
1850 grid,
1852 std::make_unique<ScaLAPACKMatrix<NumberType>>(
1853 grid->n_process_rows, grid->n_process_columns, grid, 1, 1);
1854
1855 eigenvectors->property = property;
1856 // Number of eigenvalues to be returned from psyevr; upon successful exit ev
1857 // contains the m seclected eigenvalues in ascending order.
1858 int m = n_rows;
1859 std::vector<NumberType> ev(n_rows);
1860
1861 // Number of eigenvectors to be returned;
1862 // Upon successful exit the first m=nz columns contain the selected
1863 // eigenvectors (only if jobz=='V').
1864 int nz = 0;
1865
1866 if (grid->mpi_process_is_active)
1867 {
1868 int info = 0;
1869 /*
1870 * For jobz==N only eigenvalues are computed, for jobz='V' also the
1871 * eigenvectors of the matrix are computed.
1872 */
1873 char jobz = compute_eigenvectors ? 'V' : 'N';
1874 // Default value is to compute all eigenvalues and optionally
1875 // eigenvectors.
1876 char range = 'A';
1877 NumberType vl = NumberType(), vu = NumberType();
1878 int il = 1, iu = 1;
1879
1880 // Index range for eigenvalues is not specified.
1881 if (!use_indices)
1882 {
1883 // Interval for eigenvalues is not specified and consequently all
1884 // eigenvalues/eigenpairs will be computed.
1885 if (!use_values)
1886 {
1887 range = 'A';
1888 }
1889 else
1890 {
1891 range = 'V';
1892 vl = std::min(eigenvalue_limits.first, eigenvalue_limits.second);
1893 vu = std::max(eigenvalue_limits.first, eigenvalue_limits.second);
1894 }
1895 }
1896 else
1897 {
1898 range = 'I';
1899 // As Fortran starts counting/indexing from 1 unlike C/C++, where it
1900 // starts from 0.
1901 il = std::min(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1902 iu = std::max(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1903 }
1904 NumberType *A_loc = this->values.data();
1905
1906 /*
1907 * By setting lwork to -1 a workspace query for optimal length of work is
1908 * performed.
1909 */
1910 int lwork = -1;
1911 int liwork = -1;
1912 NumberType *eigenvectors_loc =
1913 (compute_eigenvectors ? eigenvectors->values.data() : nullptr);
1914 work.resize(1);
1915 iwork.resize(1);
1916
1917 psyevr(&jobz,
1918 &range,
1919 &uplo,
1920 &n_rows,
1921 A_loc,
1924 descriptor,
1925 &vl,
1926 &vu,
1927 &il,
1928 &iu,
1929 &m,
1930 &nz,
1931 ev.data(),
1932 eigenvectors_loc,
1933 &eigenvectors->submatrix_row,
1934 &eigenvectors->submatrix_column,
1935 eigenvectors->descriptor,
1936 work.data(),
1937 &lwork,
1938 iwork.data(),
1939 &liwork,
1940 &info);
1941
1942 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevr", info));
1943
1944 lwork = static_cast<int>(work[0]);
1945 work.resize(lwork);
1946 liwork = iwork[0];
1947 iwork.resize(liwork);
1948
1949 psyevr(&jobz,
1950 &range,
1951 &uplo,
1952 &n_rows,
1953 A_loc,
1956 descriptor,
1957 &vl,
1958 &vu,
1959 &il,
1960 &iu,
1961 &m,
1962 &nz,
1963 ev.data(),
1964 eigenvectors_loc,
1965 &eigenvectors->submatrix_row,
1966 &eigenvectors->submatrix_column,
1967 eigenvectors->descriptor,
1968 work.data(),
1969 &lwork,
1970 iwork.data(),
1971 &liwork,
1972 &info);
1973
1974 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevr", info));
1975
1976 if (compute_eigenvectors)
1978 m == nz,
1979 ExcMessage(
1980 "psyevr failed to compute all eigenvectors for the selected eigenvalues"));
1981
1982 // If eigenvectors are queried, copy eigenvectors to original matrix.
1983 // As the temporary matrix eigenvectors has identical dimensions and
1984 // block-cyclic distribution we simply swap the local array.
1985 if (compute_eigenvectors)
1986 this->values.swap(eigenvectors->values);
1987
1988 // Adapt the size of ev to fit m upon return.
1989 while (ev.size() > static_cast<size_type>(m))
1990 ev.pop_back();
1991 }
1992 /*
1993 * Send number of computed eigenvalues to inactive processes.
1994 */
1995 grid->send_to_inactive(&m, 1);
1996
1997 /*
1998 * Inactive processes have to resize array of eigenvalues.
1999 */
2000 if (!grid->mpi_process_is_active)
2001 ev.resize(m);
2002 /*
2003 * Send the eigenvalues to processors not being part of the process grid.
2004 */
2005 grid->send_to_inactive(ev.data(), ev.size());
2006
2007 /*
2008 * If only eigenvalues are queried, the content of the matrix will be
2009 * destroyed. If the eigenpairs are queried, matrix A on exit stores the
2010 * eigenvectors in the columns.
2011 */
2012 if (compute_eigenvectors)
2013 {
2016 }
2017 else
2019
2020 return ev;
2021}
2022
2023
2024
2025template <typename NumberType>
2026std::vector<NumberType>
2029{
2031 ExcMessage(
2032 "Matrix has to be in Matrix state before calling this function."));
2035
2036 const bool left_singular_vectors = (U != nullptr) ? true : false;
2037 const bool right_singular_vectors = (VT != nullptr) ? true : false;
2038
2039 if (left_singular_vectors)
2040 {
2041 Assert(n_rows == U->n_rows, ExcDimensionMismatch(n_rows, U->n_rows));
2042 Assert(U->n_rows == U->n_columns,
2043 ExcDimensionMismatch(U->n_rows, U->n_columns));
2044 Assert(row_block_size == U->row_block_size,
2045 ExcDimensionMismatch(row_block_size, U->row_block_size));
2046 Assert(column_block_size == U->column_block_size,
2047 ExcDimensionMismatch(column_block_size, U->column_block_size));
2048 Assert(grid->blacs_context == U->grid->blacs_context,
2049 ExcDimensionMismatch(grid->blacs_context, U->grid->blacs_context));
2050 }
2051 if (right_singular_vectors)
2052 {
2053 Assert(n_columns == VT->n_rows,
2055 Assert(VT->n_rows == VT->n_columns,
2061 Assert(grid->blacs_context == VT->grid->blacs_context,
2062 ExcDimensionMismatch(grid->blacs_context,
2063 VT->grid->blacs_context));
2064 }
2065 std::lock_guard<std::mutex> lock(mutex);
2066
2067 std::vector<NumberType> sv(std::min(n_rows, n_columns));
2068
2069 if (grid->mpi_process_is_active)
2070 {
2071 char jobu = left_singular_vectors ? 'V' : 'N';
2072 char jobvt = right_singular_vectors ? 'V' : 'N';
2073 NumberType *A_loc = this->values.data();
2074 NumberType *U_loc = left_singular_vectors ? U->values.data() : nullptr;
2075 NumberType *VT_loc = right_singular_vectors ? VT->values.data() : nullptr;
2076 int info = 0;
2077 /*
2078 * by setting lwork to -1 a workspace query for optimal length of work is
2079 * performed
2080 */
2081 int lwork = -1;
2082 work.resize(1);
2083
2084 pgesvd(&jobu,
2085 &jobvt,
2086 &n_rows,
2087 &n_columns,
2088 A_loc,
2091 descriptor,
2092 &*sv.begin(),
2093 U_loc,
2094 &U->submatrix_row,
2095 &U->submatrix_column,
2096 U->descriptor,
2097 VT_loc,
2098 &VT->submatrix_row,
2099 &VT->submatrix_column,
2100 VT->descriptor,
2101 work.data(),
2102 &lwork,
2103 &info);
2104 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgesvd", info));
2105
2106 lwork = static_cast<int>(work[0]);
2107 work.resize(lwork);
2108
2109 pgesvd(&jobu,
2110 &jobvt,
2111 &n_rows,
2112 &n_columns,
2113 A_loc,
2116 descriptor,
2117 &*sv.begin(),
2118 U_loc,
2119 &U->submatrix_row,
2120 &U->submatrix_column,
2121 U->descriptor,
2122 VT_loc,
2123 &VT->submatrix_row,
2124 &VT->submatrix_column,
2125 VT->descriptor,
2126 work.data(),
2127 &lwork,
2128 &info);
2129 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgesvd", info));
2130 }
2131
2132 /*
2133 * send the singular values to processors not being part of the process grid
2134 */
2135 grid->send_to_inactive(sv.data(), sv.size());
2136
2139
2140 return sv;
2141}
2142
2143
2144
2145template <typename NumberType>
2146void
2148 const bool transpose)
2149{
2150 Assert(grid == B.grid,
2151 ExcMessage("The matrices A and B need to have the same process grid"));
2153 ExcMessage(
2154 "Matrix has to be in Matrix state before calling this function."));
2156 ExcMessage(
2157 "Matrix B has to be in Matrix state before calling this function."));
2158
2159 if (transpose)
2160 {
2162 }
2163 else
2164 {
2166 }
2167
2168 // see
2169 // https://www.ibm.com/support/knowledgecenter/en/SSNR5K_4.2.0/com.ibm.cluster.pessl.v4r2.pssl100.doc/am6gr_lgels.htm
2171 ExcMessage(
2172 "Use identical block sizes for rows and columns of matrix A"));
2174 ExcMessage(
2175 "Use identical block sizes for rows and columns of matrix B"));
2177 ExcMessage(
2178 "Use identical block-cyclic distribution for matrices A and B"));
2179
2180 std::lock_guard<std::mutex> lock(mutex);
2181
2182 if (grid->mpi_process_is_active)
2183 {
2184 char trans = transpose ? 'T' : 'N';
2185 NumberType *A_loc = this->values.data();
2186 NumberType *B_loc = B.values.data();
2187 int info = 0;
2188 /*
2189 * by setting lwork to -1 a workspace query for optimal length of work is
2190 * performed
2191 */
2192 int lwork = -1;
2193 work.resize(1);
2194
2195 pgels(&trans,
2196 &n_rows,
2197 &n_columns,
2198 &B.n_columns,
2199 A_loc,
2202 descriptor,
2203 B_loc,
2204 &B.submatrix_row,
2206 B.descriptor,
2207 work.data(),
2208 &lwork,
2209 &info);
2210 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgels", info));
2211
2212 lwork = static_cast<int>(work[0]);
2213 work.resize(lwork);
2214
2215 pgels(&trans,
2216 &n_rows,
2217 &n_columns,
2218 &B.n_columns,
2219 A_loc,
2222 descriptor,
2223 B_loc,
2224 &B.submatrix_row,
2226 B.descriptor,
2227 work.data(),
2228 &lwork,
2229 &info);
2230 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgels", info));
2231 }
2233}
2234
2235
2236
2237template <typename NumberType>
2238unsigned int
2240{
2242 ExcMessage(
2243 "Matrix has to be in Matrix state before calling this function."));
2245 ExcMessage(
2246 "Use identical block sizes for rows and columns of matrix A"));
2247 Assert(
2248 ratio > 0. && ratio < 1.,
2249 ExcMessage(
2250 "input parameter ratio has to be larger than zero and smaller than 1"));
2251
2253 n_rows,
2254 grid,
2259 n_columns,
2260 grid,
2264 std::vector<NumberType> sv = this->compute_SVD(&U, &VT);
2265 AssertThrow(sv[0] > std::numeric_limits<NumberType>::min(),
2266 ExcMessage("Matrix has rank 0"));
2267
2268 // Get number of singular values fulfilling the following: sv[i] > sv[0] *
2269 // ratio Obviously, 0-th element already satisfies sv[0] > sv[0] * ratio The
2270 // singular values in sv are ordered by descending value so we break out of
2271 // the loop if a singular value is smaller than sv[0] * ratio.
2272 unsigned int n_sv = 1;
2273 std::vector<NumberType> inv_sigma;
2274 inv_sigma.push_back(1 / sv[0]);
2275
2276 for (unsigned int i = 1; i < sv.size(); ++i)
2277 if (sv[i] > sv[0] * ratio)
2278 {
2279 ++n_sv;
2280 inv_sigma.push_back(1 / sv[i]);
2281 }
2282 else
2283 break;
2284
2285 // For the matrix multiplication we use only the columns of U and rows of VT
2286 // which are associated with singular values larger than the limit. That saves
2287 // computational time for matrices with rank significantly smaller than
2288 // min(n_rows,n_columns)
2290 n_sv,
2291 grid,
2296 n_columns,
2297 grid,
2301 U.copy_to(U_R,
2302 std::make_pair(0, 0),
2303 std::make_pair(0, 0),
2304 std::make_pair(n_rows, n_sv));
2305 VT.copy_to(VT_R,
2306 std::make_pair(0, 0),
2307 std::make_pair(0, 0),
2308 std::make_pair(n_sv, n_columns));
2309
2310 VT_R.scale_rows(inv_sigma);
2311 this->reinit(n_columns,
2312 n_rows,
2313 this->grid,
2317 VT_R.mult(1, U_R, 0, *this, true, true);
2319 return n_sv;
2320}
2321
2322
2323
2324template <typename NumberType>
2325NumberType
2327 const NumberType a_norm) const
2328{
2330 ExcMessage(
2331 "Matrix has to be in Cholesky state before calling this function."));
2332 std::lock_guard<std::mutex> lock(mutex);
2333 NumberType rcond = 0.;
2334
2335 if (grid->mpi_process_is_active)
2336 {
2337 int liwork = n_local_rows;
2338 iwork.resize(liwork);
2339
2340 int info = 0;
2341 const NumberType *A_loc = this->values.data();
2342
2343 // by setting lwork to -1 a workspace query for optimal length of work is
2344 // performed
2345 int lwork = -1;
2346 work.resize(1);
2347 ppocon(&uplo,
2348 &n_columns,
2349 A_loc,
2352 descriptor,
2353 &a_norm,
2354 &rcond,
2355 work.data(),
2356 &lwork,
2357 iwork.data(),
2358 &liwork,
2359 &info);
2360 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pdpocon", info));
2361 lwork = static_cast<int>(std::ceil(work[0]));
2362 work.resize(lwork);
2363
2364 // now the actual run:
2365 ppocon(&uplo,
2366 &n_columns,
2367 A_loc,
2370 descriptor,
2371 &a_norm,
2372 &rcond,
2373 work.data(),
2374 &lwork,
2375 iwork.data(),
2376 &liwork,
2377 &info);
2378 AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pdpocon", info));
2379 }
2380 grid->send_to_inactive(&rcond);
2381 return rcond;
2382}
2383
2384
2385
2386template <typename NumberType>
2387NumberType
2389{
2390 const char type('O');
2391
2393 return norm_symmetric(type);
2394 else
2395 return norm_general(type);
2396}
2397
2398
2399
2400template <typename NumberType>
2401NumberType
2403{
2404 const char type('I');
2405
2407 return norm_symmetric(type);
2408 else
2409 return norm_general(type);
2410}
2411
2412
2413
2414template <typename NumberType>
2415NumberType
2417{
2418 const char type('F');
2419
2421 return norm_symmetric(type);
2422 else
2423 return norm_general(type);
2424}
2425
2426
2427
2428template <typename NumberType>
2429NumberType
2431{
2434 ExcMessage("norms can be called in matrix state only."));
2435 std::lock_guard<std::mutex> lock(mutex);
2436 NumberType res = 0.;
2437
2438 if (grid->mpi_process_is_active)
2439 {
2440 const int iarow = indxg2p_(&submatrix_row,
2442 &(grid->this_process_row),
2444 &(grid->n_process_rows));
2445 const int iacol = indxg2p_(&submatrix_column,
2447 &(grid->this_process_column),
2449 &(grid->n_process_columns));
2450 const int mp0 = numroc_(&n_rows,
2452 &(grid->this_process_row),
2453 &iarow,
2454 &(grid->n_process_rows));
2455 const int nq0 = numroc_(&n_columns,
2457 &(grid->this_process_column),
2458 &iacol,
2459 &(grid->n_process_columns));
2460
2461 // type='M': compute largest absolute value
2462 // type='F' || type='E': compute Frobenius norm
2463 // type='0' || type='1': compute infinity norm
2464 int lwork = 0; // for type == 'M' || type == 'F' || type == 'E'
2465 if (type == 'O' || type == '1')
2466 lwork = nq0;
2467 else if (type == 'I')
2468 lwork = mp0;
2469
2470 work.resize(lwork);
2471 const NumberType *A_loc = this->values.begin();
2472 res = plange(&type,
2473 &n_rows,
2474 &n_columns,
2475 A_loc,
2478 descriptor,
2479 work.data());
2480 }
2481 grid->send_to_inactive(&res);
2482 return res;
2483}
2484
2485
2486
2487template <typename NumberType>
2488NumberType
2490{
2493 ExcMessage("norms can be called in matrix state only."));
2495 ExcMessage("Matrix has to be symmetric for this operation."));
2496 std::lock_guard<std::mutex> lock(mutex);
2497 NumberType res = 0.;
2498
2499 if (grid->mpi_process_is_active)
2500 {
2501 // int IROFFA = MOD( IA-1, MB_A )
2502 // int ICOFFA = MOD( JA-1, NB_A )
2503 const int lcm =
2504 ilcm_(&(grid->n_process_rows), &(grid->n_process_columns));
2505 const int v2 = lcm / (grid->n_process_rows);
2506
2507 const int IAROW = indxg2p_(&submatrix_row,
2509 &(grid->this_process_row),
2511 &(grid->n_process_rows));
2512 const int IACOL = indxg2p_(&submatrix_column,
2514 &(grid->this_process_column),
2516 &(grid->n_process_columns));
2517 const int Np0 = numroc_(&n_columns /*+IROFFA*/,
2519 &(grid->this_process_row),
2520 &IAROW,
2521 &(grid->n_process_rows));
2522 const int Nq0 = numroc_(&n_columns /*+ICOFFA*/,
2524 &(grid->this_process_column),
2525 &IACOL,
2526 &(grid->n_process_columns));
2527
2528 const int v1 = iceil_(&Np0, &row_block_size);
2529 const int ldw = (n_local_rows == n_local_columns) ?
2530 0 :
2531 row_block_size * iceil_(&v1, &v2);
2532
2533 const int lwork =
2534 (type == 'M' || type == 'F' || type == 'E') ? 0 : 2 * Nq0 + Np0 + ldw;
2535 work.resize(lwork);
2536 const NumberType *A_loc = this->values.begin();
2537 res = plansy(&type,
2538 &uplo,
2539 &n_columns,
2540 A_loc,
2543 descriptor,
2544 work.data());
2545 }
2546 grid->send_to_inactive(&res);
2547 return res;
2548}
2549
2550
2551
2552# ifdef DEAL_II_WITH_HDF5
2553namespace internal
2554{
2555 namespace
2556 {
2557 void
2558 create_HDF5_state_enum_id(hid_t &state_enum_id)
2559 {
2560 // create HDF5 enum type for LAPACKSupport::State
2562 state_enum_id = H5Tcreate(H5T_ENUM, sizeof(LAPACKSupport::State));
2564 herr_t status = H5Tenum_insert(state_enum_id, "cholesky", &val);
2565 AssertThrow(status >= 0, ExcInternalError());
2567 status = H5Tenum_insert(state_enum_id, "eigenvalues", &val);
2568 AssertThrow(status >= 0, ExcInternalError());
2570 status = H5Tenum_insert(state_enum_id, "inverse_matrix", &val);
2571 AssertThrow(status >= 0, ExcInternalError());
2573 status = H5Tenum_insert(state_enum_id, "inverse_svd", &val);
2574 AssertThrow(status >= 0, ExcInternalError());
2576 status = H5Tenum_insert(state_enum_id, "lu", &val);
2577 AssertThrow(status >= 0, ExcInternalError());
2579 status = H5Tenum_insert(state_enum_id, "matrix", &val);
2580 AssertThrow(status >= 0, ExcInternalError());
2582 status = H5Tenum_insert(state_enum_id, "svd", &val);
2583 AssertThrow(status >= 0, ExcInternalError());
2585 status = H5Tenum_insert(state_enum_id, "unusable", &val);
2586 AssertThrow(status >= 0, ExcInternalError());
2587 }
2588
2589 void
2590 create_HDF5_property_enum_id(hid_t &property_enum_id)
2591 {
2592 // create HDF5 enum type for LAPACKSupport::Property
2593 property_enum_id = H5Tcreate(H5T_ENUM, sizeof(LAPACKSupport::Property));
2595 herr_t status = H5Tenum_insert(property_enum_id, "diagonal", &prop);
2596 AssertThrow(status >= 0, ExcInternalError());
2598 status = H5Tenum_insert(property_enum_id, "general", &prop);
2599 AssertThrow(status >= 0, ExcInternalError());
2601 status = H5Tenum_insert(property_enum_id, "hessenberg", &prop);
2602 AssertThrow(status >= 0, ExcInternalError());
2604 status = H5Tenum_insert(property_enum_id, "lower_triangular", &prop);
2605 AssertThrow(status >= 0, ExcInternalError());
2607 status = H5Tenum_insert(property_enum_id, "symmetric", &prop);
2608 AssertThrow(status >= 0, ExcInternalError());
2610 status = H5Tenum_insert(property_enum_id, "upper_triangular", &prop);
2611 AssertThrow(status >= 0, ExcInternalError());
2612 }
2613 } // namespace
2614} // namespace internal
2615# endif
2616
2617
2618
2619template <typename NumberType>
2620void
2622 const std::string &filename,
2623 const std::pair<unsigned int, unsigned int> &chunk_size) const
2624{
2625# ifndef DEAL_II_WITH_HDF5
2626 (void)filename;
2627 (void)chunk_size;
2628 AssertThrow(false, ExcNeedsHDF5());
2629# else
2630
2631 std::pair<unsigned int, unsigned int> chunks_size_ = chunk_size;
2632
2633 if (chunks_size_.first == numbers::invalid_unsigned_int ||
2634 chunks_size_.second == numbers::invalid_unsigned_int)
2635 {
2636 // default: store the matrix in chunks of columns
2637 chunks_size_.first = n_rows;
2638 chunks_size_.second = 1;
2639 }
2640 Assert(chunks_size_.first > 0,
2641 ExcMessage("The row chunk size must be larger than 0."));
2642 AssertIndexRange(chunks_size_.first, n_rows + 1);
2643 Assert(chunks_size_.second > 0,
2644 ExcMessage("The column chunk size must be larger than 0."));
2645 AssertIndexRange(chunks_size_.second, n_columns + 1);
2646
2647# ifdef H5_HAVE_PARALLEL
2648 // implementation for configurations equipped with a parallel file system
2649 save_parallel(filename, chunks_size_);
2650
2651# else
2652 // implementation for configurations with no parallel file system
2653 save_serial(filename, chunks_size_);
2654
2655# endif
2656# endif
2657}
2658
2659
2660
2661template <typename NumberType>
2662void
2664 const std::string &filename,
2665 const std::pair<unsigned int, unsigned int> &chunk_size) const
2666{
2667# ifndef DEAL_II_WITH_HDF5
2668 (void)filename;
2669 (void)chunk_size;
2671# else
2672
2673 /*
2674 * The content of the distributed matrix is copied to a matrix using a 1x1
2675 * process grid. Therefore, one process has all the data and can write it to a
2676 * file.
2677 *
2678 * Create a 1x1 column grid which will be used to initialize
2679 * an effectively serial ScaLAPACK matrix to gather the contents from the
2680 * current object
2681 */
2682 const auto column_grid =
2683 std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2684 1,
2685 1);
2686
2687 const int MB = n_rows, NB = n_columns;
2688 ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
2689 copy_to(tmp);
2690
2691 // the 1x1 grid has only one process and this one writes
2692 // the content of the matrix to the HDF5 file
2693 if (tmp.grid->mpi_process_is_active)
2694 {
2695 herr_t status;
2696
2697 // create a new file using default properties
2698 hid_t file_id =
2699 H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
2700
2701 // modify dataset creation properties, i.e. enable chunking
2702 hsize_t chunk_dims[2];
2703 // revert order of rows and columns as ScaLAPACK uses column-major
2704 // ordering
2705 chunk_dims[0] = chunk_size.second;
2706 chunk_dims[1] = chunk_size.first;
2707 hid_t data_property = H5Pcreate(H5P_DATASET_CREATE);
2708 status = H5Pset_chunk(data_property, 2, chunk_dims);
2709 AssertThrow(status >= 0, ExcIO());
2710
2711 // create the data space for the dataset
2712 hsize_t dims[2];
2713 // change order of rows and columns as ScaLAPACKMatrix uses column major
2714 // ordering
2715 dims[0] = n_columns;
2716 dims[1] = n_rows;
2717 hid_t dataspace_id = H5Screate_simple(2, dims, nullptr);
2718
2719 // create the dataset within the file using chunk creation properties
2720 hid_t type_id = hdf5_type_id(tmp.values.data());
2721 hid_t dataset_id = H5Dcreate2(file_id,
2722 "/matrix",
2723 type_id,
2724 dataspace_id,
2725 H5P_DEFAULT,
2726 data_property,
2727 H5P_DEFAULT);
2728
2729 // write the dataset
2730 status = H5Dwrite(
2731 dataset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, tmp.values.data());
2732 AssertThrow(status >= 0, ExcIO());
2733
2734 // create HDF5 enum type for LAPACKSupport::State and
2735 // LAPACKSupport::Property
2736 hid_t state_enum_id, property_enum_id;
2737 internal::create_HDF5_state_enum_id(state_enum_id);
2738 internal::create_HDF5_property_enum_id(property_enum_id);
2739
2740 // create the data space for the state enum
2741 hsize_t dims_state[1];
2742 dims_state[0] = 1;
2743 hid_t state_enum_dataspace = H5Screate_simple(1, dims_state, nullptr);
2744 // create the dataset for the state enum
2745 hid_t state_enum_dataset = H5Dcreate2(file_id,
2746 "/state",
2747 state_enum_id,
2748 state_enum_dataspace,
2749 H5P_DEFAULT,
2750 H5P_DEFAULT,
2751 H5P_DEFAULT);
2752 // write the dataset for the state enum
2753 status = H5Dwrite(state_enum_dataset,
2754 state_enum_id,
2755 H5S_ALL,
2756 H5S_ALL,
2757 H5P_DEFAULT,
2758 &state);
2759 AssertThrow(status >= 0, ExcIO());
2760
2761 // create the data space for the property enum
2762 hsize_t dims_property[1];
2763 dims_property[0] = 1;
2764 hid_t property_enum_dataspace =
2765 H5Screate_simple(1, dims_property, nullptr);
2766 // create the dataset for the property enum
2767 hid_t property_enum_dataset = H5Dcreate2(file_id,
2768 "/property",
2769 property_enum_id,
2770 property_enum_dataspace,
2771 H5P_DEFAULT,
2772 H5P_DEFAULT,
2773 H5P_DEFAULT);
2774 // write the dataset for the property enum
2775 status = H5Dwrite(property_enum_dataset,
2776 property_enum_id,
2777 H5S_ALL,
2778 H5S_ALL,
2779 H5P_DEFAULT,
2780 &property);
2781 AssertThrow(status >= 0, ExcIO());
2782
2783 // end access to the datasets and release resources used by them
2784 status = H5Dclose(dataset_id);
2785 AssertThrow(status >= 0, ExcIO());
2786 status = H5Dclose(state_enum_dataset);
2787 AssertThrow(status >= 0, ExcIO());
2788 status = H5Dclose(property_enum_dataset);
2789 AssertThrow(status >= 0, ExcIO());
2790
2791 // terminate access to the data spaces
2792 status = H5Sclose(dataspace_id);
2793 AssertThrow(status >= 0, ExcIO());
2794 status = H5Sclose(state_enum_dataspace);
2795 AssertThrow(status >= 0, ExcIO());
2796 status = H5Sclose(property_enum_dataspace);
2797 AssertThrow(status >= 0, ExcIO());
2798
2799 // release enum data types
2800 status = H5Tclose(state_enum_id);
2801 AssertThrow(status >= 0, ExcIO());
2802 status = H5Tclose(property_enum_id);
2803 AssertThrow(status >= 0, ExcIO());
2804
2805 // release the creation property
2806 status = H5Pclose(data_property);
2807 AssertThrow(status >= 0, ExcIO());
2808
2809 // close the file.
2810 status = H5Fclose(file_id);
2811 AssertThrow(status >= 0, ExcIO());
2812 }
2813# endif
2814}
2815
2816
2817
2818template <typename NumberType>
2819void
2821 const std::string &filename,
2822 const std::pair<unsigned int, unsigned int> &chunk_size) const
2823{
2824# ifndef DEAL_II_WITH_HDF5
2825 (void)filename;
2826 (void)chunk_size;
2828# else
2829
2830 const unsigned int n_mpi_processes(
2831 Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator));
2832 MPI_Info info = MPI_INFO_NULL;
2833 /*
2834 * The content of the distributed matrix is copied to a matrix using a
2835 * 1xn_processes process grid. Therefore, the processes hold contiguous chunks
2836 * of the matrix, which they can write to the file
2837 *
2838 * Create a 1xn_processes column grid
2839 */
2840 const auto column_grid =
2841 std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2842 1,
2843 n_mpi_processes);
2844
2845 const int MB = n_rows;
2846 /*
2847 * If the ratio n_columns/n_mpi_processes is smaller than the column block
2848 * size of the original matrix, the redistribution and saving of the matrix
2849 * requires a significant amount of MPI communication. Therefore, it is better
2850 * to set a minimum value for the block size NB, causing only
2851 * ceil(n_columns/NB) processes being actively involved in saving the matrix.
2852 * Example: A 2*10^9 x 400 matrix is distributed on a 80 x 5 process grid
2853 * using block size 32. Instead of distributing the matrix on a 1 x 400
2854 * process grid with a row block size of 2*10^9 and a column block size of 1,
2855 * the minimum value for NB yields that only ceil(400/32)=13 processes will be
2856 * writing the matrix to disk.
2857 */
2858 const int NB = std::max(static_cast<int>(std::ceil(
2859 static_cast<double>(n_columns) / n_mpi_processes)),
2861
2862 ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
2863 copy_to(tmp);
2864
2865 // get pointer to data held by the process
2866 NumberType *data = (tmp.values.size() > 0) ? tmp.values.data() : nullptr;
2867
2868 herr_t status;
2869 // dataset dimensions
2870 hsize_t dims[2];
2871
2872 // set up file access property list with parallel I/O access
2873 hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
2874 status = H5Pset_fapl_mpio(plist_id, tmp.grid->mpi_communicator, info);
2875 AssertThrow(status >= 0, ExcIO());
2876
2877 // create a new file collectively and release property list identifier
2878 hid_t file_id =
2879 H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
2880 status = H5Pclose(plist_id);
2881 AssertThrow(status >= 0, ExcIO());
2882
2883 // As ScaLAPACK, and therefore the class ScaLAPACKMatrix, uses column-major
2884 // ordering but HDF5 row-major ordering, we have to reverse entries related to
2885 // columns and rows in the following. create the dataspace for the dataset
2886 dims[0] = tmp.n_columns;
2887 dims[1] = tmp.n_rows;
2888
2889 hid_t filespace = H5Screate_simple(2, dims, nullptr);
2890
2891 // create the chunked dataset with default properties and close filespace
2892 hsize_t chunk_dims[2];
2893 // revert order of rows and columns as ScaLAPACK uses column-major ordering
2894 chunk_dims[0] = chunk_size.second;
2895 chunk_dims[1] = chunk_size.first;
2896 plist_id = H5Pcreate(H5P_DATASET_CREATE);
2897 H5Pset_chunk(plist_id, 2, chunk_dims);
2898 hid_t type_id = hdf5_type_id(data);
2899 hid_t dset_id = H5Dcreate2(
2900 file_id, "/matrix", type_id, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
2901
2902 status = H5Sclose(filespace);
2903 AssertThrow(status >= 0, ExcIO());
2904
2905 status = H5Pclose(plist_id);
2906 AssertThrow(status >= 0, ExcIO());
2907
2908 // gather the number of local rows and columns from all processes
2909 std::vector<int> proc_n_local_rows(n_mpi_processes),
2910 proc_n_local_columns(n_mpi_processes);
2911 int ierr = MPI_Allgather(&tmp.n_local_rows,
2912 1,
2913 MPI_INT,
2914 proc_n_local_rows.data(),
2915 1,
2916 MPI_INT,
2917 tmp.grid->mpi_communicator);
2918 AssertThrowMPI(ierr);
2919 ierr = MPI_Allgather(&tmp.n_local_columns,
2920 1,
2921 MPI_INT,
2922 proc_n_local_columns.data(),
2923 1,
2924 MPI_INT,
2925 tmp.grid->mpi_communicator);
2926 AssertThrowMPI(ierr);
2927
2928 const unsigned int my_rank(
2930
2931 // hyperslab selection parameters
2932 // each process defines dataset in memory and writes it to the hyperslab in
2933 // the file
2934 hsize_t count[2];
2935 count[0] = tmp.n_local_columns;
2936 count[1] = tmp.n_rows;
2937 hid_t memspace = H5Screate_simple(2, count, nullptr);
2938
2939 hsize_t offset[2] = {0};
2940 for (unsigned int i = 0; i < my_rank; ++i)
2941 offset[0] += proc_n_local_columns[i];
2942
2943 // select hyperslab in the file.
2944 filespace = H5Dget_space(dset_id);
2945 status = H5Sselect_hyperslab(
2946 filespace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
2947 AssertThrow(status >= 0, ExcIO());
2948
2949 // create property list for independent dataset write
2950 plist_id = H5Pcreate(H5P_DATASET_XFER);
2951 status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_INDEPENDENT);
2952 AssertThrow(status >= 0, ExcIO());
2953
2954 // process with no data will not participate in writing to the file
2955 if (tmp.values.size() > 0)
2956 {
2957 status = H5Dwrite(dset_id, type_id, memspace, filespace, plist_id, data);
2958 AssertThrow(status >= 0, ExcIO());
2959 }
2960 // close/release sources
2961 status = H5Dclose(dset_id);
2962 AssertThrow(status >= 0, ExcIO());
2963 status = H5Sclose(filespace);
2964 AssertThrow(status >= 0, ExcIO());
2965 status = H5Sclose(memspace);
2966 AssertThrow(status >= 0, ExcIO());
2967 status = H5Pclose(plist_id);
2968 AssertThrow(status >= 0, ExcIO());
2969 status = H5Fclose(file_id);
2970 AssertThrow(status >= 0, ExcIO());
2971
2972 // before writing the state and property to file wait for
2973 // all processes to finish writing the matrix content to the file
2974 ierr = MPI_Barrier(tmp.grid->mpi_communicator);
2975 AssertThrowMPI(ierr);
2976
2977 // only root process will write state and property to the file
2978 if (tmp.grid->this_mpi_process == 0)
2979 {
2980 // open file using default properties
2981 hid_t file_id_reopen =
2982 H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT);
2983
2984 // create HDF5 enum type for LAPACKSupport::State and
2985 // LAPACKSupport::Property
2986 hid_t state_enum_id, property_enum_id;
2987 internal::create_HDF5_state_enum_id(state_enum_id);
2988 internal::create_HDF5_property_enum_id(property_enum_id);
2989
2990 // create the data space for the state enum
2991 hsize_t dims_state[1];
2992 dims_state[0] = 1;
2993 hid_t state_enum_dataspace = H5Screate_simple(1, dims_state, nullptr);
2994 // create the dataset for the state enum
2995 hid_t state_enum_dataset = H5Dcreate2(file_id_reopen,
2996 "/state",
2997 state_enum_id,
2998 state_enum_dataspace,
2999 H5P_DEFAULT,
3000 H5P_DEFAULT,
3001 H5P_DEFAULT);
3002 // write the dataset for the state enum
3003 status = H5Dwrite(state_enum_dataset,
3004 state_enum_id,
3005 H5S_ALL,
3006 H5S_ALL,
3007 H5P_DEFAULT,
3008 &state);
3009 AssertThrow(status >= 0, ExcIO());
3010
3011 // create the data space for the property enum
3012 hsize_t dims_property[1];
3013 dims_property[0] = 1;
3014 hid_t property_enum_dataspace =
3015 H5Screate_simple(1, dims_property, nullptr);
3016 // create the dataset for the property enum
3017 hid_t property_enum_dataset = H5Dcreate2(file_id_reopen,
3018 "/property",
3019 property_enum_id,
3020 property_enum_dataspace,
3021 H5P_DEFAULT,
3022 H5P_DEFAULT,
3023 H5P_DEFAULT);
3024 // write the dataset for the property enum
3025 status = H5Dwrite(property_enum_dataset,
3026 property_enum_id,
3027 H5S_ALL,
3028 H5S_ALL,
3029 H5P_DEFAULT,
3030 &property);
3031 AssertThrow(status >= 0, ExcIO());
3032
3033 status = H5Dclose(state_enum_dataset);
3034 AssertThrow(status >= 0, ExcIO());
3035 status = H5Dclose(property_enum_dataset);
3036 AssertThrow(status >= 0, ExcIO());
3037 status = H5Sclose(state_enum_dataspace);
3038 AssertThrow(status >= 0, ExcIO());
3039 status = H5Sclose(property_enum_dataspace);
3040 AssertThrow(status >= 0, ExcIO());
3041 status = H5Tclose(state_enum_id);
3042 AssertThrow(status >= 0, ExcIO());
3043 status = H5Tclose(property_enum_id);
3044 AssertThrow(status >= 0, ExcIO());
3045 status = H5Fclose(file_id_reopen);
3046 AssertThrow(status >= 0, ExcIO());
3047 }
3048
3049# endif
3050}
3051
3052
3053
3054template <typename NumberType>
3055void
3056ScaLAPACKMatrix<NumberType>::load(const std::string &filename)
3057{
3058# ifndef DEAL_II_WITH_HDF5
3059 (void)filename;
3060 AssertThrow(false, ExcNeedsHDF5());
3061# else
3062# ifdef H5_HAVE_PARALLEL
3063 // implementation for configurations equipped with a parallel file system
3064 load_parallel(filename);
3065
3066# else
3067 // implementation for configurations with no parallel file system
3068 load_serial(filename);
3069# endif
3070# endif
3071}
3072
3073
3074
3075template <typename NumberType>
3076void
3078{
3079# ifndef DEAL_II_WITH_HDF5
3080 (void)filename;
3082# else
3083
3084 /*
3085 * The content of the distributed matrix is copied to a matrix using a 1x1
3086 * process grid. Therefore, one process has all the data and can write it to a
3087 * file
3088 */
3089 // create a 1xP column grid with P being the number of MPI processes
3090 const auto one_grid =
3091 std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
3092 1,
3093 1);
3094
3095 const int MB = n_rows, NB = n_columns;
3096 ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, one_grid, MB, NB);
3097
3098 int state_int = -1;
3099 int property_int = -1;
3100
3101 // the 1x1 grid has only one process and this one reads
3102 // the content of the matrix from the HDF5 file
3103 if (tmp.grid->mpi_process_is_active)
3104 {
3105 herr_t status;
3106
3107 // open the file in read-only mode
3108 hid_t file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
3109
3110 // open the dataset in the file
3111 hid_t dataset_id = H5Dopen2(file_id, "/matrix", H5P_DEFAULT);
3112
3113 // check the datatype of the data in the file
3114 // datatype of source and destination must have the same class
3115 // see HDF User's Guide: 6.10. Data Transfer: Datatype Conversion and
3116 // Selection
3117 hid_t datatype = H5Dget_type(dataset_id);
3118 H5T_class_t t_class_in = H5Tget_class(datatype);
3119 H5T_class_t t_class = H5Tget_class(hdf5_type_id(tmp.values.data()));
3121 t_class_in == t_class,
3122 ExcMessage(
3123 "The data type of the matrix to be read does not match the archive"));
3124
3125 // get dataspace handle
3126 hid_t dataspace_id = H5Dget_space(dataset_id);
3127 // get number of dimensions
3128 const int ndims = H5Sget_simple_extent_ndims(dataspace_id);
3129 AssertThrow(ndims == 2, ExcIO());
3130 // get every dimension
3131 hsize_t dims[2];
3132 H5Sget_simple_extent_dims(dataspace_id, dims, nullptr);
3134 static_cast<int>(dims[0]) == n_columns,
3135 ExcMessage(
3136 "The number of columns of the matrix does not match the content of the archive"));
3138 static_cast<int>(dims[1]) == n_rows,
3139 ExcMessage(
3140 "The number of rows of the matrix does not match the content of the archive"));
3141
3142 // read data
3143 status = H5Dread(dataset_id,
3144 hdf5_type_id(tmp.values.data()),
3145 H5S_ALL,
3146 H5S_ALL,
3147 H5P_DEFAULT,
3148 tmp.values.data());
3149 AssertThrow(status >= 0, ExcIO());
3150
3151 // create HDF5 enum type for LAPACKSupport::State and
3152 // LAPACKSupport::Property
3153 hid_t state_enum_id, property_enum_id;
3154 internal::create_HDF5_state_enum_id(state_enum_id);
3155 internal::create_HDF5_property_enum_id(property_enum_id);
3156
3157 // open the datasets for the state and property enum in the file
3158 hid_t dataset_state_id = H5Dopen2(file_id, "/state", H5P_DEFAULT);
3159 hid_t datatype_state = H5Dget_type(dataset_state_id);
3160 H5T_class_t t_class_state = H5Tget_class(datatype_state);
3161 AssertThrow(t_class_state == H5T_ENUM, ExcIO());
3162
3163 hid_t dataset_property_id = H5Dopen2(file_id, "/property", H5P_DEFAULT);
3164 hid_t datatype_property = H5Dget_type(dataset_property_id);
3165 H5T_class_t t_class_property = H5Tget_class(datatype_property);
3166 AssertThrow(t_class_property == H5T_ENUM, ExcIO());
3167
3168 // get dataspace handles
3169 hid_t dataspace_state = H5Dget_space(dataset_state_id);
3170 hid_t dataspace_property = H5Dget_space(dataset_property_id);
3171 // get number of dimensions
3172 const int ndims_state = H5Sget_simple_extent_ndims(dataspace_state);
3173 AssertThrow(ndims_state == 1, ExcIO());
3174 const int ndims_property = H5Sget_simple_extent_ndims(dataspace_property);
3175 AssertThrow(ndims_property == 1, ExcIO());
3176 // get every dimension
3177 hsize_t dims_state[1];
3178 H5Sget_simple_extent_dims(dataspace_state, dims_state, nullptr);
3179 AssertThrow(static_cast<int>(dims_state[0]) == 1, ExcIO());
3180 hsize_t dims_property[1];
3181 H5Sget_simple_extent_dims(dataspace_property, dims_property, nullptr);
3182 AssertThrow(static_cast<int>(dims_property[0]) == 1, ExcIO());
3183
3184 // read data
3185 status = H5Dread(dataset_state_id,
3186 state_enum_id,
3187 H5S_ALL,
3188 H5S_ALL,
3189 H5P_DEFAULT,
3190 &tmp.state);
3191 AssertThrow(status >= 0, ExcIO());
3192 // To send the state from the root process to the other processes
3193 // the state enum is casted to an integer, that will be broadcasted and
3194 // subsequently casted back to the enum type
3195 state_int = static_cast<int>(tmp.state);
3196
3197 status = H5Dread(dataset_property_id,
3198 property_enum_id,
3199 H5S_ALL,
3200 H5S_ALL,
3201 H5P_DEFAULT,
3202 &tmp.property);
3203 AssertThrow(status >= 0, ExcIO());
3204 // To send the property from the root process to the other processes
3205 // the state enum is casted to an integer, that will be broadcasted and
3206 // subsequently casted back to the enum type
3207 property_int = static_cast<int>(tmp.property);
3208
3209 // terminate access to the data spaces
3210 status = H5Sclose(dataspace_id);
3211 AssertThrow(status >= 0, ExcIO());
3212 status = H5Sclose(dataspace_state);
3213 AssertThrow(status >= 0, ExcIO());
3214 status = H5Sclose(dataspace_property);
3215 AssertThrow(status >= 0, ExcIO());
3216
3217 // release data type handles
3218 status = H5Tclose(datatype);
3219 AssertThrow(status >= 0, ExcIO());
3220 status = H5Tclose(state_enum_id);
3221 AssertThrow(status >= 0, ExcIO());
3222 status = H5Tclose(property_enum_id);
3223 AssertThrow(status >= 0, ExcIO());
3224
3225 // end access to the data sets and release resources used by them
3226 status = H5Dclose(dataset_state_id);
3227 AssertThrow(status >= 0, ExcIO());
3228 status = H5Dclose(dataset_id);
3229 AssertThrow(status >= 0, ExcIO());
3230 status = H5Dclose(dataset_property_id);
3231 AssertThrow(status >= 0, ExcIO());
3232
3233 // close the file.
3234 status = H5Fclose(file_id);
3235 AssertThrow(status >= 0, ExcIO());
3236 }
3237 // so far only the root process has the correct state integer --> broadcasting
3238 tmp.grid->send_to_inactive(&state_int, 1);
3239 // so far only the root process has the correct property integer -->
3240 // broadcasting
3241 tmp.grid->send_to_inactive(&property_int, 1);
3242
3243 tmp.state = static_cast<LAPACKSupport::State>(state_int);
3244 tmp.property = static_cast<LAPACKSupport::Property>(property_int);
3245
3246 tmp.copy_to(*this);
3247
3248# endif // DEAL_II_WITH_HDF5
3249}
3250
3251
3252
3253template <typename NumberType>
3254void
3256{
3257# ifndef DEAL_II_WITH_HDF5
3258 (void)filename;
3260# else
3261# ifndef H5_HAVE_PARALLEL
3262 (void)filename;
3264# else
3265
3266 const unsigned int n_mpi_processes(
3267 Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator));
3268 MPI_Info info = MPI_INFO_NULL;
3269 /*
3270 * The content of the distributed matrix is copied to a matrix using a
3271 * 1xn_processes process grid. Therefore, the processes hold contiguous chunks
3272 * of the matrix, which they can write to the file
3273 */
3274 // create a 1xP column grid with P being the number of MPI processes
3275 const auto column_grid =
3276 std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
3277 1,
3278 n_mpi_processes);
3279
3280 const int MB = n_rows;
3281 // for the choice of NB see explanation in save_parallel()
3282 const int NB = std::max(static_cast<int>(std::ceil(
3283 static_cast<double>(n_columns) / n_mpi_processes)),
3285
3286 ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
3287
3288 // get pointer to data held by the process
3289 NumberType *data = (tmp.values.size() > 0) ? tmp.values.data() : nullptr;
3290
3291 herr_t status;
3292
3293 // set up file access property list with parallel I/O access
3294 hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
3295 status = H5Pset_fapl_mpio(plist_id, tmp.grid->mpi_communicator, info);
3296 AssertThrow(status >= 0, ExcIO());
3297
3298 // open file collectively in read-only mode and release property list
3299 // identifier
3300 hid_t file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, plist_id);
3301 status = H5Pclose(plist_id);
3302 AssertThrow(status >= 0, ExcIO());
3303
3304 // open the dataset in the file collectively
3305 hid_t dataset_id = H5Dopen2(file_id, "/matrix", H5P_DEFAULT);
3306
3307 // check the datatype of the dataset in the file
3308 // if the classes of type of the dataset and the matrix do not match abort
3309 // see HDF User's Guide: 6.10. Data Transfer: Datatype Conversion and
3310 // Selection
3311 hid_t datatype = hdf5_type_id(data);
3312 hid_t datatype_inp = H5Dget_type(dataset_id);
3313 H5T_class_t t_class_inp = H5Tget_class(datatype_inp);
3314 H5T_class_t t_class = H5Tget_class(datatype);
3316 t_class_inp == t_class,
3317 ExcMessage(
3318 "The data type of the matrix to be read does not match the archive"));
3319
3320 // get the dimensions of the matrix stored in the file
3321 // get dataspace handle
3322 hid_t dataspace_id = H5Dget_space(dataset_id);
3323 // get number of dimensions
3324 const int ndims = H5Sget_simple_extent_ndims(dataspace_id);
3325 AssertThrow(ndims == 2, ExcIO());
3326 // get every dimension
3327 hsize_t dims[2];
3328 status = H5Sget_simple_extent_dims(dataspace_id, dims, nullptr);
3329 AssertThrow(status >= 0, ExcIO());
3331 static_cast<int>(dims[0]) == n_columns,
3332 ExcMessage(
3333 "The number of columns of the matrix does not match the content of the archive"));
3335 static_cast<int>(dims[1]) == n_rows,
3336 ExcMessage(
3337 "The number of rows of the matrix does not match the content of the archive"));
3338
3339 // gather the number of local rows and columns from all processes
3340 std::vector<int> proc_n_local_rows(n_mpi_processes),
3341 proc_n_local_columns(n_mpi_processes);
3342 int ierr = MPI_Allgather(&tmp.n_local_rows,
3343 1,
3344 MPI_INT,
3345 proc_n_local_rows.data(),
3346 1,
3347 MPI_INT,
3348 tmp.grid->mpi_communicator);
3349 AssertThrowMPI(ierr);
3350 ierr = MPI_Allgather(&tmp.n_local_columns,
3351 1,
3352 MPI_INT,
3353 proc_n_local_columns.data(),
3354 1,
3355 MPI_INT,
3356 tmp.grid->mpi_communicator);
3357 AssertThrowMPI(ierr);
3358
3359 const unsigned int my_rank(
3361
3362 // hyperslab selection parameters
3363 // each process defines dataset in memory and writes it to the hyperslab in
3364 // the file
3365 hsize_t count[2];
3366 count[0] = tmp.n_local_columns;
3367 count[1] = tmp.n_local_rows;
3368
3369 hsize_t offset[2] = {0};
3370 for (unsigned int i = 0; i < my_rank; ++i)
3371 offset[0] += proc_n_local_columns[i];
3372
3373 // select hyperslab in the file
3374 status = H5Sselect_hyperslab(
3375 dataspace_id, H5S_SELECT_SET, offset, nullptr, count, nullptr);
3376 AssertThrow(status >= 0, ExcIO());
3377
3378 // create a memory dataspace independently
3379 hid_t memspace = H5Screate_simple(2, count, nullptr);
3380
3381 // read data independently
3382 status =
3383 H5Dread(dataset_id, datatype, memspace, dataspace_id, H5P_DEFAULT, data);
3384 AssertThrow(status >= 0, ExcIO());
3385
3386 // create HDF5 enum type for LAPACKSupport::State and LAPACKSupport::Property
3387 hid_t state_enum_id, property_enum_id;
3388 internal::create_HDF5_state_enum_id(state_enum_id);
3389 internal::create_HDF5_property_enum_id(property_enum_id);
3390
3391 // open the datasets for the state and property enum in the file
3392 hid_t dataset_state_id = H5Dopen2(file_id, "/state", H5P_DEFAULT);
3393 hid_t datatype_state = H5Dget_type(dataset_state_id);
3394 H5T_class_t t_class_state = H5Tget_class(datatype_state);
3395 AssertThrow(t_class_state == H5T_ENUM, ExcIO());
3396
3397 hid_t dataset_property_id = H5Dopen2(file_id, "/property", H5P_DEFAULT);
3398 hid_t datatype_property = H5Dget_type(dataset_property_id);
3399 H5T_class_t t_class_property = H5Tget_class(datatype_property);
3400 AssertThrow(t_class_property == H5T_ENUM, ExcIO());
3401
3402 // get dataspace handles
3403 hid_t dataspace_state = H5Dget_space(dataset_state_id);
3404 hid_t dataspace_property = H5Dget_space(dataset_property_id);
3405 // get number of dimensions
3406 const int ndims_state = H5Sget_simple_extent_ndims(dataspace_state);
3407 AssertThrow(ndims_state == 1, ExcIO());
3408 const int ndims_property = H5Sget_simple_extent_ndims(dataspace_property);
3409 AssertThrow(ndims_property == 1, ExcIO());
3410 // get every dimension
3411 hsize_t dims_state[1];
3412 H5Sget_simple_extent_dims(dataspace_state, dims_state, nullptr);
3413 AssertThrow(static_cast<int>(dims_state[0]) == 1, ExcIO());
3414 hsize_t dims_property[1];
3415 H5Sget_simple_extent_dims(dataspace_property, dims_property, nullptr);
3416 AssertThrow(static_cast<int>(dims_property[0]) == 1, ExcIO());
3417
3418 // read data
3419 status = H5Dread(
3420 dataset_state_id, state_enum_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tmp.state);
3421 AssertThrow(status >= 0, ExcIO());
3422
3423 status = H5Dread(dataset_property_id,
3424 property_enum_id,
3425 H5S_ALL,
3426 H5S_ALL,
3427 H5P_DEFAULT,
3428 &tmp.property);
3429 AssertThrow(status >= 0, ExcIO());
3430
3431 // close/release sources
3432 status = H5Sclose(memspace);
3433 AssertThrow(status >= 0, ExcIO());
3434 status = H5Dclose(dataset_id);
3435 AssertThrow(status >= 0, ExcIO());
3436 status = H5Dclose(dataset_state_id);
3437 AssertThrow(status >= 0, ExcIO());
3438 status = H5Dclose(dataset_property_id);
3439 AssertThrow(status >= 0, ExcIO());
3440 status = H5Sclose(dataspace_id);
3441 AssertThrow(status >= 0, ExcIO());
3442 status = H5Sclose(dataspace_state);
3443 AssertThrow(status >= 0, ExcIO());
3444 status = H5Sclose(dataspace_property);
3445 AssertThrow(status >= 0, ExcIO());
3446 // status = H5Tclose(datatype);
3447 // AssertThrow(status >= 0, ExcIO());
3448 status = H5Tclose(state_enum_id);
3449 AssertThrow(status >= 0, ExcIO());
3450 status = H5Tclose(property_enum_id);
3451 AssertThrow(status >= 0, ExcIO());
3452 status = H5Fclose(file_id);
3453 AssertThrow(status >= 0, ExcIO());
3454
3455 // copying the distributed matrices
3456 tmp.copy_to(*this);
3457
3458# endif // H5_HAVE_PARALLEL
3459# endif // DEAL_II_WITH_HDF5
3460}
3461
3462
3463
3464namespace internal
3465{
3466 namespace
3467 {
3468 template <typename NumberType>
3469 void
3470 scale_columns(ScaLAPACKMatrix<NumberType> &matrix,
3471 const ArrayView<const NumberType> &factors)
3472 {
3473 Assert(matrix.n() == factors.size(),
3474 ExcDimensionMismatch(matrix.n(), factors.size()));
3475
3476 for (unsigned int i = 0; i < matrix.local_n(); ++i)
3477 {
3478 const NumberType s = factors[matrix.global_column(i)];
3479
3480 for (unsigned int j = 0; j < matrix.local_m(); ++j)
3481 matrix.local_el(j, i) *= s;
3482 }
3483 }
3484
3485 template <typename NumberType>
3486 void
3487 scale_rows(ScaLAPACKMatrix<NumberType> &matrix,
3488 const ArrayView<const NumberType> &factors)
3489 {
3490 Assert(matrix.m() == factors.size(),
3491 ExcDimensionMismatch(matrix.m(), factors.size()));
3492
3493 for (unsigned int i = 0; i < matrix.local_m(); ++i)
3494 {
3495 const NumberType s = factors[matrix.global_row(i)];
3496
3497 for (unsigned int j = 0; j < matrix.local_n(); ++j)
3498 matrix.local_el(i, j) *= s;
3499 }
3500 }
3501
3502 } // namespace
3503} // namespace internal
3504
3505
3506
3507template <typename NumberType>
3508template <class InputVector>
3509void
3511{
3512 if (this->grid->mpi_process_is_active)
3513 internal::scale_columns(*this, make_array_view(factors));
3514}
3515
3516
3517
3518template <typename NumberType>
3519template <class InputVector>
3520void
3522{
3523 if (this->grid->mpi_process_is_active)
3524 internal::scale_rows(*this, make_array_view(factors));
3525}
3526
3527
3528
3529// instantiations
3530# include "lac/scalapack.inst"
3531
3532
3534
3535#endif // DEAL_II_WITH_SCALAPACK
ArrayView< std::remove_reference_t< typename std::iterator_traits< Iterator >::reference >, MemorySpaceType > make_array_view(const Iterator begin, const Iterator end)
Definition array_view.h:954
pointer data()
size_type size() const
std::size_t size() const
Definition array_view.h:689
size_type m() const
size_type n() const
int descriptor[9]
Definition scalapack.h:934
std::vector< NumberType > compute_SVD(ScaLAPACKMatrix< NumberType > *U=nullptr, ScaLAPACKMatrix< NumberType > *VT=nullptr)
std::vector< NumberType > eigenpairs_symmetric_by_value(const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors)
NumberType frobenius_norm() const
unsigned int pseudoinverse(const NumberType ratio)
std::vector< NumberType > eigenpairs_symmetric_by_value_MRRR(const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors)
const int first_process_column
Definition scalapack.h:968
void copy_from(const LAPACKFullMatrix< NumberType > &matrix, const unsigned int rank)
Definition scalapack.cc:362
void save_parallel(const std::string &filename, const std::pair< unsigned int, unsigned int > &chunk_size) const
void least_squares(ScaLAPACKMatrix< NumberType > &B, const bool transpose=false)
NumberType local_el(const unsigned int loc_row, const unsigned int loc_column) const
ScaLAPACKMatrix< NumberType > & operator=(const FullMatrix< NumberType > &)
Definition scalapack.cc:331
void Tadd(const NumberType b, const ScaLAPACKMatrix< NumberType > &B)
void mTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
std::vector< int > iwork
Definition scalapack.h:944
unsigned int size_type
Definition scalapack.h:81
void add(const ScaLAPACKMatrix< NumberType > &B, const NumberType a=0., const NumberType b=1., const bool transpose_B=false)
Definition scalapack.cc:994
size_type n() const
LAPACKSupport::State get_state() const
Definition scalapack.cc:322
LAPACKSupport::Property get_property() const
Definition scalapack.cc:313
void Tmmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
std::vector< NumberType > eigenpairs_symmetric_MRRR(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN()))
void scale_rows(const InputVector &factors)
const int first_process_row
Definition scalapack.h:962
std::shared_ptr< const Utilities::MPI::ProcessGrid > grid
Definition scalapack.h:899
ScaLAPACKMatrix(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general)
Definition scalapack.cc:80
void load(const std::string &filename)
const int submatrix_column
Definition scalapack.h:980
const int submatrix_row
Definition scalapack.h:974
void mmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
std::vector< NumberType > eigenpairs_symmetric(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN()))
void save_serial(const std::string &filename, const std::pair< unsigned int, unsigned int > &chunk_size) const
NumberType norm_general(const char type) const
std::vector< NumberType > work
Definition scalapack.h:939
void save(const std::string &filename, const std::pair< unsigned int, unsigned int > &chunk_size=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int)) const
void load_parallel(const std::string &filename)
NumberType l1_norm() const
void compute_lu_factorization()
NumberType norm_symmetric(const char type) const
const char uplo
Definition scalapack.h:956
void mult(const NumberType b, const ScaLAPACKMatrix< NumberType > &B, const NumberType c, ScaLAPACKMatrix< NumberType > &C, const bool transpose_A=false, const bool transpose_B=false) const
LAPACKSupport::Property property
Definition scalapack.h:892
void set_property(const LAPACKSupport::Property property)
Definition scalapack.cc:303
void reinit(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general)
Definition scalapack.cc:216
void load_serial(const std::string &filename)
size_type m() const
std::vector< NumberType > eigenpairs_symmetric_by_index_MRRR(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
std::vector< int > ipiv
Definition scalapack.h:950
NumberType reciprocal_condition_number(const NumberType a_norm) const
LAPACKSupport::State state
Definition scalapack.h:886
void TmTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
Threads::Mutex mutex
Definition scalapack.h:985
std::vector< NumberType > eigenpairs_symmetric_by_index(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
unsigned int global_column(const unsigned int loc_column) const
Definition scalapack.cc:516
void copy_to(FullMatrix< NumberType > &matrix) const
Definition scalapack.cc:670
unsigned int global_row(const unsigned int loc_row) const
Definition scalapack.cc:499
void compute_cholesky_factorization()
void copy_transposed(const ScaLAPACKMatrix< NumberType > &B)
Definition scalapack.cc:984
NumberType linfty_norm() const
void scale_columns(const InputVector &factors)
AlignedVector< NumberType > values
Definition table.h:795
size_type size(const unsigned int i) const
void reinit(const size_type size1, const size_type size2, const bool omit_default_initialization=false)
void send_to_inactive(NumberType *value, const int count=1) const
const unsigned int this_mpi_process
#define DEAL_II_NAMESPACE_OPEN
Definition config.h:40
constexpr bool running_in_debug_mode()
Definition config.h:78
#define DEAL_II_NAMESPACE_CLOSE
Definition config.h:41
DerivativeForm< 1, spacedim, dim, Number > transpose(const DerivativeForm< 1, dim, spacedim, Number > &DF)
#define DEAL_II_ASSERT_UNREACHABLE()
#define DEAL_II_NOT_IMPLEMENTED()
static ::ExceptionBase & ExcIO()
static ::ExceptionBase & ExcErrorCode(std::string arg1, types::blas_int arg2)
#define Assert(cond, exc)
#define AssertIsFinite(number)
#define AssertThrowMPI(error_code)
#define AssertIndexRange(index, range)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNeedsHDF5()
static ::ExceptionBase & ExcIndexRange(std::size_t arg1, std::size_t arg2, std::size_t arg3)
static ::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
static ::ExceptionBase & ExcMessage(std::string arg1)
#define AssertThrow(cond, exc)
@ cholesky
Contents is a Cholesky decomposition.
@ lu
Contents is an LU decomposition.
@ matrix
Contents is actually a matrix.
@ unusable
Contents is something useless.
@ inverse_matrix
Contents is the inverse of a matrix.
@ svd
Matrix contains singular value decomposition,.
@ inverse_svd
Matrix is the inverse of a singular value decomposition.
@ eigenvalues
Eigenvalue vector is filled.
@ symmetric
Matrix is symmetric.
@ hessenberg
Matrix is in upper Hessenberg form.
@ diagonal
Matrix is diagonal.
@ upper_triangular
Matrix is upper triangular.
@ lower_triangular
Matrix is lower triangular.
@ general
No special properties.
@ scalapack_copy_from
ScaLAPACKMatrix<NumberType>::copy_from.
Definition mpi_tags.h:121
@ scalapack_copy_to2
ScaLAPACKMatrix<NumberType>::copy_to.
Definition mpi_tags.h:119
@ scalapack_copy_to
ScaLAPACKMatrix<NumberType>::copy_to.
Definition mpi_tags.h:117
T sum(const T &t, const MPI_Comm mpi_communicator)
unsigned int n_mpi_processes(const MPI_Comm mpi_communicator)
Definition mpi.cc:105
T max(const T &t, const MPI_Comm mpi_communicator)
T min(const T &t, const MPI_Comm mpi_communicator)
unsigned int this_mpi_process(const MPI_Comm mpi_communicator)
Definition mpi.cc:120
const MPI_Datatype mpi_type_id_for_type
Definition mpi.h:1662
void free_communicator(MPI_Comm mpi_communicator)
Definition mpi.cc:167
constexpr unsigned int invalid_unsigned_int
Definition types.h:238
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
hid_t hdf5_type_id(const number *)
Definition scalapack.cc:39
std::array< std::pair< Number, Tensor< 1, dim, Number > >, dim > eigenvectors(const SymmetricTensor< 2, dim, Number > &T, const SymmetricTensorEigenvectorMethod method=SymmetricTensorEigenvectorMethod::ql_implicit_shifts)