TABLE OF CONTENTS
- ABINIT/m_slk
- m_slk/basemat_t
- m_slk/block_dist_1d
- m_slk/build_processor_scalapack
- m_slk/compute_eigen1
- m_slk/compute_eigen2
- m_slk/compute_eigen_problem
- m_slk/compute_generalized_eigen_problem
- m_slk/descript_scalapack
- m_slk/glob_loc__
- m_slk/grid_init
- m_slk/grid_scalapack
- m_slk/idx_loc
- m_slk/init_matrix_scalapack
- m_slk/loc_glob__
- m_slk/locmem_mb
- m_slk/matrix_from_complexmatrix
- m_slk/matrix_from_global
- m_slk/matrix_from_global_sym
- m_slk/matrix_from_realmatrix
- m_slk/matrix_get_local_cplx
- m_slk/matrix_get_local_real
- m_slk/matrix_scalapack
- m_slk/matrix_scalapack_copy
- m_slk/matrix_scalapack_free
- m_slk/matrix_set_local_cplx
- m_slk/matrix_set_local_real
- m_slk/matrix_to_complexmatrix
- m_slk/matrix_to_global
- m_slk/matrix_to_realmatrix
- m_slk/matrix_to_reference
- m_slk/my_locc
- m_slk/my_locr
- m_slk/processor_free
- m_slk/processor_init
- m_slk/processor_scalapack
- m_slk/slk_array1_free
- m_slk/slk_array2_free
- m_slk/slk_array3_free
- m_slk/slk_array4_free
- m_slk/slk_array_locmem_mb
- m_slk/slk_array_set
- m_slk/slk_bsize_and_type
- m_slk/slk_change_size_blocs
- m_slk/slk_collect_cplx
- m_slk/slk_cut
- m_slk/slk_get_head_and_wings
- m_slk/slk_get_trace
- m_slk/slk_glob2loc
- m_slk/slk_has_elpa
- m_slk/slk_heev
- m_slk/slk_hpd_invert
- m_slk/slk_invert
- m_slk/slk_matrix_from_global_dpc_1Dp
- m_slk/slk_matrix_from_global_dpc_2D
- m_slk/slk_matrix_loc2gcol
- m_slk/slk_matrix_loc2glob
- m_slk/slk_matrix_loc2grow
- m_slk/slk_matrix_to_global_dpc_2D
- m_slk/slk_pgemm_dp
- m_slk/slk_pgemm_sp
- m_slk/slk_ptrans
- m_slk/slk_pzheevx
- m_slk/slk_pzhegvx
- m_slk/slk_read
- m_slk/slk_set_head_and_wings
- m_slk/slk_set_imag_diago_to_zero
- m_slk/slk_single_fview_read
- m_slk/slk_single_fview_read_mask
- m_slk/slk_single_fview_write
- m_slk/slk_symmetrize
- m_slk/slk_take_from
- m_slk/slk_write
- m_slk/slkmat_check_shape
- m_slk/slkmat_print
- m_slk/slkmat_sp_collect_cplx
- m_slk/slkmat_sp_copy
- m_slk/slkmat_sp_heev
- m_slk/slkmat_sp_hpd_invert
- m_slk/slkmat_sp_ptrans
- m_slk/slkmat_sp_set_head_and_wings
- m_slk/slkmat_sp_t
- m_slk/slkmat_sp_take_from
- m_slk/solve_gevp_complex
ABINIT/m_slk [ Modules ]
NAME
m_slk
FUNCTION
High-level objects and wrappers around the ScaLAPACK and ELPA API.
COPYRIGHT
Copyright (C) 2004-2024 ABINIT group (CS,GZ,FB,MG,MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_slk 23 24 use defs_basis 25 use m_xmpi 26 use m_errors 27 use m_abicore 28 #ifdef HAVE_LINALG_ELPA 29 use m_elpa 30 #endif 31 #ifdef HAVE_MPI2 32 use mpi 33 #endif 34 35 use m_fstrings, only : firstchar, toupper, itoa, sjoin, ltoa 36 use m_time, only : cwtime, cwtime_report 37 use m_numeric_tools, only : blocked_loop !, print_arr 38 39 implicit none 40 41 #ifdef HAVE_MPI1 42 include 'mpif.h' 43 #endif 44 45 private 46 47 ! scaLAPACK array descriptor. 48 integer,private,parameter :: DLEN_ = 9 ! length 49 integer,private,parameter :: Dtype_ = 1 ! type 50 integer,private,parameter :: CTXT_ = 2 ! BLACS context 51 integer,private,parameter :: M_ = 3 ! nb global lines 52 integer,private,parameter :: N_ = 4 ! nb global columns 53 integer,private,parameter :: MB_ = 5 ! nb lines of a block 54 integer,private,parameter :: NB_ = 6 ! nb columns of a block 55 integer,private,parameter :: RSRC_ = 7 ! line of processors at the beginning 56 integer,private,parameter :: CSRC_ = 8 ! column of processors at the beginning 57 integer,private,parameter :: LLD_ = 9 ! local number of lines
m_slk/basemat_t [ Types ]
NAME
basemat_t
FUNCTION
Base class for scalapack matrices
SOURCE
150 type,private :: basemat_t 151 152 integer :: sizeb_local(2) = -1 153 ! dimensions of the local buffer 154 155 integer :: sizeb_global(2) = -1 156 ! dimensions of the global matrix 157 158 integer :: sizeb_blocs(2) = -1 159 ! size of the block of consecutive data 160 161 integer :: istwf_k = -1 162 163 type(processor_scalapack),pointer :: processor => null() 164 165 type(descript_scalapack) :: descript 166 167 contains 168 169 procedure :: init => init_matrix_scalapack 170 ! Constructor 171 172 procedure :: glob2loc => slk_glob2loc 173 ! Determine the local indices of an element from its global indices and return haveit bool flag. 174 175 procedure :: loc2glob => slk_matrix_loc2glob 176 ! Return global indices of a matrix element from the local indices. 177 178 procedure :: loc2grow => slk_matrix_loc2grow 179 ! Determine the global row index from the local index 180 181 procedure :: loc2gcol => slk_matrix_loc2gcol 182 ! Determine the global column index from the local index 183 184 procedure :: locmem_mb => locmem_mb 185 ! Return memory allocated for the local buffer in Mb. 186 187 procedure :: print => slkmat_print 188 ! Print info on the object. 189 190 procedure :: check_local_shape => slkmat_check_local_shape 191 ! Debugging tool to test the local shape `lshape` of the local buffer. 192 193 procedure :: free => matrix_scalapack_free 194 ! Free memory 195 196 procedure :: change_size_blocs => slk_change_size_blocs 197 ! Change the block sizes, return new object. 198 199 procedure :: get_trace => slk_get_trace 200 ! Compute the trace of an N-by-N distributed matrix. 201 202 procedure :: set_imag_diago_to_zero => slk_set_imag_diago_to_zero 203 ! Set the imaginary part of the diagonal to zero. 204 205 procedure :: invert => slk_invert 206 ! Inverse of a complex matrix. 207 208 end type basemat_t
m_slk/block_dist_1d [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
block_dist_1d
FUNCTION
Return block size for one-dimensional block column (row) distribution. Mainly used to assign blocks of contiguous columns (rows) of a matrix to successive processes when a 1d grid is employed. It is usually interfaced with CPP macros, e.g: ABI_CHECK(block_dist_1d(mat_size, nproc, block_size, msg), msg)
INPUTS
mat_size=Size of the matrix (either number of rows or number of colums) nproc=Number of processoes in the 1D scalapack grid
OUTPUT
ok= Boolean flag with exit status (idle processes are not allowed). block_size=Size of the block along this axis needed for one-dimensional block distribution msg=Error message (if not ok)
SOURCE
1327 logical function block_dist_1d(mat_size, nproc, block_size, msg) result (ok) 1328 1329 !Arguments ------------------------------------ 1330 integer, intent(in) :: mat_size, nproc 1331 integer,intent(out) :: block_size 1332 character(len=*),intent(out) :: msg 1333 1334 ! ********************************************************************* 1335 1336 ok = .True.; msg = "" 1337 !block_size = 1; return 1338 1339 block_size = mat_size / nproc 1340 if (block_size == 0) then 1341 ok = .False. 1342 write(msg, "(2(a,i0), 2a)") & 1343 "The number of MPI processors: ", nproc, " exceeeds the number of rows (columms) of the matrix: ", mat_size, ch10, & 1344 "Decrease the number of MPI processes for the scalapack level." 1345 return 1346 end if 1347 1348 if (mod(mat_size, nproc) /= 0) block_size = block_size + 1 1349 1350 end function block_dist_1d
m_slk/build_processor_scalapack [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
build_processor_scalapack
FUNCTION
Builds a ScaLAPACK processor descriptor. Build of the data related to one processor in a grid
INPUTS
grid= array representing the grid of processors. myproc= selected processor comm= MPI communicator
OUTPUT
processor= descriptor of a processor
SOURCE
470 subroutine build_processor_scalapack(processor, grid, myproc, comm) 471 472 !Arguments ------------------------------------ 473 class(processor_scalapack),intent(inout) :: processor 474 integer,intent(in) :: myproc,comm 475 class(grid_scalapack),intent(in) :: grid 476 477 ! ********************************************************************* 478 479 processor%grid = grid 480 processor%myproc = myproc 481 processor%comm = comm 482 483 #ifdef HAVE_LINALG_SCALAPACK 484 call BLACS_GRIDINFO(grid%ictxt, processor%grid%dims(1), processor%grid%dims(2), & 485 processor%coords(1), processor%coords(2)) 486 #endif 487 488 ! These values are the same as those computed by BLACS_GRIDINFO 489 ! except in the case where the myproc argument is not the local proc 490 processor%coords(1) = INT((myproc) / grid%dims(2)) 491 processor%coords(2) = MOD((myproc), grid%dims(2)) 492 493 end subroutine build_processor_scalapack
m_slk/compute_eigen1 [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
compute_eigen1
FUNCTION
Calculation of eigenvalues and eigenvectors. complex and real cases.
INPUTS
comm= MPI communicator cplex=1 if matrix is real, 2 if complex nbli_global number of lines nbco_global number of columns matrix= the matrix to process vector= eigenvalues of the matrix istwf_k= 2 if we have a real matrix else complex. [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)
OUTPUT
vector
SIDE EFFECTS
results= ScaLAPACK matrix coming out of the operation eigen= eigenvalues of the matrix
SOURCE
3252 subroutine compute_eigen1(comm,processor,cplex,nbli_global,nbco_global,matrix,vector,istwf_k,& 3253 & use_gpu_elpa) ! Optional argument 3254 3255 !Arguments ------------------------------------ 3256 !scalaras 3257 integer,intent(in) :: comm 3258 integer,intent(in) :: cplex,nbli_global,nbco_global 3259 integer,intent(in) :: istwf_k 3260 class(processor_scalapack),intent(in) :: processor 3261 integer,intent(in),optional :: use_gpu_elpa 3262 !arrays 3263 real(dp),intent(inout) :: matrix(cplex*nbli_global,nbco_global) 3264 real(dp),intent(inout) :: vector(:) 3265 3266 !Local variables------------------------------- 3267 #ifdef HAVE_LINALG_ELPA 3268 integer :: i,j 3269 #endif 3270 integer :: ierr,use_gpu_elpa_ 3271 type(matrix_scalapack) :: sca_matrix1 3272 type(matrix_scalapack) :: sca_matrix2 3273 real(dp),allocatable :: r_tmp_evec(:,:) 3274 complex(dpc),allocatable :: z_tmp_evec(:,:) 3275 3276 ! ************************************************************************* 3277 3278 use_gpu_elpa_=0 3279 #ifdef HAVE_LINALG_ELPA 3280 if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa 3281 #endif 3282 3283 ! ================================ 3284 ! INITIALISATION SCALAPACK MATRIX 3285 ! ================================ 3286 call sca_matrix1%init(nbli_global,nbco_global,processor,istwf_k) 3287 call sca_matrix2%init(nbli_global,nbco_global,processor,istwf_k) 3288 3289 ! ============================== 3290 ! FILLING SCALAPACK MATRIX 3291 ! ============================== 3292 if ( istwf_k /= 2 ) then 3293 ABI_CHECK_IEQ(cplex, 2, "cplex != 2") 3294 ABI_MALLOC(z_tmp_evec,(nbli_global,nbco_global)) 3295 z_tmp_evec=cmplx(0._DP,0._DP) 3296 #ifdef HAVE_LINALG_ELPA 3297 ! The full matrix must be set (not only one half like in scalapack). 3298 do j=1,nbco_global 3299 do i=j+1,nbli_global 3300 matrix(2*(i-1)+1,j) = matrix(2*(j-1)+1,i) 3301 matrix(2*(i-1)+2,j) = -matrix(2*(j-1)+2,i) 3302 end do 3303 end do 3304 #endif 3305 call matrix_from_complexmatrix(sca_matrix1,matrix,istwf_k) 3306 else 3307 ABI_CHECK_IEQ(cplex, 1, "cplex != 2") 3308 ABI_MALLOC(r_tmp_evec,(nbli_global,nbco_global)) 3309 r_tmp_evec(:,:)=0._DP 3310 #ifdef HAVE_LINALG_ELPA 3311 ! The full matrix must be set (not only one half like in scalapack). 3312 do j=1,nbco_global 3313 do i=j+1,nbli_global 3314 matrix(i,j) = matrix(j,i) 3315 end do 3316 end do 3317 #endif 3318 call matrix_from_realmatrix(sca_matrix1,matrix,istwf_k) 3319 endif 3320 3321 ! ================================ 3322 ! COMPUTE EIGEN VALUES AND VECTORS : A * X = lambda * X 3323 ! ================================ 3324 call compute_eigen_problem(processor,sca_matrix1, sca_matrix2,vector, comm,istwf_k, & 3325 & use_gpu_elpa=use_gpu_elpa_) 3326 3327 ! ============================== 3328 ! CONCATENATE EIGEN VECTORS 3329 ! ============================== 3330 #ifdef HAVE_MPI 3331 if (istwf_k /= 2) then 3332 call matrix_to_complexmatrix(sca_matrix2,z_tmp_evec,istwf_k) 3333 call MPI_ALLREDUCE(z_tmp_evec, matrix, nbli_global*nbco_global, MPI_DOUBLE_complex, MPI_SUM,comm,ierr) 3334 else 3335 call matrix_to_realmatrix(sca_matrix2,r_tmp_evec,istwf_k) 3336 call MPI_ALLREDUCE(r_tmp_evec, matrix, nbli_global*nbco_global, MPI_DOUBLE_PRECISION, MPI_SUM,comm,ierr) 3337 endif 3338 #endif 3339 3340 ! ==================================== 3341 ! DESTRUCTION SCALAPACK AND TMP MATRICES 3342 ! ==================================== 3343 call sca_matrix1%free() 3344 call sca_matrix2%free() 3345 3346 ABI_SFREE(z_tmp_evec) 3347 ABI_SFREE(r_tmp_evec) 3348 3349 #ifndef HAVE_LINALG_ELPA 3350 ABI_UNUSED(use_gpu_elpa) 3351 #endif 3352 3353 end subroutine compute_eigen1
m_slk/compute_eigen2 [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
compute_eigen2
FUNCTION
Calculation of eigenvalues and eigenvectors: A * X = lambda * B * X complex and real cases.
INPUTS
comm= MPI communicator cplex=1 if matrix is real, 2 if complex nbli_global number of lines nbco_global number of columns matrix1= first ScaLAPACK matrix (matrix A) matrix2= second ScaLAPACK matrix (matrix B) vector= istwf_k= 2 if we have a real matrix else complex. [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)
SIDE EFFECTS
results= ScaLAPACK matrix coming out of the operation eigen= eigenvalues of the matrix
SOURCE
3383 subroutine compute_eigen2(comm,processor,cplex,nbli_global,nbco_global,matrix1,matrix2,vector,istwf_k, & 3384 & use_gpu_elpa) ! Optional argument 3385 3386 !Arguments ------------------------------------ 3387 !scalars 3388 integer,intent(in) :: cplex,nbli_global,nbco_global 3389 integer,intent(in) :: comm 3390 integer,intent(in) :: istwf_k 3391 class(processor_scalapack),intent(in) :: processor 3392 integer,optional,intent(in) :: use_gpu_elpa 3393 !arrays 3394 real(dp),intent(inout) :: matrix1(cplex*nbli_global,nbco_global) 3395 real(dp),intent(inout) :: matrix2(cplex*nbli_global,nbco_global) 3396 real(dp),intent(inout) :: vector(:) 3397 3398 !Local variables------------------------------- 3399 #ifdef HAVE_LINALG_ELPA 3400 integer :: i,j 3401 #endif 3402 integer :: ierr,use_gpu_elpa_ 3403 type(matrix_scalapack) :: sca_matrix1, sca_matrix2, sca_matrix3 3404 real(dp),allocatable :: r_tmp_evec(:,:) 3405 complex(dpc),allocatable :: z_tmp_evec(:,:) 3406 3407 ! ************************************************************************* 3408 3409 use_gpu_elpa_=0 3410 #if defined HAVE_LINALG_ELPA 3411 if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa 3412 #endif 3413 3414 ! ================================ 3415 ! INITIALISATION SCALAPACK MATRIX 3416 ! ================================ 3417 call sca_matrix1%init(nbli_global,nbco_global,processor,istwf_k) 3418 call sca_matrix2%init(nbli_global,nbco_global,processor,istwf_k) 3419 call sca_matrix3%init(nbli_global,nbco_global,processor,istwf_k) 3420 3421 ! ============================== 3422 ! FILLING SCALAPACK MATRIX 3423 ! ============================== 3424 if ( istwf_k /= 2 ) then 3425 ABI_CHECK_IEQ(cplex, 2, "cplex != 2") 3426 ABI_MALLOC(z_tmp_evec,(nbli_global,nbco_global)) 3427 z_tmp_evec=cmplx(0._DP,0._DP) 3428 #ifdef HAVE_LINALG_ELPA 3429 ! The full matrix must be set (not only one half like in scalapack). 3430 do j=1,nbco_global 3431 do i=j+1,nbli_global 3432 matrix1(2*(i-1)+1,j) = matrix1(2*(j-1)+1,i) 3433 matrix1(2*(i-1)+2,j) = -matrix1(2*(j-1)+2,i) 3434 matrix2(2*(i-1)+1,j) = matrix2(2*(j-1)+1,i) 3435 matrix2(2*(i-1)+2,j) = -matrix2(2*(j-1)+2,i) 3436 end do 3437 end do 3438 #endif 3439 call matrix_from_complexmatrix(sca_matrix1,matrix1,istwf_k) 3440 call matrix_from_complexmatrix(sca_matrix2,matrix2,istwf_k) 3441 else 3442 ABI_CHECK_IEQ(cplex, 1, "cplex != 1") 3443 ABI_MALLOC(r_tmp_evec,(nbli_global,nbco_global)) 3444 r_tmp_evec(:,:)=0._DP 3445 #ifdef HAVE_LINALG_ELPA 3446 ! The full matrix must be set (not only one half like in scalapack). 3447 do j=1,nbco_global 3448 do i=j+1,nbli_global 3449 matrix1(i,j) = matrix1(j,i) 3450 matrix2(i,j) = matrix2(j,i) 3451 end do 3452 end do 3453 #endif 3454 call matrix_from_realmatrix(sca_matrix1,matrix1,istwf_k) 3455 call matrix_from_realmatrix(sca_matrix2,matrix2,istwf_k) 3456 endif 3457 3458 ! ================================ 3459 ! COMPUTE EIGEN VALUES AND VECTORS : A * X = lambda * B * X 3460 ! ================================ 3461 call compute_generalized_eigen_problem(processor,sca_matrix1,sca_matrix2,& 3462 & sca_matrix3,vector,comm,istwf_k,use_gpu_elpa=use_gpu_elpa_) 3463 3464 ! ============================== 3465 ! CONCATENATE EIGEN VECTORS 3466 ! ============================== 3467 #ifdef HAVE_MPI 3468 if ( istwf_k /= 2 ) then 3469 call matrix_to_complexmatrix(sca_matrix3,z_tmp_evec,istwf_k) 3470 call MPI_ALLREDUCE(z_tmp_evec, matrix1, nbli_global*nbco_global, MPI_DOUBLE_complex,& 3471 & MPI_SUM,comm,ierr) 3472 else 3473 call matrix_to_realmatrix(sca_matrix3,r_tmp_evec,istwf_k) 3474 call MPI_ALLREDUCE(r_tmp_evec, matrix1, nbli_global*nbco_global, MPI_DOUBLE_PRECISION,& 3475 & MPI_SUM,comm,ierr) 3476 endif 3477 #endif 3478 3479 ! ==================================== 3480 ! DESTRUCTION SCALAPACK AND TMP MATRICES 3481 ! ==================================== 3482 call sca_matrix1%free() 3483 call sca_matrix2%free() 3484 call sca_matrix3%free() 3485 3486 3487 #ifndef HAVE_LINALG_ELPA 3488 ABI_UNUSED(use_gpu_elpa) 3489 #endif 3490 3491 end subroutine compute_eigen2
m_slk/compute_eigen_problem [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
compute_eigen_problem
FUNCTION
Calculation of eigenvalues and eigenvectors: A * X = lambda * X, complex and real cases.
INPUTS
processor= descriptor of a processor matrix= the matrix to process comm= MPI communicator istwf_k= 2 if we have a real matrix else complex. [nev]= Number of eigenvalues needed. Default: full set [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)
OUTPUT
results= ScaLAPACK matrix coming out of the operation (global dimensions must be equal to matrix even if only a part of the eigenvectors is needed. eigen= eigenvalues of the matrix dimensioned as the global size of the square matrix even if only a part of the eigenvalues is needed.
SOURCE
2660 subroutine compute_eigen_problem(processor, matrix, results, eigen, comm, istwf_k, & 2661 & nev, use_gpu_elpa) ! Optional arguments 2662 2663 #ifdef HAVE_LINALG_ELPA 2664 !Arguments ------------------------------------ 2665 class(processor_scalapack),intent(in) :: processor 2666 class(matrix_scalapack),intent(inout) :: matrix 2667 class(matrix_scalapack),intent(inout) :: results 2668 DOUBLE PRECISION,intent(inout) :: eigen(:) 2669 integer,intent(in) :: comm,istwf_k 2670 integer,optional,intent(in) :: nev 2671 integer,optional,intent(in) :: use_gpu_elpa 2672 2673 !Local variables ------------------------------ 2674 type(elpa_hdl_t) :: elpa_hdl 2675 integer :: nev__,use_gpu_elpa_ 2676 2677 !************************************************************************ 2678 2679 nev__ = matrix%sizeb_global(1); if (present(nev)) nev__ = nev 2680 use_gpu_elpa_=0 2681 #ifdef HAVE_LINALG_ELPA 2682 if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa 2683 #endif 2684 2685 call elpa_func_allocate(elpa_hdl,gpu=use_gpu_elpa_) 2686 call elpa_func_set_matrix(elpa_hdl,matrix%sizeb_global(1),matrix%sizeb_blocs(1),nev__,& 2687 & matrix%sizeb_local(1),matrix%sizeb_local(2)) 2688 call elpa_func_get_communicators(elpa_hdl,processor%comm,processor%coords(1),processor%coords(2)) 2689 2690 if (istwf_k/=2) then 2691 call elpa_func_solve_evp_1stage(elpa_hdl,matrix%buffer_cplx,results%buffer_cplx,eigen,nev__) 2692 else 2693 call elpa_func_solve_evp_1stage(elpa_hdl,matrix%buffer_real,results%buffer_real,eigen,nev__) 2694 end if 2695 2696 call elpa_func_deallocate(elpa_hdl) 2697 2698 #else 2699 !Arguments ------------------------------------ 2700 class(processor_scalapack),intent(in) :: processor 2701 class(matrix_scalapack),intent(in) :: matrix 2702 class(matrix_scalapack),intent(inout) :: results 2703 DOUBLE PRECISION,intent(inout) :: eigen(:) 2704 integer,intent(in) :: comm,istwf_k 2705 integer,optional,intent(in) :: nev 2706 integer,optional,intent(in) :: use_gpu_elpa 2707 2708 #ifdef HAVE_LINALG_SCALAPACK 2709 !Local variables------------------------------- 2710 integer :: LRWORK,LIWORK,LCWORK,INFO 2711 character(len=500) :: msg 2712 2713 integer , dimension(1) :: IWORK_tmp 2714 DOUBLE PRECISION, dimension(1) :: RWORK_tmp 2715 complex(dpc) , dimension(1) :: CWORK_tmp 2716 2717 integer , allocatable :: IWORK(:) 2718 DOUBLE PRECISION, allocatable :: RWORK(:) 2719 complex(dpc) , allocatable :: CWORK(:) 2720 2721 integer, allocatable :: ICLUSTR(:) 2722 integer, allocatable :: IFAIL(:) 2723 DOUBLE PRECISION, allocatable :: GAP(:) 2724 2725 DOUBLE PRECISION :: ABSTOL,ORFAC 2726 integer, parameter :: IZERO=0 2727 2728 integer :: M,NZ,ierr,TWORK_tmp(3),TWORK(3) ! IA,JA,IZ,JZ, 2729 integer :: nev__, il, iu 2730 character(len=1) :: range 2731 2732 DOUBLE PRECISION, external :: PDLAMCH 2733 2734 ! ************************************************************************* 2735 2736 ABI_UNUSED(use_gpu_elpa) ! No GPU implementation is using scaLAPACK 2737 nev__ = matrix%sizeb_global(1); range = "A"; il = 0; iu = 0 2738 if (present(nev)) then 2739 nev__ = nev; range = "I"; il = 1; iu = nev 2740 end if 2741 2742 ! Initialisation 2743 INFO = 0 2744 ABSTOL = zero 2745 ORFAC = -1.D+0 2746 2747 ! Allocation of the variables for the results of the calculations 2748 ABI_MALLOC(IFAIL,(matrix%sizeb_global(2))) 2749 ABI_MALLOC(ICLUSTR,(2*processor%grid%dims(1)*processor%grid%dims(2))) 2750 ABI_MALLOC(GAP,(processor%grid%dims(1)*processor%grid%dims(2))) 2751 2752 ! Get the size of the work arrays 2753 if (istwf_k/=2) then 2754 call PZHEEVX('V', range, 'U',& 2755 & matrix%sizeb_global(2),& 2756 & matrix%buffer_cplx,1,1,matrix%descript%tab, & 2757 & ZERO,ZERO,il,iu,ABSTOL,& 2758 & m,nz,eigen,ORFAC, & 2759 & results%buffer_cplx,1,1,results%descript%tab, & 2760 & CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,& 2761 & IFAIL,ICLUSTR,GAP,INFO) 2762 else 2763 call PDSYEVX('V', range, 'U',& 2764 & matrix%sizeb_global(2),& 2765 & matrix%buffer_real,1,1,matrix%descript%tab, & 2766 & ZERO,ZERO,il,iu,ABSTOL,& 2767 & m,nz,eigen,ORFAC, & 2768 & results%buffer_real,1,1,results%descript%tab, & 2769 & RWORK_tmp,-1,IWORK_tmp,-1,& 2770 & IFAIL,ICLUSTR,GAP,INFO) 2771 end if 2772 2773 if (INFO/=0) then 2774 write(msg,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO 2775 ABI_ERROR(msg) 2776 endif 2777 2778 TWORK_tmp(1) = IWORK_tmp(1) 2779 TWORK_tmp(2) = INT(RWORK_tmp(1)) 2780 TWORK_tmp(3) = INT(real(CWORK_tmp(1))) 2781 2782 ! Get the maximum of the size of the work arrays processor%comm 2783 call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_integer,MPI_MAX,comm,ierr) 2784 2785 LIWORK = TWORK(1) 2786 LRWORK = TWORK(2) + matrix%sizeb_global(2) *(matrix%sizeb_global(2)-1) 2787 LCWORK = TWORK(3) 2788 2789 ! Allocation of the work arrays 2790 if (LIWORK>0) then 2791 ABI_MALLOC(IWORK,(LIWORK)) 2792 IWORK(:) = 0 2793 else 2794 ABI_MALLOC(IWORK,(1)) 2795 end if 2796 if (LRWORK>0) then 2797 ABI_MALLOC(RWORK,(LRWORK)) 2798 RWORK(:) = 0._dp 2799 else 2800 ABI_MALLOC(RWORK,(1)) 2801 end if 2802 if (LCWORK>0) then 2803 ABI_MALLOC(CWORK,(LCWORK)) 2804 CWORK(:) = (0._dp,0._dp) 2805 else 2806 ABI_MALLOC(CWORK,(1)) 2807 end if 2808 2809 ! prototype 2810 !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, 2811 ! orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info) 2812 2813 ! Call the calculation routine 2814 if (istwf_k/=2) then 2815 ! write(std_out,*) 'I am using PZHEEVX' 2816 call PZHEEVX('V', range, 'U',& 2817 matrix%sizeb_global(2),& 2818 matrix%buffer_cplx,1,1,matrix%descript%tab, & 2819 ZERO,ZERO,il,iu,ABSTOL,& 2820 m,nz,eigen,ORFAC, & 2821 results%buffer_cplx,1,1,results%descript%tab, & 2822 CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,& 2823 IFAIL,ICLUSTR,GAP,INFO) 2824 else 2825 ! write(std_out,*) ' I am using PDSYEVX' 2826 call PDSYEVX('V', range, 'U',& 2827 matrix%sizeb_global(2),& 2828 matrix%buffer_real,1,1,matrix%descript%tab, & 2829 ZERO,ZERO,il,iu,ABSTOL,& 2830 m,nz,eigen,ORFAC, & 2831 results%buffer_real,1,1,results%descript%tab, & 2832 RWORK,LRWORK,IWORK,LIWORK,& 2833 IFAIL,ICLUSTR,GAP,INFO) 2834 endif 2835 2836 ! MG: TODO: Recheck the computation of the workspace as I got INFO 2 with a 5x5x5 si supercell. 2837 if (INFO/=0) then 2838 write(msg,'(A,I0)') "Problem to compute eigenvalues and eigenvectors with ScaLAPACK, INFO=",INFO 2839 ABI_ERROR(msg) 2840 endif 2841 2842 ABI_FREE(IFAIl) 2843 ABI_FREE(ICLUSTR) 2844 ABI_FREE(GAP) 2845 2846 ABI_SFREE(IWORK) 2847 ABI_SFREE(RWORK) 2848 ABI_SFREE(CWORK) 2849 #endif 2850 #endif 2851 return 2852 2853 end subroutine compute_eigen_problem
m_slk/compute_generalized_eigen_problem [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
compute_generalized_eigen_problem
FUNCTION
Calculation of eigenvalues and eigenvectors of the generalized eigenvalue problem: A * X = lambda * B X complex and real cases.
INPUTS
processor= descriptor of a processor matrix1= A matrix matrix2= B matrix comm= MPI communicator istwf_k= 2 if we have a real matrix else complex. [nev]= Number of eigenvalues needed. Default: full set [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)
OUTPUT
results= ScaLAPACK matrix coming out of the operation (global dimensions must be equal to matrix even if only a part of the eigenvectors is needed. eigen= eigenvalues of the matrix dimensioned as the global size of the square matrix even if only a part of the eigenvalues is needed.
SOURCE
3027 subroutine compute_generalized_eigen_problem(processor,matrix1,matrix2,results,eigen,comm,istwf_k,& 3028 & nev,use_gpu_elpa) ! Optional arguments 3029 3030 #ifdef HAVE_LINALG_ELPA 3031 !Arguments ------------------------------------ 3032 class(processor_scalapack),intent(in) :: processor 3033 class(matrix_scalapack),intent(in) :: matrix1,matrix2 3034 class(matrix_scalapack),intent(inout) :: results 3035 DOUBLE PRECISION,intent(inout) :: eigen(:) 3036 integer,intent(in) :: comm,istwf_k 3037 integer,optional,intent(in) :: nev 3038 integer,optional,intent(in) :: use_gpu_elpa 3039 !Local 3040 type(matrix_scalapack) :: tmp1, tmp2 3041 integer :: i,n_col, n_row, nev__,use_gpu_elpa__ 3042 integer,external :: indxl2g,numroc 3043 3044 nev__ = matrix1%sizeb_global(2); if (present(nev)) nev__ = nev 3045 use_gpu_elpa__ = 0 3046 #ifdef HAVE_LINALG_ELPA 3047 if (present(use_gpu_elpa)) use_gpu_elpa__ = use_gpu_elpa 3048 #endif 3049 3050 call tmp1%init(matrix1%sizeb_global(1),matrix1%sizeb_global(2),processor,istwf_k) 3051 call tmp2%init(matrix1%sizeb_global(1),matrix1%sizeb_global(2),processor,istwf_k) 3052 3053 if (istwf_k/=2) then 3054 call solve_gevp_complex(matrix1%sizeb_global(1), nev__, & 3055 & matrix1%sizeb_local(1),matrix1%sizeb_local(2),matrix1%sizeb_blocs(1), & 3056 & matrix1%buffer_cplx,matrix2%buffer_cplx,eigen,results%buffer_cplx, & 3057 & tmp1%buffer_cplx,tmp2%buffer_cplx, & 3058 & processor%coords(1),processor%coords(2), & 3059 & processor%grid%dims(1),processor%grid%dims(2), & 3060 & matrix1%descript%tab,processor%comm,use_gpu_elpa=use_gpu_elpa__) 3061 else 3062 call solve_gevp_real(matrix1%sizeb_global(1), nev__, & 3063 & matrix1%sizeb_local(1),matrix1%sizeb_local(2),matrix1%sizeb_blocs(1), & 3064 & matrix1%buffer_real,matrix2%buffer_real,eigen,results%buffer_real, & 3065 & tmp1%buffer_real,tmp2%buffer_real, & 3066 & processor%coords(1),processor%coords(2), & 3067 & processor%grid%dims(1),processor%grid%dims(2), & 3068 & matrix1%descript%tab,processor%comm,use_gpu_elpa=use_gpu_elpa__) 3069 end if 3070 call tmp1%free() 3071 call tmp2%free() 3072 3073 #else 3074 !Arguments ------------------------------------ 3075 class(processor_scalapack),intent(in) :: processor 3076 class(matrix_scalapack),intent(in) :: matrix1,matrix2 3077 class(matrix_scalapack),intent(inout) :: results 3078 DOUBLE PRECISION,intent(inout) :: eigen(:) 3079 integer,intent(in) :: comm,istwf_k 3080 integer,optional,intent(in) :: nev 3081 integer,optional,intent(in) :: use_gpu_elpa 3082 3083 #ifdef HAVE_LINALG_SCALAPACK 3084 !Local variables------------------------------- 3085 integer :: LRWORK,LIWORK,LCWORK,INFO 3086 character(len=500) :: msg 3087 integer , dimension(1) :: IWORK_tmp 3088 DOUBLE PRECISION, dimension(1) :: RWORK_tmp 3089 complex(dpc) , dimension(1) :: CWORK_tmp 3090 3091 integer , allocatable :: IWORK(:) 3092 DOUBLE PRECISION, allocatable :: RWORK(:) 3093 complex(dpc) , allocatable :: CWORK(:) 3094 integer, allocatable :: ICLUSTR(:) 3095 integer, allocatable :: IFAIL(:) 3096 DOUBLE PRECISION, allocatable :: GAP(:) 3097 DOUBLE PRECISION :: ABSTOL,ORFAC 3098 integer , parameter :: IZERO=0 3099 integer :: M,NZ,ierr,TWORK_tmp(3),TWORK(3) ! IA,JA,IZ,JZ, 3100 character(len=1) :: range 3101 integer :: nev__, il, iu 3102 DOUBLE PRECISION, external :: PDLAMCH 3103 3104 ! ************************************************************************* 3105 3106 ABI_UNUSED(use_gpu_elpa) ! No GPU implementation is using scaLAPACK 3107 nev__ = matrix1%sizeb_global(2); range = "A"; il = 0; iu = 0 3108 if (present(nev)) then 3109 nev__ = nev; range = "I"; il = 1; iu = nev 3110 end if 3111 3112 ! Initialisation 3113 INFO = 0 3114 ABSTOL = zero 3115 ORFAC = -1.D+0 3116 3117 ! Allocate the arrays for the results of the calculation 3118 ABI_MALLOC(IFAIL ,(matrix1%sizeb_global(2))) 3119 ABI_MALLOC(ICLUSTR,(2*processor%grid%dims(1)*processor%grid%dims(2))) 3120 ABI_MALLOC(GAP ,( processor%grid%dims(1)*processor%grid%dims(2))) 3121 3122 ! Get the size of the work arrays 3123 if (istwf_k /= 2) then 3124 call PZHEGVX(1, 'V', range, 'U',& 3125 & matrix1%sizeb_global(2),& 3126 & matrix1%buffer_cplx,1,1,matrix1%descript%tab, & 3127 & matrix2%buffer_cplx,1,1,matrix2%descript%tab, & 3128 & ZERO,ZERO,il,iu,ABSTOL,& 3129 & m,nz,eigen,ORFAC, & 3130 & results%buffer_cplx,1,1,results%descript%tab, & 3131 & CWORK_tmp,-1,RWORK_tmp,-1,IWORK_tmp,-1,& 3132 & IFAIL,ICLUSTR,GAP,INFO) 3133 else 3134 call PDSYGVX(1,'V',range,'U',& 3135 & matrix1%sizeb_global(2),& 3136 & matrix1%buffer_real,1,1,matrix1%descript%tab, & 3137 & matrix2%buffer_real,1,1,matrix2%descript%tab, & 3138 & ZERO,ZERO,il,iu,ABSTOL,& 3139 & m,nz,eigen,ORFAC, & 3140 & results%buffer_real,1,1,results%descript%tab, & 3141 & RWORK_tmp,-1,IWORK_tmp,-1,& 3142 & IFAIL,ICLUSTR,GAP,INFO) 3143 endif 3144 3145 if (INFO/=0) then 3146 write(msg,'(A,I6)') "Problem to compute workspace to use ScaLAPACK, INFO=",INFO 3147 ABI_ERROR(msg) 3148 endif 3149 3150 TWORK_tmp(1) = IWORK_tmp(1) 3151 TWORK_tmp(2) = INT(RWORK_tmp(1)) + matrix1%sizeb_global(2) *(matrix1%sizeb_global(2)-1) 3152 TWORK_tmp(3) = INT(real(CWORK_tmp(1))) 3153 3154 ! Get the maximum of sizes of the work arrays processor%comm 3155 call MPI_ALLREDUCE(TWORK_tmp,TWORK,3,MPI_integer,MPI_MAX,comm,ierr) 3156 3157 LIWORK = TWORK(1) 3158 LRWORK = TWORK(2) 3159 LCWORK = TWORK(3) 3160 3161 ! Allocate the work arrays 3162 if (LIWORK>0) then 3163 ABI_MALLOC(IWORK,(LIWORK)) 3164 IWORK(:) = 0 3165 else 3166 ABI_MALLOC(IWORK,(1)) 3167 end if 3168 if (LRWORK>0) then 3169 ABI_MALLOC(RWORK,(LRWORK)) 3170 RWORK(:) = 0._dp 3171 else 3172 ABI_MALLOC(RWORK,(1)) 3173 end if 3174 if (LCWORK>0) then 3175 ABI_MALLOC(CWORK,(LCWORK)) 3176 CWORK(:) = (0._dp,0._dp) 3177 else 3178 ABI_MALLOC(CWORK,(1)) 3179 end if 3180 3181 ! Call the calculation routine 3182 if (istwf_k/=2) then 3183 ! write(std_out,*) 'I am using PZHEGVX' 3184 call PZHEGVX(1,'V',range,'U',& 3185 & matrix1%sizeb_global(2),& 3186 & matrix1%buffer_cplx,1,1,matrix1%descript%tab, & 3187 & matrix2%buffer_cplx,1,1,matrix2%descript%tab, & 3188 & ZERO,ZERO,il,iu,ABSTOL,& 3189 & m,nz,eigen,ORFAC, & 3190 & results%buffer_cplx,1,1,results%descript%tab, & 3191 & CWORK,LCWORK,RWORK,LRWORK,IWORK,LIWORK,& 3192 & IFAIL,ICLUSTR,GAP,INFO) 3193 else 3194 ! write(std_out,*) 'I am using PDSYGVX' 3195 call PDSYGVX(1,'V',range,'U',& 3196 & matrix1%sizeb_global(2),& 3197 & matrix1%buffer_real,1,1,matrix1%descript%tab, & 3198 & matrix2%buffer_real,1,1,matrix2%descript%tab, & 3199 & ZERO,ZERO,il,iu,ABSTOL,& 3200 & m,nz,eigen,ORFAC, & 3201 & results%buffer_real,1,1,results%descript%tab, & 3202 & RWORK,LRWORK,IWORK,LIWORK,& 3203 & IFAIL,ICLUSTR,GAP,INFO) 3204 endif 3205 3206 if (INFO/=0) then 3207 write(msg,'(A,I6)') "Problem to compute eigen problem with ScaLAPACK, INFO=",INFO 3208 ABI_ERROR(msg) 3209 endif 3210 3211 ABI_FREE(IFAIl) 3212 ABI_FREE(ICLUSTR) 3213 ABI_FREE(GAP) 3214 ABI_SFREE(IWORK) 3215 ABI_SFREE(RWORK) 3216 ABI_SFREE(CWORK) 3217 #endif 3218 #endif 3219 return 3220 3221 end subroutine compute_generalized_eigen_problem
m_slk/descript_scalapack [ Types ]
NAME
descript_scalapack
FUNCTION
ScaLAPACK matrix descriptor.
SOURCE
134 type,public :: descript_scalapack 135 integer :: tab(DLEN_) 136 end type descript_scalapack
m_slk/glob_loc__ [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
glob_loc__
FUNCTION
Returns the global location of a matrix coefficient.
INPUTS
matrix= the matrix to process idx= number of rows in the distributed matrix lico= block size index
SOURCE
1558 integer function glob_loc__(matrix, idx, lico) 1559 1560 !Arguments ------------------------------------ 1561 class(basemat_t),intent(in) :: matrix 1562 integer, intent(in) :: idx, lico 1563 1564 #ifdef HAVE_LINALG_SCALAPACK 1565 !Local variables------------------------------- 1566 integer,external :: NUMROC 1567 1568 ! ********************************************************************* 1569 1570 glob_loc__ = NUMROC(idx, matrix%sizeb_blocs(lico), & 1571 matrix%processor%coords(lico), 0, matrix%processor%grid%dims(lico)) 1572 #endif 1573 1574 end function glob_loc__
m_slk/grid_init [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
grid_init
FUNCTION
Set up the ScaLAPACKgrid given the total number of processors.
INPUTS
nbprocs= total number of processors comm= MPI communicator [grid_dims]=Number of procs for each dimension.
OUTPUT
grid= the grid of processors used by Scalapack
SOURCE
407 subroutine grid_init(grid, nbprocs, comm, use_gpu, grid_dims) 408 409 !Arguments ------------------------------------ 410 class(grid_scalapack),intent(out) :: grid 411 integer,intent(in) :: nbprocs,comm 412 logical,intent(in) :: use_gpu 413 integer,optional,intent(in) :: grid_dims(2) 414 415 !Local variables------------------------------- 416 integer :: i 417 418 ! ********************************************************************* 419 420 grid%nbprocs = nbprocs 421 422 if (.not. present(grid_dims)) then 423 ! Search for a rectangular grid of processors 424 i=INT(SQRT(float(nbprocs))) 425 do while (MOD(nbprocs,i) /= 0) 426 i = i-1 427 end do 428 i=max(i,1) 429 430 grid%dims(1) = i 431 grid%dims(2) = INT(nbprocs/i) 432 433 else 434 grid%dims = grid_dims 435 end if 436 437 ABI_CHECK(product(grid%dims) == nbprocs, sjoin("grid%dims:", ltoa(grid%dims), "does not agree with nprocs:", itoa(nbprocs))) 438 439 grid%ictxt = comm 440 grid%use_gpu = use_gpu 441 442 #ifdef HAVE_LINALG_SCALAPACK 443 ! 'R': Use row-major natural ordering 444 call BLACS_GRIDINIT(grid%ictxt, 'R', grid%dims(1), grid%dims(2)) 445 #endif 446 447 end subroutine grid_init
m_slk/grid_scalapack [ Types ]
NAME
grid_scalapack
FUNCTION
Grid of ScaLAPACK processors.
SOURCE
71 type,public :: grid_scalapack 72 73 integer :: nbprocs = -1 74 ! Total number of processors 75 76 integer :: dims(2) = -1 77 ! Number of procs for rows/columns 78 79 integer :: ictxt = xmpi_comm_null 80 ! BLACS context i.e. MPI communicator. 81 82 logical :: use_gpu = .false. 83 ! Wether GPU is used, relevant for determining matrix block size. 84 85 contains 86 procedure :: init => grid_init ! Set up the processor grid for ScaLAPACK. 87 end type grid_scalapack
m_slk/idx_loc [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
idx_loc
FUNCTION
Return local indices from global indices, **independently** of the processor.
INPUTS
matrix= the matrix to process i= row in the matrix j= column in the matrix
OUTPUT
iloc= local row of the coefficient jloc= local column of the coefficient
SOURCE
1527 subroutine idx_loc(matrix, i, j, iloc, jloc) 1528 1529 !Arguments ------------------------------------ 1530 class(basemat_t),intent(in) :: matrix 1531 integer, intent(in) :: i,j 1532 integer, intent(out) :: iloc,jloc 1533 1534 ! ********************************************************************* 1535 1536 iloc = glob_loc__(matrix, i, 1) 1537 jloc = glob_loc__(matrix, j, 2) 1538 1539 end subroutine idx_loc
m_slk/init_matrix_scalapack [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
init_matrix_scalapack
FUNCTION
Initialisation of a SCALAPACK matrix (each proc initializes its own part of the matrix)
INPUTS
processor= descriptor of a processor nbli_global= total number of lines nbco_global= total number of columns istwf_k= 2 if we have a real matrix else complex. [size_blocs]= custom block sizes. Use -1 to use global size along that direction. Useful to distribute only rows or columns. Obviously, [-1, -1] is not allowed.
OUTPUT
matrix= the matrix to process
SOURCE
591 subroutine init_matrix_scalapack(matrix, nbli_global, nbco_global, processor, istwf_k, size_blocs) 592 593 !Arguments ------------------------------------ 594 class(basemat_t),intent(inout) :: matrix 595 integer,intent(in) :: nbli_global, nbco_global, istwf_k 596 type(processor_scalapack),target,intent(in) :: processor 597 integer,intent(in),optional :: size_blocs(2) 598 599 #ifdef HAVE_LINALG_SCALAPACK 600 !Local variables------------------------------- 601 #ifdef HAVE_LINALG_ELPA 602 integer, parameter :: DEFAULT_SIZE_BLOCS = 1 603 #else 604 ! As recommended by Intel MKL, a more sensible default than the previous value of 40 605 integer, parameter :: DEFAULT_SIZE_BLOCS = 24 606 #endif 607 ! As recommanded in ELPA, which advises distributions as squared as possible using powers of 2 608 integer, parameter :: DEFAULT_SIZE_BLOCS_GPU = 16 609 integer :: info,sizeb 610 integer,external :: NUMROC 611 !character(len=500) :: msg 612 613 ! ********************************************************************* 614 615 !call matrix%free() 616 617 sizeb = DEFAULT_SIZE_BLOCS 618 if(processor%grid%use_gpu) sizeb = DEFAULT_SIZE_BLOCS_GPU 619 620 !Records of the matrix type 621 matrix%processor => processor 622 matrix%sizeb_blocs(1) = MIN(sizeb, nbli_global) 623 matrix%sizeb_blocs(2) = MIN(sizeb, nbco_global) 624 625 #ifdef HAVE_LINALG_ELPA 626 if(matrix%sizeb_blocs(1) .ne. matrix%sizeb_blocs(2)) then 627 matrix%sizeb_blocs(1) = MIN(matrix%sizeb_blocs(1), matrix%sizeb_blocs(2)) 628 matrix%sizeb_blocs(2) = matrix%sizeb_blocs(1) 629 end if 630 #endif 631 632 ! Use custom block sizes. 633 if (present(size_blocs)) then 634 ABI_CHECK(.not. all(size_blocs == -1), "size_blocs [-1, -1] is not allowed") 635 if (size_blocs(1) == -1) then 636 matrix%sizeb_blocs(1) = nbli_global 637 else 638 matrix%sizeb_blocs(1) = MIN(size_blocs(1), nbli_global) 639 end if 640 if (size_blocs(2) == -1) then 641 matrix%sizeb_blocs(2) = nbco_global 642 else 643 matrix%sizeb_blocs(2) = MIN(size_blocs(2), nbco_global) 644 end if 645 end if 646 647 matrix%sizeb_global(1) = nbli_global 648 matrix%sizeb_global(2) = nbco_global 649 650 ! Size of the local buffer 651 ! NUMROC computes the NUMber of Rows Or Columns of a distributed matrix owned by the process indicated by IPROC. 652 ! NUMROC (n, nb, iproc, isrcproc, nprocs) 653 matrix%sizeb_local(1) = NUMROC(nbli_global, matrix%sizeb_blocs(1), & 654 processor%coords(1), 0, processor%grid%dims(1)) 655 656 matrix%sizeb_local(2) = NUMROC(nbco_global,matrix%sizeb_blocs(2), & 657 processor%coords(2), 0, processor%grid%dims(2)) 658 659 call idx_loc(matrix, matrix%sizeb_global(1), matrix%sizeb_global(2), & 660 matrix%sizeb_local(1), matrix%sizeb_local(2)) 661 662 ! Initialisation of the SCALAPACK description of the matrix 663 ! (desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info) 664 call DESCINIT(matrix%descript%tab, nbli_global, nbco_global, & 665 matrix%sizeb_blocs(1), matrix%sizeb_blocs(2), 0, 0, & 666 processor%grid%ictxt, MAX(1, matrix%sizeb_local(1)), info) 667 668 if (info /= 0) then 669 ABI_ERROR(sjoin("Error while initializing scalapack matrix. info:", itoa(info))) 670 end if 671 672 ! Allocate local buffer. 673 matrix%istwf_k = istwf_k 674 select type (matrix) 675 class is (matrix_scalapack) 676 if (istwf_k /= 2) then 677 ABI_MALLOC(matrix%buffer_cplx, (matrix%sizeb_local(1), matrix%sizeb_local(2))) 678 matrix%buffer_cplx = czero 679 else 680 ABI_MALLOC(matrix%buffer_real, (matrix%sizeb_local(1), matrix%sizeb_local(2))) 681 matrix%buffer_real = zero 682 end if 683 class is (slkmat_sp_t) 684 if (istwf_k /= 2) then 685 ABI_MALLOC(matrix%buffer_cplx, (matrix%sizeb_local(1), matrix%sizeb_local(2))) 686 matrix%buffer_cplx = czero_sp 687 else 688 ABI_MALLOC(matrix%buffer_real, (matrix%sizeb_local(1), matrix%sizeb_local(2))) 689 matrix%buffer_real = zero_sp 690 end if 691 class default 692 ABI_ERROR("Wrong class") 693 end select 694 #endif 695 696 end subroutine init_matrix_scalapack
m_slk/loc_glob__ [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
loc_glob__
FUNCTION
Determine the global index from a local index (row or column) as a function of a given processor
INPUTS
matrix= the matrix to process proc= descriptor of a processor idx= number of rows in the distributed matrix lico= block size index. 1 for rows. 2 for columns
SOURCE
1726 integer pure function loc_glob__(matrix, proc, idx, lico) 1727 1728 !Arguments ------------------------------------ 1729 class(basemat_t),intent(in) :: matrix 1730 class(processor_scalapack),intent(in) :: proc 1731 integer, intent(in) :: idx,lico 1732 1733 !Local variables------------------------------- 1734 integer :: nbcyc, rest, nblocs 1735 1736 ! ********************************************************************* 1737 1738 nbcyc = INT((idx-1) / matrix%sizeb_blocs(lico)) 1739 rest = MOD(idx-1, matrix%sizeb_blocs(lico)) 1740 nblocs = nbcyc * proc%grid%dims(lico) + proc%coords(lico) 1741 1742 loc_glob__ = nblocs * matrix%sizeb_blocs(lico) + rest + 1 1743 1744 end function loc_glob__
m_slk/locmem_mb [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
locmem_mb
FUNCTION
Returns memory allocated for the local buffer in Mb.
SOURCE
710 pure real(dp) function locmem_mb(mat) 711 712 !Arguments ------------------------------------ 713 class(basemat_t),intent(in) :: mat 714 715 ! ********************************************************************* 716 717 locmem_mb = zero 718 select type (mat) 719 class is (matrix_scalapack) 720 if (allocated(mat%buffer_real)) locmem_mb = product(int(shape(mat%buffer_real))) * dp 721 if (allocated(mat%buffer_cplx)) locmem_mb = product(int(shape(mat%buffer_cplx))) * two * dp 722 class is (slkmat_sp_t) 723 if (allocated(mat%buffer_real)) locmem_mb = product(int(shape(mat%buffer_real))) * sp 724 if (allocated(mat%buffer_cplx)) locmem_mb = product(int(shape(mat%buffer_cplx))) * two * sp 725 end select 726 locmem_mb = locmem_mb * b2Mb 727 728 end function locmem_mb
m_slk/matrix_from_complexmatrix [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_from_complexmatrix
FUNCTION
Routine to fill a SCALAPACK matrix from a global matrix (FULL STORAGE MODE)
INPUTS
istwf_k= 2 if we have a real matrix else complex. reference= a complex matrix
SIDE EFFECTS
matrix= the matrix to process
SOURCE
1919 subroutine matrix_from_complexmatrix(matrix, reference, istwf_k) 1920 1921 !Arguments ------------------------------------ 1922 class(matrix_scalapack),intent(inout) :: matrix 1923 integer,intent(in) :: istwf_k 1924 !arrays 1925 real(dp),intent(in) :: reference(:,:) 1926 1927 !Local variables------------------------------- 1928 integer :: i,j,iglob,jglob 1929 complex(dpc) :: val 1930 1931 ! ********************************************************************* 1932 1933 ABI_UNUSED(istwf_k) 1934 1935 do i=1,matrix%sizeb_local(1) 1936 do j=1,matrix%sizeb_local(2) 1937 call matrix%loc2glob(i, j, iglob, jglob) 1938 val = dcmplx(reference(2*iglob-1, jglob),reference(2*iglob, jglob)) 1939 call matrix_set_local_cplx(matrix,i,j,val) 1940 end do 1941 end do 1942 1943 end subroutine matrix_from_complexmatrix
m_slk/matrix_from_global [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_from_global
FUNCTION
Routine to fill a SCALAPACK matrix from a global PACKED matrix.
INPUTS
istwf_k= 2 if we have a real matrix else complex. reference= one-dimensional array with packed matrix.
SIDE EFFECTS
matrix= the matrix to process
SOURCE
1765 subroutine matrix_from_global(matrix, reference, istwf_k) 1766 1767 !Arguments ------------------------------------ 1768 class(matrix_scalapack),intent(inout) :: matrix 1769 integer,intent(in) :: istwf_k 1770 real(dp),intent(in) :: reference(*) 1771 1772 !Local variables------------------------------- 1773 integer :: i,j,iglob,jglob,ind 1774 real(dp) :: val_real 1775 complex(dp) :: val_cplx 1776 1777 ! ********************************************************************* 1778 1779 do i=1,matrix%sizeb_local(1) 1780 do j=1,matrix%sizeb_local(2) 1781 call matrix%loc2glob(i, j, iglob, jglob) 1782 1783 if (istwf_k/=2) then 1784 ind = jglob*(jglob-1)+2*iglob-1 1785 val_cplx = dcmplx(reference(ind),reference(ind+1)) 1786 call matrix_set_local_cplx(matrix,i,j,val_cplx) 1787 else 1788 ind = (jglob*(jglob-1))/2 + iglob 1789 val_real = reference(ind) 1790 call matrix_set_local_real(matrix,i,j,val_real) 1791 end if 1792 1793 end do 1794 end do 1795 1796 end subroutine matrix_from_global
m_slk/matrix_from_global_sym [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_from_global_sym
FUNCTION
INPUTS
istwf_k= 2 if we have a real matrix else complex. reference= one-dimensional array
SIDE EFFECTS
matrix= the matrix to process
SOURCE
1816 subroutine matrix_from_global_sym(matrix, reference, istwf_k) 1817 1818 !Arguments ------------------------------------ 1819 class(matrix_scalapack),intent(inout) :: matrix 1820 real(dp),intent(in) :: reference(:) 1821 integer,intent(in) :: istwf_k 1822 1823 !Local variables------------------------------- 1824 integer :: i,j,iglob,jglob,ind 1825 complex(dp):: val_cplx 1826 real(dp) ::val_real 1827 1828 ! ********************************************************************* 1829 1830 do i=1,matrix%sizeb_local(1) 1831 do j=1,matrix%sizeb_local(2) 1832 call matrix%loc2glob(i,j,iglob,jglob) 1833 if(jglob < iglob) then 1834 ind = iglob*(iglob-1)+2*jglob-1 1835 else 1836 ind = jglob*(jglob-1)+2*iglob-1 1837 end if 1838 if (istwf_k/=2) then 1839 val_cplx = dcmplx(reference(ind),reference(ind+1)) 1840 if(jglob < iglob) then 1841 call matrix_set_local_cplx(matrix,i,j,conjg(val_cplx)) 1842 else 1843 call matrix_set_local_cplx(matrix,i,j,val_cplx) 1844 end if 1845 else 1846 ind = (ind + 1) / 2 1847 val_real = reference(ind) 1848 call matrix_set_local_real(matrix,i,j,val_real) 1849 end if 1850 end do 1851 end do 1852 1853 end subroutine matrix_from_global_sym
m_slk/matrix_from_realmatrix [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_from_realmatrix
FUNCTION
Routine to fill a SCALAPACK matrix from a real global matrix (FULL STORAGE MODE)
INPUTS
istwf_k= 2 if we have a real matrix else complex. reference= a real matrix
SIDE EFFECTS
matrix= the matrix to process
SOURCE
1874 subroutine matrix_from_realmatrix(matrix, reference, istwf_k) 1875 1876 !Arguments ------------------------------------ 1877 class(matrix_scalapack),intent(inout) :: matrix 1878 integer,intent(in) :: istwf_k 1879 !arrays 1880 real(dp),intent(in) :: reference(:,:) 1881 1882 !Local variables------------------------------- 1883 integer :: i,j,iglob,jglob 1884 real(dp) :: val 1885 1886 ! ********************************************************************* 1887 1888 ABI_UNUSED(istwf_k) 1889 1890 do i=1,matrix%sizeb_local(1) 1891 do j=1,matrix%sizeb_local(2) 1892 call matrix%loc2glob(i, j, iglob, jglob) 1893 val = reference(iglob, jglob) 1894 call matrix_set_local_real(matrix,i,j,val) 1895 end do 1896 end do 1897 1898 end subroutine matrix_from_realmatrix
m_slk/matrix_get_local_cplx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_get_local_cplx
FUNCTION
Returns a local matrix coefficient of complex type. Access to a component thanks to its local indices
INPUTS
matrix= the matrix to process i= row in the matrix j= column in the matrix
OUTPUT
The value of the local matrix.
SOURCE
1396 pure complex(dpc) function matrix_get_local_cplx(matrix, i, j) 1397 1398 !Arguments ------------------------------------ 1399 class(matrix_scalapack),intent(in) :: matrix 1400 integer, intent(in) :: i,j 1401 1402 ! ********************************************************************* 1403 1404 matrix_get_local_cplx = matrix%buffer_cplx(i,j) 1405 1406 end function matrix_get_local_cplx
m_slk/matrix_get_local_real [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_get_local_real
FUNCTION
Returns a local matrix coefficient of double precision type.
INPUTS
matrix= the matrix to process i= row in the matrix j= column in the matrix
SOURCE
1425 pure real(dp) function matrix_get_local_real(matrix,i,j) 1426 1427 !Arguments ------------------------------------ 1428 class(matrix_scalapack),intent(in) :: matrix 1429 integer, intent(in) :: i,j 1430 1431 ! ********************************************************************* 1432 1433 matrix_get_local_real = matrix%buffer_real(i,j) 1434 1435 end function matrix_get_local_real
m_slk/matrix_scalapack [ Types ]
NAME
matrix_scalapack
FUNCTION
high-level interface to ScaLAPACK matrix (double precision version)
SOURCE
222 type, public, extends(basemat_t) :: matrix_scalapack 223 224 real(dp),allocatable :: buffer_real(:,:) 225 ! local part of the (real) matrix. 226 ! The istwf_k option passed to the constructor defines whether we have a real or complex matrix 227 228 complex(dpc),allocatable :: buffer_cplx(:,:) 229 ! local part of the (complex) matrix 230 231 contains 232 233 procedure :: get_head_and_wings => slk_get_head_and_wings 234 ! Return global arrays with the head and the wings of the matrix. 235 236 procedure :: set_head_and_wings => slk_set_head_and_wings 237 ! Set head and the wings of the matrix starting from global arrays. 238 239 procedure :: copy => matrix_scalapack_copy 240 ! Copy object 241 242 procedure :: hpd_invert => slk_hpd_invert 243 ! Inverse of a Hermitian positive definite matrix. 244 245 procedure :: ptrans => slk_ptrans 246 ! Transpose matrix 247 248 procedure :: cut => slk_cut 249 ! Extract submatrix and create new matrix with `size_blocs` and `processor` 250 251 procedure :: take_from => slk_take_from 252 ! Take values from source 253 254 procedure :: collect_cplx => slk_collect_cplx 255 ! Return on all processors the submatrix of shape (mm, nn) starting at position ija. 256 257 procedure :: heev => slk_heev 258 ! Compute eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X 259 260 procedure :: pzheevx => slk_pzheevx 261 ! Compute Eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. ! A * X = lambda * X 262 263 procedure :: pzhegvx => slk_pzhegvx 264 ! Eigenvalues and, optionally, eigenvectors of a complex 265 ! generalized Hermitian-definite eigenproblem, of the form 266 ! sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, 267 ! or sub( B )*sub( A )*x=(lambda)*x. 268 269 procedure :: symmetrize => slk_symmetrize 270 ! Symmetrizes a square scaLAPACK matrix. 271 272 procedure :: bsize_and_type => slk_bsize_and_type 273 ! Returns the byte size and the MPI datatype 274 275 end type matrix_scalapack
m_slk/matrix_scalapack_copy [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_scalapack_copy
FUNCTION
Copy in_mat to out_mat. If empty is True, the values in the local buffer are not copied. Default: False
SOURCE
1056 subroutine matrix_scalapack_copy(in_mat, out_mat, empty) 1057 1058 !Arguments ------------------------------------ 1059 class(matrix_scalapack),intent(in) :: in_mat 1060 class(matrix_scalapack),intent(out) :: out_mat 1061 logical,optional,intent(in) :: empty 1062 1063 !Local variables------------------------------- 1064 logical :: empty__ 1065 1066 ! ********************************************************************* 1067 1068 call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), in_mat%processor, in_mat%istwf_k, & 1069 size_blocs=in_mat%sizeb_blocs) 1070 1071 empty__ = .False.; if (present(empty)) empty__ = empty 1072 if (.not. empty__) then 1073 if (in_mat%istwf_k == 1) then 1074 out_mat%buffer_cplx = in_mat%buffer_cplx 1075 else 1076 out_mat%buffer_real = in_mat%buffer_real 1077 end if 1078 end if 1079 1080 end subroutine matrix_scalapack_copy
m_slk/matrix_scalapack_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_scalapack_free
FUNCTION
Free dynamic memory
SOURCE
1132 subroutine matrix_scalapack_free(mat) 1133 1134 !Arguments ------------------------------------ 1135 class(basemat_t),intent(inout) :: mat 1136 1137 ! ********************************************************************* 1138 1139 ! Don't free the grid. Just nullify the pointer as there might be other objects keeping a ref to processor. 1140 mat%processor => null() 1141 1142 mat%sizeb_global = 0 1143 mat%sizeb_blocs = 0 1144 mat%sizeb_local = 0 1145 mat%descript%tab = 0 1146 1147 select type (mat) 1148 class is (matrix_scalapack) 1149 ABI_SFREE(mat%buffer_cplx) 1150 ABI_SFREE(mat%buffer_real) 1151 class is (slkmat_sp_t) 1152 ABI_SFREE(mat%buffer_cplx) 1153 ABI_SFREE(mat%buffer_real) 1154 class default 1155 ABI_ERROR("Wrong class") 1156 end select 1157 1158 end subroutine matrix_scalapack_free
m_slk/matrix_set_local_cplx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_set_local_cplx
FUNCTION
Sets a local matrix coefficient of complex type. ------------------------------------------------------- Positioning of a component of a matrix thanks to its local indices -------------------------------------------------------
INPUTS
i= row in the matrix j= column in the matrix value= the value to set
SIDE EFFECTS
matrix%buffer_cplx(i,j) filled with value
SOURCE
1460 pure subroutine matrix_set_local_cplx(matrix,i,j,value) 1461 1462 !Arguments ------------------------------------ 1463 class(matrix_scalapack),intent(inout) :: matrix 1464 integer, intent(in) :: i,j 1465 complex(dp), intent(in) :: value 1466 1467 ! ********************************************************************* 1468 1469 matrix%buffer_cplx(i,j) = value 1470 1471 end subroutine matrix_set_local_cplx
m_slk/matrix_set_local_real [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_set_local_real
FUNCTION
Sets a local matrix coefficient of double precision type.
INPUTS
i= row in the matrix j= column in the matrix value= the value to set
SIDE EFFECTS
matrix%buffer_real(i,j) set to value
SOURCE
1493 pure subroutine matrix_set_local_real(matrix, i, j, value) 1494 1495 !Arguments ------------------------------------ 1496 class(matrix_scalapack),intent(inout) :: matrix 1497 integer, intent(in) :: i,j 1498 real(dp), intent(in) :: value 1499 1500 ! ********************************************************************* 1501 1502 matrix%buffer_real(i,j) = value 1503 1504 end subroutine matrix_set_local_real
m_slk/matrix_to_complexmatrix [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_to_complexmatrix
FUNCTION
Inserts a ScaLAPACK matrix into a complex matrix in FULL STORAGE MODE.
INPUTS
matrix= the matrix to process istwf_k= 2 if we have a real matrix else complex.
SIDE EFFECTS
reference= the matrix to fill
SOURCE
2059 subroutine matrix_to_complexmatrix(matrix, reference, istwf_k) 2060 2061 !Arguments ------------------------------------ 2062 integer,intent(in) :: istwf_k 2063 class(matrix_scalapack),intent(in) :: matrix 2064 !arrays 2065 complex(dpc),intent(inout) :: reference(:,:) 2066 2067 !Local variables------------------------------- 2068 integer :: i,j,iglob,jglob 2069 2070 ! ********************************************************************* 2071 2072 ABI_UNUSED(istwf_k) 2073 2074 do i=1,matrix%sizeb_local(1) 2075 do j=1,matrix%sizeb_local(2) 2076 call matrix%loc2glob(i, j, iglob, jglob) 2077 reference(iglob,jglob) = matrix_get_local_cplx(matrix,i,j) 2078 end do 2079 end do 2080 2081 end subroutine matrix_to_complexmatrix
m_slk/matrix_to_global [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_to_global
FUNCTION
Inserts a ScaLAPACK matrix into a global one in PACKED storage mode.
INPUTS
matrix= the matrix to process istwf_k= 2 if we have a real matrix else complex. nband_k= number of bands at this k point for that spin polarization
SIDE EFFECTS
reference= one-dimensional array
SOURCE
1965 subroutine matrix_to_global(matrix, reference, istwf_k) 1966 1967 !Arguments ------------------------------------ 1968 class(matrix_scalapack),intent(in) :: matrix 1969 integer,intent(in) :: istwf_k !,nband_k 1970 real(dp),intent(inout) :: reference(*) !(nband_k*(nband_k+1)) 1971 1972 !Local variables------------------------------- 1973 integer :: i,j,iglob,jglob,ind 1974 1975 ! ********************************************************************* 1976 1977 do i=1,matrix%sizeb_local(1) 1978 do j=1,matrix%sizeb_local(2) 1979 call matrix%loc2glob(i, j, iglob, jglob) 1980 1981 ind = jglob*(jglob-1)+2*iglob-1 1982 if (ind <= matrix%sizeb_global(2)*(matrix%sizeb_global(2)+1)) then 1983 if (istwf_k/=2) then 1984 reference(ind) = real(matrix_get_local_cplx(matrix,i,j)) 1985 reference(ind+1) = aimag(matrix_get_local_cplx(matrix,i,j)) 1986 else 1987 ind=(ind+1)/2 !real packed storage 1988 reference(ind) = matrix_get_local_real(matrix,i,j) 1989 end if 1990 end if 1991 end do 1992 end do 1993 1994 end subroutine matrix_to_global
m_slk/matrix_to_realmatrix [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_to_realmatrix
FUNCTION
Inserts a ScaLAPACK matrix into a real matrix in FULL STORAGE MODE.
INPUTS
matrix= the matrix to process istwf_k= 2 if we have a real matrix else complex.
SIDE EFFECTS
reference= the matrix to fill
SOURCE
2015 subroutine matrix_to_realmatrix(matrix, reference, istwf_k) 2016 2017 !Arguments ------------------------------------ 2018 class(matrix_scalapack),intent(in) :: matrix 2019 integer,intent(in) :: istwf_k 2020 !arrays 2021 real(dp),intent(inout) :: reference(:,:) 2022 2023 !Local variables------------------------------- 2024 integer :: i,j,iglob,jglob 2025 !complex(dpc) :: zvar 2026 2027 ! ********************************************************************* 2028 2029 ABI_UNUSED(istwf_k) 2030 2031 do i=1,matrix%sizeb_local(1) 2032 do j=1,matrix%sizeb_local(2) 2033 call matrix%loc2glob(i, j, iglob, jglob) 2034 reference(iglob,jglob) = matrix_get_local_real(matrix,i,j) 2035 end do 2036 end do 2037 2038 end subroutine matrix_to_realmatrix
m_slk/matrix_to_reference [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
matrix_to_reference
FUNCTION
Routine to fill a full matrix with respect to a SCALAPACK matrix.
INPUTS
matrix= the matrix to process istwf_k= 2 if we have a real matrix else complex.
SIDE EFFECTS
reference= one-dimensional array
SOURCE
2102 subroutine matrix_to_reference(matrix, reference, istwf_k) 2103 2104 !Arguments ------------------------------------ 2105 class(matrix_scalapack),intent(in) :: matrix 2106 integer,intent(in) :: istwf_k 2107 !arrays 2108 real(dp),intent(inout) :: reference(:,:) 2109 2110 !Local variables------------------------------- 2111 integer :: i,j,iglob,jglob,ind 2112 2113 ! ********************************************************************* 2114 2115 do i=1,matrix%sizeb_local(1) 2116 do j=1,matrix%sizeb_local(2) 2117 call matrix%loc2glob(i, j, iglob, jglob) 2118 2119 if (istwf_k/=2) then 2120 ind=(iglob-1)*2+1 2121 reference(ind,jglob) = real(matrix_get_local_cplx(matrix,i,j)) 2122 reference(ind+1,jglob) = aimag(matrix_get_local_cplx(matrix,i,j)) 2123 else 2124 ind=iglob 2125 reference(ind,jglob) = matrix_get_local_real(matrix,i,j) 2126 !reference(ind+1,jglob) = 0._dp 2127 end if 2128 2129 end do 2130 end do 2131 2132 end subroutine matrix_to_reference
m_slk/my_locc [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
my_locc
FUNCTION
Method of matrix_scalapack wrapping the scaLAPACK tool LOCc.
OUTPUT
my_locc= For the meaning see NOTES below.
NOTES
Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q. LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p processes of its process column. Similarly, LOCc( K ) denotes the number of elements of K that a process would receive if K were distributed over the q processes of its process row. The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC: LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). An upper bound for these quantities may be computed by: LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
SOURCE
2461 integer function my_locc(mat) 2462 2463 !Arguments ------------------------------------ 2464 class(basemat_t),intent(in) :: mat 2465 2466 #ifdef HAVE_LINALG_SCALAPACK 2467 !Local variables------------------------------- 2468 integer :: N, NB_A, MYCOL, CSRC_A, NPCOL 2469 integer,external :: NUMROC 2470 2471 ! ************************************************************************* 2472 2473 N = mat%descript%tab(N_ ) ! The number of columns in the global matrix. 2474 NB_A = mat%descript%tab(NB_) ! The number of columns in a block. 2475 MYCOL = mat%processor%coords(2) ! The column index of my processor 2476 CSRC_A = mat%descript%tab(CSRC_) ! The column of the processors at the beginning. 2477 NPCOL = mat%processor%grid%dims(2) ! The number of processors per column in the Scalapack grid. 2478 2479 my_locc = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ) 2480 #endif 2481 2482 end function my_locc
m_slk/my_locr [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
my_locr
FUNCTION
Method of matrix_scalapack wrapping the scaLAPACK tool LOCr.
OUTPUT
my_locr= For the meaning see NOTES below.
NOTES
Let K be the number of rows or columns of a distributed matrix, and assume that its process grid has dimension p x q. LOCr( K ) denotes the number of elements of K that a process would receive if K were distributed over the p processes of its process column. Similarly, LOCc( K ) denotes the number of elements of K that a process would receive if K were distributed over the q processes of its process row. The values of LOCr() and LOCc() may be determined via a call to the ScaLAPACK tool function, NUMROC: LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). An upper bound for these quantities may be computed by: LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
SOURCE
2410 integer function my_locr(mat) 2411 2412 !Arguments ------------------------------------ 2413 class(basemat_t),intent(in) :: mat 2414 2415 !Local variables------------------------------- 2416 #ifdef HAVE_LINALG_SCALAPACK 2417 integer :: M, MB_A, MYROW, RSRC_A, NPROW 2418 integer,external :: NUMROC 2419 2420 ! ************************************************************************* 2421 2422 M = mat%descript%tab(M_ ) ! The number of rows in the global matrix. 2423 MB_A = mat%descript%tab(MB_) ! The number of rows in a block. 2424 MYROW = mat%processor%coords(1) ! The row index of my processor 2425 RSRC_A = mat%descript%tab(RSRC_) ! The row of the processors at the beginning. 2426 NPROW = mat%processor%grid%dims(1) ! The number of processors per row in the Scalapack grid. 2427 2428 my_locr = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ) 2429 #endif 2430 2431 end function my_locr
m_slk/processor_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
processor_free
FUNCTION
Removes a processor from the ScaLAPACK grid.
SOURCE
552 subroutine processor_free(processor) 553 554 !Arguments ------------------------------------ 555 class(processor_scalapack),intent(inout) :: processor 556 557 ! ********************************************************************* 558 559 #ifdef HAVE_LINALG_SCALAPACK 560 if (processor%grid%ictxt /= xmpi_comm_null) then 561 call BLACS_GRIDEXIT(processor%grid%ictxt) 562 !call BLACS_EXIT(0) 563 end if 564 #endif 565 566 end subroutine processor_free
m_slk/processor_init [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
processor_init
FUNCTION
Initializes an instance of processor ScaLAPACK from an MPI communicator.
INPUTS
comm= MPI communicator [grid_dims]=Number of procs for each dimension.
OUTPUT
processor= descriptor of a processor
SOURCE
514 subroutine processor_init(processor, comm, grid_dims) 515 516 !Arguments ------------------------------------ 517 class(processor_scalapack),intent(out) :: processor 518 integer, intent(in) :: comm 519 integer,optional,intent(in) :: grid_dims(2) 520 521 !Local variables------------------------------- 522 type(grid_scalapack) :: grid 523 integer :: nbproc,myproc 524 525 ! ********************************************************************* 526 527 nbproc = xmpi_comm_size(comm) 528 myproc = xmpi_comm_rank(comm) 529 530 if (present(grid_dims)) then 531 call grid%init(nbproc, comm, .false., grid_dims=grid_dims) 532 else 533 call grid%init(nbproc, comm, .false.) 534 end if 535 536 call build_processor_scalapack(processor, grid, myproc, comm) 537 538 end subroutine processor_init
m_slk/processor_scalapack [ Types ]
NAME
processor_scalapack
FUNCTION
One processor in the grid.
SOURCE
101 type,public :: processor_scalapack 102 103 integer :: myproc = -1 104 ! rank the processor 105 106 integer :: comm = xmpi_comm_null 107 ! MPI communicator underlying the BLACS grid. 108 109 integer :: coords(2) = -1 110 ! Coordinates of the processor in the grid. 111 112 type(grid_scalapack) :: grid 113 ! the grid to which the processor is associated to. 114 115 contains 116 procedure :: init => processor_init ! Initializes an instance of processor ScaLAPACK from an MPI communicator. 117 procedure :: free => processor_free ! Free the object 118 end type processor_scalapack 119 120 private :: build_processor_scalapack ! Build a ScaLAPACK processor descriptor
m_slk/slk_array1_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array1_free
FUNCTION
Deallocate 1d array of matrix_scalapack elements
SOURCE
1172 subroutine slk_array1_free(slk_arr1) 1173 class(basemat_t),intent(inout) :: slk_arr1(:) 1174 integer :: i1 1175 do i1=1,size(slk_arr1, dim=1) 1176 call slk_arr1(i1)%free() 1177 end do 1178 end subroutine slk_array1_free
m_slk/slk_array2_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array2_free
FUNCTION
Deallocate 2d array of matrix_scalapack elements
SOURCE
1192 subroutine slk_array2_free(slk_arr2) 1193 class(basemat_t),intent(inout) :: slk_arr2(:,:) 1194 integer :: i1, i2 1195 do i2=1,size(slk_arr2, dim=2) 1196 do i1=1,size(slk_arr2, dim=1) 1197 call slk_arr2(i1, i2)%free() 1198 end do 1199 end do 1200 end subroutine slk_array2_free
m_slk/slk_array3_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array3_free
FUNCTION
Deallocate 3d array of matrix_scalapack elements
SOURCE
1214 subroutine slk_array3_free(slk_arr3) 1215 class(basemat_t),intent(inout) :: slk_arr3(:,:,:) 1216 integer :: i1, i2, i3 1217 do i3=1,size(slk_arr3, dim=3) 1218 do i2=1,size(slk_arr3, dim=2) 1219 do i1=1,size(slk_arr3, dim=1) 1220 call slk_arr3(i1, i2, i3)%free() 1221 end do 1222 end do 1223 end do 1224 end subroutine slk_array3_free
m_slk/slk_array4_free [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array4_free
FUNCTION
Deallocate 4d array of matrix_scalapack elements
SOURCE
1238 subroutine slk_array4_free(slk_arr4) 1239 class(basemat_t),intent(inout) :: slk_arr4(:,:,:,:) 1240 integer :: i1, i2, i3, i4 1241 do i4=1,size(slk_arr4, dim=4) 1242 do i3=1,size(slk_arr4, dim=3) 1243 do i2=1,size(slk_arr4, dim=2) 1244 do i1=1,size(slk_arr4, dim=1) 1245 call slk_arr4(i1, i2, i3, i4)%free() 1246 end do 1247 end do 1248 end do 1249 end do 1250 end subroutine slk_array4_free
m_slk/slk_array_locmem_mb [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array_locmem_mb
FUNCTION
Elemental function to compute the memory allocated for an array of matrix_scalapack elements Usage: mem_mb = sum(mat_array)
SOURCE
1295 elemental real(dp) function slk_array_locmem_mb(mat) result(mem_mb) 1296 class(basemat_t),intent(in) :: mat 1297 mem_mb = mat%locmem_mb() 1298 end function slk_array_locmem_mb
m_slk/slk_array_set [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_array_set
FUNCTION
Elemental routine to set the value of the PBLAS buffer to a costant value `cvalue`. Usually used to zero all the buffers in an array of matrix_scalapack objects.
SOURCE
1265 elemental subroutine slk_array_set(mat, cvalue) 1266 1267 !Arguments ------------------------------------ 1268 class(basemat_t),intent(inout) :: mat 1269 complex(dp),intent(in) :: cvalue 1270 1271 select type (mat) 1272 class is (matrix_scalapack) 1273 if (allocated(mat%buffer_cplx)) mat%buffer_cplx = cvalue 1274 if (allocated(mat%buffer_real)) mat%buffer_real = real(cvalue, kind=dp) 1275 class is (slkmat_sp_t) 1276 if (allocated(mat%buffer_cplx)) mat%buffer_cplx = cmplx(cvalue, kind=sp) 1277 if (allocated(mat%buffer_real)) mat%buffer_real = real(cvalue, kind=sp) 1278 end select 1279 1280 end subroutine slk_array_set
m_slk/slk_bsize_and_type [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_bsize_and_type
FUNCTION
Returns the byte size and the MPI datatype associated to the matrix elements that are stored in the ScaLAPACK_matrix
INPUTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer
OUTPUT
bsize_elm=Byte size of the matrix element. mpi_type_elm=MPI datatype of the matrix element.
SOURCE
6497 subroutine slk_bsize_and_type(Slk_mat, bsize_elm, mpi_type_elm) 6498 6499 !Arguments ------------------------------------ 6500 !scalars 6501 class(matrix_scalapack),intent(in) :: Slk_mat 6502 integer,intent(out) :: bsize_elm,mpi_type_elm 6503 6504 !Local variables ------------------------------ 6505 !scalars 6506 integer :: ierr 6507 character(len=500) :: msg 6508 6509 ! ************************************************************************ 6510 6511 ! @matrix_scalapack 6512 ierr=0 6513 #ifdef HAVE_MPI 6514 if (allocated(Slk_mat%buffer_cplx)) then 6515 ierr = ierr + 1 6516 mpi_type_elm = MPI_DOUBLE_COMPLEX 6517 bsize_elm = xmpi_bsize_dpc 6518 end if 6519 6520 if (allocated(Slk_mat%buffer_real)) then 6521 ierr = ierr + 1 6522 mpi_type_elm = MPI_DOUBLE_PRECISION 6523 bsize_elm = xmpi_bsize_dp 6524 end if 6525 #endif 6526 6527 ! One and only one buffer should be allocated. 6528 if (ierr /= 1) then 6529 write(msg,'(a,i0)')" ScaLAPACK buffers are not allocated correctly, ierr= ",ierr 6530 ABI_ERROR(msg) 6531 end if 6532 6533 end subroutine slk_bsize_and_type
m_slk/slk_change_size_blocs [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_change_size_blocs
FUNCTION
Change the block sizes, return new matrix in out_mat
INPUTS
[free]: True if `in_mat` should be deallocated. Default: False
OUTPUT
SOURCE
4787 subroutine slk_change_size_blocs(in_mat, out_mat, & 4788 size_blocs, processor, free) ! Optional 4789 4790 !Arguments ------------------------------------ 4791 class(basemat_t),target,intent(inout) :: in_mat 4792 class(basemat_t),intent(out) :: out_mat 4793 integer,optional,intent(in) :: size_blocs(2) 4794 class(processor_scalapack), target, optional,intent(in) :: processor 4795 logical,optional,intent(in) :: free 4796 4797 !Local variables------------------------------- 4798 type(processor_scalapack), pointer :: processor__ 4799 4800 ! ************************************************************************* 4801 4802 processor__ => in_mat%processor; if (present(processor)) processor__ => processor 4803 4804 if (present(size_blocs)) then 4805 call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), processor__, in_mat%istwf_k, size_blocs=size_blocs) 4806 else 4807 call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), processor__, in_mat%istwf_k) 4808 end if 4809 !call out_mat%print(header="output matrix generated by slk_change_size_blocs") 4810 4811 ABI_CHECK(same_type_as(in_mat, out_mat), "in_mat and out_mat should have same type!") 4812 4813 ! p?gemr2d: Copies a submatrix from one general rectangular matrix to another. 4814 ! prototype 4815 !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt) 4816 4817 #ifdef HAVE_LINALG_SCALAPACK 4818 select type (in_mat) 4819 class is (matrix_scalapack) 4820 select type (out_mat) 4821 class is (matrix_scalapack) 4822 if (allocated(in_mat%buffer_cplx)) then 4823 call pzgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2), & 4824 in_mat%buffer_cplx, 1, 1, in_mat%descript%tab, & 4825 out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, & 4826 processor__%grid%ictxt) 4827 4828 else if (allocated(in_mat%buffer_real)) then 4829 call pdgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2), & 4830 in_mat%buffer_real, 1, 1, in_mat%descript%tab, & 4831 out_mat%buffer_real, 1, 1, out_mat%descript%tab, & 4832 processor__%grid%ictxt) 4833 else 4834 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 4835 end if 4836 end select 4837 4838 class is (slkmat_sp_t) 4839 select type (out_mat) 4840 class is (slkmat_sp_t) 4841 if (allocated(in_mat%buffer_cplx)) then 4842 call pcgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2), & 4843 in_mat%buffer_cplx, 1, 1, in_mat%descript%tab, & 4844 out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, & 4845 processor__%grid%ictxt) 4846 4847 else if (allocated(in_mat%buffer_real)) then 4848 call psgemr2d(in_mat%sizeb_global(1), in_mat%sizeb_global(2), & 4849 in_mat%buffer_real, 1, 1, in_mat%descript%tab, & 4850 out_mat%buffer_real, 1, 1, out_mat%descript%tab, & 4851 processor__%grid%ictxt) 4852 else 4853 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 4854 end if 4855 end select 4856 4857 class default 4858 ABI_ERROR("Wrong class") 4859 end select 4860 #endif 4861 4862 if (present(free)) then 4863 if (free) call in_mat%free() 4864 end if 4865 4866 end subroutine slk_change_size_blocs
m_slk/slk_collect_cplx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_collect_cplx
FUNCTION
Return on all processors the complex submatrix of shape (mm, nn) starting at position ija. NB: `out_carr` is allocated by the routine. If optional argument request is use, the routine uses non-blocking BCAST and client code is supposed to wait before accessing out_carr.
SOURCE
5128 subroutine slk_collect_cplx(in_mat, mm, nn, ija, out_carr, request) 5129 5130 !Arguments ------------------------------------ 5131 class(matrix_scalapack),intent(in) :: in_mat 5132 integer,intent(in) :: mm, nn, ija(2) 5133 complex(dp) ABI_ASYNC, allocatable,intent(out) :: out_carr(:,:) 5134 integer ABI_ASYNC, optional,intent(out) :: request 5135 5136 !Local variables------------------------------- 5137 integer,parameter :: master = 0 5138 integer :: ierr 5139 type(processor_scalapack) :: self_processor 5140 type(matrix_scalapack) :: out_mat 5141 5142 ! ************************************************************************* 5143 5144 ABI_CHECK(allocated(in_mat%buffer_cplx), "buffer_cplx is not allocated") 5145 5146 if (in_mat%processor%grid%nbprocs == 1) then 5147 ! Copy buffer and return 5148 ABI_MALLOC(out_carr, (mm, nn)) 5149 out_carr(:,:) = in_mat%buffer_cplx(ija(1):ija(1)+mm-1, ija(2):ija(2)+nn-1); return 5150 end if 5151 5152 ! Two-step algorithm: 5153 ! 1) Use pzgemr2d to collect submatrix on master. 5154 ! 2) Master brodacasts submatrix. 5155 5156 if (in_mat%processor%myproc == master) then 5157 call self_processor%init(xmpi_comm_self) 5158 call out_mat%init(mm, nn, self_processor, in_mat%istwf_k, size_blocs=[mm, nn]) 5159 else 5160 out_mat%descript%tab(CTXT_) = -1 5161 end if 5162 5163 #ifdef HAVE_LINALG_SCALAPACK 5164 call pzgemr2d(mm, nn, & 5165 in_mat%buffer_cplx, ija(1), ija(2), in_mat%descript%tab, & 5166 out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, & 5167 in_mat%processor%grid%ictxt) 5168 #endif 5169 5170 if (in_mat%processor%myproc == master) then 5171 ABI_MOVE_ALLOC(out_mat%buffer_cplx, out_carr) 5172 call out_mat%free() 5173 call self_processor%free() 5174 else 5175 ABI_MALLOC(out_carr, (mm, nn)) 5176 end if 5177 5178 if (present(request)) then 5179 call xmpi_ibcast(out_carr, master, in_mat%processor%comm, request, ierr) 5180 else 5181 call xmpi_bcast(out_carr, master, in_mat%processor%comm, ierr) 5182 end if 5183 5184 end subroutine slk_collect_cplx
m_slk/slk_cut [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_cut
FUNCTION
Extract submatrix of shape (glob_nrows, glob_ncols) starting at `ija` from `in_mat` and create new matrix with `size_blocs` and `processor`
INPUTS
[free]: True if `in_mat` should be deallocated. Default: False
OUTPUT
SOURCE
4886 subroutine slk_cut(in_mat, glob_nrows, glob_ncols, out_mat, & 4887 size_blocs, processor, ija, ijb, free) ! Optional 4888 4889 !Arguments ------------------------------------ 4890 class(matrix_scalapack),target,intent(inout) :: in_mat 4891 integer,intent(in) :: glob_nrows, glob_ncols 4892 class(matrix_scalapack),intent(out) :: out_mat 4893 integer,optional,intent(in) :: size_blocs(2) 4894 class(processor_scalapack), target, optional,intent(in) :: processor 4895 integer,optional,intent(in) :: ija(2), ijb(2) 4896 logical,optional,intent(in) :: free 4897 4898 !Local variables------------------------------- 4899 type(processor_scalapack), pointer :: processor__ 4900 integer :: ija__(2), ijb__(2) 4901 4902 ! ************************************************************************* 4903 4904 ija__ = [1, 1]; if (present(ija)) ija__ = ija 4905 ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb 4906 4907 processor__ => in_mat%processor; if (present(processor)) processor__ => processor 4908 4909 if (present(size_blocs)) then 4910 call out_mat%init(glob_nrows, glob_ncols, processor__, in_mat%istwf_k, size_blocs=size_blocs) 4911 else 4912 call out_mat%init(glob_nrows, glob_ncols, processor__, in_mat%istwf_k) 4913 end if 4914 !call out_mat%print(header="output matrix generated by slk_cut") 4915 4916 ! p?gemr2d: Copies a submatrix from one general rectangular matrix to another. 4917 ! prototype 4918 !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt) 4919 4920 if (allocated(in_mat%buffer_cplx)) then 4921 #ifdef HAVE_LINALG_SCALAPACK 4922 call pzgemr2d(glob_nrows, glob_ncols, & 4923 in_mat%buffer_cplx, ija__(1), ija__(2), in_mat%descript%tab, & 4924 out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, & 4925 processor__%grid%ictxt) 4926 4927 else if (allocated(in_mat%buffer_real)) then 4928 call pdgemr2d(glob_nrows, glob_ncols, & 4929 in_mat%buffer_real, ija__(1), ija__(2), in_mat%descript%tab, & 4930 out_mat%buffer_real, ijb__(1), ijb__(2), out_mat%descript%tab, & 4931 processor__%grid%ictxt) 4932 #endif 4933 else 4934 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 4935 end if 4936 4937 if (present(free)) then 4938 if (free) call in_mat%free() 4939 end if 4940 4941 end subroutine slk_cut
m_slk/slk_get_head_and_wings [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_get_head_and_wings
FUNCTION
Return global arrays with head and wings of the matrix. If call_mpi if False, global MPI sum is postponed. Useful to reduce the number of MPI calls if one has to operate on multiple matrices.
SOURCE
873 subroutine slk_get_head_and_wings(mat, head, low_wing, up_wing, call_mpi) 874 875 !Arguments ------------------------------------ 876 class(matrix_scalapack),intent(in) :: mat 877 complex(dp),intent(out) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2)) 878 logical,intent(in) :: call_mpi 879 880 !Local variables------------------------------- 881 integer :: ierr, il_g1, il_g2, iglob1, iglob2 882 logical :: is_cplx 883 884 ! ********************************************************************* 885 886 head = zero; low_wing = zero; up_wing = zero 887 888 is_cplx = allocated(mat%buffer_cplx) 889 890 do il_g2=1,mat%sizeb_local(2) 891 iglob2 = mat%loc2gcol(il_g2) 892 do il_g1=1,mat%sizeb_local(1) 893 iglob1 = mat%loc2grow(il_g1) 894 895 if (iglob1 == 1 .or. iglob2 == 1) then 896 if (iglob1 == 1 .and. iglob2 == 1) then 897 if (is_cplx) then 898 head = mat%buffer_cplx(il_g1, il_g2) 899 else 900 head = mat%buffer_real(il_g1, il_g2) 901 end if 902 else if (iglob1 == 1) then 903 if (is_cplx) then 904 up_wing(iglob2) = mat%buffer_cplx(il_g1, il_g2) 905 else 906 up_wing(iglob2) = mat%buffer_real(il_g1, il_g2) 907 end if 908 else if (iglob2 == 1) then 909 if (is_cplx) then 910 low_wing(iglob1) = mat%buffer_cplx(il_g1, il_g2) 911 else 912 low_wing(iglob1) = mat%buffer_real(il_g1, il_g2) 913 end if 914 end if 915 end if 916 917 end do 918 end do 919 920 if (call_mpi) then 921 call xmpi_sum(head, mat%processor%grid%ictxt, ierr) 922 call xmpi_sum(low_wing, mat%processor%grid%ictxt, ierr) 923 call xmpi_sum(up_wing, mat%processor%grid%ictxt, ierr) 924 end if 925 926 end subroutine slk_get_head_and_wings
m_slk/slk_get_trace [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_get_trace
FUNCTION
Compute the trace of an N-by-N distributed matrix.
INPUTS
OUTPUT
SOURCE
5275 complex(dp) function slk_get_trace(mat) result(ctrace) 5276 5277 !Arguments ------------------------------------ 5278 class(basemat_t), intent(in) :: mat 5279 5280 !Local variables------------------------------- 5281 #ifdef HAVE_LINALG_SCALAPACK 5282 real(dp) :: rtrace 5283 real(sp) :: rtrace_sp 5284 complex(sp) :: ctrace_sp 5285 real(dp),external :: PDLATRA 5286 real(sp),external :: PSLATRA 5287 complex(sp),external :: PCLATRA 5288 complex(dp),external :: PZLATRA 5289 #endif 5290 5291 ! ************************************************************************* 5292 5293 ABI_CHECK_IEQ(mat%sizeb_global(1), mat%sizeb_global(2), "get_trace assumes square matrix!") 5294 5295 ! prototype for complex version. 5296 ! COMPLEX*16 FUNCTION PZLATRA( N, A, IA, JA, DESCA ) 5297 #ifdef HAVE_LINALG_SCALAPACK 5298 select type (mat) 5299 class is (matrix_scalapack) 5300 if (allocated(mat%buffer_cplx)) then 5301 ctrace = PZLATRA(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab) 5302 else if (allocated(mat%buffer_real)) then 5303 rtrace = PDLATRA(mat%sizeb_global(1), mat%buffer_real, 1, 1, mat%descript%tab) 5304 ctrace = rtrace 5305 else 5306 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 5307 end if 5308 class is (slkmat_sp_t) 5309 if (allocated(mat%buffer_cplx)) then 5310 ctrace_sp = PCLATRA(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab) 5311 ctrace = ctrace_sp 5312 else if (allocated(mat%buffer_real)) then 5313 rtrace_sp = PSLATRA(mat%sizeb_global(1), mat%buffer_real, 1, 1, mat%descript%tab) 5314 ctrace = rtrace_sp 5315 else 5316 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 5317 end if 5318 class default 5319 ABI_ERROR("Wrong class") 5320 end select 5321 #endif 5322 5323 end function slk_get_trace
m_slk/slk_glob2loc [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_glob2loc
FUNCTION
Determine the local indices of an element from its global indices and return haveit bool flag.
INPUTS
iloc= local row index. jloc= local column index.
OUTPUT
iloc= row in the matrix jloc= column in the matrix haveit= True if (iglob, jglob) is stored on this proc
SOURCE
1597 subroutine slk_glob2loc(mat, iglob, jglob, iloc, jloc, haveit) 1598 1599 !Arguments ------------------------------------ 1600 class(basemat_t),intent(in) :: mat 1601 integer, intent(in) :: iglob, jglob 1602 integer, intent(out) :: iloc, jloc 1603 logical,intent(out) :: haveit 1604 1605 !Local variables------------------------------- 1606 integer :: row_src, col_src 1607 1608 ! ********************************************************************* 1609 1610 #ifdef HAVE_LINALG_SCALAPACK 1611 ! SUBROUTINE INFOG2L( GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC) 1612 1613 call INFOG2L(iglob, jglob, mat%descript%tab, mat%processor%grid%dims(1), mat%processor%grid%dims(2), & 1614 mat%processor%coords(1), mat%processor%coords(2), iloc, jloc, row_src, col_src) 1615 1616 haveit = all(mat%processor%coords == [row_src, col_src]) 1617 #endif 1618 1619 end subroutine slk_glob2loc
m_slk/slk_has_elpa [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_has_elpa
FUNCTION
Return True if ELPA support is activated
SOURCE
1364 pure logical function slk_has_elpa() result (ans) 1365 1366 ! ********************************************************************* 1367 1368 ans = .False. 1369 #ifdef HAVE_LINALG_ELPA 1370 ans = .True. 1371 #endif 1372 1373 end function slk_has_elpa
m_slk/slk_heev [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_heev
FUNCTION
slk_heev computes selected eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X
INPUTS
JOBZ (global input) CHARACTER*1 Specifies whether or not to compute the eigenvectors: = "N": Compute eigenvalues only. = "V": Compute eigenvalues and eigenvectors. UPLO (global input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = "U": Upper triangular = "L": Lower triangular mat=The object storing the local buffer in DOUBLE PRECISION, the array descriptor, the PBLAS context. vec=The distributed eigenvectors. Not referenced if JOBZ="N"
OUTPUT
W (global output) array, dimension (N) where N is the rank of the global matrix. On normal exit, the first M entries contain the selected eigenvalues in ascending order.
SIDE EFFECTS
If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors. On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed.
SOURCE
3527 subroutine slk_heev(mat, jobz, uplo, vec, w, & 3528 mat_size, ija, ijz) ! Optional 3529 3530 !Arguments ------------------------------------ 3531 !scalars 3532 class(matrix_scalapack),intent(inout) :: mat 3533 character(len=*),intent(in) :: jobz, uplo 3534 class(matrix_scalapack),intent(inout) :: vec 3535 !arrays 3536 real(dp),intent(out) :: w(:) 3537 integer,optional,intent(in) :: mat_size, ija(2), ijz(2) 3538 3539 #ifdef HAVE_LINALG_SCALAPACK 3540 !Local variables ------------------------------ 3541 !scalars 3542 integer :: lwork, lrwork, info, nn 3543 !character(len=500) :: msg 3544 !arrays 3545 integer :: ija__(2), ijz__(2) 3546 real(dp),allocatable :: rwork_dp(:) 3547 complex(dp),allocatable :: work_dp(:) 3548 3549 !************************************************************************ 3550 3551 ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated") 3552 3553 nn = mat%sizeb_global(2); if (present(mat_size)) nn = mat_size 3554 ija__ = [1, 1]; if (present(ija)) ija__ = ija 3555 ijz__ = [1, 1]; if (present(ijz)) ijz__ = ijz 3556 3557 ! Get optimal size of workspace. 3558 lwork = - 1; lrwork = -1 3559 ABI_MALLOC(work_dp, (1)) 3560 ABI_MALLOC(rwork_dp, (1)) 3561 3562 !call pzheev(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, info) 3563 3564 call PZHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, & 3565 w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_dp, lwork, rwork_dp, lrwork, info) 3566 ABI_CHECK(info == 0, sjoin("Error in the calculation of the workspace size, info:", itoa(info))) 3567 3568 lwork = NINT(real(work_dp(1))); lrwork= NINT(rwork_dp(1)) !*2 3569 ABI_FREE(work_dp) 3570 ABI_FREE(rwork_dp) 3571 3572 ! MG: Nov 23 2011. On my mac with the official scalapack package, rwork(1) is not large enough and causes a SIGFAULT. 3573 if (firstchar(jobz, ['V'])) then 3574 if (lrwork < 2*nn + 2*nn-2) lrwork = 2*nn + 2*nn-2 3575 else if (firstchar(jobz, ['N'])) then 3576 if (lrwork < 2*nn) lrwork = 2*nn 3577 end if 3578 !write(std_out,*)lwork,lrwork 3579 3580 ! Solve the problem. 3581 ABI_MALLOC(work_dp, (lwork)) 3582 ABI_MALLOC(rwork_dp, (lrwork)) 3583 3584 call PZHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, & 3585 w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_dp, lwork, rwork_dp, lrwork, info) 3586 ABI_CHECK(info == 0, sjoin("PZHEEV returned info:", itoa(info))) 3587 ABI_FREE(work_dp) 3588 ABI_FREE(rwork_dp) 3589 #endif 3590 3591 end subroutine slk_heev
m_slk/slk_hpd_invert [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_hpd_invert
FUNCTION
Compute the inverse of an Hermitian positive definite matrix.
INPUTS
uplo: global input = 'U': Upper triangle of sub( A ) is stored; = 'L': Lower triangle of sub( A ) is stored. [full]: If full PBLAS matrix is neeeded. Default: True
SIDE EFFECTS
mat= The object storing the local buffer, the array descriptor, the context, etc. On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( A ) to be factored. If UPLO = 'U', the leading N-by-N upper triangular part of sub( A ) contains the upper triangular part of the matrix, and its strictly lower triangular part is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of sub( A ) contains the lower triangular part of the distribu- ted matrix, and its strictly upper triangular part is not referenced. On exit, the local pieces of the upper or lower triangle of the (Hermitian) inverse of sub( A )
SOURCE
4387 subroutine slk_hpd_invert(mat, uplo, full) 4388 4389 !Arguments ------------------------------------ 4390 character(len=*),intent(in) :: uplo 4391 class(matrix_scalapack),intent(inout) :: mat 4392 logical,optional,intent(in) :: full 4393 4394 #ifdef HAVE_LINALG_SCALAPACK 4395 !Local variables ------------------------------ 4396 !scalars 4397 integer :: info, mm, il1, il2, iglob1, iglob2 4398 type(matrix_scalapack) :: work_mat 4399 logical :: full__ 4400 4401 !************************************************************************ 4402 4403 ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated") 4404 4405 ! ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite. 4406 ! A = U**H * U, if UPLO = 'U', or 4407 ! A = L * L**H, if UPLO = 'L', 4408 mm = mat%sizeb_global(1) 4409 call PZPOTRF(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info) 4410 ABI_CHECK(info == 0, sjoin("PZPOTRF returned info:", itoa(info))) 4411 4412 ! PZPOTRI computes the inverse of a complex Hermitian positive definite 4413 ! distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the 4414 ! Cholesky factorization sub( A ) = U**H*U or L*L**H computed by PZPOTRF. 4415 call PZPOTRI(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info) 4416 ABI_CHECK(info == 0, sjoin("PZPOTRI returned info:", itoa(info))) 4417 4418 full__ = .True.; if (present(full)) full__ = full 4419 if (full__) then 4420 ! Only the uplo part contains the inverse so we need to fill the other triangular part. 4421 ! 1) Fill the missing triangle with zeros and copy results to work_mat 4422 ! 2) Call pzgeadd to compute: sub(C) := beta*sub(C) + alpha*op(sub(A)) 4423 ! 3) Divide diagonal elements by two. 4424 4425 do il2=1,mat%sizeb_local(2) 4426 iglob2 = mat%loc2gcol(il2) 4427 do il1=1,mat%sizeb_local(1) 4428 iglob1 = mat%loc2grow(il1) 4429 if (uplo == "L" .and. iglob2 > iglob1) mat%buffer_cplx(il1, il2) = zero 4430 if (uplo == "U" .and. iglob2 < iglob1) mat%buffer_cplx(il1, il2) = zero 4431 end do 4432 end do 4433 4434 call mat%copy(work_mat, empty=.False.) 4435 4436 ! call pzgeadd(trans, m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc) 4437 ! sub(C) := beta*sub(C) + alpha*op(sub(A)) 4438 call pzgeadd("C", mm, mm, cone, work_mat%buffer_cplx, 1, 1, work_mat%descript%tab, & 4439 cone, mat%buffer_cplx, 1, 1, mat%descript%tab) 4440 call work_mat%free() 4441 4442 do il2=1,mat%sizeb_local(2) 4443 iglob2 = mat%loc2gcol(il2) 4444 do il1=1,mat%sizeb_local(1) 4445 iglob1 = mat%loc2grow(il1) 4446 if (iglob2 == iglob1) mat%buffer_cplx(il1, il2) = half * mat%buffer_cplx(il1, il2) 4447 end do 4448 end do 4449 end if ! full__ 4450 #endif 4451 4452 end subroutine slk_hpd_invert
m_slk/slk_invert [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_invert
FUNCTION
Compute the inverse of a complex matrix.
SIDE EFFECTS
mat In input, the matrix to invert. In output the matrix inverted and distributed among the nodes.
SOURCE
4265 subroutine slk_invert(mat) 4266 4267 !Arguments ------------------------------------ 4268 class(basemat_t),intent(inout) :: mat 4269 4270 #ifdef HAVE_LINALG_SCALAPACK 4271 !Local variables ------------------------------ 4272 !scalars 4273 integer :: lwork,info,ipiv_size,liwork 4274 !array 4275 integer,allocatable :: ipiv(:), iwork(:) 4276 complex(dp),allocatable :: work_dp(:) 4277 complex(sp),allocatable :: work_sp(:) 4278 4279 !************************************************************************ 4280 4281 ! IMPORTANT NOTE: PZGETRF requires square block decomposition i.e., MB_A = NB_A. 4282 if (mat%descript%tab(MB_) /= mat%descript%tab(NB_)) then 4283 ABI_ERROR(" PZGETRF requires square block decomposition i.e MB_A = NB_A.") 4284 end if 4285 4286 ipiv_size = my_locr(mat) + mat%descript%tab(MB_) 4287 ABI_MALLOC(ipiv, (ipiv_size)) 4288 4289 select type (mat) 4290 class is (matrix_scalapack) 4291 if (allocated(mat%buffer_cplx)) then 4292 ! P * L * U Factorization. 4293 call PZGETRF(mat%sizeb_global(1), mat%sizeb_global(2), mat%buffer_cplx, 1, 1, mat%descript%tab,ipiv, info) 4294 ABI_CHECK(info == 0, sjoin(" PZGETRF returned info:", itoa(info))) 4295 4296 ! Get optimal size of workspace for PZGETRI. 4297 lwork = -1; liwork = -1 4298 ABI_MALLOC(work_dp,(1)) 4299 ABI_MALLOC(iwork,(1)) 4300 4301 call PZGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_dp, lwork, iwork, liwork, info) 4302 ABI_CHECK(info == 0, "PZGETRI: Error while computing workspace size") 4303 4304 lwork = nint(real(work_dp(1))); liwork=iwork(1) 4305 ABI_FREE(work_dp) 4306 ABI_FREE(iwork) 4307 4308 ! Solve the problem. 4309 ABI_MALLOC(work_dp, (lwork)) 4310 ABI_MALLOC(iwork, (liwork)) 4311 4312 call PZGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_dp, lwork, iwork, liwork, info) 4313 ABI_CHECK(info == 0, sjoin("PZGETRI returned info:", itoa(info))) 4314 ABI_FREE(work_dp) 4315 4316 else if (allocated(mat%buffer_real)) then 4317 ABI_ERROR("Inversion for real matrices not coded!") 4318 end if 4319 4320 class is (slkmat_sp_t) 4321 if (allocated(mat%buffer_cplx)) then 4322 ! P * L * U Factorization. 4323 call PCGETRF(mat%sizeb_global(1), mat%sizeb_global(2), mat%buffer_cplx, 1, 1, mat%descript%tab,ipiv, info) 4324 ABI_CHECK(info == 0, sjoin(" PCGETRF returned info:", itoa(info))) 4325 4326 ! Get optimal size of workspace for PZGETRI. 4327 lwork = -1; liwork = -1 4328 ABI_MALLOC(work_sp,(1)) 4329 ABI_MALLOC(iwork,(1)) 4330 4331 call PCGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_sp, lwork, iwork, liwork, info) 4332 ABI_CHECK(info == 0, "PZGETRI: Error while computing workspace size") 4333 4334 lwork = nint(real(work_sp(1))); liwork=iwork(1) 4335 ABI_FREE(work_sp) 4336 ABI_FREE(iwork) 4337 4338 ! Solve the problem. 4339 ABI_MALLOC(work_sp, (lwork)) 4340 ABI_MALLOC(iwork, (liwork)) 4341 4342 call PCGETRI(mat%sizeb_global(1), mat%buffer_cplx, 1, 1, mat%descript%tab, ipiv, work_sp, lwork, iwork, liwork, info) 4343 ABI_CHECK(info == 0, sjoin("PZGETRI returned info:", itoa(info))) 4344 ABI_FREE(work_sp) 4345 4346 else if (allocated(mat%buffer_real)) then 4347 ABI_ERROR("Inversion for real matrices not coded!") 4348 end if 4349 4350 class default 4351 ABI_ERROR("Wrong class") 4352 end select 4353 4354 ABI_FREE(iwork) 4355 ABI_FREE(ipiv) 4356 #endif 4357 4358 end subroutine slk_invert
m_slk/slk_matrix_from_global_dpc_1Dp [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_from_global_dpc_1Dp
FUNCTION
Routine to fill a complex SCALAPACK matrix with respect to a global matrix. target: double precision complex matrix in packed form.
INPUTS
glob_pmat(n*(n+1)/2)=One-dimensional array containing the global matrix A packed columnwise in a linear array. The j-th column of A is stored in the array glob_pmat as follows: if uplo = "U", glob_pmat(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if uplo = "L", glob_pmat(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. where n is the number of rows or columns in the global matrix. uplo=String specifying whether only the upper or lower triangular part of the global matrix is used: = "U": Upper triangular = "L": Lower triangular
SIDE EFFECTS
mat<matrix_scalapack>=The distributed matrix. %buffer_cplx=Local buffer containg the value this node is dealing with.
SOURCE
2244 subroutine slk_matrix_from_global_dpc_1Dp(mat,uplo,glob_pmat) 2245 2246 !Arguments ------------------------------------ 2247 !scalars 2248 class(matrix_scalapack),intent(inout) :: mat 2249 character(len=*),intent(in) :: uplo 2250 !array 2251 complex(dpc),intent(in) :: glob_pmat(:) 2252 2253 !Local variables------------------------------- 2254 integer :: ii,jj,iglob,jglob,ind,n 2255 real(dp) :: szm 2256 2257 !************************************************************************ 2258 2259 ABI_CHECK(allocated(mat%buffer_cplx), "%buffer_cplx not allocated") 2260 2261 szm = SIZE(glob_pmat) 2262 n = NINT( (-1 + SQRT(one+8*szm) )*half ) 2263 if (n*(n+1)/2 /= SIZE(glob_pmat)) then 2264 ABI_ERROR("Buggy compiler") 2265 end if 2266 2267 select case (uplo(1:1)) 2268 2269 case ("U", "u") 2270 ! Only the upper triangle of the global matrix is used. 2271 do jj=1,mat%sizeb_local(2) 2272 do ii=1,mat%sizeb_local(1) 2273 call mat%loc2glob(ii, jj, iglob, jglob) 2274 2275 if (jglob>=iglob) then 2276 ind = iglob + jglob*(jglob-1)/2 2277 mat%buffer_cplx(ii,jj) = glob_pmat(ind) 2278 else 2279 ind = jglob + iglob*(iglob-1)/2 2280 mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) ) 2281 end if 2282 2283 end do 2284 end do 2285 2286 case ("L", "l") 2287 ! Only the lower triangle of the global matrix is used. 2288 do jj=1,mat%sizeb_local(2) 2289 do ii=1,mat%sizeb_local(1) 2290 call mat%loc2glob(ii, jj, iglob, jglob) 2291 2292 if (jglob<=iglob) then 2293 ind = iglob + (jglob-1)*(2*n-jglob)/2 2294 mat%buffer_cplx(ii,jj) = glob_pmat(ind) 2295 else 2296 ind = jglob + (iglob-1)*(2*n-iglob)/2 2297 mat%buffer_cplx(ii,jj) = DCONJG( glob_pmat(ind) ) 2298 end if 2299 2300 end do 2301 end do 2302 2303 case default 2304 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 2305 end select 2306 2307 end subroutine slk_matrix_from_global_dpc_1Dp
m_slk/slk_matrix_from_global_dpc_2D [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_from_global_dpc_2D
FUNCTION
Routine to fill a complex SCALAPACK matrix with respect to a global matrix. target: Two-dimensional double precision complex matrix
INPUTS
glob_mat=Two-dimensional array containing the global matrix. uplo=String specifying whether only the upper or lower triangular part of the global matrix is used: = "U": Upper triangular = "L": Lower triangular = "A": Full matrix (used for general complex matrices)
SIDE EFFECTS
mat<matrix_scalapack>=The distributed matrix. %buffer_cplx=Local buffer containg the value this node is dealing with.
SOURCE
2158 subroutine slk_matrix_from_global_dpc_2D(mat, uplo, glob_mat) 2159 2160 !Arguments ------------------------------------ 2161 !scalars 2162 class(matrix_scalapack),intent(inout) :: mat 2163 character(len=*),intent(in) :: uplo 2164 !array 2165 complex(dpc),intent(in) :: glob_mat(:,:) 2166 2167 !Local variables------------------------------- 2168 integer :: ii, jj, iglob, jglob 2169 2170 !************************************************************************ 2171 2172 ABI_CHECK(allocated(mat%buffer_cplx), "%buffer_cplx not allocated") 2173 2174 select case (uplo(1:1)) 2175 2176 case ("A", "a") 2177 ! Full global matrix is used. 2178 do jj=1,mat%sizeb_local(2) 2179 do ii=1,mat%sizeb_local(1) 2180 call mat%loc2glob(ii, jj, iglob, jglob) 2181 mat%buffer_cplx(ii,jj) = glob_mat(iglob,jglob) 2182 end do 2183 end do 2184 2185 case ("U", "u") 2186 ! Only the upper triangle of the global matrix is used. 2187 do jj=1,mat%sizeb_local(2) 2188 do ii=1,mat%sizeb_local(1) 2189 call mat%loc2glob(ii, jj, iglob, jglob) 2190 if (jglob>=iglob) then 2191 mat%buffer_cplx(ii,jj) = glob_mat(iglob,jglob) 2192 else 2193 mat%buffer_cplx(ii,jj) = DCONJG(glob_mat(jglob,iglob)) 2194 end if 2195 end do 2196 end do 2197 2198 case ("L", "l") 2199 ! Only the lower triangle of the global matrix is used. 2200 do jj=1,mat%sizeb_local(2) 2201 do ii=1,mat%sizeb_local(1) 2202 call mat%loc2glob(ii, jj, iglob, jglob) 2203 if (jglob<=iglob) then 2204 mat%buffer_cplx(ii,jj) = glob_mat(iglob,jglob) 2205 else 2206 mat%buffer_cplx(ii,jj) = DCONJG(glob_mat(jglob,iglob)) 2207 end if 2208 end do 2209 end do 2210 2211 case default 2212 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 2213 end select 2214 2215 end subroutine slk_matrix_from_global_dpc_2D
m_slk/slk_matrix_loc2gcol [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_loc2gcol
FUNCTION
Determine the global column index of an element from the local index
INPUTS
matrix= the matrix to process. jloc= local column index.
SOURCE
1696 integer pure function slk_matrix_loc2gcol(matrix, jloc) result(jglob) 1697 1698 !Arguments ------------------------------------ 1699 class(basemat_t),intent(in) :: matrix 1700 integer, intent(in) :: jloc 1701 1702 ! ********************************************************************* 1703 1704 jglob = loc_glob__(matrix, matrix%processor, jloc, 2) 1705 1706 end function slk_matrix_loc2gcol
m_slk/slk_matrix_loc2glob [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_loc2glob
FUNCTION
Determine the global indices of an element from its local indices.
INPUTS
matrix= the matrix to process. iloc= local row index. jloc= local column index.
OUTPUT
i= row in the matrix j= column in the matrix
SOURCE
1642 pure subroutine slk_matrix_loc2glob(matrix, iloc, jloc, i, j) 1643 1644 !Arguments ------------------------------------ 1645 class(basemat_t),intent(in) :: matrix 1646 integer, intent(in) :: iloc,jloc 1647 integer, intent(out) :: i,j 1648 1649 ! ********************************************************************* 1650 1651 i = loc_glob__(matrix, matrix%processor, iloc, 1) 1652 j = loc_glob__(matrix, matrix%processor, jloc, 2) 1653 1654 end subroutine slk_matrix_loc2glob
m_slk/slk_matrix_loc2grow [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_loc2grow
FUNCTION
Determine the global row index from the local index
INPUTS
matrix= the matrix to process. iloc= local row index.
SOURCE
1670 integer pure function slk_matrix_loc2grow(matrix, iloc) result(iglob) 1671 1672 !Arguments ------------------------------------ 1673 class(basemat_t),intent(in) :: matrix 1674 integer, intent(in) :: iloc 1675 1676 ! ********************************************************************* 1677 1678 iglob = loc_glob__(matrix, matrix%processor, iloc, 1) 1679 1680 end function slk_matrix_loc2grow
m_slk/slk_matrix_to_global_dpc_2D [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_matrix_to_global_dpc_2D
FUNCTION
Fill a global matrix with respect to a SCALAPACK matrix. target: Two-dimensional double precision complex matrix.
INPUTS
Slk_mat<matrix_scalapack>=The distributed matrix. uplo=String specifying whether the upper or lower triangular part of the global matrix has to be filled: = "U": Upper triangular = "L": Lower triangular = "A": Full matrix is filled (used for general complex matrices)
SIDE EFFECTS
glob_mat=The global matrix where the entries owned by this processors have been overwritten. Note that the remaing entries not treated by this node are not changed.
SOURCE
2333 subroutine slk_matrix_to_global_dpc_2D(mat, uplo, glob_mat) 2334 2335 !Arguments ------------------------------------ 2336 !scalaras 2337 class(matrix_scalapack),intent(in) :: mat 2338 character(len=*),intent(in) :: uplo 2339 !arrays 2340 complex(dpc),intent(inout) :: glob_mat(:,:) 2341 2342 !Local variables------------------------------- 2343 integer :: ii,jj,iglob,jglob 2344 2345 !************************************************************************ 2346 2347 select case (uplo(1:1)) 2348 2349 case ("A", "a") 2350 ! Full global matrix has to be filled. 2351 do jj=1,mat%sizeb_local(2) 2352 do ii=1,mat%sizeb_local(1) 2353 call mat%loc2glob(ii, jj, iglob, jglob) 2354 glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj) 2355 end do 2356 end do 2357 2358 case ("U", "u") 2359 ! Only the upper triangle of the global matrix is filled. 2360 do jj=1,mat%sizeb_local(2) 2361 do ii=1,mat%sizeb_local(1) 2362 call mat%loc2glob(ii, jj, iglob, jglob) 2363 if (jglob>=iglob) glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj) 2364 end do 2365 end do 2366 2367 case ("L", "l") 2368 ! Only the lower triangle of the global matrix is filled. 2369 do jj=1,mat%sizeb_local(2) 2370 do ii=1,mat%sizeb_local(1) 2371 call mat%loc2glob(ii, jj, iglob, jglob) 2372 if (jglob<=iglob) glob_mat(iglob,jglob) = mat%buffer_cplx(ii,jj) 2373 end do 2374 end do 2375 2376 case default 2377 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 2378 end select 2379 2380 end subroutine slk_matrix_to_global_dpc_2D
m_slk/slk_pgemm_dp [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_pgemm_dp
FUNCTION
Extended matrix * matrix product: C := alpha*A*B + beta*C For a simple matrix vector product, one can simply pass alpha = cone and beta = czero
INPUTS
matrix1= first ScaLAPACK matrix (matrix A) matrix2= second ScaLAPACK matrix (matrix B) alpha= scalar multiplicator for the A*B product beta= scalar multiplicator for the C matrix [ija, ijb, ijc]: (global). The row and column indices in the distributed matrix A/B/C indicating the first row and the first column of the submatrix, respectively. Default [1, 1]
OUTPUT
results= ScaLAPACK matrix coming out of the operation
NOTES
The ESLL manual says that "matrices matrix1 and matrix2 must have no common elements otherwise, results are unpredictable." However the official scaLAPACK documentation does not report this (severe) limitation.
SOURCE
2513 subroutine slk_pgemm_dp(transa, transb, matrix1, alpha, matrix2, beta, results, & 2514 ija, ijb, ijc) ! optional 2515 2516 !Arguments ------------------------------------ 2517 character(len=1),intent(in) :: transa, transb 2518 class(matrix_scalapack),intent(in) :: matrix1, matrix2 2519 class(matrix_scalapack),intent(inout) :: results 2520 complex(dpc),intent(in) :: alpha, beta 2521 integer,optional,intent(in) :: ija(2), ijb(2), ijc(2) 2522 2523 !Local variables------------------------------- 2524 integer :: mm, nn, kk, ija__(2), ijb__(2), ijc__(2) 2525 2526 !************************************************************************ 2527 2528 ija__ = [1, 1]; if (present(ija)) ija__ = ija 2529 ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb 2530 ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc 2531 2532 mm = matrix1%sizeb_global(1) 2533 nn = matrix2%sizeb_global(2) 2534 kk = matrix1%sizeb_global(2) 2535 2536 if (toupper(transa) /= 'N') then 2537 mm = matrix1%sizeb_global(2) 2538 kk = matrix1%sizeb_global(1) 2539 end if 2540 if (toupper(transb) /= 'N') nn = matrix2%sizeb_global(1) 2541 2542 #ifdef HAVE_LINALG_SCALAPACK 2543 ! pzgemm(transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc) 2544 if (matrix1%istwf_k /= 2) then 2545 call PZGEMM(transa, transb, mm, nn, kk, alpha, & 2546 matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, & 2547 matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, & 2548 beta, results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab) 2549 else 2550 call PDGEMM(transa, transb, mm, nn, kk, real(alpha, kind=dp), & 2551 matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, & 2552 matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, & 2553 real(beta, kind=dp), results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab) 2554 end if 2555 #endif 2556 2557 end subroutine slk_pgemm_dp
m_slk/slk_pgemm_sp [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_pgemm_sp
FUNCTION
Extended matrix * matrix product: C := alpha*A*B + beta*C For a simple matrix vector product, one can simply pass alpha = cone and beta = czero
INPUTS
matrix1= first ScaLAPACK matrix (matrix A) matrix2= second ScaLAPACK matrix (matrix B) alpha= scalar multiplicator for the A*B product beta= scalar multiplicator for the C matrix [ija, ijb, ijc]: (global). The row and column indices in the distributed matrix A/B/C indicating the first row and the first column of the submatrix, respectively. Default [1, 1]
OUTPUT
results= ScaLAPACK matrix coming out of the operation
NOTES
The ESLL manual says that "matrices matrix1 and matrix2 must have no common elements otherwise, results are unpredictable." However the official scaLAPACK documentation does not report this (severe) limitation.
SOURCE
2588 subroutine slk_pgemm_sp(transa, transb, matrix1, alpha, matrix2, beta, results, & 2589 ija, ijb, ijc) ! optional 2590 2591 !Arguments ------------------------------------ 2592 character(len=1),intent(in) :: transa, transb 2593 class(slkmat_sp_t),intent(in) :: matrix1, matrix2 2594 class(slkmat_sp_t),intent(inout) :: results 2595 complex(sp),intent(in) :: alpha, beta 2596 integer,optional,intent(in) :: ija(2), ijb(2), ijc(2) 2597 2598 !Local variables------------------------------- 2599 integer :: mm, nn, kk, ija__(2), ijb__(2), ijc__(2) 2600 2601 !************************************************************************ 2602 2603 ija__ = [1, 1]; if (present(ija)) ija__ = ija 2604 ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb 2605 ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc 2606 2607 mm = matrix1%sizeb_global(1) 2608 nn = matrix2%sizeb_global(2) 2609 kk = matrix1%sizeb_global(2) 2610 2611 if (toupper(transa) /= 'N') then 2612 mm = matrix1%sizeb_global(2) 2613 kk = matrix1%sizeb_global(1) 2614 end if 2615 if (toupper(transb) /= 'N') nn = matrix2%sizeb_global(1) 2616 2617 #ifdef HAVE_LINALG_SCALAPACK 2618 ! pzgemm(transa, transb, m, n, k, alpha, a, ia, ja, desca, b, ib, jb, descb, beta, c, ic, jc, descc) 2619 if (matrix1%istwf_k /= 2) then 2620 call PCGEMM(transa, transb, mm, nn, kk, alpha, & 2621 matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, & 2622 matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, & 2623 beta, results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab) 2624 else 2625 call PSGEMM(transa, transb, mm, nn, kk, real(alpha, kind=sp), & 2626 matrix1%buffer_cplx, ija__(1), ija__(2), matrix1%descript%tab, & 2627 matrix2%buffer_cplx, ijb__(1), ijb__(2), matrix2%descript%tab, & 2628 real(beta, kind=sp), results%buffer_cplx, ijc__(1), ijc__(2), results%descript%tab) 2629 end if 2630 #endif 2631 2632 end subroutine slk_pgemm_sp
m_slk/slk_ptrans [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_ptrans
FUNCTION
Transposes a matrix sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an n by m submatrix.
INPUTS
[ija(2)]: (global) The row and column indices in the distributed matrix in_mat indicating the first row and the first column of the submatrix sub(A), respectively. [ijc(2)]: (global) The row and column indices in the distributed matrix out_mat indicating the first row and the first column of the submatrix sub(C), respectively. [free]: True to deallocate in_mat. Default: False
SOURCE
4577 subroutine slk_ptrans(in_mat, trans, out_mat, & 4578 out_gshape, ija, ijc, size_blocs, alpha, beta, free) ! optional 4579 4580 !Arguments ------------------------------------ 4581 class(matrix_scalapack),intent(inout) :: in_mat 4582 character(len=1),intent(in) :: trans 4583 class(matrix_scalapack),intent(inout) :: out_mat 4584 integer,optional,intent(in) :: out_gshape(2), size_blocs(2), ija(2), ijc(2) 4585 complex(dp),optional,intent(in) :: alpha, beta 4586 logical,optional,intent(in) :: free 4587 4588 !Local variables------------------------------- 4589 integer :: sb, mm, nn, size_blocs__(2) 4590 real(dp) :: ralpha__, rbeta__ 4591 integer :: ija__(2), ijc__(2) 4592 complex(dp) :: calpha__, cbeta__ 4593 4594 ! ************************************************************************* 4595 4596 ija__ = [1, 1]; if (present(ija)) ija__ = ija 4597 ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc 4598 4599 ! transposed output (sub)matrix has shape (nn, mm) 4600 if (present(out_gshape)) then 4601 nn = out_gshape(1) 4602 mm = out_gshape(2) 4603 else 4604 nn = in_mat%sizeb_global(2) 4605 mm = in_mat%sizeb_global(1) 4606 end if 4607 4608 if (present(size_blocs)) then 4609 size_blocs__ = size_blocs 4610 else 4611 ! FIXME: This can cause problems if I start to use round-robin distribution in GWR!!!!! 4612 size_blocs__(1) = in_mat%sizeb_global(2) 4613 sb = in_mat%sizeb_global(1) / in_mat%processor%grid%dims(2) 4614 if (mod(in_mat%sizeb_global(1), in_mat%processor%grid%dims(2)) /= 0) sb = sb + 1 4615 size_blocs__(2) = sb 4616 !size_blocs__(2) = in_mat%sizeb_blocs(1); size_blocs__(1) = in_mat%sizeb_blocs(2) 4617 end if 4618 4619 call out_mat%init(nn, mm, in_mat%processor, in_mat%istwf_k, size_blocs=size_blocs__) 4620 4621 ! prototype: call pdtran(m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc) 4622 4623 if (allocated(in_mat%buffer_cplx)) then 4624 #ifdef HAVE_LINALG_SCALAPACK 4625 select case (trans) 4626 case ("N") 4627 ! sub(C) := beta*sub(C) + alpha*sub(A)', 4628 calpha__ = cone; if (present(alpha)) calpha__ = alpha 4629 cbeta__ = czero; if (present(beta)) cbeta__ = beta 4630 call pztranu(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), & 4631 in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab) 4632 4633 case ("C") 4634 ! sub(C) := beta * sub(C) + alpha * conjg(sub(A)') 4635 calpha__ = cone; if (present(alpha)) calpha__ = alpha 4636 cbeta__ = czero; if (present(beta)) cbeta__ = beta 4637 call pztranc(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), & 4638 in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab) 4639 4640 case default 4641 ABI_ERROR(sjoin("Invalid value for trans:", trans)) 4642 end select 4643 4644 else if (allocated(in_mat%buffer_real)) then 4645 ralpha__ = one; if (present(alpha)) ralpha__ = real(alpha) 4646 rbeta__ = zero; if (present(beta)) rbeta__ = real(beta) 4647 call pdtran(nn, mm, ralpha__, in_mat%buffer_real, ija__(1), ija__(2), & 4648 in_mat%descript%tab, rbeta__, out_mat%buffer_real, ijc__(1), ijc__(2), out_mat%descript%tab) 4649 #endif 4650 else 4651 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 4652 end if 4653 4654 if (present(free)) then 4655 if (free) call in_mat%free() 4656 end if 4657 4658 end subroutine slk_ptrans
m_slk/slk_pzheevx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_pzheevx
FUNCTION
slk_pzheevx computes selected eigenvalues and, optionally, eigenvectors of a complex Hermitian matrix A. A * X = lambda * X
INPUTS
mat=ScaLAPACK matrix (matrix A) vec=The distributed eigenvectors X. Not referenced if JOBZ="N" JOBZ (global input) CHARACTER*1 Specifies whether or not to compute the eigenvectors: = "N": Compute eigenvalues only. = "V": Compute eigenvalues and eigenvectors. RANGE (global input) CHARACTER*1 = "A": all eigenvalues will be found. = "V": all eigenvalues in the interval [VL,VU] will be found. = "I": the IL-th through IU-th eigenvalues will be found. UPLO (global input) CHARACTER*1 Specifies whether the upper or lower triangular part of the Hermitian matrix A is stored: = "U": Upper triangular = "L": Lower triangular VL (global input) DOUBLE PRECISION If RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE = "A" or "I" VU (global input) DOUBLE PRECISION If RANGE="V", the upper bound of the interval to be searched for eigenvalues. Not referenced if RANGE = "A" or "I". IL (global input) integer If RANGE="I", the index (from smallest to largest) of the smallest eigenvalue to be returned. IL >= 1. Not referenced if RANGE = "A" or "V". IU (global input) integer If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned. min(IL,N) <= IU <= N. Not referenced if RANGE = "A" or "V" ABSTOL (global input) DOUBLE PRECISION If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors. The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used in its place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*PDLAMCH("S") not zero. If this routine returns with ((MOD(INFO,2).NE.0) .OR. (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").
OUTPUT
mene_found= (global output) Total number of eigenvalues found. 0 <= mene_found <= N. eigen(N)= (global output) Eigenvalues of A where N is the dimension of M On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.
SIDE EFFECTS
If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors. Slk%mat%buffer_cplx is destroyed when the routine returns
SOURCE
3763 subroutine slk_pzheevx(mat, jobz, range, uplo, vl, vu, il, iu, abstol, vec, mene_found, eigen) 3764 3765 !Arguments ------------------------------------ 3766 class(matrix_scalapack),intent(inout) :: mat 3767 integer,intent(in) :: il, iu 3768 integer,intent(out) :: mene_found 3769 real(dp),intent(in) :: abstol,vl,vu 3770 character(len=*),intent(in) :: jobz,range,uplo 3771 class(matrix_scalapack),intent(inout) :: vec 3772 !arrays 3773 real(dp),intent(out) :: eigen(*) 3774 3775 #ifdef HAVE_LINALG_SCALAPACK 3776 !Local variables------------------------------- 3777 !scalars 3778 integer :: lwork,lrwork,liwork,info,nvec_calc !,ierr 3779 real(dp) :: orfac 3780 character(len=500) :: msg 3781 !arrays 3782 !integer :: ibuff(3),max_ibuff(3) 3783 integer,allocatable :: iwork(:),iclustr(:),ifail(:) 3784 real(dp),allocatable :: rwork(:),gap(:) 3785 complex(dpc),allocatable :: work(:) 3786 3787 !************************************************************************ 3788 3789 ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx is not allocated!") 3790 3791 ! abstol = PDLAMCH(vec%processor%grid%ictxt,'U') 3792 3793 orfac = -one ! Only for eigenvectors: use default value 10d-3. 3794 ! Vectors within orfac*norm(A) will be reorthogonalized. 3795 3796 ! Allocate the arrays for the results of the calculation 3797 ABI_MALLOC(gap, (mat%processor%grid%dims(1) * mat%processor%grid%dims(2))) 3798 3799 if (firstchar(jobz, ["V","v"])) then 3800 ABI_MALLOC(ifail, (mat%sizeb_global(2))) 3801 ABI_MALLOC(iclustr, (2*mat%processor%grid%dims(1) * mat%processor%grid%dims(2))) 3802 end if 3803 3804 ! Get the optimal size of the work arrays. 3805 lwork=-1; lrwork=-1; liwork=-1 3806 ABI_MALLOC(work, (1)) 3807 ABI_MALLOC(iwork, (1)) 3808 ABI_MALLOC(rwork, (3)) 3809 ! This is clearly seen in the source in which rwork(1:3) is accessed 3810 ! in the calculation of the workspace size. 3811 3812 ! prototype 3813 !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, 3814 ! orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info) 3815 3816 call PZHEEVX(jobz,range,uplo, mat%sizeb_global(2),mat%buffer_cplx,1,1,mat%descript%tab,& 3817 vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,& 3818 vec%buffer_cplx,1,1,vec%descript%tab,& 3819 work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info) 3820 3821 ABI_CHECK(info == 0, sjoin("Problem to compute workspace, info:", itoa(info))) 3822 3823 lwork = NINT(real(work(1)),kind=dp) 3824 lrwork = NINT(rwork(1)) 3825 liwork = iwork(1) 3826 3827 ABI_FREE(work) 3828 ABI_FREE(rwork) 3829 ABI_FREE(iwork) 3830 ! 3831 ! FROM THE SCALAPACK MAN PAGE: 3832 ! The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too 3833 ! small. If you want to guarantee orthogonality (at the cost of potentially poor performance) you should 3834 ! add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is the number of eigenvalues in the 3835 ! largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | 3836 ! W(J+1) <= W(J) + ORFAC*2*norm(A) }. 3837 3838 if (firstchar(jobz, ["V","v"])) then 3839 lrwork = INT( lrwork + mat%sizeb_global(2) *(mat%sizeb_global(2)-1) ) 3840 end if 3841 3842 ! ibuff(1) = lwork 3843 ! ibuff(2) = lrwork !INT(lrwork + mat%sizeb_global(2) *(mat%sizeb_global(2)-1) 3844 ! ibuff(3) = liwork 3845 3846 ! Get the maximum of sizes of the work arrays processor%comm 3847 ! call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_integer,MPI_MAX,comm,ierr) 3848 3849 ! lwork = max_ibuff(1) 3850 ! lrwork = max_ibuff(2) 3851 ! liwork = max_ibuff(3) 3852 3853 ABI_MALLOC(work , (lwork )) 3854 ABI_MALLOC(rwork, (lrwork)) 3855 ABI_MALLOC(iwork, (liwork)) 3856 3857 ! prototype 3858 !call pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, 3859 ! orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info) 3860 3861 ! Call the scaLAPACK routine. 3862 ! write(std_out,*) 'I am using PZHEEVX' 3863 call PZHEEVX(jobz,range,uplo, mat%sizeb_global(2),mat%buffer_cplx,1,1,mat%descript%tab,& 3864 vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,& 3865 vec%buffer_cplx,1,1,vec%descript%tab,& 3866 work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info) 3867 3868 ! TODO 3869 !call pxheevx_info_to_msg(info, jobz, nvec_calc, mene_found, msg) 3870 !ABI_CHECK(info == 0, msg) 3871 3872 ! Handle possible error. 3873 if (info < 0) then 3874 write(msg,'(a,i0,a)')" The ", -info, "-th argument of P?HEEVX had an illegal value." 3875 if (info == -25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed" 3876 ABI_ERROR(msg) 3877 end if 3878 3879 if (info > 0) then 3880 write(msg,'(a,i0)') " P?HEEVX returned info: ",info 3881 call wrtout(std_out, msg) 3882 if (MOD(info, 2) /= 0)then 3883 write(msg,'(3a)')& 3884 " One or more eigenvectors failed to converge. ",ch10,& 3885 " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')" 3886 call wrtout(std_out, msg) 3887 end if 3888 if (MOD(info / 2, 2) /= 0) then 3889 write(msg,'(5a)')& 3890 " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,& 3891 " could not be reorthogonalized because of insufficient workspace. ",ch10,& 3892 " The indices of the clusters are stored in the array ICLUSTR." 3893 call wrtout(std_out, msg) 3894 end if 3895 if (MOD(info / 4, 2) /= 0) then 3896 write(msg,'(3a)')" Space limit prevented PZHEEVX from computing all of the eigenvectors between VL and VU. ",ch10,& 3897 " The number of eigenvectors computed is returned in NZ." 3898 call wrtout(std_out, msg) 3899 end if 3900 if (MOD(info / 8, 2) /= 0) then 3901 call wrtout(std_out, "PZSTEBZ failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')") 3902 end if 3903 ABI_ERROR("Cannot continue") 3904 end if 3905 3906 ! Check the number of eigenvalues found wrt to the number of vectors calculated. 3907 if ( firstchar(jobz, ['V','v']) .and. mene_found /= nvec_calc) then 3908 write(msg,'(5a)')& 3909 " The user supplied insufficient space and PZHEEVX is not able to detect this before beginning computation. ",ch10,& 3910 " To get all the eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,& 3911 " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. " 3912 !ierr = huge(1) 3913 ABI_ERROR(msg) 3914 end if 3915 3916 ABI_FREE(work) 3917 ABI_FREE(rwork) 3918 ABI_FREE(iwork) 3919 ABI_FREE(gap) 3920 3921 ABI_SFREE(ifail) 3922 ABI_SFREE(iclustr) 3923 #endif 3924 3925 end subroutine slk_pzheevx
m_slk/slk_pzhegvx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_pzhegvx
FUNCTION
slk_pzhegvx provides an object-oriented interface to the ScaLAPACK routine PZHEGVX that computes selected eigenvalues and, optionally, eigenvectors of a complex generalized Hermitian-definite eigenproblem, of the form sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or sub( B )*sub( A )*x=(lambda)*x. Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed to be Hermitian positive definite.
INPUTS
Slk_matA<matrix_scalapack>=ScaLAPACK matrix (matrix A) Slk_matB<matrix_scalapack>=ScaLAPACK matrix (matrix B) Slk_vec<matrix_scalapack>=The distributed eigenvectors X. Not referenced if JOBZ="N" IBtype (global input) integer Specifies the problem type to be solved: = 1: sub( A )*x = (lambda)*sub( B )*x = 2: sub( A )*sub( B )*x = (lambda)*x = 3: sub( B )*sub( A )*x = (lambda)*x JOBZ (global input) CHARACTER*1 Specifies whether or not to compute the eigenvectors: = "N": Compute eigenvalues only. = "V": Compute eigenvalues and eigenvectors. RANGE (global input) CHARACTER*1 = "A": all eigenvalues will be found. = "V": all eigenvalues in the interval [VL,VU] will be found. = "I": the IL-th through IU-th eigenvalues will be found. UPLO (global input) CHARACTER*1 Specifies whether the upper or lower triangular part of the Hermitian matrix sub(A) and sub(B) is stored: = "U": Upper triangular = "L": Lower triangular VL (global input) DOUBLE PRECISION If RANGE="V",the lower bound of the interval to be searched for eigenvalues. Not referenced if RANGE = "A" or "I" VU (global input) DOUBLE PRECISION If RANGE="V", the upper bound of the interval to be searched for eigenvalues. Not referenced if RANGE = "A" or "I". IL (global input) integer If RANGE="I", the index (from smallest to largest) of the smallest eigenvalue to be returned. IL >= 1. Not referenced if RANGE = "A" or "V". IU (global input) integer If RANGE="I", the index (from smallest to largest) of the largest eigenvalue to be returned. min(IL,N) <= IU <= N. Not referenced if RANGE = "A" or "V" ABSTOL (global input) DOUBLE PRECISION If JOBZ="V", setting ABSTOL to PDLAMCH( CONTEXT, "U") yields the most orthogonal eigenvectors. The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to ABSTOL + EPS * max( |a|,|b| ) , where EPS is the machine precision. If ABSTOL is less than or equal to zero, then EPS*norm(T) will be used in its place, where norm(T) is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when ABSTOL is set to twice the underflow threshold 2*PDLAMCH("S") not zero. If this routine returns with ((MOD(INFO,2).NE.0) .OR. (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or eigenvectors did not converge, try setting ABSTOL to 2*PDLAMCH("S").
OUTPUT
mene_found= (global output) Total number of eigenvalues found. 0 <= mene_found <= N. eigen(N)= (global output) Eigenvalues of A where N is the dimension of M On normal exit, the first mene_found entries contain the selected eigenvalues in ascending order.
SIDE EFFECTS
Slk_vec<matrix_scalapack>: %buffer_cplx local output (global dimension (N,N) If JOBZ = 'V', then on normal exit the first M columns of Z contain the orthonormal eigenvectors of the matrix corresponding to the selected eigenvalues. If JOBZ = 'N', then Z is not referenced. Slk_matA<matrix_scalapack>: %buffer_cplx (local input/local output) complex(DPC) pointer into the local memory to an array of dimension (LLD_A, LOCc(JA+N-1)). On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U', the leading N-by-N upper triangular part of sub( A ) contains the upper triangular part of the matrix. If UPLO = 'L', the leading N-by-N lower triangular part of sub( A ) contains the lower triangular part of the matrix. On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains the distributed matrix Z of eigenvectors. The eigenvectors are normalized as follows: if IBtype = 1 or 2, Z**H*sub( B )*Z = I; if IBtype = 3, Z**H*inv( sub( B ) )*Z = I. If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') or the lower triangle (if UPLO='L') of sub( A ), including the diagonal, is destroyed. Slk_matB= %buffer_cplx (local input/local output) complex*(DPC) pointer into the local memory to an array of dimension (LLD_B, LOCc(JB+N-1)). On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U', the leading N-by-N upper triangular part of sub( B ) contains the upper triangular part of the matrix. If UPLO = 'L', the leading N-by-N lower triangular part of sub( B ) contains the lower triangular part of the matrix. On exit, if INFO <= N, the part of sub( B ) containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization sub( B ) = U**H*U or sub( B ) = L*L**H.
SOURCE
4050 subroutine slk_pzhegvx(Slk_matA, ibtype, jobz, range, uplo, Slk_matB, vl, vu, il, iu, abstol, Slk_vec, mene_found, eigen) 4051 4052 !Arguments ------------------------------------ 4053 class(matrix_scalapack),intent(inout) :: Slk_matA 4054 integer,intent(in) :: il,iu,ibtype 4055 integer,intent(out) :: mene_found 4056 real(dp),intent(in) :: abstol,vl,vu 4057 character(len=*),intent(in) :: jobz,range,uplo 4058 class(matrix_scalapack),intent(inout) :: Slk_matB 4059 class(matrix_scalapack),intent(inout) :: Slk_vec 4060 !arrays 4061 real(dp),intent(out) :: eigen(*) 4062 4063 #ifdef HAVE_LINALG_SCALAPACK 4064 !Local variables------------------------------- 4065 !scalars 4066 integer :: lwork,lrwork,liwork,info,nvec_calc !,ierr 4067 real(dp) :: orfac 4068 logical :: ltest 4069 character(len=500) :: msg 4070 !arrays 4071 !integer :: ibuff(3),max_ibuff(3) 4072 integer :: desca(DLEN_),descb(DLEN_),descz(DLEN_) 4073 integer,allocatable :: iwork(:),iclustr(:),ifail(:) 4074 real(dp),allocatable :: rwork(:),gap(:) 4075 complex(dpc),allocatable :: work(:) 4076 4077 !************************************************************************ 4078 4079 ABI_CHECK(allocated(Slk_matA%buffer_cplx), "buffer_cplx is not allocated!") 4080 4081 ! abstol = PDLAMCH(Slk_vecprocessor%grid%ictxt,'U') 4082 4083 orfac = -one ! Only for eigenvectors: use default value 10d-3. 4084 ! Vectors within orfac*norm(A) will be reorthogonalized. 4085 4086 ! ====================== 4087 ! Alignment requirements 4088 ! ====================== 4089 ! The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1), 4090 ! and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties, 4091 4092 desca = Slk_matA%descript%tab 4093 descb = Slk_matB%descript%tab 4094 if (firstchar(jobz, ["V", "v"])) then 4095 descz = Slk_vec%descript%tab 4096 else 4097 descz = Slk_matA%descript%tab 4098 end if 4099 4100 ltest = .TRUE. 4101 ltest = ltest .and. (DESCA(MB_) == DESCA(NB_)) 4102 !IA = IB = IZ 4103 !JA = IB = JZ 4104 ltest = ltest .and.ALL(DESCA(M_ ) == [DESCB(M_ ), DESCZ(M_ )]) 4105 ltest = ltest .and.ALL(DESCA(N_ ) == [DESCB(N_ ), DESCZ(N_ )]) 4106 ltest = ltest .and.ALL(DESCA(MB_ ) == [DESCB(MB_ ), DESCZ(MB_ )]) 4107 ltest = ltest .and.ALL(DESCA(NB_ ) == [DESCB(NB_ ), DESCZ(NB_ )]) 4108 ltest = ltest .and.ALL(DESCA(RSRC_) == [DESCB(RSRC_), DESCZ(RSRC_)]) 4109 ltest = ltest .and.ALL(DESCA(CSRC_) == [DESCB(CSRC_), DESCZ(CSRC_)]) 4110 !MOD( IA-1, DESCA( MB_ ) ) = 0 4111 !MOD( JA-1, DESCA( NB_ ) ) = 0 4112 !MOD( IB-1, DESCB( MB_ ) ) = 0 4113 !MOD( JB-1, DESCB( NB_ ) ) = 0 4114 4115 if (.not.ltest) then 4116 ABI_ERROR("Alignment requirements not satisfied, check the caller") 4117 end if 4118 4119 !Allocate the arrays for the results of the calculation 4120 ABI_MALLOC(gap, (Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2))) 4121 4122 if (firstchar(jobz, ["V","v"])) then 4123 ABI_MALLOC(ifail,(Slk_matA%sizeb_global(2))) 4124 ABI_MALLOC(iclustr,( 2*Slk_matA%processor%grid%dims(1) * Slk_matA%processor%grid%dims(2))) 4125 else 4126 ABI_MALLOC(ifail,(1)) 4127 end if 4128 4129 ! Get the optimal size of the work arrays. 4130 lwork=-1; lrwork=-1; liwork=-1 4131 ABI_MALLOC(work,(1)) 4132 ABI_MALLOC(iwork,(1)) 4133 ABI_MALLOC(rwork,(3)) 4134 ! This is clearly seen in the source in which rwork(1:3) is accessed 4135 ! in the calcuation of the workspace size. 4136 4137 call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,& 4138 Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,& 4139 vl,vu,il,iu,abstol,mene_found,nvec_calc,eigen,orfac,& 4140 Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,& 4141 work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info) 4142 4143 ABI_CHECK(info == 0, sjoin("Problem to compute workspace, info:", itoa(info))) 4144 4145 lwork = NINT(real(work(1)),kind=dp) 4146 lrwork = NINT(rwork(1)) 4147 liwork = iwork(1) 4148 4149 ABI_FREE(work) 4150 ABI_FREE(rwork) 4151 ABI_FREE(iwork) 4152 4153 !FROM THE SCALAPACK MAN PAGE: 4154 !The computed eigenvectors may not be orthogonal if the minimal workspace is supplied and ORFAC is too 4155 !small. If you want to guarantee orthogonality (at the cost of potentially poor performance) you should 4156 !add the following to LRWORK: (CLUSTERSIZE-1)*N where CLUSTERSIZE is the number of eigenvalues in the 4157 !largest cluster, where a cluster is defined as a set of close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) | 4158 !W(J+1) <= W(J) + ORFAC*2*norm(A) }. 4159 4160 if (firstchar(jobz, ["V","v"])) then 4161 lrwork = INT( lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1) ) 4162 end if 4163 4164 !ibuff(1) = lwork 4165 !ibuff(2) = lrwork !INT(lrwork + Slk_matA%sizeb_global(2) *(Slk_matA%sizeb_global(2)-1) 4166 !ibuff(3) = liwork 4167 4168 !Get the maximum of sizes of the work arrays processor%comm 4169 !call MPI_ALLREDUCE(ibuff,max_ibuff,3,MPI_integer,MPI_MAX,comm,ierr) 4170 4171 !lwork = max_ibuff(1) 4172 !lrwork = max_ibuff(2) 4173 !liwork = max_ibuff(3) 4174 4175 ABI_MALLOC(work , (lwork )) 4176 ABI_MALLOC(rwork, (lrwork)) 4177 ABI_MALLOC(iwork, (liwork)) 4178 4179 ! Call the scaLAPACK routine. 4180 ! write(std_out,*) 'I am using PZHEGVX' 4181 call pzhegvx(ibtype,jobz,range,uplo, Slk_matA%sizeb_global(2),Slk_matA%buffer_cplx,1,1,Slk_matA%descript%tab,& 4182 Slk_matB%buffer_cplx,1,1,Slk_matB%descript%tab,& 4183 vl,vu,il,iu,abstol,mene_found,nvec_calc, eigen,orfac,& 4184 Slk_vec%buffer_cplx,1,1,Slk_vec%descript%tab,& 4185 work,lwork,rwork,lrwork,iwork,liwork,ifail,iclustr,gap,info) 4186 4187 ! Handle the possible error. 4188 if (info < 0) then 4189 write(msg,'(a,i7,a)')" The ",-info,"-th argument of PZHEGVX had an illegal value." 4190 if (info==-25) msg = " LRWORK is too small to compute all the eigenvectors requested, no computation is performed" 4191 ABI_ERROR(msg) 4192 end if 4193 4194 if (info > 0) then 4195 write(msg,'(a,i0)') " PZHEGVX returned info: ",info 4196 call wrtout(std_out, msg) 4197 if (MOD(info,2)/=0)then 4198 write(msg,'(3a)')& 4199 " One or more eigenvectors failed to converge. ",ch10,& 4200 " Their indices are stored in IFAIL. Ensure ABSTOL=2.0*PDLAMCH('U')" 4201 call wrtout(std_out, msg) 4202 end if 4203 if (MOD(info / 2, 2) /= 0) then 4204 write(msg,'(5a)')& 4205 " Eigenvectors corresponding to one or more clusters of eigenvalues ",ch10,& 4206 " could not be reorthogonalized because of insufficient workspace. ",ch10,& 4207 " The indices of the clusters are stored in the array ICLUSTR." 4208 call wrtout(std_out, msg) 4209 end if 4210 if (MOD(info / 4, 2) /= 0) then 4211 write(msg,'(3a)')& 4212 " Space limit prevented PZHEGVX from computing all of the eigenvectors between VL and VU. ",ch10,& 4213 " The number of eigenvectors computed is returned in NZ." 4214 call wrtout(std_out, msg) 4215 end if 4216 if (MOD(info / 8, 2) /= 0) then 4217 msg = " PZSTEBZ failed to compute eigenvalues. Ensure ABSTOL=2.0*PDLAMCH('U')" 4218 call wrtout(std_out, msg) 4219 end if 4220 if (MOD(info / 16, 2) /= 0) then 4221 write(msg,'(3a)')& 4222 " B was not positive definite.",ch10,& 4223 " IFAIL(1) indicates the order of the smallest minor which is not positive definite." 4224 call wrtout(std_out, msg) 4225 end if 4226 ABI_ERROR("Cannot continue") 4227 end if 4228 4229 ! Check the number of eigenvalues found wrt to the number of vectors calculated. 4230 if ( firstchar(jobz, ['V','v']) .and. mene_found/=nvec_calc) then 4231 write(msg,'(5a)')& 4232 " The user supplied insufficient space and PZHEGVX is not able to detect this before beginning computation. ",ch10,& 4233 " To get all the eigenvectors requested, the user must supply both sufficient space to hold the ",ch10,& 4234 " eigenvectors in Z (M .LE. DESCZ(N_)) and sufficient workspace to compute them. " 4235 ABI_ERROR(msg) 4236 end if 4237 4238 ABI_FREE(work) 4239 ABI_FREE(rwork) 4240 ABI_FREE(iwork) 4241 ABI_FREE(gap) 4242 ABI_FREE(ifail) 4243 ABI_SFREE(iclustr) 4244 #endif 4245 4246 end subroutine slk_pzhegvx
m_slk/slk_read [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_read
FUNCTION
Routine to read a square scaLAPACK distributed matrix from an external file using MPI-IO.
INPUTS
uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk: = "U": Upper triangular is stored = "L": Lower triangular is stored = "A": Full matrix (used for general complex matrices) symtype=Symmetry type of the matrix stored on disk (used only if uplo = "L" or "A"). = "H" for Hermitian matrix = "S" for symmetric matrix. = "N" if matrix has no symmetry (not compatible with uplo="L" or uplo="U". is_fortran_file=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files. [fname]= Mutually exclusive with mpi_fh. The name of the external file from which the matrix will be read. The file is open and closed inside the routine with MPI flags specified by flags. [mpi_fh]=File handler associated to the file (already open in the caller). Not compatible with fname. [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN. Default is MPI_MODE_RDONLY. Referenced only when fname is used.
SIDE EFFECTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer supposed to be allocated. %buffer_cplx=Local buffer containg the distributed matrix stored on the external file. If fname is present then the file is opened and closed inside the routine. Any exception is fatal. [offset]= input: Offset used to access the content of the file. Default is zero. output: New offset incremented with the byte size of the matrix that has been read (Fortran markers are included if is_fortran_file=.TRUE.)
TODO
- Generalize the implementation adding the reading of the real buffer. - This routine is not portable as this kind of access pattern is not supported by all MPI implementations E.g. with MPICH we have --- !ERROR src_file: m_slk.F90 src_line: 3780 mpi_rank: 1 message: | SET_VIEW Other I/O error , error stack: ADIO_Set_view(48): **iobadoverlap displacements of filetype must be in a monotonically nondecreasing order ... - This routine should be removed and replaced by hdf5 + mpi-io
SOURCE
5655 subroutine slk_read(Slk_mat,uplo,symtype,is_fortran_file,fname,mpi_fh,offset,flags) 5656 5657 !Arguments ------------------------------------ 5658 !scalars 5659 integer,optional,intent(in) :: flags,mpi_fh 5660 integer(XMPI_OFFSET_KIND),optional,intent(inout) :: offset 5661 character(len=*),optional,intent(in) :: fname 5662 character(len=*),intent(in) :: uplo,symtype 5663 logical,intent(in) :: is_fortran_file 5664 class(matrix_scalapack),intent(inout) :: Slk_mat 5665 5666 !Local variables ------------------------------ 5667 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI_IO 5668 !scalars 5669 integer :: nrows_glob,offset_err,slk_type,etype 5670 integer(XMPI_OFFSET_KIND) :: my_offset 5671 logical :: do_open 5672 integer :: comm,my_flags,my_fh,buffer_size,ierr,col_glob 5673 integer :: nfrec,bsize_elm,mpi_type_elm 5674 !complex(dpc) :: ctest 5675 logical,parameter :: check_frm=.TRUE. 5676 integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:) 5677 !arrays 5678 character(len=500) :: msg 5679 5680 !************************************************************************ 5681 5682 do_open = PRESENT(fname) 5683 if (do_open) then 5684 ABI_CHECK(.not.PRESENT(fname), "fname should not be present") 5685 else 5686 ABI_CHECK(PRESENT(mpi_fh), "mpi_fh should be present") 5687 end if 5688 5689 my_offset=0; if (PRESENT(offset)) my_offset=offset 5690 5691 ABI_CHECK(allocated(Slk_mat%buffer_cplx), "%buffer_cplx not allocated") 5692 if (firstchar(uplo, ["U","L"]) .and. Slk_mat%sizeb_global(1) /= Slk_mat%sizeb_global(2) ) then 5693 ABI_ERROR("rectangular matrices are not compatible with the specified uplo") 5694 end if 5695 5696 nrows_glob = Slk_mat%sizeb_global(1) 5697 5698 buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2)) 5699 5700 call wrtout(std_out, "slk_read: Using MPI-IO") 5701 5702 comm = Slk_mat%processor%comm 5703 5704 if (do_open) then ! Open the file. 5705 my_flags=MPI_MODE_RDONLY; if (PRESENT(flags)) my_flags = flags 5706 call MPI_FILE_OPEN(comm, fname, my_flags, MPI_INFO_NULL, my_fh, ierr) 5707 ABI_CHECK_MPI(ierr,"FILE_OPEN "//TRIM(fname)) 5708 else 5709 my_fh = mpi_fh 5710 end if 5711 5712 call slk_single_fview_read(Slk_mat,uplo,etype,slk_type,offset_err,is_fortran_file=is_fortran_file) 5713 5714 if (offset_err/=0) then 5715 write(msg,"(3a)")& 5716 "Global position index cannot be stored in standard Fortran integer ",ch10,& 5717 "scaLAPACK matrix cannot be read with a single MPI-IO call." 5718 ABI_ERROR(msg) 5719 end if 5720 5721 call MPI_FILE_SET_VIEW(my_fh, my_offset, etype, slk_type, 'native', MPI_INFO_NULL, ierr) 5722 ABI_CHECK_MPI(ierr,"SET_VIEW") 5723 5724 call MPI_FILE_READ_ALL(my_fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr) 5725 ABI_CHECK_MPI(ierr,"READ_ALL") 5726 5727 ! Symmetrize local buffer if uplo /= "All" 5728 call slk_symmetrize(Slk_mat, uplo, symtype) 5729 5730 !BEGINDEBUG 5731 !call MPI_FILE_READ_AT(mpi_fh,my_offset+xmpio_bsize_frm,ctest,1,MPI_DOUBLE_complex,MPI_STATUS_IGNORE,ierr) 5732 !write(std_out,*)"ctest",ctest 5733 !call MPI_FILE_READ_AT(mpi_fh,my_offset+2*xmpio_bsize_frm,ctest,1,MPI_DOUBLE_complex,MPI_STATUS_IGNORE,ierr) 5734 !write(std_out,*)"ctest",ctest 5735 !ENDDEBUG 5736 5737 !call print_arr(Slk_mat%buffer_cplx,max_r=10,max_c=10,unit=std_out) 5738 ! 5739 ! Close the file and release the MPI filetype. 5740 call MPI_type_FREE(slk_type,ierr) 5741 ABI_CHECK_MPI(ierr,"MPI_type_FREE") 5742 5743 call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm) 5744 5745 !It seems that personal call makes the code stuck 5746 !if (is_fortran_file .and. check_frm .and. Slk_mat%Processor%myproc==0) then ! Master checks the Fortran markers. 5747 if (is_fortran_file .and. check_frm) then ! Master checks the Fortran markers. 5748 call wrtout(std_out,"Checking Fortran record markers...", do_flush=.True.) 5749 nfrec = Slk_mat%sizeb_global(2) 5750 ABI_MALLOC(bsize_frecord,(nfrec)) 5751 if (firstchar(uplo, ["A"])) then 5752 bsize_frecord = Slk_mat%sizeb_global(1) * bsize_elm 5753 else if (firstchar(uplo, ["U"])) then 5754 bsize_frecord = (/(col_glob * bsize_elm, col_glob=1,nfrec)/) 5755 else if (firstchar(uplo, ["L"])) then 5756 bsize_frecord = (/(col_glob * bsize_elm, col_glob=nfrec,1,-1)/) 5757 else 5758 ABI_ERROR("Wrong uplo") 5759 end if 5760 call xmpio_check_frmarkers(my_fh,my_offset,xmpio_collective,nfrec,bsize_frecord,ierr) 5761 ABI_CHECK(ierr==0,"Wrong Fortran record markers") 5762 ABI_FREE(bsize_frecord) 5763 end if 5764 5765 if (do_open) then ! Close the file. 5766 call MPI_FILE_CLOSE(my_fh, ierr) 5767 ABI_CHECK_MPI(ierr,"FILE_CLOSE") 5768 end if 5769 5770 !Increment the offset 5771 if (PRESENT(offset)) then 5772 if (firstchar(uplo, ["A"])) then 5773 offset = offset + PRODUCT(Slk_mat%sizeb_global(1:2)) * bsize_elm 5774 if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm 5775 else if (firstchar(uplo, ["U","L"])) then 5776 offset = offset + ( (Slk_mat%sizeb_global(2) * (Slk_mat%sizeb_global(2))+1)/2 ) * bsize_elm 5777 if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm 5778 else 5779 ABI_ERROR("Wrong uplo") 5780 end if 5781 end if 5782 5783 call xmpi_barrier(comm) 5784 RETURN 5785 5786 #else 5787 ABI_ERROR("MPI-IO support not enabled") 5788 #endif 5789 5790 end subroutine slk_read
m_slk/slk_set_head_and_wings [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_set_head_and_wings
FUNCTION
Set head and the wings of the matrix starting from global arrays.
SOURCE
940 subroutine slk_set_head_and_wings(mat, head, low_wing, up_wing) 941 942 !Arguments ------------------------------------ 943 class(matrix_scalapack),intent(inout) :: mat 944 complex(dp),intent(in) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2)) 945 946 !Local variables------------------------------- 947 integer :: il_g1, il_g2, iglob1, iglob2 948 logical :: is_cplx 949 950 ! ********************************************************************* 951 952 is_cplx = allocated(mat%buffer_cplx) 953 954 do il_g2=1,mat%sizeb_local(2) 955 iglob2 = mat%loc2gcol(il_g2) 956 do il_g1=1,mat%sizeb_local(1) 957 iglob1 = mat%loc2grow(il_g1) 958 959 if (iglob1 == 1 .or. iglob2 == 1) then 960 if (iglob1 == 1 .and. iglob2 == 1) then 961 if (is_cplx) then 962 mat%buffer_cplx(il_g1, il_g2) = head 963 else 964 mat%buffer_real(il_g1, il_g2) = real(head) 965 end if 966 else if (iglob1 == 1) then 967 if (is_cplx) then 968 mat%buffer_cplx(il_g1, il_g2) = up_wing(iglob2) 969 else 970 mat%buffer_real(il_g1, il_g2) = real(up_wing(iglob2)) 971 end if 972 else if (iglob2 == 1) then 973 if (is_cplx) then 974 mat%buffer_cplx(il_g1, il_g2) = low_wing(iglob1) 975 else 976 mat%buffer_real(il_g1, il_g2) = real(low_wing(iglob1)) 977 end if 978 end if 979 end if 980 981 end do 982 end do 983 984 end subroutine slk_set_head_and_wings
m_slk/slk_set_imag_diago_to_zero [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_set_imag_diago_to_zero
FUNCTION
Set the imaginary part of the diagonal to zero. Return in local_max the max of the imaginar part in the local buffer. No MPI communication is performed inside the routine. Client code can easily reduce local_max within the PBLAS communicator if needed.
INPUTS
OUTPUT
SOURCE
5344 subroutine slk_set_imag_diago_to_zero(mat, local_max) 5345 5346 !Arguments ------------------------------------ 5347 class(basemat_t), intent(inout) :: mat 5348 real(dp),intent(out) :: local_max 5349 5350 !Local variables------------------------------- 5351 integer :: il1, iglob1, il2, iglob2 5352 5353 ! ************************************************************************* 5354 5355 local_max = -huge(one) 5356 5357 select type (mat) 5358 class is (matrix_scalapack) 5359 if (allocated(mat%buffer_real)) return 5360 do il2=1,mat%sizeb_local(2) 5361 iglob2 = mat%loc2gcol(il2) 5362 do il1=1,mat%sizeb_local(1) 5363 iglob1 = mat%loc2grow(il1) 5364 if (iglob1 == iglob2) then 5365 local_max = max(local_max, aimag(mat%buffer_cplx(il1, il2))) 5366 mat%buffer_cplx(il1, il2) = real(mat%buffer_cplx(il1, il2)) 5367 end if 5368 end do 5369 end do 5370 5371 class is (slkmat_sp_t) 5372 if (allocated(mat%buffer_real)) return 5373 do il2=1,mat%sizeb_local(2) 5374 iglob2 = mat%loc2gcol(il2) 5375 do il1=1,mat%sizeb_local(1) 5376 iglob1 = mat%loc2grow(il1) 5377 if (iglob1 == iglob2) then 5378 local_max = max(local_max, aimag(mat%buffer_cplx(il1, il2))) 5379 mat%buffer_cplx(il1, il2) = real(mat%buffer_cplx(il1, il2)) 5380 end if 5381 end do 5382 end do 5383 class default 5384 ABI_ERROR("Wrong class") 5385 end select 5386 5387 5388 end subroutine slk_set_imag_diago_to_zero
m_slk/slk_single_fview_read [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_single_fview_read
FUNCTION
Return an MPI datatype that can be used to read a scaLAPACK distributed matrix from a binary file using MPI-IO.
INPUTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer. uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk: = "U": Upper triangular is stored = "L": Lower triangular is stored = "A": Full matrix (used for general complex matrices) [is_fortran_file]=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files with record markers. In this case etype is set xmpio_mpi_type_frm provided that the mpi_type of the matrix element is commensurate with xmpio_mpi_type_frm. Defaults to .TRUE.
OUTPUT
etype=Elementary data type (handle) defining the elementary unit used to access the file. slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file. Note that the view assumes that the file pointer points to the FIRST Fortran record marker. offset_err=Error code. A non-zero value signals that the global matrix is too large for a single MPI-IO access (see notes below).
NOTES
With (signed) Fortran integers, the maximum size of the file that that can be read in one-shot is around 2Gb when etype is set to byte. Using a larger etype might create portability problems (real data on machines using integer*16 for the marker) since etype must be a multiple of the Fortran record marker Due to the above reason, block_displ is given in bytes and must be stored in a integer of kind XMPI_ADDRESS_KIND. If the displacement is too large, the routine returns offset_err=1 so that the caller will know that several MPI-IO reads are needed to read the local buffer.
SOURCE
6114 subroutine slk_single_fview_read(Slk_mat,uplo,etype,slk_type,offset_err,is_fortran_file) 6115 6116 !Arguments ------------------------------------ 6117 !scalars 6118 integer,intent(out) :: offset_err,slk_type,etype 6119 character(len=*),intent(in) :: uplo 6120 logical,optional,intent(in) :: is_fortran_file 6121 class(matrix_scalapack),intent(in) :: Slk_mat 6122 6123 !Local variables ------------------------------ 6124 !scalars 6125 integer :: jloc,iloc,iglob,jglob,nrows_glob,ncols_glob,mpi_err,nel 6126 integer :: bsize_frm,mpi_type_elm,ij_loc,bsize_etype,bsize_elm 6127 integer(XMPI_OFFSET_KIND) :: ijp_glob,my_offset,cpad_frm 6128 !arrays 6129 character(len=500) :: msg 6130 integer,allocatable :: block_length(:),block_type(:) 6131 integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:) 6132 6133 !************************************************************************ 6134 6135 #ifdef HAVE_MPI_IO 6136 !@matrix_scalapack 6137 bsize_frm = xmpio_bsize_frm ! Byte size of the Fortran record marker. 6138 if (PRESENT(is_fortran_file)) then 6139 if (.not.is_fortran_file) bsize_frm = 0 6140 end if 6141 6142 call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm) 6143 6144 ! Global dimensions. 6145 nrows_glob=Slk_mat%sizeb_global(1) 6146 ncols_glob=Slk_mat%sizeb_global(2) 6147 6148 ! Number of matrix elements treated by this node. 6149 nel = PRODUCT(Slk_mat%sizeb_local(1:2)) 6150 6151 !Cannot use MPI_type_CREATE_INDEXED_BLOCK since it is not correctly implemented in several MPI libraries. 6152 !etype has to be set to MPI_BYTE, since the displacement in MPI structures is always in byte. 6153 ! ABI_WARNING("Using MPI_type_STRUCT for the MPI-IO file view") 6154 6155 etype = MPI_BYTE 6156 call MPI_type_SIZE(etype,bsize_etype,mpi_err) 6157 6158 ! Define the mapping between scaLAPACK buffer and the storage on file. 6159 ABI_MALLOC(block_length, (nel+2)) 6160 ABI_MALLOC(block_displ, (nel+2)) 6161 ABI_MALLOC(block_type, (nel+2)) 6162 block_length(1)=1 6163 block_displ (1)=0 6164 block_type (1)=MPI_LB 6165 6166 ! Note that the view assumes that the file pointer points to the first Fortran record marker. 6167 offset_err=0 6168 select case (uplo(1:1)) 6169 case ("A","a") ! The entire global matrix is stored on disk. TODO can use contigous vectors for better access. 6170 ij_loc=0 6171 do jloc=1,Slk_mat%sizeb_local(2) 6172 do iloc=1,Slk_mat%sizeb_local(1) 6173 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6174 ij_loc = ij_loc+1 6175 my_offset = 2*(jglob-1)*bsize_frm + bsize_frm + (jglob-1)*nrows_glob*bsize_elm + (iglob-1) * bsize_elm 6176 my_offset = my_offset / bsize_etype 6177 if (xmpio_max_address(my_offset)) offset_err=1 ! Test for possible wraparounds 6178 block_displ (ij_loc+1) = my_offset 6179 block_type (ij_loc+1) = mpi_type_elm 6180 block_length(ij_loc+1) = 1 6181 end do 6182 end do 6183 6184 case ("U","u") ! Only the upper triangle of the global matrix is stored on disk. 6185 ij_loc=0 6186 do jloc=1,Slk_mat%sizeb_local(2) 6187 do iloc=1,Slk_mat%sizeb_local(1) 6188 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6189 if (jglob>=iglob) then 6190 ijp_glob = iglob + jglob*(jglob-1)/2 ! Index for packed form 6191 cpad_frm = 2*(jglob-1)*bsize_frm 6192 else 6193 ijp_glob = jglob + iglob*(iglob-1)/2 ! Index for packed form 6194 cpad_frm = 2*(iglob-1)*bsize_frm 6195 end if 6196 ij_loc = ij_loc+1 6197 my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm 6198 my_offset = my_offset / bsize_etype 6199 if (xmpio_max_address(my_offset)) offset_err=1 ! Test for possible wraparounds 6200 block_displ (ij_loc+1) = my_offset 6201 block_type (ij_loc+1) = mpi_type_elm 6202 block_length(ij_loc+1) = 1 6203 end do 6204 end do 6205 6206 case ("L","l") ! Only the lower triangle of the global matrix is stored on disk. 6207 ij_loc=0 6208 do jloc=1,Slk_mat%sizeb_local(2) 6209 do iloc=1,Slk_mat%sizeb_local(1) 6210 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6211 if (jglob<=iglob) then 6212 ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form 6213 cpad_frm = 2*(jglob-1)*bsize_frm 6214 else 6215 ijp_glob = jglob + (iglob-1)*(2*nrows_glob-iglob)/2 ! Index for packed form 6216 cpad_frm = 2*(iglob-1)*bsize_frm 6217 end if 6218 ij_loc = ij_loc+1 6219 my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm 6220 my_offset = my_offset / bsize_etype 6221 if (xmpio_max_address(my_offset)) offset_err=1 ! block_displ is usually integer*4. Test for possible wraparounds 6222 block_displ (ij_loc+1) = my_offset 6223 block_type (ij_loc+1) = mpi_type_elm 6224 block_length(ij_loc+1) = 1 6225 end do 6226 end do 6227 6228 if (offset_err/=0) then ! just warn, let the caller handle the exception. 6229 write(msg,"(3a)")& 6230 " Global position index cannot be stored in standard Fortran integer ",ch10,& 6231 " scaLAPACK matrix cannot be read with a single MPI-IO call ." 6232 ABI_WARNING(msg) 6233 end if 6234 6235 case default 6236 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 6237 end select 6238 6239 block_length(nel+2)= 1 6240 block_displ (nel+2)= ncols_glob * (nrows_glob*bsize_elm + 2*bsize_frm) / bsize_etype 6241 block_type (nel+2)= MPI_UB 6242 6243 call xmpio_type_struct(nel+2,block_length,block_displ,block_type,slk_type,mpi_err) 6244 ABI_CHECK_MPI(mpi_err,"MPI_type_STRUCT") 6245 6246 ABI_FREE(block_length) 6247 ABI_FREE(block_displ) 6248 ABI_FREE(block_type) 6249 6250 call MPI_type_COMMIT(slk_type,mpi_err) 6251 ABI_CHECK_MPI(mpi_err,"MPI_type_COMMIT") 6252 6253 #else 6254 ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read") 6255 #endif 6256 6257 end subroutine slk_single_fview_read
m_slk/slk_single_fview_read_mask [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_single_fview_read_mask
FUNCTION
Return an MPI datatype that can be used to read a scaLAPACK distributed matrix from a binary file using MPI-IO. The view is created using the user-defined mask function mask_of_glob. The storage of the data on file is described via the user-defined function offset_of_glob.
INPUTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK matrix. mask_of_glob(row_glob,col_glob,size_glob) is an integer function that accepts in input the global indices of the matrix size_glob(1:2) are the global dimensions. Return 0 if (row_glob,col_glob) should not be read. offset_of_glob(row_glob,col_glob,size_glob,nsblocks,sub_block,bsize_elm,bsize_frm) nsblocks=Number of sub-blocks (will be passed to offset_of_glob) sub_block(2,2,nsblocks)=Global coordinates of the extremal points delimiting the sub-blocs e.g. sub_block(:,1,1) gives the coordinates of the left upper corner of the first block. sub_block(:,2,1) gives the coordinates of the right lower corner of the first block. [is_fortran_file]=.FALSE. is C stream is used. Set to .TRUE. for writing Fortran binary files with record marker.
OUTPUT
my_nel=Number of elements that will be read by this node. etype=Elementary data type (handle) defining the elementary unit used to access the file. This is the elementary type that must be used to creae the view (MPI_BYTE is used). slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file. Note that the view assumes that the file pointer points to the FIRST Fortran record marker. offset_err=Error code. A returned non-zero value signals that the global matrix is too large for a single MPI-IO access. See notes in other slk_single_fview_* routines.
SIDE EFFECTS
myel2loc(:,:) input: pointer to NULL output: myel2loc(2,my_nel): myel2loc(:,el) gives (iloc,jloc) for el=1,my_nel.
SOURCE
5833 subroutine slk_single_fview_read_mask(Slk_mat,mask_of_glob,offset_of_glob,nsblocks,sub_block,& 5834 my_nel,myel2loc,etype,slk_type,offset_err,is_fortran_file) 5835 5836 !Arguments ------------------------------------ 5837 !scalars 5838 integer,intent(in) :: nsblocks 5839 integer,intent(out) :: my_nel,offset_err,slk_type,etype 5840 logical,optional,intent(in) :: is_fortran_file 5841 class(matrix_scalapack),intent(in) :: Slk_mat 5842 !arrays 5843 integer,intent(in) :: sub_block(2,2,nsblocks) 5844 integer,pointer :: myel2loc(:,:) 5845 5846 interface 5847 function mask_of_glob(row_glob,col_glob,size_glob) 5848 use defs_basis 5849 integer :: mask_of_glob 5850 integer,intent(in) :: row_glob,col_glob 5851 integer,intent(in) :: size_glob(2) 5852 end function mask_of_glob 5853 end interface 5854 5855 interface 5856 function offset_of_glob(row_glob,col_glob,size_glob,nsblocks,sub_block,bsize_elm,bsize_frm) 5857 use defs_basis 5858 use m_xmpi 5859 integer(XMPI_OFFSET_KIND) :: offset_of_glob 5860 integer,intent(in) :: row_glob,col_glob,bsize_elm,bsize_frm,nsblocks 5861 integer,intent(in) :: size_glob(2),sub_block(2,2,nsblocks) 5862 end function offset_of_glob 5863 end interface 5864 5865 !Local variables ------------------------------ 5866 !scalars 5867 integer :: el,jloc,iloc,iglob,jglob,mpi_err,sweep 5868 integer :: bsize_frm,mpi_type_elm,bsize_elm 5869 integer(XMPI_OFFSET_KIND) :: tmp_off,max_displ 5870 !arrays 5871 character(len=500) :: msg 5872 integer,allocatable :: block_length(:),block_type(:) 5873 integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:) 5874 5875 !************************************************************************ 5876 5877 #ifdef HAVE_MPI_IO 5878 bsize_frm = xmpio_bsize_frm ! Byte size of the Fortran record marker. 5879 if (PRESENT(is_fortran_file)) then 5880 if (.not.is_fortran_file) bsize_frm = 0 5881 end if 5882 5883 ! Byte size of the matrix element. 5884 call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm) 5885 5886 ! Find the number of local matrix elements to be read, then create the table myel2loc. 5887 do sweep=1,2 5888 if (sweep==2) then 5889 ABI_MALLOC(myel2loc,(2,my_nel)) 5890 end if 5891 my_nel=0 5892 5893 do jloc=1,Slk_mat%sizeb_local(2) 5894 do iloc=1,Slk_mat%sizeb_local(1) 5895 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 5896 if ( mask_of_glob(iglob,jglob,Slk_mat%sizeb_global)/= 0) then ! Will fill this entry. 5897 my_nel = my_nel+1 5898 if (sweep==2) myel2loc(:,my_nel) = (/iloc,jloc/) 5899 end if 5900 end do 5901 end do 5902 end do 5903 5904 etype = MPI_BYTE 5905 5906 ! Define the mapping between scaLAPACK buffer and the storage on file. 5907 ! Note that the view assumes that the file pointer points to the first Fortran record marker. 5908 ABI_MALLOC(block_length,(my_nel+2)) 5909 ABI_MALLOC(block_displ,(my_nel+2)) 5910 ABI_MALLOC(block_type,(my_nel+2)) 5911 block_length(1)=1 5912 block_displ (1)=0 5913 block_type (1)=MPI_LB 5914 5915 offset_err=0; max_displ=0 5916 do el=1,my_nel 5917 iloc = myel2loc(1,el) 5918 jloc = myel2loc(2,el) 5919 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 5920 tmp_off = offset_of_glob(iglob,jglob,Slk_mat%sizeb_global,nsblocks,sub_block,bsize_elm,bsize_frm) 5921 if (xmpio_max_address(tmp_off)) offset_err=1 ! Test for possible wraparounds. 5922 max_displ = MAX(max_displ,tmp_off) 5923 block_displ (el+1) = tmp_off 5924 block_type (el+1) = mpi_type_elm 5925 block_length(el+1) = 1 5926 !write(std_out,*)" iglob, jglob, tmp_off ",iglob, jglob, tmp_off 5927 end do 5928 !write(std_out,*)" MAX displ is ",MAXVAL(block_displ) 5929 5930 if (offset_err/=0) then ! just warn, let the caller handle the exception. 5931 write(msg,"(3a)")& 5932 " Global position index cannot be stored in standard Fortran integer ",ch10,& 5933 " scaLAPACK matrix cannot be read with a single MPI-IO call ." 5934 ABI_WARNING(msg) 5935 end if 5936 5937 block_length(my_nel+2) = 1 5938 block_displ (my_nel+2) = max_displ 5939 block_type (my_nel+2) = MPI_UB 5940 5941 call xmpio_type_struct(my_nel+2,block_length,block_displ,block_type,slk_type,mpi_err) 5942 ABI_CHECK_MPI(mpi_err,"MPI_type_STRUCT") 5943 5944 ABI_FREE(block_length) 5945 ABI_FREE(block_displ) 5946 ABI_FREE(block_type) 5947 5948 call MPI_type_COMMIT(slk_type,mpi_err) 5949 ABI_CHECK_MPI(mpi_err,"MPI_type_COMMIT") 5950 5951 #else 5952 ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read_mask") 5953 #endif 5954 5955 end subroutine slk_single_fview_read_mask
m_slk/slk_single_fview_write [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_single_fview_write
FUNCTION
Returns an MPI datatype that can be used to write a scaLAPACK distributed matrix to a binary file using MPI-IO.
INPUTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer. uplo=String specifying whether only the upper or lower triangular part of the global matrix is stored on disk: = "U": Upper triangular is stored = "L": Lower triangular is stored = "A": Full matrix (used for general complex matrices) [is_fortran_file]=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files with record marker. In this case etype is set xmpio_mpi_type_frm provided that the mpi_type of the matrix element is commensurate with xmpio_mpi_type_frm. Defaults to .TRUE. glob_subarray(2,2) = Used to select the subarray of the global matrix. Used only when uplo="All" glob_subarray(:,1)=starting global coordinates of the subarray in each dimension (array of nonnegative integers >=1, <=array_of_sizes) glob_subarray(:,2)=Number of elements in each dimension of the subarray (array of positive integers)
OUTPUT
nelw=Number of elements to be written. etype=Elementary data type (handle) defining the elementary unit used to access the file. slk_type=New MPI type that can be used to instantiate the MPI-IO view for the Fortran file. Note that the view assumes that the file pointer points to the FIRST Fortran record marker. offset_err=Error code. A non-zero value signals that the global matrix is too large for a single MPI-IO access (see notes below).
SIDE EFFECTS
elw2slk(:,:) = input: pointer to null(). output: elw2slk(2,nelw) contains the local coordinates of the matrix elements to be written. (useful only if the upper or lower triangle of the global matrix has to be written or when uplo="all" but a global subarray is written.
NOTES
With (signed) Fortran integers, the maximum size of the file that that can be read in one-shot is around 2Gb when etype is set to byte. Using a larger etype might create portability problems (real data on machines using integer*16 for the marker) since etype must be a multiple of the Fortran record marker Due to the above reason, block_displ is given in bytes and must be stored in a Fortran integer of kind XMPI_ADDRESS_KIND. If the displacement is too large, the routine returns offset_err=1 so that the caller will know that several MPI-IO reads are needed to write the local buffer.
SOURCE
6311 subroutine slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,is_fortran_file,glob_subarray) 6312 6313 !Arguments ------------------------------------ 6314 !scalars 6315 class(matrix_scalapack),intent(in) :: Slk_mat 6316 integer,intent(out) :: offset_err,slk_type,etype,nelw 6317 character(len=*),intent(in) :: uplo 6318 logical,optional,intent(in) :: is_fortran_file 6319 !arrays 6320 integer,pointer :: elw2slk(:,:) 6321 integer,optional,intent(in) :: glob_subarray(2,2) 6322 6323 !Local variables ------------------------------ 6324 !scalars 6325 integer :: jloc,iloc,iglob,jglob,nrows_glob,ncols_glob,mpi_err,nel_max 6326 integer :: grow_min,grow_max,gcol_min,gcol_max 6327 integer :: bsize_frm,mpi_type_elm,ij_loc,bsize_elm 6328 integer(XMPI_OFFSET_KIND) :: ijp_glob,my_offset,cpad_frm 6329 !arrays 6330 character(len=500) :: msg 6331 integer,allocatable :: block_length(:),block_type(:) 6332 integer(XMPI_ADDRESS_KIND),allocatable :: block_displ(:) 6333 6334 !************************************************************************ 6335 6336 #ifdef HAVE_MPI_IO 6337 !@matrix_scalapack 6338 bsize_frm = xmpio_bsize_frm ! Byte size of the Fortran record marker. 6339 if (PRESENT(is_fortran_file)) then 6340 if (.not.is_fortran_file) bsize_frm = 0 6341 end if 6342 6343 if (PRESENT(glob_subarray).and..not.firstchar(uplo, ["A"])) then 6344 ABI_ERROR("glob_subarray should not be used when uplo/=All") 6345 end if 6346 6347 call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm) 6348 6349 ! Global dimensions. 6350 nrows_glob=Slk_mat%sizeb_global(1) 6351 ncols_glob=Slk_mat%sizeb_global(2) 6352 6353 ! Number of matrix elements treated by this node. 6354 nel_max = PRODUCT(Slk_mat%sizeb_local(1:2)) 6355 6356 ABI_MALLOC(elw2slk,(2,nel_max)) 6357 elw2slk=0 6358 6359 ! Cannot use MPI_type_CREATE_INDEXED_BLOCK since it is not correctly implemented in several MPI libraries. 6360 ! etype has to be set to MPI_BYTE, since the displacement in MPI structures is always in byte. 6361 etype = MPI_BYTE 6362 6363 ! Define the mapping between scaLAPACK buffer and the storage on file. 6364 ABI_MALLOC(block_length, (nel_max+2)) 6365 ABI_MALLOC(block_displ, (nel_max+2)) 6366 ABI_MALLOC(block_type, (nel_max+2)) 6367 block_length(1)=1 6368 block_displ (1)=0 6369 block_type (1)=MPI_LB 6370 6371 ! Note that the view assumes that the file pointer points to the first Fortran record marker. 6372 offset_err=0 6373 6374 select case (uplo(1:1)) 6375 case ("A","a") 6376 ! The entire global matrix is written on disk. TODO can use contigous vectors for better access. 6377 grow_min=1; grow_max=nrows_glob 6378 gcol_min=1; gcol_max=ncols_glob 6379 if (PRESENT(glob_subarray)) then ! subarray access. 6380 grow_min = glob_subarray(1,1) 6381 gcol_min = glob_subarray(2,1) 6382 grow_max = grow_min + glob_subarray(1,2) -1 6383 gcol_max = gcol_min + glob_subarray(2,2) -1 6384 end if 6385 6386 ij_loc=0 6387 do jloc=1,Slk_mat%sizeb_local(2) 6388 do iloc=1,Slk_mat%sizeb_local(1) 6389 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6390 if (iglob>=grow_min.and.iglob<=grow_max .and. & ! glob_subarray element. 6391 jglob>=gcol_min.and.jglob<=gcol_max) then 6392 ij_loc = ij_loc+1 6393 my_offset = 2*(jglob-1)*bsize_frm + bsize_frm + (jglob-1)*nrows_glob*bsize_elm + (iglob-1) * bsize_elm 6394 if (xmpio_max_address(my_offset)) offset_err=1 ! Test for possible wraparounds 6395 block_displ (ij_loc+1) = my_offset 6396 block_type (ij_loc+1) = mpi_type_elm 6397 block_length(ij_loc+1) = 1 6398 elw2slk(:,ij_loc) = (/iloc,jloc/) ! useless when subarray are not used but oh well! 6399 end if 6400 end do 6401 end do 6402 6403 case ("U","u") 6404 ! Only the upper triangle of the global matrix is stored on disk. 6405 ij_loc=0 6406 do jloc=1,Slk_mat%sizeb_local(2) 6407 do iloc=1,Slk_mat%sizeb_local(1) 6408 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6409 if (jglob>=iglob) then 6410 ijp_glob = iglob + jglob*(jglob-1)/2 ! Index for packed form 6411 cpad_frm = 2*(jglob-1)*bsize_frm 6412 ij_loc = ij_loc+1 6413 my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm 6414 if (xmpio_max_address(my_offset)) offset_err=1 ! Test for possible wraparounds 6415 block_displ (ij_loc+1) = my_offset 6416 block_type (ij_loc+1) = mpi_type_elm 6417 block_length(ij_loc+1) = 1 6418 elw2slk(:,ij_loc) = (/iloc,jloc/) 6419 end if 6420 end do 6421 end do 6422 6423 case ("L","l") 6424 ! Only the lower triangle of the global matrix is stored on disk. 6425 ij_loc=0 6426 do jloc=1,Slk_mat%sizeb_local(2) 6427 do iloc=1,Slk_mat%sizeb_local(1) 6428 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6429 if (jglob<=iglob) then 6430 ijp_glob = iglob + (jglob-1)*(2*nrows_glob-jglob)/2 ! Index for packed form 6431 cpad_frm = 2*(jglob-1)*bsize_frm 6432 ij_loc = ij_loc+1 6433 my_offset = cpad_frm + bsize_frm + (ijp_glob-1) * bsize_elm 6434 if (xmpio_max_address(my_offset)) offset_err=1 ! block_displ is usually integer*4. Test for possible wraparounds 6435 block_displ (ij_loc+1) = my_offset 6436 block_type (ij_loc+1) = mpi_type_elm 6437 block_length(ij_loc+1) = 1 6438 elw2slk(:,ij_loc) = (/iloc,jloc/) 6439 end if 6440 end do 6441 end do 6442 6443 case default 6444 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 6445 end select 6446 6447 if (offset_err/=0) then ! just warn, let the caller handle the exception. 6448 write(msg,"(3a)")& 6449 "Global position index cannot be stored in standard Fortran integer ",ch10,& 6450 "scaLAPACK matrix cannot be read with a single MPI-IO call ." 6451 ABI_WARNING(msg) 6452 end if 6453 6454 ! Final number of matrix elements that will be written by this node. 6455 nelw = ij_loc 6456 6457 block_length(nelw+2)= 1 6458 block_displ (nelw+2)= ncols_glob * (nrows_glob*bsize_elm + 2*bsize_frm) 6459 block_type (nelw+2)= MPI_UB 6460 6461 call xmpio_type_struct(nelw+2,block_length,block_displ,block_type,slk_type,mpi_err) 6462 ABI_CHECK_MPI(mpi_err, "MPI_type_STRUCT") 6463 6464 ABI_FREE(block_length) 6465 ABI_FREE(block_displ) 6466 ABI_FREE(block_type) 6467 6468 call MPI_type_COMMIT(slk_type,mpi_err) 6469 ABI_CHECK_MPI(mpi_err, "MPI_type_COMMIT") 6470 6471 #else 6472 ABI_ERROR("MPI-IO is mandatatory in slk_single_fview_read") 6473 #endif 6474 6475 end subroutine slk_single_fview_write
m_slk/slk_symmetrize [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_symmetrize
FUNCTION
Symmetrize a square scaLAPACK matrix.
INPUTS
uplo=String specifying whether only the upper or lower triangular part of the global matrix has been read = "U": Upper triangular has been read. = "L": Lower triangular has been read. = "A": Full matrix (used for general complex matrices) symtype=Symmetry type of the matrix (used only if uplo = "L" or "A"). = "H" for Hermitian matrix = "S" for symmetric matrix. = "N" if matrix has no symmetry (not compatible with uplo="L" or uplo="U".
SIDE EFFECTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer supposed to be allocated. %buffer_cplx=Local buffer containg the distributed matrix stored on the external file.
SOURCE
5984 subroutine slk_symmetrize(Slk_mat, uplo, symtype) 5985 5986 !Arguments ------------------------------------ 5987 !scalars 5988 class(matrix_scalapack),intent(inout) :: Slk_mat 5989 character(len=*),intent(in) :: symtype 5990 character(len=*),intent(in) :: uplo 5991 5992 !Local variables ------------------------------ 5993 !scalars 5994 integer :: jloc,iloc,iglob,jglob,ij_loc 5995 logical :: is_hermitian,is_real,is_cplx,is_symmetric 5996 character(len=500) :: msg 5997 5998 !************************************************************************ 5999 6000 is_cplx = (allocated(Slk_mat%buffer_cplx)) 6001 is_real = (allocated(Slk_mat%buffer_real)) 6002 6003 ! One and only one buffer should be allocated. 6004 if (is_real .and. is_cplx) then 6005 write(msg,'(a,2l1)')" ScaLAPACK buffers are not allocated correctly, is_real=, is_cplx ",is_real,is_cplx 6006 ABI_ERROR(msg) 6007 end if 6008 6009 if (is_real) RETURN 6010 6011 is_hermitian=.FALSE.; is_symmetric=.FALSE. 6012 select case (symtype(1:1)) 6013 case ("H", "h") 6014 is_hermitian = .TRUE. 6015 case ("S","s") 6016 is_symmetric = .TRUE. 6017 case("N","n") 6018 if (ALL(uplo(1:1) /= ["A","a"])) then 6019 msg = " Found symtype= "//TRIM(symtype)//", but uplo= "//TRIM(uplo) 6020 ABI_ERROR(msg) 6021 end if 6022 RETURN ! Nothing to do. 6023 case default 6024 ABI_ERROR("Wrong symtype "//TRIM(symtype)) 6025 end select 6026 6027 !write(std_out,*)"is_cplx",is_cplx 6028 !write(std_out,*)"is_hermitian",is_hermitian 6029 6030 select case (uplo(1:1)) 6031 case ("A","a") 6032 ! Full global matrix has been read, nothing to do. 6033 return 6034 6035 case ("U", "u") 6036 ! Only the upper triangle of the global matrix was read. 6037 if (is_cplx .and. is_hermitian) then 6038 ij_loc=0 6039 do jloc=1,Slk_mat%sizeb_local(2) 6040 do iloc=1,Slk_mat%sizeb_local(1) 6041 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6042 ij_loc = ij_loc+1 6043 if (jglob < iglob) then 6044 ! Diagonal elements are not forced to be real. 6045 Slk_mat%buffer_cplx(iloc,jloc) = DCONJG(Slk_mat%buffer_cplx(iloc,jloc)) 6046 end if 6047 !if (iglob==jglob) Slk_mat%buffer_cplx(iloc,jloc) = real(Slk_mat%buffer_cplx(iloc,jloc)) 6048 end do 6049 end do 6050 end if 6051 6052 case ("L", "l") 6053 ! Only the lower triangle of the global matrix was read. 6054 if (is_cplx .and. is_hermitian) then 6055 ij_loc=0 6056 do jloc=1,Slk_mat%sizeb_local(2) 6057 do iloc=1,Slk_mat%sizeb_local(1) 6058 call slk_mat%loc2glob(iloc, jloc, iglob, jglob) 6059 ij_loc = ij_loc+1 6060 if (jglob>iglob) then ! diagonal elements are not forced to be real. 6061 Slk_mat%buffer_cplx(iloc,jloc) = DCONJG(Slk_mat%buffer_cplx(iloc,jloc)) 6062 end if 6063 !if (iglob==jglob) Slk_mat%buffer_cplx(iloc,jloc) = real(Slk_mat%buffer_cplx(iloc,jloc)) 6064 end do 6065 end do 6066 end if 6067 6068 case default 6069 ABI_BUG(" Wrong uplo: "//TRIM(uplo)) 6070 end select 6071 6072 end subroutine slk_symmetrize
m_slk/slk_take_from [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_take_from
FUNCTION
Take values from source NB: This routine should be called by all procs owning mat and source.
INPUTS
[free]: True if source should be deallocated. Default: False
OUTPUT
SOURCE
4961 subroutine slk_take_from(out_mat, source, & 4962 ija, ijb, free) ! optional 4963 4964 !Arguments ------------------------------------ 4965 class(matrix_scalapack),intent(inout) :: out_mat 4966 class(matrix_scalapack),intent(inout) :: source 4967 integer,optional,intent(in) :: ija(2), ijb(2) 4968 logical,optional,intent(in) :: free 4969 4970 !Local variables------------------------------- 4971 integer :: mm, nn 4972 character(len=500) :: msg 4973 integer :: ija__(2), ijb__(2) 4974 4975 ! ************************************************************************* 4976 4977 ! prototype 4978 !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt) 4979 4980 ! Take care when context A is disjoint from context B. The general rules for which parameters need to be set are: 4981 ! 4982 ! - All calling processes must have the correct m and n. 4983 ! - Processes in context A must correctly define all parameters describing A. 4984 ! - Processes in context B must correctly define all parameters describing B. 4985 ! - Processes which are not members of context A must pass ctxt_a = -1 and need not set other parameters describing A. 4986 ! - Processes which are not members of contextB must pass ctxt_b = -1 and need not set other parameters describing B. 4987 4988 mm = source%sizeb_global(1) 4989 nn = source%sizeb_global(2) 4990 4991 ija__ = [1, 1]; if (present(ija)) ija__ = ija 4992 ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb 4993 4994 if (all(out_mat%sizeb_global == -1)) then 4995 out_mat%descript%tab(CTXT_) = -1 4996 else 4997 ABI_CHECK_IEQ(out_mat%istwf_k, source%istwf_k, "istwfk_mat /= istwfk_source") 4998 if (any(out_mat%sizeb_global /= source%sizeb_global)) then 4999 msg = sjoin("Matrices should have same global shape but out_mat:", ltoa(out_mat%sizeb_global), & 5000 "source:", ltoa(source%sizeb_global)) 5001 ABI_ERROR(msg) 5002 end if 5003 end if 5004 5005 if (allocated(source%buffer_cplx)) then 5006 #ifdef HAVE_LINALG_SCALAPACK 5007 call pzgemr2d(mm, nn, & 5008 source%buffer_cplx, ija__(1), ija__(2), source%descript%tab, & 5009 out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, & 5010 source%processor%grid%ictxt) 5011 5012 else if (allocated(source%buffer_real)) then 5013 call pdgemr2d(mm, nn, & 5014 source%buffer_real, ija__(1), ija__(2), source%descript%tab, & 5015 out_mat%buffer_real,ijb__(1), ijb__(2), out_mat%descript%tab, & 5016 source%processor%grid%ictxt) 5017 #endif 5018 else 5019 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 5020 end if 5021 5022 if (present(free)) then 5023 if (free) call source%free() 5024 end if 5025 5026 end subroutine slk_take_from
m_slk/slk_write [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slk_write
FUNCTION
Routine to write a square scaLAPACK-distributed matrix to an external file using MPI-IO.
INPUTS
Slk_mat<matrix_scalapack>=Structured datatype defining the scaLAPACK distribution with the local buffer containing the distributed matrix. uplo=String specifying whether only the upper or lower triangular part of the global matrix is used: = "U": Upper triangular = "L": Lower triangular = "A": Full matrix (used for general complex matrices) is_fortran_file=.FALSE. is C stream is used. .TRUE. for writing Fortran binary files. [fname]= Mutually exclusive with mpi_fh. The name of the external file on which the matrix will be written. The file is open and closed inside the routine with MPI flags specified by flags. [mpi_fh]=File handler associated to the file (already open in the caller). Not compatible with fname. [flags]=MPI-IO flags used to open the file in MPI_FILE_OPEN. Default is MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_EXCL. [glob_subarray(2,2)] = Used to select the subarray of the global matrix. Used only when uplo="All" NOTE that each node should call the routine with the same value. glob_subarray(:,1)=starting global coordinates of the subarray in each dimension (array of nonnegative integers >=1, <=array_of_sizes) glob_subarray(:,2)=Number of elements in each dimension of the subarray (array of positive integers)
OUTPUT
Only writing. The global scaLAPACK matrix is written to file fname. If fname is present then the file is open and closed inside the routine. Any exception is fatal.
SIDE EFFECTS
[offset]= input: Offset used to access the content of the file. Default is zero. output: New offset incremented with the byte size of the matrix that has been read (Fortran markers are included if is_fortran_file=.TRUE.)
TODO
- Generalize the implementation adding the writing the real buffer. - This routine should be removed and replaced by hdf5 + mpi-io
SOURCE
5434 subroutine slk_write(Slk_mat, uplo, is_fortran_file, fname,mpi_fh, offset, flags, glob_subarray) 5435 5436 !Arguments ------------------------------------ 5437 !scalars 5438 integer,optional,intent(in) :: flags 5439 integer,optional,intent(inout) :: mpi_fh 5440 integer(XMPI_OFFSET_KIND),optional,intent(inout) :: offset 5441 logical,intent(in) :: is_fortran_file 5442 character(len=*),optional,intent(in) :: fname 5443 character(len=*),intent(in) :: uplo 5444 class(matrix_scalapack),intent(in) :: Slk_mat 5445 !array 5446 integer,optional,intent(in) :: glob_subarray(2,2) 5447 5448 !Local variables ------------------------------ 5449 !scalars 5450 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI_IO 5451 integer :: jloc,iloc,nrows_glob,ncols_glob,elw,nrows_w,ncols_w ! iglob,jglob, 5452 integer :: slk_type,offset_err,etype,nfrec,bsize_elm,mpi_type_elm 5453 integer(XMPI_OFFSET_KIND) :: my_offset 5454 logical :: do_open 5455 integer :: comm,my_flags,my_fh,buffer_size 5456 integer :: ierr,nelw,col_glob ! ij_loc, 5457 !arrays 5458 integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:) 5459 integer,pointer :: elw2slk(:,:) 5460 complex(dpc),allocatable :: buffer1_cplx(:) 5461 character(len=500) :: msg 5462 5463 !************************************************************************ 5464 5465 ABI_CHECK(allocated(Slk_mat%buffer_cplx), "buffer_cplx not allocated") 5466 5467 if (firstchar(uplo, ["U","L"]) .and. Slk_mat%sizeb_global(1) /= Slk_mat%sizeb_global(2) ) then 5468 ABI_ERROR("rectangular matrices are not compatible with the specified uplo") 5469 end if 5470 5471 if (PRESENT(glob_subarray).and. .not. firstchar(uplo, ["A"])) then 5472 ABI_ERROR("glob_subarray should not be used when uplo/=All") 5473 end if 5474 5475 do_open = PRESENT(fname) 5476 if (do_open) then 5477 ABI_CHECK(.not.PRESENT(fname),"fname should not be present") 5478 else 5479 ABI_CHECK(PRESENT(mpi_fh),"mpi_fh should be present") 5480 end if 5481 5482 my_offset=0; if (PRESENT(offset)) my_offset=offset 5483 5484 comm = Slk_mat%processor%comm 5485 5486 nrows_glob=Slk_mat%sizeb_global(1) 5487 ncols_glob=Slk_mat%sizeb_global(1) 5488 buffer_size= PRODUCT(Slk_mat%sizeb_local(1:2)) 5489 5490 call slk_mat%bsize_and_type(bsize_elm, mpi_type_elm) 5491 5492 if (do_open) then !Open the file. 5493 my_flags=MPI_MODE_CREATE + MPI_MODE_WRONLY + MPI_MODE_APPEND 5494 if (PRESENT(flags)) my_flags = flags 5495 5496 call MPI_FILE_OPEN(comm, fname, my_flags, MPI_INFO_NULL, my_fh, ierr) 5497 ABI_CHECK_MPI(ierr, "MPI_FILE_OPEN "//TRIM(fname)) 5498 else 5499 my_fh = mpi_fh 5500 end if 5501 5502 if (PRESENT(glob_subarray)) then 5503 call slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,& 5504 is_fortran_file=is_fortran_file,glob_subarray=glob_subarray) 5505 else 5506 call slk_single_fview_write(Slk_mat,uplo,nelw,elw2slk,etype,slk_type,offset_err,& 5507 is_fortran_file=is_fortran_file) 5508 end if 5509 5510 if (offset_err /= 0) then 5511 write(msg,"(3a)")& 5512 " Global position index cannot be stored in standard Fortran integer ",ch10,& 5513 " scaLAPACK matrix cannot be read with a single MPI-IO call." 5514 ABI_ERROR(msg) 5515 end if 5516 5517 call MPI_FILE_SET_VIEW(my_fh, my_offset, etype, slk_type, 'native', MPI_INFO_NULL, ierr) 5518 ABI_CHECK_MPI(ierr,"SET_VIEW") 5519 5520 call MPI_TYPE_FREE(slk_type,ierr) 5521 ABI_CHECK_MPI(ierr,"MPI_type_FREE") 5522 5523 if (nelw == buffer_size) then 5524 ! Dump Slk_mat% immediately. 5525 call MPI_FILE_WRITE_ALL(my_fh, Slk_mat%buffer_cplx, buffer_size, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr) 5526 ABI_CHECK_MPI(ierr,"WRITE_ALL") 5527 else 5528 ! Have to extract the data to be written. 5529 ABI_MALLOC(buffer1_cplx,(nelw)) 5530 do elw=1,nelw 5531 iloc = elw2slk(1,elw) 5532 jloc = elw2slk(2,elw) 5533 buffer1_cplx(elw) = Slk_mat%buffer_cplx(iloc,jloc) 5534 end do 5535 call MPI_FILE_WRITE_ALL(my_fh, buffer1_cplx, nelw, MPI_DOUBLE_complex, MPI_STATUS_IGNORE, ierr) 5536 ABI_CHECK_MPI(ierr,"WRITE_ALL") 5537 ABI_FREE(buffer1_cplx) 5538 end if 5539 5540 ABI_FREE(elw2slk) 5541 ! 5542 ! Number of columns and rows that have been written. 5543 ! Used to write the Fortran markers and to increment the offset. 5544 nrows_w = nrows_glob 5545 ncols_w = ncols_glob 5546 if (PRESENT(glob_subarray)) then 5547 nrows_w = glob_subarray(1,2) - glob_subarray(1,1) + 1 5548 ncols_w = glob_subarray(2,2) - glob_subarray(2,1) + 1 5549 if (.not.firstchar(uplo, ["A"])) then 5550 ABI_ERROR("glob_subarray should not be used when uplo/=All") 5551 end if 5552 end if 5553 5554 !TODO check whether slk_single_fview_write can report an offset to reduce the extent. 5555 if (is_fortran_file) then ! Collective writing of the Fortran markers. 5556 nfrec = ncols_w 5557 ABI_MALLOC(bsize_frecord,(nfrec)) 5558 if (firstchar(uplo, ["A"])) then 5559 bsize_frecord = nrows_w * bsize_elm 5560 else if (firstchar(uplo, ["U"])) then 5561 bsize_frecord = (/(col_glob * bsize_elm, col_glob=1,nfrec)/) 5562 else if (firstchar(uplo, ["L"])) then 5563 bsize_frecord = (/(col_glob * bsize_elm, col_glob=nfrec,1,-1)/) 5564 else 5565 ABI_ERROR("Wrong uplo") 5566 end if 5567 call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,nfrec,bsize_frecord,ierr) 5568 ABI_CHECK(ierr==0,"Error while writing Fortran markers") 5569 ABI_FREE(bsize_frecord) 5570 end if 5571 5572 if (do_open) then 5573 ! Close the file. 5574 call MPI_FILE_CLOSE(my_fh, ierr) 5575 ABI_CHECK_MPI(ierr,"FILE_CLOSE") 5576 end if 5577 5578 ! Increment the offset 5579 if (PRESENT(offset)) then 5580 if (firstchar(uplo, ["A"])) then 5581 offset = offset + nrows_w*ncols_w*bsize_elm 5582 if (is_fortran_file) offset = offset + ncols_w*2*xmpio_bsize_frm 5583 else if (firstchar(uplo, ["U","L"])) then 5584 offset = offset + ( (Slk_mat%sizeb_global(2) * (Slk_mat%sizeb_global(2))+1)/2 ) * bsize_elm 5585 if (is_fortran_file) offset = offset + Slk_mat%sizeb_global(2)*2*xmpio_bsize_frm 5586 else 5587 ABI_ERROR("Wrong uplo") 5588 end if 5589 end if 5590 5591 call xmpi_barrier(comm) 5592 RETURN 5593 5594 #else 5595 ABI_ERROR("MPI-IO support not activated") 5596 #endif 5597 5598 end subroutine slk_write
m_slk/slkmat_check_shape [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_check_shape
FUNCTION
Debugging tool to test the local shape `lshape` of the local buffer. Return exit status in `ok` and error message in `msg`.
SOURCE
809 logical function slkmat_check_local_shape(mat, lshape, msg) result (ok) 810 811 !Arguments ------------------------------------ 812 class(basemat_t),intent(in) :: mat 813 integer,intent(in) :: lshape(2) 814 character(len=*),intent(out) :: msg 815 816 ! ********************************************************************* 817 818 msg = "" 819 ok = all(mat%sizeb_local == lshape) 820 if (.not. ok) then 821 msg = sjoin("mat%sizeb_local:", ltoa(mat%sizeb_local), " not equal to input local lshape ", ltoa(lshape)) 822 return 823 end if 824 825 select type (mat) 826 class is (matrix_scalapack) 827 if (allocated(mat%buffer_cplx)) then 828 ok = all(shape(mat%buffer_cplx) == lshape) 829 if (.not. ok) then 830 msg = sjoin("shape(buffer_cplx):", ltoa(shape(mat%buffer_cplx)), " != input local lshape ", ltoa(lshape)); return 831 end if 832 else if (allocated(mat%buffer_real)) then 833 ok = all(shape(mat%buffer_real) == lshape) 834 if (.not. ok) then 835 msg = sjoin("shape(buffer_real):", ltoa(shape(mat%buffer_real)), " != input local lshape ", ltoa(lshape)); return 836 end if 837 end if 838 839 class is (slkmat_sp_t) 840 ! Same piece of code as above. May use include file! 841 if (allocated(mat%buffer_cplx)) then 842 ok = all(shape(mat%buffer_cplx) == lshape) 843 if (.not. ok) then 844 msg = sjoin("shape(buffer_cplx):", ltoa(shape(mat%buffer_cplx)), " != input local lshape ", ltoa(lshape)); return 845 end if 846 else if (allocated(mat%buffer_real)) then 847 ok = all(shape(mat%buffer_real) == lshape) 848 if (.not. ok) then 849 msg = sjoin("shape(buffer_real):", ltoa(shape(mat%buffer_real)), " != input local lshape ", ltoa(lshape)); return 850 end if 851 end if 852 853 class default 854 ABI_ERROR("Wrong class") 855 end select 856 857 end function slkmat_check_local_shape
m_slk/slkmat_print [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_print
FUNCTION
Print info on scalapack matrix.
INPUTS
[unit]=Unit number (default: std_out) [header]=title for info [prtvol]=Verbosity level (default: 0)
SOURCE
747 subroutine slkmat_print(mat, header, unit, prtvol) 748 749 !Arguments ------------------------------------ 750 class(basemat_t),intent(in) :: mat 751 character(len=*),optional,intent(in) :: header 752 integer,optional,intent(in) :: prtvol, unit 753 754 !Local variables------------------------------- 755 integer :: unt, my_prtvol, grid_dims(2) 756 character(len=50) :: matrix_dtype 757 character(len=5000) :: msg 758 759 ! ********************************************************************* 760 761 unt = std_out; if (present(unit)) unt =unit 762 my_prtvol = 0; if (present(prtvol)) my_prtvol = prtvol 763 764 msg = ' ==== Info on matrix_scalapack ==== ' 765 if (present(header)) msg=' ==== '//trim(adjustl(header))//' ==== ' 766 call wrtout(unt, msg) 767 768 matrix_dtype = "undefined" 769 select type (mat) 770 class is (matrix_scalapack) 771 if (allocated(mat%buffer_real)) matrix_dtype = "real" 772 if (allocated(mat%buffer_cplx)) matrix_dtype = "complex" 773 class is (slkmat_sp_t) 774 if (allocated(mat%buffer_real)) matrix_dtype = "real" 775 if (allocated(mat%buffer_cplx)) matrix_dtype = "complex" 776 class default 777 ABI_ERROR("Wrong class") 778 end select 779 780 grid_dims = [-1, -1] 781 if (associated(mat%processor)) grid_dims = mat%processor%grid%dims 782 783 write(msg,'(5(3a),a,f8.1,a)') & 784 " matrix_dtype ..... ", trim(matrix_dtype), ch10, & 785 " sizeb_global ..... ", trim(ltoa(mat%sizeb_global)), ch10, & 786 " sizeb_local ...... ", trim(ltoa(mat%sizeb_local)), ch10, & 787 " sizeb_blocs ...... ", trim(ltoa(mat%sizeb_blocs)), ch10, & 788 " processor grid .. ", trim(ltoa(grid_dims)), ch10, & 789 " memory (Mb) ...... ", mat%locmem_mb(), ch10 790 call wrtout(unt, msg) 791 792 !if (prtvol > 10) call mat%write(unit) 793 794 end subroutine slkmat_print
m_slk/slkmat_sp_collect_cplx [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_collect_cplx
FUNCTION
Return on all processors the complex submatrix of shape (mm, nn) starting at position ija. NB: `out_carr` is allocated by the routine. If optional argument request is use, the routine uses non-blocking BCAST and client code is supposed to wait before accessing out_carr.
SOURCE
5201 subroutine slkmat_sp_collect_cplx(in_mat, mm, nn, ija, out_carr, request) 5202 5203 !Arguments ------------------------------------ 5204 class(slkmat_sp_t),intent(in) :: in_mat 5205 integer,intent(in) :: mm, nn, ija(2) 5206 complex(sp) ABI_ASYNC, allocatable,intent(out) :: out_carr(:,:) 5207 integer ABI_ASYNC, optional,intent(out) :: request 5208 5209 !Local variables------------------------------- 5210 integer,parameter :: master = 0 5211 integer :: ierr 5212 type(processor_scalapack) :: self_processor 5213 type(slkmat_sp_t) :: out_mat 5214 5215 ! ************************************************************************* 5216 5217 ABI_CHECK(allocated(in_mat%buffer_cplx), "buffer_cplx is not allocated") 5218 5219 if (in_mat%processor%grid%nbprocs == 1) then 5220 ! Copy buffer and return 5221 ABI_MALLOC(out_carr, (mm, nn)) 5222 out_carr(:,:) = in_mat%buffer_cplx(ija(1):ija(1)+mm-1, ija(2):ija(2)+nn-1); return 5223 end if 5224 5225 ! Two-step algorithm: 5226 ! 1) Use pzgemr2d to collect submatrix on master. 5227 ! 2) Master brodacasts submatrix. 5228 5229 if (in_mat%processor%myproc == master) then 5230 call self_processor%init(xmpi_comm_self) 5231 call out_mat%init(mm, nn, self_processor, in_mat%istwf_k, size_blocs=[mm, nn]) 5232 else 5233 out_mat%descript%tab(CTXT_) = -1 5234 end if 5235 5236 #ifdef HAVE_LINALG_SCALAPACK 5237 call pcgemr2d(mm, nn, & 5238 in_mat%buffer_cplx, ija(1), ija(2), in_mat%descript%tab, & 5239 out_mat%buffer_cplx, 1, 1, out_mat%descript%tab, & 5240 in_mat%processor%grid%ictxt) 5241 #endif 5242 5243 if (in_mat%processor%myproc == master) then 5244 ABI_MOVE_ALLOC(out_mat%buffer_cplx, out_carr) 5245 call out_mat%free() 5246 call self_processor%free() 5247 else 5248 ABI_MALLOC(out_carr, (mm, nn)) 5249 end if 5250 5251 if (present(request)) then 5252 call xmpi_ibcast(out_carr, master, in_mat%processor%comm, request, ierr) 5253 else 5254 call xmpi_bcast(out_carr, master, in_mat%processor%comm, ierr) 5255 end if 5256 5257 end subroutine slkmat_sp_collect_cplx
m_slk/slkmat_sp_copy [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_copy
FUNCTION
Copy in_mat to out_mat. If empty is True, the values in the local buffer are not copied. Default: False
SOURCE
1094 subroutine slkmat_sp_copy(in_mat, out_mat, empty) 1095 1096 !Arguments ------------------------------------ 1097 class(slkmat_sp_t),intent(in) :: in_mat 1098 class(slkmat_sp_t),intent(out) :: out_mat 1099 logical,optional,intent(in) :: empty 1100 1101 !Local variables------------------------------- 1102 logical :: empty__ 1103 1104 ! ********************************************************************* 1105 1106 call out_mat%init(in_mat%sizeb_global(1), in_mat%sizeb_global(2), in_mat%processor, in_mat%istwf_k, & 1107 size_blocs=in_mat%sizeb_blocs) 1108 1109 empty__ = .False.; if (present(empty)) empty__ = empty 1110 if (.not. empty__) then 1111 if (in_mat%istwf_k == 1) then 1112 out_mat%buffer_cplx = in_mat%buffer_cplx 1113 else 1114 out_mat%buffer_real = in_mat%buffer_real 1115 end if 1116 end if 1117 1118 end subroutine slkmat_sp_copy
m_slk/slkmat_sp_heev [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_heev
FUNCTION
slkmat_sp_heev computes selected eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X
INPUTS
JOBZ (global input) CHARACTER*1 Specifies whether or not to compute the eigenvectors: = "N": Compute eigenvalues only. = "V": Compute eigenvalues and eigenvectors. UPLO (global input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = "U": Upper triangular = "L": Lower triangular mat=The object storing the local buffer in SINGLE PRECISION, the array descriptor, the PBLAS context. vec=The distributed eigenvectors. Not referenced if JOBZ="N"
OUTPUT
W (global output) array, dimension (N) where N is the rank of the global matrix. On normal exit, the first M entries contain the selected eigenvalues in ascending order.
SIDE EFFECTS
If JOBZ="V", the local buffer vec%buffer_cplx will contain part of the distributed eigenvectors. On exit, the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed.
SOURCE
3627 subroutine slkmat_sp_heev(mat, jobz, uplo, vec, w, & 3628 mat_size, ija, ijz) ! Optional 3629 3630 !Arguments ------------------------------------ 3631 !scalars 3632 class(slkmat_sp_t),intent(inout) :: mat 3633 character(len=*),intent(in) :: jobz, uplo 3634 class(slkmat_sp_t),intent(inout) :: vec 3635 !arrays 3636 real(sp),intent(out) :: w(:) 3637 integer,optional,intent(in) :: mat_size, ija(2), ijz(2) 3638 3639 #ifdef HAVE_LINALG_SCALAPACK 3640 !Local variables ------------------------------ 3641 !scalars 3642 integer :: lwork, lrwork, info, nn 3643 !arrays 3644 integer :: ija__(2), ijz__(2) 3645 real(sp),allocatable :: rwork_sp(:) 3646 complex(sp),allocatable :: work_sp(:) 3647 3648 !************************************************************************ 3649 3650 ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated") 3651 3652 nn = mat%sizeb_global(2); if (present(mat_size)) nn = mat_size 3653 ija__ = [1, 1]; if (present(ija)) ija__ = ija 3654 ijz__ = [1, 1]; if (present(ijz)) ijz__ = ijz 3655 3656 ! Get optimal size of workspace. 3657 lwork = - 1; lrwork = -1 3658 ABI_MALLOC(work_sp, (1)) 3659 ABI_MALLOC(rwork_sp, (1)) 3660 3661 !call pzheev(jobz, uplo, n, a, ia, ja, desca, w, z, iz, jz, descz, work, lwork, rwork, lrwork, info) 3662 3663 call PCHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, & 3664 w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_sp, lwork, rwork_sp, lrwork, info) 3665 ABI_CHECK(info == 0, sjoin("Error in the calculation of the workspace size, info:", itoa(info))) 3666 3667 lwork = NINT(real(work_sp(1))); lrwork= NINT(rwork_sp(1)) !*2 3668 ABI_FREE(work_sp) 3669 ABI_FREE(rwork_sp) 3670 3671 ! MG: Nov 23 2011. On my mac with the official scalapack package, rwork(1) is not large enough and causes a SIGFAULT. 3672 if (firstchar(jobz, ['V'])) then 3673 if (lrwork < 2*nn + 2*nn-2) lrwork = 2*nn + 2*nn-2 3674 else if (firstchar(jobz, ['N'])) then 3675 if (lrwork < 2*nn) lrwork = 2*nn 3676 end if 3677 !write(std_out,*)lwork,lrwork 3678 3679 ! Solve the problem. 3680 ABI_MALLOC(work_sp, (lwork)) 3681 ABI_MALLOC(rwork_sp, (lrwork)) 3682 3683 call PCHEEV(jobz, uplo, nn, mat%buffer_cplx, ija__(1), ija__(2), mat%descript%tab, & 3684 w, vec%buffer_cplx, ijz__(1), ijz__(2), vec%descript%tab, work_sp, lwork, rwork_sp, lrwork, info) 3685 ABI_CHECK(info == 0, sjoin("PCHEEV returned info:", itoa(info))) 3686 ABI_FREE(work_sp) 3687 ABI_FREE(rwork_sp) 3688 #endif 3689 3690 end subroutine slkmat_sp_heev
m_slk/slkmat_sp_hpd_invert [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_hpd_invert
FUNCTION
Compute the inverse of an Hermitian positive definite matrix.
INPUTS
uplo: global input = 'U': Upper triangle of sub( A ) is stored; = 'L': Lower triangle of sub( A ) is stored. [full]: If full PBLAS matrix is neeeded. Default: True
SIDE EFFECTS
mat= The object storing the local buffer, the array descriptor, the context, etc. On entry, this array contains the local pieces of the N-by-N Hermitian distributed matrix sub( A ) to be factored. If UPLO = 'U', the leading N-by-N upper triangular part of sub( A ) contains the upper triangular part of the matrix, and its strictly lower triangular part is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of sub( A ) contains the lower triangular part of the distribu- ted matrix, and its strictly upper triangular part is not referenced. On exit, the local pieces of the upper or lower triangle of the (Hermitian) inverse of sub( A )
SOURCE
4481 subroutine slkmat_sp_hpd_invert(mat, uplo, full) 4482 4483 !Arguments ------------------------------------ 4484 character(len=*),intent(in) :: uplo 4485 class(slkmat_sp_t),intent(inout) :: mat 4486 logical,optional,intent(in) :: full 4487 4488 #ifdef HAVE_LINALG_SCALAPACK 4489 !Local variables ------------------------------ 4490 !scalars 4491 integer :: info, mm, il1, il2, iglob1, iglob2 4492 type(slkmat_sp_t) :: work_mat 4493 logical :: full__ 4494 4495 !************************************************************************ 4496 4497 ABI_CHECK(allocated(mat%buffer_cplx), "buffer_cplx not allocated") 4498 4499 ! ZPOTRF computes the Cholesky factorization of a complex Hermitian positive definite. 4500 ! A = U**H * U, if UPLO = 'U', or 4501 ! A = L * L**H, if UPLO = 'L', 4502 mm = mat%sizeb_global(1) 4503 call PCPOTRF(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info) 4504 ABI_CHECK(info == 0, sjoin("PCPOTRF returned info:", itoa(info))) 4505 4506 ! PZPOTRI computes the inverse of a complex Hermitian positive definite 4507 ! distributed matrix sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the 4508 ! Cholesky factorization sub( A ) = U**H*U or L*L**H computed by PZPOTRF. 4509 call PCPOTRI(uplo, mm, mat%buffer_cplx, 1, 1, mat%descript%tab, info) 4510 ABI_CHECK(info == 0, sjoin("PCPOTRI returned info:", itoa(info))) 4511 4512 full__ = .True.; if (present(full)) full__ = full 4513 if (full__) then 4514 ! Only the uplo part contains the inverse so we need to fill the other triangular part. 4515 ! 1) Fill the missing triangle with zeros and copy results to work_mat 4516 ! 2) Call pzgeadd to compute: sub(C) := beta*sub(C) + alpha*op(sub(A)) 4517 ! 3) Divide diagonal elements by two. 4518 4519 do il2=1,mat%sizeb_local(2) 4520 iglob2 = mat%loc2gcol(il2) 4521 do il1=1,mat%sizeb_local(1) 4522 iglob1 = mat%loc2grow(il1) 4523 if (uplo == "L" .and. iglob2 > iglob1) mat%buffer_cplx(il1, il2) = zero_sp 4524 if (uplo == "U" .and. iglob2 < iglob1) mat%buffer_cplx(il1, il2) = zero_sp 4525 end do 4526 end do 4527 4528 call mat%copy(work_mat, empty=.False.) 4529 4530 ! call pzgeadd(trans, m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc) 4531 ! sub(C) := beta*sub(C) + alpha*op(sub(A)) 4532 call pcgeadd("C", mm, mm, cone_sp, work_mat%buffer_cplx, 1, 1, work_mat%descript%tab, & 4533 cone_sp, mat%buffer_cplx, 1, 1, mat%descript%tab) 4534 call work_mat%free() 4535 4536 do il2=1,mat%sizeb_local(2) 4537 iglob2 = mat%loc2gcol(il2) 4538 do il1=1,mat%sizeb_local(1) 4539 iglob1 = mat%loc2grow(il1) 4540 if (iglob2 == iglob1) mat%buffer_cplx(il1, il2) = 0.5_sp * mat%buffer_cplx(il1, il2) 4541 end do 4542 end do 4543 end if ! full__ 4544 #endif 4545 4546 end subroutine slkmat_sp_hpd_invert
m_slk/slkmat_sp_ptrans [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_ptrans
FUNCTION
Transposes a matrix sub( C ) := beta*sub( C ) + alpha*op( sub( A ) ) where sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), sub( A ) denotes A(IA:IA+N-1,JA:JA+M-1), and, op( X ) = X'. Thus, op( sub( A ) ) denotes A(IA:IA+N-1,JA:JA+M-1)'. Beta is a scalar, sub( C ) is an m by n submatrix, and sub( A ) is an n by m submatrix.
INPUTS
[ija(2)]: (global) The row and column indices in the distributed matrix in_mat indicating the first row and the first column of the submatrix sub(A), respectively. [ijc(2)]: (global) The row and column indices in the distributed matrix out_mat indicating the first row and the first column of the submatrix sub(C), respectively. [free]: True to deallocate in_mat. Default: False
SOURCE
4687 subroutine slkmat_sp_ptrans(in_mat, trans, out_mat, & 4688 out_gshape, ija, ijc, size_blocs, alpha, beta, free) ! optional 4689 4690 !Arguments ------------------------------------ 4691 class(slkmat_sp_t),intent(inout) :: in_mat 4692 character(len=1),intent(in) :: trans 4693 class(slkmat_sp_t),intent(inout) :: out_mat 4694 integer,optional,intent(in) :: out_gshape(2), size_blocs(2), ija(2), ijc(2) 4695 complex(sp),optional,intent(in) :: alpha, beta 4696 logical,optional,intent(in) :: free 4697 4698 !Local variables------------------------------- 4699 integer :: sb, mm, nn, size_blocs__(2) 4700 real(sp) :: ralpha__, rbeta__ 4701 integer :: ija__(2), ijc__(2) 4702 complex(sp) :: calpha__, cbeta__ 4703 4704 ! ************************************************************************* 4705 4706 ija__ = [1, 1]; if (present(ija)) ija__ = ija 4707 ijc__ = [1, 1]; if (present(ijc)) ijc__ = ijc 4708 4709 ! transposed output (sub)matrix has shape (nn, mm) 4710 if (present(out_gshape)) then 4711 nn = out_gshape(1) 4712 mm = out_gshape(2) 4713 else 4714 nn = in_mat%sizeb_global(2) 4715 mm = in_mat%sizeb_global(1) 4716 end if 4717 4718 if (present(size_blocs)) then 4719 size_blocs__ = size_blocs 4720 else 4721 ! FIXME: This can cause problems if I start to use round-robin distribution in GWR!!!!! 4722 size_blocs__(1) = in_mat%sizeb_global(2) 4723 sb = in_mat%sizeb_global(1) / in_mat%processor%grid%dims(2) 4724 if (mod(in_mat%sizeb_global(1), in_mat%processor%grid%dims(2)) /= 0) sb = sb + 1 4725 size_blocs__(2) = sb 4726 !size_blocs__(2) = in_mat%sizeb_blocs(1); size_blocs__(1) = in_mat%sizeb_blocs(2) 4727 end if 4728 4729 call out_mat%init(nn, mm, in_mat%processor, in_mat%istwf_k, size_blocs=size_blocs__) 4730 4731 ! prototype: call pdtran(m, n, alpha, a, ia, ja, desca, beta, c, ic, jc, descc) 4732 4733 if (allocated(in_mat%buffer_cplx)) then 4734 #ifdef HAVE_LINALG_SCALAPACK 4735 select case (trans) 4736 case ("N") 4737 ! sub(C) := beta*sub(C) + alpha*sub(A)', 4738 calpha__ = cone_sp; if (present(alpha)) calpha__ = alpha 4739 cbeta__ = czero_sp; if (present(beta)) cbeta__ = beta 4740 call pctranu(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), & 4741 in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab) 4742 4743 case ("C") 4744 ! sub(C) := beta * sub(C) + alpha * conjg(sub(A)') 4745 calpha__ = cone_sp; if (present(alpha)) calpha__ = alpha 4746 cbeta__ = czero_sp; if (present(beta)) cbeta__ = beta 4747 call pctranc(nn, mm, calpha__, in_mat%buffer_cplx, ija__(1), ija__(2), & 4748 in_mat%descript%tab, cbeta__, out_mat%buffer_cplx, ijc__(1), ijc__(2), out_mat%descript%tab) 4749 4750 case default 4751 ABI_ERROR(sjoin("Invalid value for trans:", trans)) 4752 end select 4753 4754 else if (allocated(in_mat%buffer_real)) then 4755 ralpha__ = one_sp; if (present(alpha)) ralpha__ = real(alpha) 4756 rbeta__ = zero_sp; if (present(beta)) rbeta__ = real(beta) 4757 call pstran(nn, mm, ralpha__, in_mat%buffer_real, ija__(1), ija__(2), & 4758 in_mat%descript%tab, rbeta__, out_mat%buffer_real, ijc__(1), ijc__(2), out_mat%descript%tab) 4759 #endif 4760 else 4761 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 4762 end if 4763 4764 if (present(free)) then 4765 if (free) call in_mat%free() 4766 end if 4767 4768 end subroutine slkmat_sp_ptrans
m_slk/slkmat_sp_set_head_and_wings [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_set_head_and_wings
FUNCTION
Set head and the wings of the matrix starting from global arrays.
SOURCE
998 subroutine slkmat_sp_set_head_and_wings(mat, head, low_wing, up_wing) 999 1000 !Arguments ------------------------------------ 1001 class(slkmat_sp_t),intent(inout) :: mat 1002 complex(sp),intent(in) :: head, low_wing(mat%sizeb_global(1)), up_wing(mat%sizeb_global(2)) 1003 1004 !Local variables------------------------------- 1005 integer :: il_g1, il_g2, iglob1, iglob2 1006 logical :: is_cplx 1007 1008 ! ********************************************************************* 1009 1010 is_cplx = allocated(mat%buffer_cplx) 1011 1012 do il_g2=1,mat%sizeb_local(2) 1013 iglob2 = mat%loc2gcol(il_g2) 1014 do il_g1=1,mat%sizeb_local(1) 1015 iglob1 = mat%loc2grow(il_g1) 1016 1017 if (iglob1 == 1 .or. iglob2 == 1) then 1018 if (iglob1 == 1 .and. iglob2 == 1) then 1019 if (is_cplx) then 1020 mat%buffer_cplx(il_g1, il_g2) = head 1021 else 1022 mat%buffer_real(il_g1, il_g2) = real(head) 1023 end if 1024 else if (iglob1 == 1) then 1025 if (is_cplx) then 1026 mat%buffer_cplx(il_g1, il_g2) = up_wing(iglob2) 1027 else 1028 mat%buffer_real(il_g1, il_g2) = real(up_wing(iglob2)) 1029 end if 1030 else if (iglob2 == 1) then 1031 if (is_cplx) then 1032 mat%buffer_cplx(il_g1, il_g2) = low_wing(iglob1) 1033 else 1034 mat%buffer_real(il_g1, il_g2) = real(low_wing(iglob1)) 1035 end if 1036 end if 1037 end if 1038 1039 end do 1040 end do 1041 1042 end subroutine slkmat_sp_set_head_and_wings
m_slk/slkmat_sp_t [ Types ]
NAME
slkmat_sp_t
FUNCTION
high-level interface to ScaLAPACK matrix (single precision version)
SOURCE
287 type, public, extends(basemat_t) :: slkmat_sp_t 288 289 real(sp),allocatable :: buffer_real(:,:) 290 ! local part of the (real) matrix. 291 ! The istwf_k option passed to the constructor defines whether we have a real or complex matrix 292 293 complex(sp),allocatable :: buffer_cplx(:,:) 294 ! local part of the (complex) matrix 295 296 contains 297 298 procedure :: copy => slkmat_sp_copy 299 ! Copy object 300 301 procedure :: take_from => slkmat_sp_take_from 302 ! Take values from source 303 304 procedure :: ptrans => slkmat_sp_ptrans 305 ! Transpose matrix 306 307 procedure :: set_head_and_wings => slkmat_sp_set_head_and_wings 308 ! Set head and the wings of the matrix starting from global arrays. 309 310 !procedure :: cut => slk_cut 311 ! Extract submatrix and create new matrix with `size_blocs` and `processor` 312 313 procedure :: collect_cplx => slkmat_sp_collect_cplx 314 ! Return on all processors the submatrix of shape (mm, nn) starting at position ija. 315 316 procedure :: heev => slkmat_sp_heev 317 ! Compute eigenvalues and, optionally, eigenvectors of an Hermitian matrix A. A * X = lambda * X 318 319 procedure :: hpd_invert => slkmat_sp_hpd_invert 320 ! Inverse of a Hermitian positive definite matrix. 321 322 end type slkmat_sp_t
m_slk/slkmat_sp_take_from [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
slkmat_sp_take_from
FUNCTION
Take values from source NB: This routine should be called by all procs owning mat and source.
INPUTS
[free]: True if source should be deallocated. Default: False
OUTPUT
SOURCE
5046 subroutine slkmat_sp_take_from(out_mat, source, & 5047 ija, ijb, free) ! optional 5048 5049 !Arguments ------------------------------------ 5050 class(slkmat_sp_t),intent(inout) :: out_mat 5051 class(slkmat_sp_t),intent(inout) :: source 5052 integer,optional,intent(in) :: ija(2), ijb(2) 5053 logical,optional,intent(in) :: free 5054 5055 !Local variables------------------------------- 5056 integer :: mm, nn 5057 character(len=500) :: msg 5058 integer :: ija__(2), ijb__(2) 5059 5060 ! ************************************************************************* 5061 5062 ! prototype 5063 !call pzgemr2d(m, n, a, ia, ja, desca, b, ib, jb, descb, ictxt) 5064 5065 ! Take care when context A is disjoint from context B. The general rules for which parameters need to be set are: 5066 ! 5067 ! - All calling processes must have the correct m and n. 5068 ! - Processes in context A must correctly define all parameters describing A. 5069 ! - Processes in context B must correctly define all parameters describing B. 5070 ! - Processes which are not members of context A must pass ctxt_a = -1 and need not set other parameters describing A. 5071 ! - Processes which are not members of contextB must pass ctxt_b = -1 and need not set other parameters describing B. 5072 5073 mm = source%sizeb_global(1) 5074 nn = source%sizeb_global(2) 5075 5076 ija__ = [1, 1]; if (present(ija)) ija__ = ija 5077 ijb__ = [1, 1]; if (present(ijb)) ijb__ = ijb 5078 5079 if (all(out_mat%sizeb_global == -1)) then 5080 out_mat%descript%tab(CTXT_) = -1 5081 else 5082 ABI_CHECK_IEQ(out_mat%istwf_k, source%istwf_k, "istwfk_mat /= istwfk_source") 5083 if (any(out_mat%sizeb_global /= source%sizeb_global)) then 5084 msg = sjoin("Matrices should have same global shape but out_mat:", ltoa(out_mat%sizeb_global), & 5085 "source:", ltoa(source%sizeb_global)) 5086 ABI_ERROR(msg) 5087 end if 5088 end if 5089 5090 if (allocated(source%buffer_cplx)) then 5091 #ifdef HAVE_LINALG_SCALAPACK 5092 call pcgemr2d(mm, nn, & 5093 source%buffer_cplx, ija__(1), ija__(2), source%descript%tab, & 5094 out_mat%buffer_cplx, ijb__(1), ijb__(2), out_mat%descript%tab, & 5095 source%processor%grid%ictxt) 5096 5097 else if (allocated(source%buffer_real)) then 5098 call psgemr2d(mm, nn, & 5099 source%buffer_real, ija__(1), ija__(2), source%descript%tab, & 5100 out_mat%buffer_real,ijb__(1), ijb__(2), out_mat%descript%tab, & 5101 source%processor%grid%ictxt) 5102 #endif 5103 else 5104 ABI_ERROR("Neither buffer_cplx nor buffer_real are allocated!") 5105 end if 5106 5107 if (present(free)) then 5108 if (free) call source%free() 5109 end if 5110 5111 end subroutine slkmat_sp_take_from
m_slk/solve_gevp_complex [ Functions ]
[ Top ] [ m_slk ] [ Functions ]
NAME
solve_gevp_complex
FUNCTION
Calculation of eigenvalues and eigenvectors: A * X = lambda * B * X complex and real cases.
INPUTS
processor= descriptor of a processor matrix1= first ScaLAPACK matrix (matrix A) matrix2= second ScaLAPACK matrix (matrix B) comm= MPI communicator istwf_k= 2 if we have a real matrix else complex. [use_gpu_elpa]= Flag to activate the use of GPU (ELPA only)
SIDE EFFECTS
results= ScaLAPACK matrix coming out of the operation eigen= eigenvalues of the matrix
SOURCE
2880 #ifdef HAVE_LINALG_ELPA 2881 2882 subroutine solve_gevp_complex(na,nev,na_rows,na_cols,nblk,a,b,ev,z,tmp1,tmp2, & 2883 & my_prow,my_pcol,np_rows,np_cols,sc_desc,comm,& 2884 & use_gpu_elpa) ! Optional parameter 2885 2886 !-Arguments 2887 integer,intent(in) :: na 2888 integer,intent(in) :: nev 2889 integer,intent(in) :: na_rows,na_cols 2890 integer,intent(in) :: nblk 2891 integer,intent(in) :: my_pcol,my_prow 2892 integer,intent(in) :: np_cols,np_rows 2893 integer,intent(in) :: sc_desc(9) 2894 integer,intent(in) :: comm 2895 integer,optional,intent(in) :: use_gpu_elpa 2896 real*8 :: ev(na) 2897 complex*16 :: a(na_rows,na_cols),b(na_rows,na_cols),z(na_rows,na_cols) 2898 complex*16 :: tmp1(na_rows,na_cols),tmp2(na_rows,na_cols) 2899 !-Local variables 2900 integer :: i, n_col, n_row, use_gpu_elpa_ 2901 integer,external :: indxl2g,numroc 2902 complex*16, parameter :: CZERO = (0.d0,0.d0), CONE = (1.d0,0.d0) 2903 type(elpa_hdl_t) :: elpa_hdl 2904 2905 ! ************************************************************************* 2906 2907 use_gpu_elpa_=0 2908 #ifdef HAVE_LINALG_ELPA 2909 if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa 2910 #endif 2911 2912 ! Allocate ELPA handle 2913 call elpa_func_allocate(elpa_hdl,blacs_ctx=sc_desc(CTXT_),gpu=use_gpu_elpa_) 2914 call elpa_func_set_matrix(elpa_hdl,na,nblk,nev,na_rows,na_cols) 2915 call elpa_func_get_communicators(elpa_hdl,comm,my_prow,my_pcol) 2916 2917 call elpa_func_solve_gevp_2stage(elpa_hdl,a,b,z,ev,nev) 2918 2919 call elpa_func_deallocate(elpa_hdl) 2920 2921 end subroutine solve_gevp_complex 2922 2923 !---------------------------------------------------------------------- 2924 2925 subroutine solve_gevp_real(na,nev,na_rows,na_cols,nblk,a,b,ev,z,tmp1,tmp2, & 2926 & my_prow,my_pcol,np_rows,np_cols,sc_desc,comm, & 2927 & use_gpu_elpa) ! Optional argument 2928 2929 !-Arguments 2930 integer,intent(in) :: na 2931 integer,intent(in) :: nev 2932 integer,intent(in) :: na_rows,na_cols 2933 integer,intent(in) :: nblk 2934 integer,intent(in) :: my_pcol,my_prow 2935 integer,intent(in) :: np_cols,np_rows 2936 integer,intent(in) :: sc_desc(9) 2937 integer,intent(in) :: comm 2938 integer,optional,intent(in) :: use_gpu_elpa 2939 real*8 :: ev(na) 2940 real*8 :: a(na_rows,na_cols),b(na_rows,na_cols),z(na_rows,na_cols) 2941 real*8::tmp1(na_rows,na_cols),tmp2(na_rows,na_cols) 2942 !-Local variables 2943 integer :: i, n_col, n_row, use_gpu_elpa_ 2944 integer,external :: indxl2g,numroc 2945 type(elpa_hdl_t) :: elpa_hdl 2946 2947 ! ************************************************************************* 2948 2949 use_gpu_elpa_=0 2950 #ifdef HAVE_LINALG_ELPA 2951 if (present(use_gpu_elpa)) use_gpu_elpa_=use_gpu_elpa 2952 #endif 2953 2954 ! Allocate ELPA handle 2955 call elpa_func_allocate(elpa_hdl,blacs_ctx=sc_desc(CTXT_),gpu=use_gpu_elpa_) 2956 call elpa_func_set_matrix(elpa_hdl,na,nblk,nev,na_rows,na_cols) 2957 call elpa_func_get_communicators(elpa_hdl,comm,my_prow,my_pcol) 2958 2959 !FIXME Need to figure out why generalized_eigenvectors doesn't work in this real case 2960 ! while it is fine with complex case 2961 if(.false.) then 2962 call elpa_func_solve_gevp_2stage(elpa_hdl,a,b,tmp1,ev,nev) 2963 else 2964 ! 1. Calculate Cholesky factorization of Matrix B = U**T * U 2965 ! and invert triangular matrix U 2966 call elpa_func_cholesky(elpa_hdl,b) 2967 call elpa_func_invert_triangular(elpa_hdl,b) 2968 ! 2. Calculate U**-T * A * U**-1 2969 ! 2a. tmp1 = U**-T * A 2970 call elpa_func_hermitian_multiply(elpa_hdl,'U','L',na,b,a,na_rows,na_cols,tmp1,na_rows,na_cols) 2971 ! 2b. tmp2 = tmp1**T 2972 call pdtran(na,na,1.d0,tmp1,1,1,sc_desc,0.d0,tmp2,1,1,sc_desc) 2973 ! 2c. A = U**-T * tmp2 ( = U**-T * Aorig * U**-1 ) 2974 call elpa_func_hermitian_multiply(elpa_hdl,'U','U',na,b,tmp2,na_rows,na_cols,a,na_rows,na_cols) 2975 ! A is only set in the upper half, solve_evp_real needs a full matrix 2976 ! Set lower half from upper half 2977 call pdtran(na,na,1.d0,a,1,1,sc_desc,0.d0,tmp1,1,1,sc_desc) 2978 do i=1,na_cols 2979 ! Get global column corresponding to i and number of local rows up to 2980 ! and including the diagonal, these are unchanged in A 2981 n_col = indxl2g(i, nblk, my_pcol, 0, np_cols) 2982 n_row = numroc (n_col, nblk, my_prow, 0, np_rows) 2983 a(n_row+1:na_rows,i) = tmp1(n_row+1:na_rows,i) 2984 enddo 2985 ! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1 2986 ! Eigenvectors go to tmp1 2987 call elpa_func_solve_evp_1stage(elpa_hdl,a,tmp1,ev,nev) 2988 ! 4. Backtransform eigenvectors: Z = U**-1 * tmp1 2989 ! hermitian_multiply needs the transpose of U**-1, thus tmp2 = (U**-1)**T 2990 call pdtran(na,na,1.d0,b,1,1,sc_desc,0.d0,tmp2,1,1,sc_desc) 2991 call elpa_func_hermitian_multiply(elpa_hdl,'L','N',nev,tmp2,tmp1,na_rows,na_cols,z,na_rows,na_cols) 2992 end if 2993 2994 call elpa_func_deallocate(elpa_hdl) 2995 2996 end subroutine solve_gevp_real