TABLE OF CONTENTS
ABINIT/m_xgScalapack [ Modules ]
NAME
m_xgScalapack
FUNCTION
COPYRIGHT
Copyright (C) 2017-2024 ABINIT group (J. Bieder) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
15 #if defined HAVE_CONFIG_H 16 #include "config.h" 17 #endif 18 19 #include "abi_common.h" 20 21 module m_xgScalapack 22 23 use defs_basis, only : std_err, std_out, dp, ABI_GPU_DISABLED, ABI_GPU_OPENMP 24 use m_abicore 25 use m_xmpi 26 use m_errors 27 use m_slk 28 use m_xg 29 use m_xomp 30 use m_time, only: timab 31 32 #ifdef HAVE_MPI2 33 use mpi 34 #endif 35 36 implicit none 37 38 #ifdef HAVE_MPI1 39 include 'mpif.h' 40 #endif 41 42 43 private 44 45 integer, parameter :: M__SLK = 1 46 integer, parameter :: M__ROW = 1 47 integer, parameter :: M__COL = 2 48 integer, parameter :: M__UNUSED = 4 49 integer, parameter :: M__WORLD = 5 50 integer, parameter :: M__NDATA = 5 51 integer, parameter :: M__tim_init = 1690 52 integer, parameter :: M__tim_free = 1691 53 integer, parameter :: M__tim_heev = 1692 54 integer, parameter :: M__tim_hegv = 1693 55 integer, parameter :: M__tim_scatter = 1694 56 57 integer, parameter, public :: SLK_AUTO = -1 58 integer, parameter, public :: SLK_FORCED = 1 59 integer, parameter, public :: SLK_DISABLED = 0 60 integer, save :: M__CONFIG = SLK_AUTO 61 integer, save :: M__MAXDIM = 1000 62 63 type, public :: xgScalapack_t 64 integer :: comms(M__NDATA) 65 integer :: rank(M__NDATA) 66 integer :: size(M__NDATA) 67 integer :: coords(2) 68 integer :: ngroup 69 integer :: verbosity 70 type(grid_scalapack) :: grid 71 end type xgScalapack_t 72 73 public :: xgScalapack_init 74 public :: xgScalapack_free 75 public :: xgScalapack_heev 76 public :: xgScalapack_hegv 77 public :: xgScalapack_config 78 contains
m_xgScalapack/xgScalapack_init [ Functions ]
[ Top ] [ m_xgScalapack ] [ Functions ]
NAME
xgScalapack_init
FUNCTION
Init the scalapack communicator for next operations. If the comm has too many cpus, then take only a subgroup of this comm
INPUTS
OUTPUT
SOURCE
94 subroutine xgScalapack_init(xgScalapack,comm,maxDim,verbosity,gpu_option,usable) 95 96 type(xgScalapack_t), intent(inout) :: xgScalapack 97 integer , intent(in ) :: comm 98 integer , intent(in ) :: maxDim 99 integer , intent(in ) :: verbosity 100 logical , intent(in ) :: gpu_option 101 logical , intent( out) :: usable 102 double precision :: tsec(2) 103 #ifdef HAVE_LINALG_MKL_THREADS 104 integer :: mkl_get_max_threads 105 #endif 106 #ifdef HAVE_LINALG_OPENBLAS_THREADS 107 integer :: openblas_get_num_threads 108 #endif 109 integer :: nthread 110 #ifdef HAVE_LINALG_SCALAPACK 111 integer :: maxProc 112 integer :: nproc 113 integer :: ngroup 114 integer :: subgroup 115 integer :: mycomm(2) 116 integer :: ierr 117 integer :: test_row 118 integer :: test_col 119 #else 120 ABI_UNUSED(comm) 121 ABI_UNUSED(maxDim) 122 #endif 123 124 call timab(M__tim_init,1,tsec) 125 126 xgScalapack%comms = xmpi_comm_null 127 xgScalapack%rank = xmpi_undefined_rank 128 xgScalapack%verbosity = verbosity 129 130 nthread = 1 131 #ifdef HAVE_LINALG_MKL_THREADS 132 nthread = mkl_get_max_threads() 133 #elif HAVE_LINALG_OPENBLAS_THREADS 134 nthread = openblas_get_num_threads() 135 #else 136 nthread = xomp_get_num_threads(open_parallel=.true.) 137 if ( nthread == 0 ) nthread = 1 138 #endif 139 140 #ifdef HAVE_LINALG_SCALAPACK 141 142 nproc = xmpi_comm_size(comm) 143 xgScalapack%comms(M__WORLD) = comm 144 xgScalapack%rank(M__WORLD) = xmpi_comm_rank(comm) 145 xgScalapack%size(M__WORLD) = nproc 146 147 maxProc = (maxDim / (M__MAXDIM*nthread))+1 ! ( M__MAXDIM x M__MAXDIM matrice per MPI ) 148 if ( M__CONFIG > 0 .and. M__CONFIG <= nproc ) then 149 maxProc = M__CONFIG 150 else if ( maxProc > nproc ) then 151 maxProc = nproc 152 end if 153 154 if ( maxProc == 1 .or. M__CONFIG == SLK_DISABLED) then 155 usable = .false. 156 return 157 else if ( nthread > 1 ) then ! disable scalapack with threads since it is not threadsafe 158 ! This should be check with new elpa version en MPI+OpenMP 159 if ( M__CONFIG > 0 ) then 160 ABI_WARNING("xgScalapack turned off because you have threads") 161 end if 162 usable = .false. 163 return 164 else 165 usable = .true. 166 maxProc = 2*((maxProc+1)/2) ! Round to next even number 167 end if 168 169 if ( xgScalapack%verbosity > 0 ) then 170 write(std_out,*) " xgScalapack will use", maxProc, "/", nproc, "MPIs" 171 end if 172 173 ngroup = nproc/maxProc 174 xgScalapack%ngroup = ngroup 175 176 if ( maxProc < nproc ) then 177 if ( xgScalapack%rank(M__WORLD) < maxProc*ngroup ) then 178 subgroup = xgScalapack%rank(M__WORLD)/maxProc 179 mycomm(1) = M__SLK 180 mycomm(2) = M__UNUSED 181 else 182 subgroup = ngroup+1 183 mycomm(1) = M__UNUSED 184 mycomm(2) = M__SLK 185 end if 186 call MPI_Comm_split(comm, subgroup, xgScalapack%rank(M__WORLD), xgScalapack%comms(mycomm(1)),ierr) 187 if ( ierr /= 0 ) then 188 ABI_ERROR("Error splitting communicator") 189 end if 190 xgScalapack%comms(mycomm(2)) = xmpi_comm_null 191 xgScalapack%rank(mycomm(1)) = xmpi_comm_rank(xgScalapack%comms(mycomm(1))) 192 xgScalapack%rank(mycomm(2)) = xmpi_undefined_rank 193 xgScalapack%size(mycomm(1)) = xmpi_comm_size(xgScalapack%comms(mycomm(1))) 194 xgScalapack%size(mycomm(2)) = nproc - xgScalapack%size(mycomm(1)) 195 else 196 call MPI_Comm_dup(comm,xgScalapack%comms(M__SLK),ierr) 197 if ( ierr /= 0 ) then 198 ABI_ERROR("Error duplicating communicator") 199 end if 200 xgScalapack%rank(M__SLK) = xmpi_comm_rank(xgScalapack%comms(M__SLK)) 201 xgScalapack%size(M__SLK) = nproc 202 end if 203 204 if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then 205 call xgScalapack%grid%init(xgScalapack%size(M__SLK), xgScalapack%comms(M__SLK), gpu_option) 206 call BLACS_GridInfo(xgScalapack%grid%ictxt, & 207 xgScalapack%grid%dims(M__ROW), xgScalapack%grid%dims(M__COL),& 208 xgScalapack%coords(M__ROW), xgScalapack%coords(M__COL)) 209 210 !These values are the same as those computed by BLACS_GRIDINFO 211 !except in the case where the myproc argument is not the local proc 212 test_row = INT((xgScalapack%rank(M__SLK)) / xgScalapack%grid%dims(2)) 213 test_col = MOD((xgScalapack%rank(M__SLK)), xgScalapack%grid%dims(2)) 214 if ( test_row /= xgScalapack%coords(M__ROW) ) then 215 ABI_WARNING("Row id mismatch") 216 end if 217 if ( test_col /= xgScalapack%coords(M__COL) ) then 218 ABI_WARNING("Col id mismatch") 219 end if 220 end if 221 222 #else 223 usable = .false. 224 #endif 225 226 call timab(M__tim_init,2,tsec) 227 228 end subroutine xgScalapack_init 229 230 subroutine xgScalapack_config(myconfig,maxDim) 231 232 integer, intent(in) :: myconfig 233 integer, intent(in) :: maxDim 234 if ( myconfig == SLK_AUTO) then 235 M__CONFIG = myconfig 236 ABI_COMMENT("xgScalapack in auto mode") 237 else if ( myconfig == SLK_DISABLED) then 238 M__CONFIG = myconfig 239 ABI_COMMENT("xgScalapack disabled") 240 else if ( myconfig > 0) then 241 M__CONFIG = myconfig 242 ABI_COMMENT("xgScalapack enabled") 243 else 244 ABI_WARNING("Bad value for xgScalapack config -> autodetection") 245 M__CONFIG = SLK_AUTO 246 end if 247 if ( maxDim > 0 ) then 248 M__MAXDIM = maxDim 249 end if 250 251 end subroutine xgScalapack_config 252 253 function toProcessorScalapack(xgScalapack) result(processor) 254 255 type(xgScalapack_t), intent(in) :: xgScalapack 256 type(processor_scalapack) :: processor 257 258 processor%myproc = xgScalapack%rank(M__SLK) 259 processor%comm = xgScalapack%comms(M__SLK) 260 processor%coords = xgScalapack%coords 261 processor%grid = xgScalapack%grid 262 end function toProcessorScalapack 263 264 !This is for testing purpose. 265 !May not be optimal since I do not control old implementation but at least gives a reference. 266 subroutine xgScalapack_heev(xgScalapack,matrixA,eigenvalues,gpu_option) 267 use, intrinsic :: iso_c_binding 268 type(xgScalapack_t), intent(inout) :: xgScalapack 269 type(xgBlock_t) , intent(inout) :: matrixA 270 type(xgBlock_t) , intent(inout) :: eigenvalues 271 integer, optional , intent(in) :: gpu_option 272 #ifdef HAVE_LINALG_SCALAPACK 273 double precision, pointer :: matrix(:,:) !(cplex*nbli_global,nbco_global) 274 double precision, pointer :: eigenvalues_tmp(:,:) 275 double precision, pointer :: vector(:) 276 double precision :: tsec(2) 277 integer :: cplex 278 integer :: istwf_k 279 integer :: nbli_global, nbco_global 280 type(c_ptr) :: cptr 281 integer :: req(2), status(MPI_STATUS_SIZE,2), ierr 282 integer :: l_gpu_option,l_use_gpu_elpa 283 #endif 284 285 #ifdef HAVE_LINALG_SCALAPACK 286 call timab(M__tim_heev,1,tsec) 287 288 l_gpu_option=ABI_GPU_DISABLED 289 if (present(gpu_option)) then 290 l_gpu_option = gpu_option 291 end if 292 l_use_gpu_elpa=0 293 #ifdef HAVE_LINALG_ELPA 294 if (l_gpu_option/=ABI_GPU_DISABLED) l_use_gpu_elpa=1 295 #endif 296 297 ! Keep only working processors 298 if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then 299 300 call xgBlock_getSize(eigenvalues,nbli_global,nbco_global) 301 if ( cols(matrixA) /= nbli_global ) then 302 ABI_ERROR("Number of eigen values differ from number of vectors") 303 end if 304 305 if ( space(matrixA) == SPACE_C ) then 306 cplex = 2 307 istwf_k = 1 308 else 309 cplex = 1 310 istwf_k = 2 311 endif 312 313 call xgBlock_getSize(matrixA,nbli_global,nbco_global) 314 315 if(l_gpu_option==ABI_GPU_OPENMP) then 316 call xgBlock_copy_from_gpu(matrixA) 317 call xgBlock_copy_from_gpu(eigenvalues) 318 end if 319 320 call xgBlock_reverseMap(matrixA,matrix,nbli_global,nbco_global) 321 call xgBlock_reverseMap(eigenvalues,eigenvalues_tmp,nbco_global,1) 322 cptr = c_loc(eigenvalues_tmp) 323 call c_f_pointer(cptr,vector,(/ nbco_global /)) 324 325 call compute_eigen1(xgScalapack%comms(M__SLK), & 326 toProcessorScalapack(xgScalapack), & 327 cplex,nbli_global,nbco_global,matrix,vector,istwf_k,& 328 use_gpu_elpa=l_use_gpu_elpa) 329 330 end if 331 332 call timab(M__tim_heev,2,tsec) 333 334 req(1:2)=-1 335 call xgScalapack_scatter(xgScalapack,matrixA,req(1)) 336 call xgScalapack_scatter(xgScalapack,eigenvalues,req(2)) 337 #ifdef HAVE_MPI 338 if ( any(req/=-1) ) then 339 call MPI_WaitAll(2,req,status,ierr) 340 if ( ierr /= 0 ) then 341 ABI_ERROR("Error waiting data") 342 endif 343 end if 344 #endif 345 346 if(l_gpu_option==ABI_GPU_OPENMP) then 347 call xgBlock_copy_to_gpu(matrixA) 348 call xgBlock_copy_to_gpu(eigenvalues) 349 end if 350 351 #else 352 ABI_ERROR("ScaLAPACK support not available") 353 ABI_UNUSED(xgScalapack%verbosity) 354 ABI_UNUSED(matrixA%normal) 355 ABI_UNUSED(eigenvalues%normal) 356 #endif 357 358 end subroutine xgScalapack_heev 359 360 !This is for testing purpose. 361 !May not be optimal since I do not control old implementation but at least gives a reference. 362 subroutine xgScalapack_hegv(xgScalapack,matrixA,matrixB,eigenvalues,gpu_option) 363 use, intrinsic :: iso_c_binding 364 type(xgScalapack_t), intent(inout) :: xgScalapack 365 type(xgBlock_t) , intent(inout) :: matrixA 366 type(xgBlock_t) , intent(inout) :: matrixB 367 type(xgBlock_t) , intent(inout) :: eigenvalues 368 integer, optional , intent(in) :: gpu_option 369 #ifdef HAVE_LINALG_SCALAPACK 370 double precision, pointer :: matrix1(:,:) !(cplex*nbli_global,nbco_global) 371 double precision, pointer :: matrix2(:,:) !(cplex*nbli_global,nbco_global) 372 double precision, pointer :: eigenvalues_tmp(:,:) 373 double precision, pointer :: vector(:) 374 double precision :: tsec(2) 375 integer :: cplex 376 integer :: istwf_k 377 integer :: nbli_global, nbco_global 378 type(c_ptr) :: cptr 379 integer :: req(2), status(MPI_STATUS_SIZE,2),ierr 380 integer :: l_gpu_option,l_use_gpu_elpa 381 #endif 382 383 #ifdef HAVE_LINALG_SCALAPACK 384 call timab(M__tim_hegv,1,tsec) 385 386 l_gpu_option=ABI_GPU_DISABLED 387 if (present(gpu_option)) then 388 l_gpu_option = gpu_option 389 end if 390 l_use_gpu_elpa=0 391 #ifdef HAVE_LINALG_ELPA 392 if (l_gpu_option/=ABI_GPU_DISABLED) l_use_gpu_elpa=1 393 #endif 394 395 ! Keep only working processors 396 if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then 397 398 call xgBlock_getSize(eigenvalues,nbli_global,nbco_global) 399 if ( cols(matrixA) /= cols(matrixB) ) then 400 ABI_ERROR("Matrix A and B don't have the same number of vectors") 401 end if 402 403 if ( cols(matrixA) /= nbli_global ) then 404 ABI_ERROR("Number of eigen values differ from number of vectors") 405 end if 406 407 if ( space(matrixA) == SPACE_C ) then 408 cplex = 2 409 istwf_k = 1 410 else 411 cplex = 1 412 istwf_k = 2 413 endif 414 415 call xgBlock_getSize(matrixA,nbli_global,nbco_global) 416 417 if(l_gpu_option==ABI_GPU_OPENMP) then 418 call xgBlock_copy_from_gpu(matrixA) 419 call xgBlock_copy_from_gpu(matrixB) 420 call xgBlock_copy_from_gpu(eigenvalues) 421 end if 422 423 call xgBlock_reverseMap(matrixA,matrix1,nbli_global,nbco_global) 424 call xgBlock_reverseMap(matrixB,matrix2,nbli_global,nbco_global) 425 call xgBlock_reverseMap(eigenvalues,eigenvalues_tmp,nbco_global,1) 426 cptr = c_loc(eigenvalues_tmp) 427 call c_f_pointer(cptr,vector,(/ nbco_global /)) 428 429 call compute_eigen2(xgScalapack%comms(M__SLK), & 430 toProcessorScalapack(xgScalapack), & 431 cplex,nbli_global,nbco_global,matrix1,matrix2,vector,istwf_k,& 432 use_gpu_elpa=l_use_gpu_elpa) 433 end if 434 435 call timab(M__tim_hegv,2,tsec) 436 437 req(1:2)=-1 438 call xgScalapack_scatter(xgScalapack,matrixA,req(1)) 439 call xgScalapack_scatter(xgScalapack,eigenvalues,req(2)) 440 #ifdef HAVE_MPI 441 if ( any(req/=-1) ) then 442 call MPI_WaitAll(2,req,status,ierr) 443 if ( ierr /= 0 ) then 444 ABI_ERROR("Error waiting data") 445 endif 446 end if 447 #endif 448 449 if(l_gpu_option==ABI_GPU_OPENMP) then 450 call xgBlock_copy_to_gpu(matrixA) 451 call xgBlock_copy_to_gpu(eigenvalues) 452 end if 453 454 #else 455 ABI_ERROR("ScaLAPACK support not available") 456 ABI_UNUSED(xgScalapack%verbosity) 457 ABI_UNUSED(matrixA%normal) 458 ABI_UNUSED(matrixB%normal) 459 ABI_UNUSED(eigenvalues%normal) 460 #endif 461 462 end subroutine xgScalapack_hegv 463 464 465 subroutine xgScalapack_scatter(xgScalapack,matrix,req) 466 467 type(xgScalapack_t), intent(in ) :: xgScalapack 468 type(xgBlock_t) , intent(inout) :: matrix 469 integer , intent( out) :: req 470 double precision, pointer :: tab(:,:) 471 double precision :: tsec(2) 472 integer :: cols, rows 473 integer :: ierr 474 integer :: sendto, receivefrom 475 integer :: lap 476 477 call timab(M__tim_scatter,1,tsec) 478 479 call xgBlock_getSize(matrix,rows,cols) 480 call xgBlock_reverseMap(matrix,tab,rows,cols) 481 482 ! If we did the he(e|g)v and we are the first group 483 if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null .and. xgScalapack%rank(M__WORLD)<xgScalapack%size(M__SLK) ) then 484 lap = xgScalapack%ngroup 485 sendto = xgScalapack%rank(M__WORLD) + lap*xgScalapack%size(M__SLK) 486 if ( sendto < xgScalapack%size(M__WORLD) ) then 487 !do while ( sendto < xgScalapack%size(M__WORLD) ) 488 !call xmpi_send(tab,sendto,sendto,xgScalapack%comms(M__WORLD),ierr) 489 call xmpi_isend(tab,sendto,sendto,xgScalapack%comms(M__WORLD),req,ierr) 490 !write(*,*) xgScalapack%rank(M__WORLD), "sends to", sendto 491 if ( ierr /= 0 ) then 492 ABI_ERROR("Error sending data") 493 end if 494 !lap = lap+1 495 !sendto = xgScalapack%rank(M__WORLD) + lap*xgScalapack%size(M__SLK) 496 !end do 497 end if 498 else if ( xgScalapack%comms(M__UNUSED) /= xmpi_comm_null ) then 499 receivefrom = MODULO(xgScalapack%rank(M__WORLD), xgScalapack%size(M__SLK)) 500 if ( receivefrom >= 0 ) then 501 !call xmpi_recv(tab,receivefrom,xgScalapack%rank(M__WORLD),xgScalapack%comms(M__WORLD),ierr) 502 call xmpi_irecv(tab,receivefrom,xgScalapack%rank(M__WORLD),xgScalapack%comms(M__WORLD),req,ierr) 503 !write(*,*) xgScalapack%rank(M__WORLD), "receive from", receivefrom 504 if ( ierr /= 0 ) then 505 ABI_ERROR("Error receiving data") 506 end if 507 end if 508 !else 509 !ABI_BUG("Error scattering data") 510 end if 511 512 call timab(M__tim_scatter,2,tsec) 513 514 end subroutine xgScalapack_scatter 515 516 517 subroutine xgScalapack_free(xgScalapack) 518 519 type(xgScalapack_t), intent(inout) :: xgScalapack 520 double precision :: tsec(2) 521 #ifdef HAVE_LINALG_SCALAPACK 522 integer :: ierr 523 #endif 524 525 call timab(M__tim_free,1,tsec) 526 #ifdef HAVE_LINALG_SCALAPACK 527 if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then 528 call BLACS_GridExit(xgScalapack%grid%ictxt) 529 call MPI_Comm_free(xgScalapack%comms(M__SLK),ierr) 530 end if 531 if ( xgScalapack%comms(M__UNUSED) /= xmpi_comm_null ) then 532 call MPI_Comm_free(xgScalapack%comms(M__UNUSED),ierr) 533 end if 534 #else 535 ABI_UNUSED(xgScalapack%verbosity) 536 #endif 537 call timab(M__tim_free,2,tsec) 538 539 end subroutine xgScalapack_free 540 541 end module m_xgScalapack