TABLE OF CONTENTS
- ABINIT/m_chebfi2
- m_chebfi2/chebfi_allocateAll
- m_chebfi2/chebfi_ampfactor
- m_chebfi2/chebfi_computeNextOrderChebfiPolynom
- m_chebfi2/chebfi_free
- m_chebfi2/chebfi_init
- m_chebfi2/chebfi_memInfo
- m_chebfi2/chebfi_oracle1
- m_chebfi2/chebfi_poly1
- m_chebfi2/chebfi_rayleighRitzQuotients
- m_chebfi2/chebfi_run
- m_chebfi2/chebfi_swapInnerBuffers
ABINIT/m_chebfi2 [ Functions ]
NAME
m_chebfi2
FUNCTION
This module contains the types and routines used to apply the Chebyshev filtering method (2021 implementation using xG abstraction layer) It mainly defines a 'chebfi' datatypes and associated methods.
COPYRIGHT
Copyright (C) 2018-2024 ABINIT group (BS) This file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
SOURCE
19 #if defined HAVE_CONFIG_H 20 #include "config.h" 21 #endif 22 23 #include "abi_common.h" 24 25 ! nvtx related macro definition 26 #include "nvtx_macros.h" 27 28 module m_chebfi2 29 30 use defs_basis 31 use defs_abitypes 32 use m_abicore 33 use m_errors 34 use m_time, only : timab 35 36 use m_cgtools 37 use m_xg 38 use m_xgTransposer 39 use m_xg_ortho_RR 40 41 use m_xmpi 42 use m_xomp 43 #ifdef HAVE_OPENMP 44 use omp_lib 45 #endif 46 47 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL) 48 use m_gpu_toolbox, only : CPU_DEVICE_ID, gpu_device_synchronize 49 #endif 50 51 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS) 52 use m_nvtx_data 53 #endif 54 55 implicit none 56 57 private 58 59 !Several (private) parameters 60 !------------------------------------------------- 61 62 integer, parameter :: tim_init = 1751 63 integer, parameter :: tim_free = 1752 64 ! integer, parameter :: tim_run = 1753 65 integer, parameter :: tim_getAX_BX = 1754 66 integer, parameter :: tim_invovl = 1755 67 integer, parameter :: tim_residu = 1756 68 integer, parameter :: tim_RR = 1757 69 ! integer, parameter :: tim_pcond = 1758 70 integer, parameter :: tim_RR_q = 1759 71 integer, parameter :: tim_next_p = 1760 72 integer, parameter :: tim_swap = 1761 73 integer, parameter :: tim_amp_f = 1762 74 integer, parameter :: tim_alltoall = 1763 75 integer, parameter :: tim_X_NP_init = 1769 76 integer, parameter :: tim_AX_BX_init = 1770 77 78 !Public 'chebfi' datatype 79 !------------------------------------------------- 80 81 type, public :: chebfi_t 82 integer :: space 83 integer :: spacedim ! Space dimension for one vector 84 integer :: total_spacedim ! Maybe not needed 85 integer :: neigenpairs ! Number of eigen values/vectors we want 86 integer :: nline ! Number of line to perform 87 integer :: spacecom ! Communicator for MPI 88 real(dp) :: tolerance ! Tolerance on the residu to stop the minimization 89 real(dp) :: ecut ! Ecut for Chebfi oracle 90 91 integer :: paral_kgb ! MPI parallelization variables 92 integer :: bandpp 93 integer :: comm_cols 94 integer :: comm_rows 95 96 logical :: paw 97 integer :: eigenProblem !1 (A*x = (lambda)*B*x), 2 (A*B*x = (lambda)*x), 3 (B*A*x = (lambda)*x) 98 integer :: istwf_k 99 100 ! when GPU is enabled, currently OpenMP is not fully supported, abinit is launched 101 ! with OMP_NUM_THREADS=1, but we may locally increase the number of OpenMP threads 102 ! wherever it is safe to do; in that case we use gpu_kokkos_nthrd to specify 103 ! the number of OpenMP threads. This value is controlled by dtset variable 104 ! dtset%gpu_kokkos_nthrd 105 integer :: gpu_option 106 integer :: gpu_kokkos_nthrd = 1 ! only used if gpu is enabled, number of OpenMP threads used 107 integer :: me_g0 108 109 !ARRAYS 110 type(xgBlock_t) :: X 111 112 type(xg_t) :: X_NP 113 type(xgBlock_t) :: X_next 114 type(xgBlock_t) :: X_prev 115 116 type(xg_t) :: AX 117 type(xg_t) :: BX 118 119 type(xgBlock_t) :: xXColsRows 120 type(xgBlock_t) :: xAXColsRows 121 type(xgBlock_t) :: xBXColsRows 122 123 type(xgTransposer_t) :: xgTransposerX 124 type(xgTransposer_t) :: xgTransposerAX 125 type(xgTransposer_t) :: xgTransposerBX 126 127 type(xgBlock_t) :: eigenvalues 128 129 !SWAP POINTERS 130 type(xgBlock_t) :: X_swap 131 type(xgBlock_t) :: AX_swap 132 type(xgBlock_t) :: BX_swap 133 134 end type chebfi_t 135 136 !Public methods associated to 'chebfi' datatype 137 !------------------------------------------------- 138 public :: chebfi_init 139 public :: chebfi_free 140 public :: chebfi_memInfo 141 public :: chebfi_run 142 143 CONTAINS !========================================================================================
m_chebfi2/chebfi_allocateAll [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_allocateAll
FUNCTION
Allocate all memory spaces in a 'chebfi' datastructure.
INPUTS
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
258 subroutine chebfi_allocateAll(chebfi) 259 260 implicit none 261 262 ! Arguments ------------------------------------ 263 type(chebfi_t) , intent(inout) :: chebfi 264 265 ! Local variables------------------------------- 266 ! scalars 267 integer :: neigenpairs 268 integer :: space 269 integer :: spacedim 270 integer :: total_spacedim, ierr 271 ! arrays 272 real(dp) :: tsec(2) 273 274 ! ********************************************************************* 275 276 space = chebfi%space 277 spacedim = chebfi%spacedim 278 neigenpairs = chebfi%neigenpairs 279 280 call chebfi_free(chebfi) 281 282 call timab(tim_X_NP_init,1,tsec) 283 if (chebfi%paral_kgb == 0) then 284 chebfi%total_spacedim = spacedim 285 call xg_init(chebfi%X_NP,space,spacedim,2*neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option) !regular arrays 286 call xg_setBlock(chebfi%X_NP, chebfi%X_next, 1, spacedim, neigenpairs) 287 call xg_setBlock(chebfi%X_NP, chebfi%X_prev, neigenpairs+1, spacedim, neigenpairs) 288 else 289 total_spacedim = spacedim 290 call xmpi_sum(total_spacedim,chebfi%spacecom,ierr) 291 chebfi%total_spacedim = total_spacedim 292 call xg_init(chebfi%X_NP,space,total_spacedim,2*chebfi%bandpp,chebfi%spacecom,gpu_option=chebfi%gpu_option) !transposed arrays 293 call xg_setBlock(chebfi%X_NP, chebfi%X_next, 1, total_spacedim, chebfi%bandpp) 294 call xg_setBlock(chebfi%X_NP, chebfi%X_prev, chebfi%bandpp+1, total_spacedim, chebfi%bandpp) 295 end if 296 call timab(tim_X_NP_init,2,tsec) 297 298 call timab(tim_AX_BX_init,1,tsec) 299 !transposer will handle these arrays automatically 300 call xg_init(chebfi%AX,space,spacedim,neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option) 301 call xg_init(chebfi%BX,space,spacedim,neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option) 302 call timab(tim_AX_BX_init,2,tsec) 303 304 end subroutine chebfi_allocateAll
m_chebfi2/chebfi_ampfactor [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_ampfactor
FUNCTION
Compute amplification factor
INPUTS
eig (:,:)= eigenvalues lambda_minus,lambda_plus= nline_bands(:)= order of Chebyshev polynom for each band
OUTPUT
SIDE EFFECTS
residu<type(xgBlock_t)>= vector of residuals chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
961 subroutine chebfi_ampfactor(chebfi,eig,lambda_minus,lambda_plus,nline_bands) 962 963 implicit none 964 965 ! Arguments ------------------------------------ 966 integer, intent(in ) :: nline_bands(:) 967 real(dp), pointer, intent(in ) :: eig(:,:) 968 real(dp), intent(in ) :: lambda_minus 969 real(dp), intent(in ) :: lambda_plus 970 type(chebfi_t), intent(inout) :: chebfi 971 972 ! Local variables------------------------------- 973 ! scalars 974 integer :: iband,nbands 975 real(dp) :: ampfactor 976 real(dp) :: eig_per_band 977 type(xgBlock_t) :: X_part 978 type(xgBlock_t) :: AX_part 979 type(xgBlock_t) :: BX_part 980 981 ! ********************************************************************* 982 983 if (chebfi%paral_kgb == 0) then 984 nbands = chebfi%neigenpairs 985 else 986 nbands = chebfi%bandpp 987 end if 988 989 do iband = 1, nbands 990 991 eig_per_band = eig(1,iband) 992 993 !cheb_poly1(x, n, a, b) 994 ampfactor = cheb_poly1(eig_per_band, nline_bands(iband), lambda_minus, lambda_plus) 995 996 if(abs(ampfactor) < 1e-3) ampfactor = 1e-3 !just in case, avoid amplifying too much 997 998 call xgBlock_setBlock(chebfi%xXColsRows, X_part, iband, chebfi%total_spacedim, 1) 999 call xgBlock_setBlock(chebfi%xAXColsRows, AX_part, iband, chebfi%total_spacedim, 1) 1000 1001 call xgBlock_scale(X_part, 1/ampfactor, 1) 1002 call xgBlock_scale(AX_part, 1/ampfactor, 1) 1003 1004 if(chebfi%paw) then 1005 call xgBlock_setBlock(chebfi%xBXColsRows, BX_part, iband, chebfi%total_spacedim, 1) 1006 call xgBlock_scale(BX_part, 1/ampfactor, 1) 1007 end if 1008 end do 1009 1010 end subroutine chebfi_ampfactor
m_chebfi2/chebfi_computeNextOrderChebfiPolynom [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_computeNextOrderChebfiPolynom
FUNCTION
From P_n(B-^1.A)|X> (where P_n is the Chebyshev polynom of order n), computes P_n+1(B-^1.A)|X>
INPUTS
iline=order of polynom center= one_over_r,two_over_r= getBm1X= pointer to the function giving B^-1|X> B is typically the overlap operator S
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
838 subroutine chebfi_computeNextOrderChebfiPolynom(chebfi,iline,center,one_over_r,two_over_r,getBm1X) 839 840 implicit none 841 842 !Arguments ------------------------------------ 843 real(dp) , intent(in) :: center 844 integer , intent(in) :: iline 845 real(dp) , intent(in) :: one_over_r 846 real(dp) , intent(in) :: two_over_r 847 type(chebfi_t) , intent(inout) :: chebfi 848 interface 849 subroutine getBm1X(X,Bm1X,transposer) 850 use m_xg, only : xgBlock_t 851 use m_xgTransposer !, only: xgTransposer_t 852 type(xgBlock_t), intent(inout) :: X 853 type(xgBlock_t), intent(inout) :: Bm1X 854 type(xgTransposer_t), optional, intent(inout) :: transposer 855 end subroutine getBm1X 856 end interface 857 858 !Local variables------------------------------- 859 real(dp) :: tsec(2) 860 861 ! ********************************************************************* 862 863 if (chebfi%paw) then 864 call timab(tim_invovl, 1, tsec) 865 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_BM1X) 866 call getBm1X(chebfi%xAXColsRows, chebfi%X_next, chebfi%xgTransposerX) 867 ABI_NVTX_END_RANGE() 868 call timab(tim_invovl, 2, tsec) 869 else 870 call xgBlock_copy(chebfi%xAXColsRows,chebfi%X_next) 871 end if 872 873 ABI_NVTX_START_RANGE(NVTX_INVOVL_POST3) 874 call xgBlock_scale(chebfi%xXColsRows, center, 1) !scale by center 875 876 !(B-1 * A * Psi^i-1 - c * Psi^i-1) 877 call xgBlock_saxpy(chebfi%X_next, dble(-1.0), chebfi%xXColsRows) 878 879 !Psi^i-1 = 1/c * Psi^i-1 880 call xgBlock_scale(chebfi%xXColsRows, 1/center, 1) !counter scale by 1/center 881 882 if (iline == 0) then 883 call xgBlock_scale(chebfi%X_next, one_over_r, 1) 884 else 885 call xgBlock_scale(chebfi%X_next, two_over_r, 1) 886 887 call xgBlock_saxpy(chebfi%X_next, dble(-1.0), chebfi%X_prev) 888 end if 889 890 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL) 891 if (chebfi%gpu_option==ABI_GPU_KOKKOS) then 892 call gpu_device_synchronize() 893 end if 894 #endif 895 ABI_NVTX_END_RANGE() 896 897 end subroutine chebfi_computeNextOrderChebfiPolynom
m_chebfi2/chebfi_free [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_free
FUNCTION
Destroy a 'chebfi' datastructure.
INPUTS
OUTPUT
arraymem(2)= memory information
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
326 subroutine chebfi_free(chebfi) 327 328 implicit none 329 330 !Arguments ------------------------------------ 331 type(chebfi_t) , intent(inout) :: chebfi 332 333 ! ********************************************************************* 334 335 call xg_free(chebfi%X_NP) 336 337 call xg_free(chebfi%AX) 338 call xg_free(chebfi%BX) 339 340 !call xg_finalize() 341 342 end subroutine chebfi_free
m_chebfi2/chebfi_init [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_init
FUNCTION
Initialize a 'chebfi' datastructure.
INPUTS
bandpp= number of 'bands' handled by a processor eigenProblem= type of eigenpb: 1 (A*x = (lambda)*B*x), 2 (A*B*x = (lambda)*x), 3 (B*A*x = (lambda)*x) istwf_k= parameter that describes the storage of wfs me_g0= 1 if this processors treats G=0, 0 otherwise neigenpairs= number of requested eigenvectors/eigenvalues nline= Chebyshev polynomial level (.i.e. number of H applications) comm_rows= "rows" communicator comm_cols= "cols" communicator paral_kgb= flag controlling (k,g,bands) parallelization space= defines in which space we are (columns, rows, etc.) spacecom= MPI communicator spacedim= space dimension for one vector paw= flag. TRUE if current calculation ses the PAW approach ecut= plane-wave cut-off energy tolerance= tolerance criterion on the residu to stop the minimization
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
177 subroutine chebfi_init(chebfi,neigenpairs,spacedim,tolerance,ecut,paral_kgb,bandpp, & 178 nline,space,eigenProblem,istwf_k,spacecom,me_g0,paw,comm_rows,comm_cols,& 179 gpu_option,gpu_kokkos_nthrd) 180 181 implicit none 182 183 ! Arguments ------------------------------------ 184 integer , intent(in ) :: bandpp 185 integer , intent(in ) :: eigenProblem 186 integer , intent(in ) :: istwf_k 187 integer , intent(in ) :: me_g0 188 integer , intent(in ) :: neigenpairs 189 integer , intent(in ) :: nline 190 integer , intent(in ) :: comm_cols 191 integer , intent(in ) :: comm_rows 192 integer , intent(in ) :: paral_kgb 193 integer , intent(in ) :: space 194 integer , intent(in ) :: spacecom 195 integer , intent(in ) :: spacedim 196 integer , intent(in ) :: gpu_option 197 logical , intent(in ) :: paw 198 real(dp) , intent(in ) :: ecut 199 real(dp) , intent(in ) :: tolerance 200 type(chebfi_t), intent(inout) :: chebfi 201 integer , intent(in ), optional :: gpu_kokkos_nthrd 202 203 ! Local variables------------------------------- 204 real(dp) :: tsec(2) 205 206 ! ********************************************************************* 207 208 call timab(tim_init,1,tsec) 209 210 chebfi%space = space 211 chebfi%neigenpairs = neigenpairs 212 chebfi%spacedim = spacedim 213 chebfi%tolerance = tolerance 214 chebfi%ecut = ecut 215 chebfi%paral_kgb = paral_kgb 216 chebfi%comm_cols = comm_cols 217 chebfi%bandpp = bandpp 218 chebfi%comm_rows = comm_rows 219 chebfi%nline = nline 220 chebfi%spacecom = spacecom 221 chebfi%eigenProblem = eigenProblem 222 chebfi%istwf_k = istwf_k 223 chebfi%me_g0 = me_g0 224 chebfi%paw = paw 225 chebfi%gpu_option = gpu_option 226 227 if (present(gpu_kokkos_nthrd)) then 228 chebfi%gpu_kokkos_nthrd = gpu_kokkos_nthrd 229 else 230 chebfi%gpu_kokkos_nthrd = 1 231 end if 232 233 call chebfi_allocateAll(chebfi) 234 235 call timab(tim_init,2,tsec) 236 237 end subroutine chebfi_init
m_chebfi2/chebfi_memInfo [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_memInfo
FUNCTION
Provides memory information about a 'chebfi' datastructure.
INPUTS
bandpp= number of 'bands' handled by a processor neigenpairs= number of requested eigenvectors/eigenvalues paral_kgb= flag controlling (k,g,bands) parallelization space= defines in which space we are (columns, rows, etc.) spacedim= dimension of MPI communicator total_spacedim= size of global KGB communicator (typically 'banspinorfft' comm.)
OUTPUT
arraymem(2)= memory information
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
370 function chebfi_memInfo(neigenpairs,spacedim,space,paral_kgb,total_spacedim,bandpp) result(arraymem) 371 372 implicit none 373 374 !Arguments ------------------------------------ 375 integer, intent(in ) :: bandpp 376 integer, intent(in ) :: neigenpairs 377 integer, intent(in ) :: paral_kgb 378 integer, intent(in ) :: space 379 integer, intent(in ) :: spacedim 380 integer, intent(in ) :: total_spacedim 381 382 !Local variables------------------------------- 383 !scalars 384 real(dp) :: memX 385 real(dp) :: memX_next 386 real(dp) :: memX_prev 387 real(dp) :: memAX 388 real(dp) :: memBX 389 !Transposer variables 390 real(dp) :: memX_CR 391 real(dp) :: memAX_CR 392 real(dp) :: memBX_CR 393 !chebfi_rayleighRitz function variables 394 real(dp) :: memA_und_X 395 real(dp) :: memB_und_X 396 real(dp) :: memEigenvalues 397 real(dp) :: cplx 398 !arrays 399 real(dp) :: arraymem(2) 400 401 ! ********************************************************************* 402 cplx = 1 403 if ( space == SPACE_C ) cplx = 2 !for now only complex 404 405 !Permanent in chebfi 406 memX = cplx * kind(1.d0) * spacedim * neigenpairs 407 408 if (paral_kgb == 0) then 409 memX_next = cplx * kind(1.d0) * spacedim * neigenpairs 410 memX_prev = cplx * kind(1.d0) * spacedim * neigenpairs 411 else 412 memX_next = cplx * kind(1.d0) * total_spacedim * bandpp 413 memX_prev = cplx * kind(1.d0) * total_spacedim * bandpp 414 end if 415 416 memAX = cplx * kind(1.d0) * spacedim * neigenpairs 417 memBX = cplx * kind(1.d0) * spacedim * neigenpairs 418 419 !Transposer colrow array 420 if (paral_kgb == 1) then 421 memX_CR = cplx * kind(1.d0) * total_spacedim * bandpp 422 memAX_CR = cplx * kind(1.d0) * total_spacedim * bandpp 423 memBX_CR = cplx * kind(1.d0) * total_spacedim * bandpp 424 else 425 memX_CR = 0 426 memAX_CR = 0 427 memBX_CR = 0 428 end if 429 430 !chebfi_rayleighRitz function variables 431 memA_und_X = cplx * kind(1.d0) * neigenpairs * neigenpairs 432 memB_und_X = cplx * kind(1.d0) * neigenpairs * neigenpairs 433 memEigenvalues = kind(1.d0) * neigenpairs 434 435 arraymem(1) = memX + memX_next + memX_prev + & 436 memAX + memBX + memX_CR + memAX_CR + memBX_CR 437 arraymem(2) = memA_und_X + memB_und_X + memEigenvalues 438 439 end function chebfi_memInfo
m_chebfi2/chebfi_oracle1 [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_oracle1
FUNCTION
Compute order of Chebyshev polynom necessary to converge to a given tol
INPUTS
xx= input variable aa= left bound of the interval bb= right bound of the interval tol= needed precision nmax= max number of iterations
OUTPUT
SIDE EFFECTS
SOURCE
1035 function cheb_oracle1(xx,aa,bb,tol,nmax) result(nn) 1036 1037 implicit none 1038 1039 ! Arguments ------------------------------------ 1040 integer :: nn 1041 integer, intent(in) :: nmax 1042 real(dp), intent(in) :: xx,aa,bb 1043 real(dp), intent(in) :: tol 1044 1045 ! Local variables------------------------------- 1046 integer :: ii 1047 real(dp) :: yy,yim1,xred,temp 1048 1049 ! ************************************************************************* 1050 1051 xred = (xx-(aa+bb)/2)/(bb-aa)*2 1052 yy = xred 1053 yim1 = 1 !ONE 1054 1055 nn = nmax 1056 if(1/(yy**2) < tol) then 1057 nn = 1 1058 else 1059 do ii=2, nmax-1 1060 temp = yy 1061 yy = 2*xred*yy - yim1 1062 yim1 = temp 1063 if(1/(yy**2) < tol) then 1064 nn = ii 1065 exit 1066 end if 1067 end do 1068 end if 1069 1070 end function cheb_oracle1
m_chebfi2/chebfi_poly1 [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_poly1
FUNCTION
Compute Chebyshev polynomial???
INPUTS
xx= input variable aa= left bound of the interval bb= right bound of the interval nn=
OUTPUT
SIDE EFFECTS
SOURCE
1094 function cheb_poly1(xx,nn,aa,bb) result(yy) 1095 1096 implicit none 1097 1098 ! Arguments ------------------------------------ 1099 integer, intent(in) :: nn 1100 real(dp), intent(in) :: xx, aa, bb 1101 real(dp) :: yy 1102 1103 ! Local variables------------------------------- 1104 integer :: ii 1105 real(dp) :: xred,yim1,temp 1106 1107 ! ************************************************************************* 1108 1109 xred = (xx-(aa+bb)/2)/(bb-aa)*2 1110 yy = xred 1111 yim1 = 1 1112 do ii= 2, nn 1113 temp = yy 1114 yy = 2*xred*yy - yim1 1115 yim1 = temp 1116 end do 1117 1118 end function cheb_poly1
m_chebfi2/chebfi_rayleighRitzQuotients [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_rayleighRitzQuotients
FUNCTION
Compute the Rayleigh-Ritz quotients.
INPUTS
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm maxeig= highest eigenvalue mineig= lowest eigenvalue DivResults= Rayleigh-Ritz quotients
SOURCE
770 subroutine chebfi_rayleighRitzQuotients(chebfi,maxeig,mineig,DivResults) 771 772 implicit none 773 774 !Arguments ------------------------------------ 775 real(dp), intent(inout) :: maxeig 776 real(dp), intent(inout) :: mineig 777 type(chebfi_t), intent(inout) :: chebfi 778 type(xgBlock_t), intent(inout) :: DivResults 779 780 !Local variables------------------------------- 781 !scalars 782 type(xg_t)::Results1 783 type(xg_t)::Results2 784 !arrays 785 integer :: maxeig_pos(2) 786 integer :: mineig_pos(2) 787 788 ! ********************************************************************* 789 790 !Doesnt work with npfft (ncols=1 in the formula below) ??? 791 792 if (chebfi%paral_kgb == 0) then 793 call xg_init(Results1, chebfi%space, chebfi%neigenpairs, 1, gpu_option=chebfi%gpu_option) 794 call xg_init(Results2, chebfi%space, chebfi%neigenpairs, 1, gpu_option=chebfi%gpu_option) 795 else 796 call xg_init(Results1, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option) 797 call xg_init(Results2, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option) 798 end if 799 800 call xgBlock_colwiseDotProduct(chebfi%xXColsRows, chebfi%xAXColsRows, Results1%self) 801 802 !PAW 803 call xgBlock_colwiseDotProduct(chebfi%xXColsRows, chebfi%xBXColsRows, Results2%self) 804 805 call xgBlock_colwiseDivision(Results1%self, Results2%self, DivResults, & 806 & maxeig, maxeig_pos, mineig, mineig_pos) 807 808 call xg_free(Results1) 809 call xg_free(Results2) 810 811 end subroutine chebfi_rayleighRitzQuotients
m_chebfi2/chebfi_run [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_run
FUNCTION
Apply the Chebyshev Filtering algorithm on a set of vectors.
INPUTS
mpi_enreg = information about MPI parallelization getAX_BX= pointer to the function giving A|X> and B|X> A is typically the Hamiltonian H, and B the overlap operator S getBm1X= pointer to the function giving B^-1|X> B is typically the overlap operator S pcond= pointer to the function used to apply the preconditioning
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm eigen= Full eigenvalues (initial values on entry) residu= residuals, i.e. norm of (A-lambdaB)|X> X0= Full set of vectors (initial values on entry)
SOURCE
469 subroutine chebfi_run(chebfi,X0,getAX_BX,getBm1X,pcond,eigen,residu,nspinor) 470 471 implicit none 472 473 !Arguments ------------------------------------ 474 type(chebfi_t) , intent(inout) :: chebfi 475 integer, intent(in) :: nspinor 476 type(xgBlock_t), intent(inout) :: X0 477 type(xgBlock_t), intent(inout) :: eigen 478 type(xgBlock_t), intent(inout) :: residu 479 interface 480 subroutine getAX_BX(X,AX,BX,transposer) 481 use m_xg, only : xgBlock_t 482 use m_xgTransposer !, only: xgTransposer_t 483 type(xgBlock_t), intent(inout) :: X 484 type(xgBlock_t), intent(inout) :: AX 485 type(xgBlock_t), intent(inout) :: BX 486 type(xgTransposer_t), optional, intent(inout) :: transposer 487 end subroutine getAX_BX 488 end interface 489 interface 490 subroutine getBm1X(X,Bm1X,transposer) 491 use m_xg, only : xgBlock_t 492 use m_xgTransposer !, only: xgTransposer_t 493 type(xgBlock_t), intent(inout) :: X 494 type(xgBlock_t), intent(inout) :: Bm1X 495 type(xgTransposer_t), optional, intent(inout) :: transposer 496 end subroutine getBm1X 497 end interface 498 interface 499 subroutine pcond(W) 500 use m_xg, only : xgBlock_t 501 type(xgBlock_t), intent(inout) :: W 502 end subroutine pcond 503 end interface 504 505 !Local variables------------------------------- 506 !scalars 507 integer :: spacedim 508 integer :: neigenpairs 509 integer :: nline,nline_max 510 integer :: iline, iband, ierr 511 ! integer :: comm_fft_save,comm_band_save !FFT and BAND MPI communicators from rest of Abinit, to be saved 512 real(dp) :: tolerance 513 real(dp) :: maxeig, maxeig_global 514 real(dp) :: mineig, mineig_global 515 real(dp) :: lambda_minus 516 real(dp) :: lambda_plus 517 real(dp) :: one_over_r 518 real(dp) :: two_over_r 519 real(dp) :: center 520 real(dp) :: radius 521 type(xg_t) :: DivResults 522 !arrays 523 real(dp) :: tsec(2) 524 !Pointers similar to old Chebfi 525 integer,allocatable :: nline_bands(:) !Oracle variable 526 real(dp),pointer :: eig(:,:) 527 528 ! ********************************************************************* 529 530 ! call timab(tim_run,1,tsec) 531 532 spacedim = chebfi%spacedim 533 neigenpairs = chebfi%neigenpairs 534 nline = chebfi%nline 535 chebfi%eigenvalues = eigen 536 537 if (chebfi%paral_kgb == 0) then 538 ABI_MALLOC(nline_bands,(neigenpairs)) 539 call xg_init(DivResults, chebfi%space, neigenpairs, 1, gpu_option=chebfi%gpu_option) 540 else 541 ABI_MALLOC(nline_bands,(chebfi%bandpp)) 542 call xg_init(DivResults, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option) 543 end if 544 545 tolerance = chebfi%tolerance 546 lambda_plus = chebfi%ecut 547 chebfi%X = X0 548 549 ! Transpose 550 if (chebfi%paral_kgb == 1) then 551 552 call xgTransposer_constructor(chebfi%xgTransposerX,chebfi%X,chebfi%xXColsRows,nspinor,& 553 STATE_LINALG,TRANS_ALL2ALL,chebfi%comm_rows,chebfi%comm_cols,0,0,gpu_option=chebfi%gpu_option) 554 555 ! !save existing ABinit communicators 556 ! comm_fft_save = mpi_enreg%comm_fft 557 ! comm_band_save = mpi_enreg%comm_band 558 559 ! !set new communicators from Transposer so it can interact with getghc 560 ! !transpose correctly 561 ! mpi_enreg%comm_fft = xgTransposer_getComm(chebfi%xgTransposerX, 2) 562 ! mpi_enreg%comm_band = xgTransposer_getComm(chebfi%xgTransposerX, 3) 563 564 call xgTransposer_copyConstructor(chebfi%xgTransposerAX,chebfi%xgTransposerX,chebfi%AX%self,chebfi%xAXColsRows,STATE_LINALG) 565 call xgTransposer_copyConstructor(chebfi%xgTransposerBX,chebfi%xgTransposerX,chebfi%BX%self,chebfi%xBXColsRows,STATE_LINALG) 566 567 chebfi%xgTransposerX%gpu_kokkos_nthrd = chebfi%gpu_kokkos_nthrd 568 chebfi%xgTransposerAX%gpu_kokkos_nthrd = chebfi%gpu_kokkos_nthrd 569 chebfi%xgTransposerBX%gpu_kokkos_nthrd = chebfi%gpu_kokkos_nthrd 570 571 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_TRANSPOSE) 572 call xgTransposer_transpose(chebfi%xgTransposerX,STATE_COLSROWS) 573 chebfi%xgTransposerAX%state = STATE_COLSROWS 574 chebfi%xgTransposerBX%state = STATE_COLSROWS 575 !call xgTransposer_transpose(chebfi%xgTransposerAX,STATE_COLSROWS) 576 !call xgTransposer_transpose(chebfi%xgTransposerBX,STATE_COLSROWS) 577 ABI_NVTX_END_RANGE() 578 else 579 call xgBlock_setBlock(chebfi%X, chebfi%xXColsRows, 1, spacedim, neigenpairs) !use xXColsRows instead of X notion 580 call xgBlock_setBlock(chebfi%AX%self, chebfi%xAXColsRows, 1, spacedim, neigenpairs) !use xAXColsRows instead of AX notion 581 call xgBlock_setBlock(chebfi%BX%self, chebfi%xBXColsRows, 1, spacedim, neigenpairs) 582 end if 583 584 call timab(tim_getAX_BX,1,tsec) 585 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_AX_BX) 586 call getAX_BX(chebfi%xXColsRows,chebfi%xAXColsRows,chebfi%xBXColsRows,chebfi%xgTransposerX) 587 ABI_NVTX_END_RANGE() 588 call timab(tim_getAX_BX,2,tsec) 589 590 if (chebfi%paral_kgb == 1) then 591 call xmpi_barrier(chebfi%spacecom) 592 end if 593 594 !********************* Compute Rayleigh quotients for every band, and set lambda equal to the largest one ***** 595 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_RRQ) 596 597 ! NOTICE : the following lines are kept for reference 598 ! they are no longer necessary as chebfi_rayleighRitzQuotients can fully run on GPU 599 ! it is no longer necessary to issue a data prefetch, data are already present on device 600 601 ! if (chebfi%gpu_option == ABI_GPU_KOKKOS) then 602 ! #if defined(HAVE_GPU_CUDA) 603 ! call xgBlock_prefetch_async(chebfi%xXColsRows, CPU_DEVICE_ID) 604 ! call xgBlock_prefetch_async(chebfi%xAXColsRows, CPU_DEVICE_ID) 605 ! call xgBlock_prefetch_async(chebfi%xBXColsRows, CPU_DEVICE_ID) 606 ! #endif 607 ! end if 608 609 call timab(tim_RR_q, 1, tsec) 610 call chebfi_rayleighRitzQuotients(chebfi, maxeig, mineig, DivResults%self) !OK 611 call timab(tim_RR_q, 2, tsec) 612 613 if (chebfi%paral_kgb == 1) then 614 call xmpi_max(maxeig,maxeig_global,chebfi%spacecom,ierr) 615 call xmpi_min(mineig,mineig_global,chebfi%spacecom,ierr) 616 else 617 maxeig_global = maxeig 618 mineig_global = mineig 619 end if 620 ABI_NVTX_END_RANGE() 621 622 lambda_minus = maxeig_global 623 624 nline_max = cheb_oracle1(mineig_global, lambda_minus, lambda_plus, 1D-16, 40) 625 626 if (chebfi%paral_kgb == 0) then 627 call xgBlock_reverseMap(DivResults%self,eig,1,neigenpairs) 628 do iband=1, neigenpairs !TODO TODO 629 ! !nline necessary to converge to tolerance 630 ! !nline_tolwfr = cheb_oracle1(dble(eig(iband*2-1,1)), lambda_minus, lambda_plus, tolerance / resids_filter(iband), nline) 631 ! !nline necessary to decrease residual by a constant factor 632 ! !nline_decrease = cheb_oracle1(dble(eig(iband*2-1,1)), lambda_minus, lambda_plus, 0.1D, dtset%nline) 633 ! !nline_bands(iband) = MAX(MIN(nline_tolwfr, nline_decrease, nline_max, chebfi%nline), 1) 634 nline_bands(iband) = nline ! fiddle with this to use locking 635 end do 636 else 637 call xgBlock_reverseMap(DivResults%self,eig,1,chebfi%bandpp) 638 do iband=1, chebfi%bandpp !TODO TODO 639 nline_bands(iband) = nline ! fiddle with this to use locking 640 end do 641 end if 642 643 center = (lambda_plus + lambda_minus)*0.5 644 radius = (lambda_plus - lambda_minus)*0.5 645 646 one_over_r = 1/radius 647 two_over_r = 2/radius 648 649 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_CORE) 650 do iline = 0, nline - 1 651 652 call timab(tim_next_p,1,tsec) 653 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_NEXT_ORDER) 654 call chebfi_computeNextOrderChebfiPolynom(chebfi, iline, center, one_over_r, two_over_r, getBm1X) 655 ABI_NVTX_END_RANGE() 656 call timab(tim_next_p,2,tsec) 657 658 call timab(tim_swap,1,tsec) 659 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_SWAP_BUF) 660 if (chebfi%paral_kgb == 0) then 661 call chebfi_swapInnerBuffers(chebfi, spacedim, neigenpairs) 662 else 663 call chebfi_swapInnerBuffers(chebfi, chebfi%total_spacedim, chebfi%bandpp) 664 end if 665 ABI_NVTX_END_RANGE() 666 call timab(tim_swap,2,tsec) 667 668 !A * Psi 669 call timab(tim_getAX_BX,1,tsec) 670 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_AX_BX) 671 call getAX_BX(chebfi%xXColsRows,chebfi%xAXColsRows,chebfi%xBXColsRows,chebfi%xgTransposerX) 672 ABI_NVTX_END_RANGE() 673 call timab(tim_getAX_BX,2,tsec) 674 675 end do ! iline 676 ABI_NVTX_END_RANGE() 677 678 if (chebfi%paral_kgb == 1) then 679 call xmpi_barrier(chebfi%spacecom) 680 end if 681 682 call timab(tim_amp_f,1,tsec) 683 call chebfi_ampfactor(chebfi, eig, lambda_minus, lambda_plus, nline_bands) 684 call timab(tim_amp_f,2,tsec) 685 686 call xg_free(DivResults) 687 ABI_FREE(nline_bands) 688 689 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_TRANSPOSE) 690 if (chebfi%paral_kgb == 1) then 691 call xmpi_barrier(chebfi%spacecom) 692 693 call xgTransposer_transpose(chebfi%xgTransposerX, STATE_LINALG) 694 call xgTransposer_transpose(chebfi%xgTransposerAX,STATE_LINALG) 695 call xgTransposer_transpose(chebfi%xgTransposerBX,STATE_LINALG) 696 697 if (xmpi_comm_size(chebfi%spacecom) == 1) then !only one MPI proc reset buffers to right addresses (because of X-Xcolwise swaps) 698 call xgBlock_setBlock(chebfi%xXColsRows, chebfi%X, 1, spacedim, neigenpairs) 699 call xgBlock_setBlock(chebfi%xAXColsRows, chebfi%AX%self, 1, spacedim, neigenpairs) 700 call xgBlock_setBlock(chebfi%xBXColsRows, chebfi%BX%self, 1, spacedim, neigenpairs) 701 end if 702 else 703 call xgBlock_setBlock(chebfi%xXColsRows, chebfi%X, 1, spacedim, neigenpairs) 704 call xgBlock_setBlock(chebfi%xAXColsRows, chebfi%AX%self, 1, spacedim, neigenpairs) 705 call xgBlock_setBlock(chebfi%xBXColsRows, chebfi%BX%self, 1, spacedim, neigenpairs) 706 end if 707 ABI_NVTX_END_RANGE() 708 709 ABI_NVTX_START_RANGE(NVTX_CHEBFI2_RR) 710 call xg_RayleighRitz(chebfi%X,chebfi%AX%self,chebfi%BX%self,chebfi%eigenvalues,ierr,0,tim_RR,chebfi%gpu_option,solve_ax_bx=.true.) 711 ABI_NVTX_END_RANGE() 712 713 call timab(tim_residu, 1, tsec) 714 if (chebfi%paw) then 715 call xgBlock_colwiseCymax(chebfi%AX%self,chebfi%eigenvalues,chebfi%BX%self,chebfi%AX%self) 716 else 717 call xgBlock_colwiseCymax(chebfi%AX%self,chebfi%eigenvalues,chebfi%X,chebfi%AX%self) 718 end if 719 720 ! call timab(tim_pcond,1,tsec) 721 call pcond(chebfi%AX%self) 722 ! call timab(tim_pcond,2,tsec) 723 724 call xgBlock_colwiseNorm2(chebfi%AX%self, residu) 725 call timab(tim_residu, 2, tsec) 726 727 call xgBlock_copy(chebfi%X,X0) 728 729 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL) 730 if (chebfi%gpu_option==ABI_GPU_KOKKOS) then 731 call gpu_device_synchronize() 732 end if 733 #endif 734 735 if (chebfi%paral_kgb == 1) then 736 call xgTransposer_free(chebfi%xgTransposerX) 737 call xgTransposer_free(chebfi%xgTransposerAX) 738 call xgTransposer_free(chebfi%xgTransposerBX) 739 ! !Reset communicators to original Abinit values for rest of ABinit 740 ! mpi_enreg%comm_fft = comm_fft_save 741 ! mpi_enreg%comm_band = comm_band_save 742 end if 743 744 ! call timab(tim_run,2,tsec) 745 746 end subroutine chebfi_run
m_chebfi2/chebfi_swapInnerBuffers [ Functions ]
[ Top ] [ m_chebfi2 ] [ Functions ]
NAME
chebfi_swapInnerBuffers
FUNCTION
Swap buffers inside a 'chebfi' datastructure.
INPUTS
neigenpairs= number of requested eigenvectors/eigenvalues spacedim= space dimension for one vector
OUTPUT
SIDE EFFECTS
chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
SOURCE
920 subroutine chebfi_swapInnerBuffers(chebfi,spacedim,neigenpairs) 921 922 implicit none 923 924 ! Arguments ------------------------------------ 925 integer , intent(in ) :: spacedim 926 integer , intent(in ) :: neigenpairs 927 type(chebfi_t) , intent(inout) :: chebfi 928 929 ! ********************************************************************* 930 931 call xgBlock_setBlock(chebfi%X_prev, chebfi%X_swap, 1, spacedim, neigenpairs) !X_swap = X_prev 932 call xgBlock_setBlock(chebfi%xXColsRows, chebfi%X_prev, 1, spacedim, neigenpairs) !X_prev = xXColsRows 933 call xgBlock_setBlock(chebfi%X_next, chebfi%xXColsRows, 1, spacedim, neigenpairs) !xXColsRows = X_next 934 call xgBlock_setBlock(chebfi%X_swap, chebfi%X_next, 1, spacedim, neigenpairs) !X_next = X_swap 935 936 end subroutine chebfi_swapInnerBuffers