TABLE OF CONTENTS


ABINIT/m_chebfi2 [ Functions ]

[ Top ] [ Functions ]

NAME

 m_chebfi2

FUNCTION

 This module contains the types and routines used to apply the
 Chebyshev filtering method (2021 implementation using xG abstraction layer)
 It mainly defines a 'chebfi' datatypes and associated methods.

COPYRIGHT

 Copyright (C) 2018-2024 ABINIT group (BS)
 This file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

SOURCE

 19 #if defined HAVE_CONFIG_H
 20 #include "config.h"
 21 #endif
 22 
 23 #include "abi_common.h"
 24 
 25 ! nvtx related macro definition
 26 #include "nvtx_macros.h"
 27 
 28 module m_chebfi2
 29 
 30  use defs_basis
 31  use defs_abitypes
 32  use m_abicore
 33  use m_errors
 34  use m_time, only : timab
 35 
 36  use m_cgtools
 37  use m_xg
 38  use m_xgTransposer
 39  use m_xg_ortho_RR
 40 
 41  use m_xmpi
 42  use m_xomp
 43 #ifdef HAVE_OPENMP
 44  use omp_lib
 45 #endif
 46 
 47 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
 48  use m_gpu_toolbox, only : CPU_DEVICE_ID, gpu_device_synchronize
 49 #endif
 50 
 51 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
 52  use m_nvtx_data
 53 #endif
 54 
 55  implicit none
 56 
 57  private
 58 
 59 !Several (private) parameters
 60 !-------------------------------------------------
 61 
 62  integer, parameter :: tim_init         = 1751
 63  integer, parameter :: tim_free         = 1752
 64 ! integer, parameter :: tim_run          = 1753
 65  integer, parameter :: tim_getAX_BX     = 1754
 66  integer, parameter :: tim_invovl       = 1755
 67  integer, parameter :: tim_residu       = 1756
 68  integer, parameter :: tim_RR           = 1757
 69 ! integer, parameter :: tim_pcond        = 1758
 70  integer, parameter :: tim_RR_q         = 1759
 71  integer, parameter :: tim_next_p       = 1760
 72  integer, parameter :: tim_swap         = 1761
 73  integer, parameter :: tim_amp_f        = 1762
 74  integer, parameter :: tim_alltoall     = 1763
 75  integer, parameter :: tim_X_NP_init    = 1769
 76  integer, parameter :: tim_AX_BX_init   = 1770
 77 
 78 !Public 'chebfi' datatype
 79 !-------------------------------------------------
 80 
 81  type, public :: chebfi_t
 82    integer :: space
 83    integer :: spacedim                      ! Space dimension for one vector
 84    integer :: total_spacedim                ! Maybe not needed
 85    integer :: neigenpairs                   ! Number of eigen values/vectors we want
 86    integer :: nline                         ! Number of line to perform
 87    integer :: spacecom                      ! Communicator for MPI
 88    real(dp) :: tolerance            ! Tolerance on the residu to stop the minimization
 89    real(dp) :: ecut                 ! Ecut for Chebfi oracle
 90 
 91    integer :: paral_kgb                     ! MPI parallelization variables
 92    integer :: bandpp
 93    integer :: comm_cols
 94    integer :: comm_rows
 95 
 96    logical :: paw
 97    integer :: eigenProblem   !1 (A*x = (lambda)*B*x), 2 (A*B*x = (lambda)*x), 3 (B*A*x = (lambda)*x)
 98    integer :: istwf_k
 99 
100    ! when GPU is enabled, currently OpenMP is not fully supported, abinit is launched
101    ! with OMP_NUM_THREADS=1, but we may locally increase the number of OpenMP threads
102    ! wherever it is safe to do; in that case we use gpu_kokkos_nthrd to specify
103    ! the number of OpenMP threads. This value is controlled by dtset variable
104    ! dtset%gpu_kokkos_nthrd
105    integer :: gpu_option
106    integer :: gpu_kokkos_nthrd = 1 ! only used if gpu is enabled, number of OpenMP threads used
107    integer :: me_g0
108 
109    !ARRAYS
110    type(xgBlock_t) :: X
111 
112    type(xg_t) :: X_NP
113    type(xgBlock_t) :: X_next
114    type(xgBlock_t) :: X_prev
115 
116    type(xg_t) :: AX
117    type(xg_t) :: BX
118 
119    type(xgBlock_t) :: xXColsRows
120    type(xgBlock_t) :: xAXColsRows
121    type(xgBlock_t) :: xBXColsRows
122 
123    type(xgTransposer_t) :: xgTransposerX
124    type(xgTransposer_t) :: xgTransposerAX
125    type(xgTransposer_t) :: xgTransposerBX
126 
127    type(xgBlock_t) :: eigenvalues
128 
129    !SWAP POINTERS
130    type(xgBlock_t) :: X_swap
131    type(xgBlock_t) :: AX_swap
132    type(xgBlock_t) :: BX_swap
133 
134   end type chebfi_t
135 
136 !Public methods associated to 'chebfi' datatype
137 !-------------------------------------------------
138  public :: chebfi_init
139  public :: chebfi_free
140  public :: chebfi_memInfo
141  public :: chebfi_run
142 
143  CONTAINS  !========================================================================================

m_chebfi2/chebfi_allocateAll [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_allocateAll

FUNCTION

 Allocate all memory spaces in a 'chebfi' datastructure.

INPUTS

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

258 subroutine chebfi_allocateAll(chebfi)
259 
260  implicit none
261 
262  ! Arguments ------------------------------------
263  type(chebfi_t)  , intent(inout) :: chebfi
264 
265  ! Local variables-------------------------------
266  ! scalars
267  integer  :: neigenpairs
268  integer  :: space
269  integer  :: spacedim
270  integer  :: total_spacedim, ierr
271  ! arrays
272  real(dp) :: tsec(2)
273 
274 ! *********************************************************************
275 
276  space       = chebfi%space
277  spacedim    = chebfi%spacedim
278  neigenpairs = chebfi%neigenpairs
279 
280  call chebfi_free(chebfi)
281 
282  call timab(tim_X_NP_init,1,tsec)
283  if (chebfi%paral_kgb == 0) then
284    chebfi%total_spacedim = spacedim
285    call xg_init(chebfi%X_NP,space,spacedim,2*neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option) !regular arrays
286    call xg_setBlock(chebfi%X_NP, chebfi%X_next,             1, spacedim, neigenpairs)
287    call xg_setBlock(chebfi%X_NP, chebfi%X_prev, neigenpairs+1, spacedim, neigenpairs)
288  else
289    total_spacedim = spacedim
290    call xmpi_sum(total_spacedim,chebfi%spacecom,ierr)
291    chebfi%total_spacedim = total_spacedim
292    call xg_init(chebfi%X_NP,space,total_spacedim,2*chebfi%bandpp,chebfi%spacecom,gpu_option=chebfi%gpu_option) !transposed arrays
293    call xg_setBlock(chebfi%X_NP, chebfi%X_next,               1, total_spacedim, chebfi%bandpp)
294    call xg_setBlock(chebfi%X_NP, chebfi%X_prev, chebfi%bandpp+1, total_spacedim, chebfi%bandpp)
295  end if
296  call timab(tim_X_NP_init,2,tsec)
297 
298  call timab(tim_AX_BX_init,1,tsec)
299  !transposer will handle these arrays automatically
300  call xg_init(chebfi%AX,space,spacedim,neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option)
301  call xg_init(chebfi%BX,space,spacedim,neigenpairs,chebfi%spacecom,gpu_option=chebfi%gpu_option)
302  call timab(tim_AX_BX_init,2,tsec)
303 
304 end subroutine chebfi_allocateAll

m_chebfi2/chebfi_ampfactor [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_ampfactor

FUNCTION

 Compute amplification factor

INPUTS

 eig (:,:)= eigenvalues
 lambda_minus,lambda_plus=
 nline_bands(:)= order of Chebyshev polynom for each band

OUTPUT

SIDE EFFECTS

  residu<type(xgBlock_t)>= vector of residuals
  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

 961 subroutine chebfi_ampfactor(chebfi,eig,lambda_minus,lambda_plus,nline_bands)
 962 
 963   implicit none
 964 
 965   ! Arguments ------------------------------------
 966   integer,           intent(in   ) :: nline_bands(:)
 967   real(dp), pointer, intent(in   ) :: eig(:,:)
 968   real(dp),          intent(in   ) :: lambda_minus
 969   real(dp),          intent(in   ) :: lambda_plus
 970   type(chebfi_t),    intent(inout) :: chebfi
 971 
 972   ! Local variables-------------------------------
 973   ! scalars
 974   integer         :: iband,nbands
 975   real(dp)        :: ampfactor
 976   real(dp)        :: eig_per_band
 977   type(xgBlock_t) :: X_part
 978   type(xgBlock_t) :: AX_part
 979   type(xgBlock_t) :: BX_part
 980 
 981   ! *********************************************************************
 982 
 983   if (chebfi%paral_kgb == 0) then
 984     nbands = chebfi%neigenpairs
 985   else
 986     nbands = chebfi%bandpp
 987   end if
 988 
 989   do iband = 1, nbands
 990 
 991     eig_per_band = eig(1,iband)
 992 
 993     !cheb_poly1(x, n, a, b)
 994     ampfactor = cheb_poly1(eig_per_band, nline_bands(iband), lambda_minus, lambda_plus)
 995 
 996     if(abs(ampfactor) < 1e-3) ampfactor = 1e-3 !just in case, avoid amplifying too much
 997 
 998     call xgBlock_setBlock(chebfi%xXColsRows, X_part, iband, chebfi%total_spacedim, 1)
 999     call xgBlock_setBlock(chebfi%xAXColsRows, AX_part, iband, chebfi%total_spacedim, 1)
1000 
1001     call xgBlock_scale(X_part, 1/ampfactor, 1)
1002     call xgBlock_scale(AX_part, 1/ampfactor, 1)
1003 
1004     if(chebfi%paw) then
1005       call xgBlock_setBlock(chebfi%xBXColsRows, BX_part, iband, chebfi%total_spacedim, 1)
1006       call xgBlock_scale(BX_part, 1/ampfactor, 1)
1007     end if
1008   end do
1009 
1010 end subroutine chebfi_ampfactor

m_chebfi2/chebfi_computeNextOrderChebfiPolynom [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_computeNextOrderChebfiPolynom

FUNCTION

 From P_n(B-^1.A)|X> (where P_n is the Chebyshev polynom of order n),
   computes P_n+1(B-^1.A)|X>

INPUTS

  iline=order of polynom
  center=
  one_over_r,two_over_r=
  getBm1X= pointer to the function giving B^-1|X>
           B is typically the overlap operator S

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

838 subroutine chebfi_computeNextOrderChebfiPolynom(chebfi,iline,center,one_over_r,two_over_r,getBm1X)
839 
840  implicit none
841 
842 !Arguments ------------------------------------
843  real(dp)       , intent(in) :: center
844  integer        , intent(in) :: iline
845  real(dp)       , intent(in) :: one_over_r
846  real(dp)       , intent(in) :: two_over_r
847  type(chebfi_t) , intent(inout) :: chebfi
848  interface
849    subroutine getBm1X(X,Bm1X,transposer)
850      use m_xg, only : xgBlock_t
851      use m_xgTransposer !, only: xgTransposer_t
852      type(xgBlock_t), intent(inout) :: X
853      type(xgBlock_t), intent(inout) :: Bm1X
854      type(xgTransposer_t), optional, intent(inout) :: transposer
855    end subroutine getBm1X
856  end interface
857 
858  !Local variables-------------------------------
859  real(dp) :: tsec(2)
860 
861  ! *********************************************************************
862 
863  if (chebfi%paw) then
864    call timab(tim_invovl, 1, tsec)
865    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_BM1X)
866    call getBm1X(chebfi%xAXColsRows, chebfi%X_next, chebfi%xgTransposerX)
867    ABI_NVTX_END_RANGE()
868    call timab(tim_invovl, 2, tsec)
869  else
870    call xgBlock_copy(chebfi%xAXColsRows,chebfi%X_next)
871  end if
872 
873  ABI_NVTX_START_RANGE(NVTX_INVOVL_POST3)
874  call xgBlock_scale(chebfi%xXColsRows, center, 1) !scale by center
875 
876  !(B-1 * A * Psi^i-1 - c * Psi^i-1)
877  call xgBlock_saxpy(chebfi%X_next, dble(-1.0), chebfi%xXColsRows)
878 
879  !Psi^i-1  = 1/c * Psi^i-1
880  call xgBlock_scale(chebfi%xXColsRows, 1/center, 1) !counter scale by 1/center
881 
882  if (iline == 0) then
883    call xgBlock_scale(chebfi%X_next, one_over_r, 1)
884  else
885    call xgBlock_scale(chebfi%X_next, two_over_r, 1)
886 
887    call xgBlock_saxpy(chebfi%X_next, dble(-1.0), chebfi%X_prev)
888  end if
889 
890 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
891  if (chebfi%gpu_option==ABI_GPU_KOKKOS) then
892    call gpu_device_synchronize()
893  end if
894 #endif
895  ABI_NVTX_END_RANGE()
896 
897 end subroutine chebfi_computeNextOrderChebfiPolynom

m_chebfi2/chebfi_free [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_free

FUNCTION

 Destroy a 'chebfi' datastructure.

INPUTS

OUTPUT

  arraymem(2)= memory information

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

326 subroutine chebfi_free(chebfi)
327 
328  implicit none
329 
330 !Arguments ------------------------------------
331  type(chebfi_t) , intent(inout) :: chebfi
332 
333 ! *********************************************************************
334 
335  call xg_free(chebfi%X_NP)
336 
337  call xg_free(chebfi%AX)
338  call xg_free(chebfi%BX)
339 
340 !call xg_finalize()
341 
342 end subroutine chebfi_free

m_chebfi2/chebfi_init [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_init

FUNCTION

 Initialize a 'chebfi' datastructure.

INPUTS

  bandpp= number of 'bands' handled by a processor
  eigenProblem= type of eigenpb: 1 (A*x = (lambda)*B*x), 2 (A*B*x = (lambda)*x), 3 (B*A*x = (lambda)*x)
  istwf_k= parameter that describes the storage of wfs
  me_g0= 1 if this processors treats G=0, 0 otherwise
  neigenpairs= number of requested eigenvectors/eigenvalues
  nline= Chebyshev polynomial level (.i.e. number of H applications)
  comm_rows= "rows" communicator
  comm_cols= "cols" communicator
  paral_kgb= flag controlling (k,g,bands) parallelization
  space= defines in which space we are (columns, rows, etc.)
  spacecom= MPI communicator
  spacedim= space dimension for one vector
  paw= flag. TRUE if current calculation ses the PAW approach
  ecut= plane-wave cut-off energy
  tolerance= tolerance criterion on the residu to stop the minimization

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

177 subroutine chebfi_init(chebfi,neigenpairs,spacedim,tolerance,ecut,paral_kgb,bandpp, &
178                        nline,space,eigenProblem,istwf_k,spacecom,me_g0,paw,comm_rows,comm_cols,&
179                        gpu_option,gpu_kokkos_nthrd)
180 
181  implicit none
182 
183  ! Arguments ------------------------------------
184  integer       , intent(in   ) :: bandpp
185  integer       , intent(in   ) :: eigenProblem
186  integer       , intent(in   ) :: istwf_k
187  integer       , intent(in   ) :: me_g0
188  integer       , intent(in   ) :: neigenpairs
189  integer       , intent(in   ) :: nline
190  integer       , intent(in   ) :: comm_cols
191  integer       , intent(in   ) :: comm_rows
192  integer       , intent(in   ) :: paral_kgb
193  integer       , intent(in   ) :: space
194  integer       , intent(in   ) :: spacecom
195  integer       , intent(in   ) :: spacedim
196  integer       , intent(in   ) :: gpu_option
197  logical       , intent(in   ) :: paw
198  real(dp)      , intent(in   ) :: ecut
199  real(dp)      , intent(in   ) :: tolerance
200  type(chebfi_t), intent(inout) :: chebfi
201  integer       , intent(in   ), optional :: gpu_kokkos_nthrd
202 
203  ! Local variables-------------------------------
204  real(dp)                      :: tsec(2)
205 
206  ! *********************************************************************
207 
208  call timab(tim_init,1,tsec)
209 
210  chebfi%space = space
211  chebfi%neigenpairs = neigenpairs
212  chebfi%spacedim    = spacedim
213  chebfi%tolerance   = tolerance
214  chebfi%ecut        = ecut
215  chebfi%paral_kgb   = paral_kgb
216  chebfi%comm_cols   = comm_cols
217  chebfi%bandpp      = bandpp
218  chebfi%comm_rows   = comm_rows
219  chebfi%nline       = nline
220  chebfi%spacecom    = spacecom
221  chebfi%eigenProblem = eigenProblem
222  chebfi%istwf_k      = istwf_k
223  chebfi%me_g0        = me_g0
224  chebfi%paw          = paw
225  chebfi%gpu_option = gpu_option
226 
227  if (present(gpu_kokkos_nthrd)) then
228     chebfi%gpu_kokkos_nthrd = gpu_kokkos_nthrd
229  else
230     chebfi%gpu_kokkos_nthrd = 1
231  end if
232 
233  call chebfi_allocateAll(chebfi)
234 
235  call timab(tim_init,2,tsec)
236 
237 end subroutine chebfi_init

m_chebfi2/chebfi_memInfo [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_memInfo

FUNCTION

 Provides memory information about a 'chebfi' datastructure.

INPUTS

  bandpp= number of 'bands' handled by a processor
  neigenpairs= number of requested eigenvectors/eigenvalues
  paral_kgb= flag controlling (k,g,bands) parallelization
  space= defines in which space we are (columns, rows, etc.)
  spacedim= dimension of MPI communicator
  total_spacedim= size of global KGB communicator (typically 'banspinorfft' comm.)

OUTPUT

  arraymem(2)= memory information

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

370 function chebfi_memInfo(neigenpairs,spacedim,space,paral_kgb,total_spacedim,bandpp) result(arraymem)
371 
372  implicit none
373 
374 !Arguments ------------------------------------
375  integer, intent(in   ) :: bandpp
376  integer, intent(in   ) :: neigenpairs
377  integer, intent(in   ) :: paral_kgb
378  integer, intent(in   ) :: space
379  integer, intent(in   ) :: spacedim
380  integer, intent(in   ) :: total_spacedim
381 
382 !Local variables-------------------------------
383 !scalars
384  real(dp) :: memX
385  real(dp) :: memX_next
386  real(dp) :: memX_prev
387  real(dp) :: memAX
388  real(dp) :: memBX
389 !Transposer variables
390  real(dp) :: memX_CR
391  real(dp) :: memAX_CR
392  real(dp) :: memBX_CR
393 !chebfi_rayleighRitz function variables
394  real(dp) :: memA_und_X
395  real(dp) :: memB_und_X
396  real(dp) :: memEigenvalues
397  real(dp) :: cplx
398 !arrays
399  real(dp) :: arraymem(2)
400 
401 ! *********************************************************************
402  cplx = 1
403  if ( space == SPACE_C ) cplx = 2 !for now only complex
404 
405  !Permanent in chebfi
406  memX = cplx * kind(1.d0) * spacedim * neigenpairs
407 
408  if (paral_kgb == 0) then
409    memX_next = cplx * kind(1.d0) * spacedim * neigenpairs
410    memX_prev = cplx * kind(1.d0) * spacedim * neigenpairs
411  else
412    memX_next = cplx * kind(1.d0) * total_spacedim * bandpp
413    memX_prev = cplx * kind(1.d0) * total_spacedim * bandpp
414  end if
415 
416  memAX = cplx * kind(1.d0) * spacedim * neigenpairs
417  memBX = cplx * kind(1.d0) * spacedim * neigenpairs
418 
419  !Transposer colrow array
420  if (paral_kgb == 1) then
421    memX_CR = cplx * kind(1.d0) * total_spacedim * bandpp
422    memAX_CR = cplx * kind(1.d0) * total_spacedim * bandpp
423    memBX_CR = cplx * kind(1.d0) * total_spacedim * bandpp
424  else
425    memX_CR = 0
426    memAX_CR = 0
427    memBX_CR = 0
428  end if
429 
430  !chebfi_rayleighRitz function variables
431  memA_und_X = cplx * kind(1.d0) * neigenpairs * neigenpairs
432  memB_und_X = cplx * kind(1.d0) * neigenpairs * neigenpairs
433  memEigenvalues = kind(1.d0) * neigenpairs
434 
435  arraymem(1) = memX + memX_next + memX_prev + &
436                memAX + memBX + memX_CR + memAX_CR + memBX_CR
437  arraymem(2) = memA_und_X + memB_und_X + memEigenvalues
438 
439 end function chebfi_memInfo

m_chebfi2/chebfi_oracle1 [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_oracle1

FUNCTION

 Compute order of Chebyshev polynom necessary to converge to a given tol

INPUTS

  xx= input variable
  aa= left bound of the interval
  bb= right bound of the interval
  tol= needed precision
  nmax= max number of iterations

OUTPUT

SIDE EFFECTS

SOURCE

1035 function cheb_oracle1(xx,aa,bb,tol,nmax) result(nn)
1036 
1037   implicit none
1038 
1039   ! Arguments ------------------------------------
1040   integer              :: nn
1041   integer,  intent(in) :: nmax
1042   real(dp), intent(in) :: xx,aa,bb
1043   real(dp), intent(in) :: tol
1044 
1045   ! Local variables-------------------------------
1046   integer :: ii
1047   real(dp) :: yy,yim1,xred,temp
1048 
1049   ! *************************************************************************
1050 
1051   xred = (xx-(aa+bb)/2)/(bb-aa)*2
1052   yy = xred
1053   yim1 = 1 !ONE
1054 
1055   nn = nmax
1056   if(1/(yy**2) < tol) then
1057     nn = 1
1058   else
1059     do ii=2, nmax-1
1060       temp = yy
1061       yy = 2*xred*yy - yim1
1062       yim1 = temp
1063       if(1/(yy**2) < tol) then
1064         nn = ii
1065         exit
1066       end if
1067     end do
1068   end if
1069 
1070 end function cheb_oracle1

m_chebfi2/chebfi_poly1 [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_poly1

FUNCTION

 Compute Chebyshev polynomial???

INPUTS

  xx= input variable
  aa= left bound of the interval
  bb= right bound of the interval
  nn=

OUTPUT

SIDE EFFECTS

SOURCE

1094 function cheb_poly1(xx,nn,aa,bb) result(yy)
1095 
1096   implicit none
1097 
1098   ! Arguments ------------------------------------
1099   integer,  intent(in) :: nn
1100   real(dp), intent(in) :: xx, aa, bb
1101   real(dp)             :: yy
1102 
1103   ! Local variables-------------------------------
1104   integer  :: ii
1105   real(dp) :: xred,yim1,temp
1106 
1107   ! *************************************************************************
1108 
1109   xred = (xx-(aa+bb)/2)/(bb-aa)*2
1110   yy = xred
1111   yim1 = 1
1112   do ii= 2, nn
1113     temp = yy
1114     yy = 2*xred*yy - yim1
1115     yim1 = temp
1116   end do
1117 
1118 end function cheb_poly1

m_chebfi2/chebfi_rayleighRitzQuotients [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_rayleighRitzQuotients

FUNCTION

 Compute the Rayleigh-Ritz quotients.

INPUTS

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
  maxeig= highest eigenvalue
  mineig= lowest eigenvalue
  DivResults= Rayleigh-Ritz quotients

SOURCE

770 subroutine chebfi_rayleighRitzQuotients(chebfi,maxeig,mineig,DivResults)
771 
772  implicit none
773 
774 !Arguments ------------------------------------
775  real(dp), intent(inout) :: maxeig
776  real(dp), intent(inout) :: mineig
777  type(chebfi_t), intent(inout) :: chebfi
778  type(xgBlock_t), intent(inout) :: DivResults
779 
780 !Local variables-------------------------------
781 !scalars
782  type(xg_t)::Results1
783  type(xg_t)::Results2
784 !arrays
785  integer :: maxeig_pos(2)
786  integer :: mineig_pos(2)
787 
788 ! *********************************************************************
789 
790 !Doesnt work with npfft (ncols=1 in the formula below) ???
791 
792  if (chebfi%paral_kgb == 0) then
793    call xg_init(Results1, chebfi%space, chebfi%neigenpairs, 1, gpu_option=chebfi%gpu_option)
794    call xg_init(Results2, chebfi%space, chebfi%neigenpairs, 1, gpu_option=chebfi%gpu_option)
795  else
796    call xg_init(Results1, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option)
797    call xg_init(Results2, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option)
798  end if
799 
800  call xgBlock_colwiseDotProduct(chebfi%xXColsRows, chebfi%xAXColsRows, Results1%self)
801 
802 !PAW
803  call xgBlock_colwiseDotProduct(chebfi%xXColsRows, chebfi%xBXColsRows, Results2%self)
804 
805  call xgBlock_colwiseDivision(Results1%self, Results2%self, DivResults, &
806    & maxeig, maxeig_pos, mineig, mineig_pos)
807 
808  call xg_free(Results1)
809  call xg_free(Results2)
810 
811 end subroutine chebfi_rayleighRitzQuotients

m_chebfi2/chebfi_run [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_run

FUNCTION

 Apply the Chebyshev Filtering algorithm on a set of vectors.

INPUTS

  mpi_enreg = information about MPI parallelization
  getAX_BX= pointer to the function giving A|X> and B|X>
            A is typically the Hamiltonian H, and B the overlap operator S
  getBm1X= pointer to the function giving B^-1|X>
           B is typically the overlap operator S
  pcond= pointer to the function used to apply the preconditioning

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm
  eigen= Full eigenvalues (initial values on entry)
  residu= residuals, i.e. norm of (A-lambdaB)|X>
  X0= Full set of vectors (initial values on entry)

SOURCE

469 subroutine chebfi_run(chebfi,X0,getAX_BX,getBm1X,pcond,eigen,residu,nspinor)
470 
471  implicit none
472 
473 !Arguments ------------------------------------
474  type(chebfi_t) , intent(inout) :: chebfi
475  integer,         intent(in)    :: nspinor
476  type(xgBlock_t), intent(inout) :: X0
477  type(xgBlock_t), intent(inout) :: eigen
478  type(xgBlock_t), intent(inout) :: residu
479  interface
480    subroutine getAX_BX(X,AX,BX,transposer)
481      use m_xg, only : xgBlock_t
482      use m_xgTransposer !, only: xgTransposer_t
483      type(xgBlock_t), intent(inout) :: X
484      type(xgBlock_t), intent(inout) :: AX
485      type(xgBlock_t), intent(inout) :: BX
486      type(xgTransposer_t), optional, intent(inout) :: transposer
487    end subroutine getAX_BX
488  end interface
489  interface
490    subroutine getBm1X(X,Bm1X,transposer)
491      use m_xg, only : xgBlock_t
492      use m_xgTransposer !, only: xgTransposer_t
493      type(xgBlock_t), intent(inout) :: X
494      type(xgBlock_t), intent(inout) :: Bm1X
495      type(xgTransposer_t), optional, intent(inout) :: transposer
496    end subroutine getBm1X
497  end interface
498  interface
499    subroutine pcond(W)
500      use m_xg, only : xgBlock_t
501      type(xgBlock_t), intent(inout)           :: W
502    end subroutine pcond
503  end interface
504 
505 !Local variables-------------------------------
506 !scalars
507  integer :: spacedim
508  integer :: neigenpairs
509  integer :: nline,nline_max
510  integer :: iline, iband, ierr
511 ! integer :: comm_fft_save,comm_band_save !FFT and BAND MPI communicators from rest of Abinit, to be saved
512  real(dp) :: tolerance
513  real(dp) :: maxeig, maxeig_global
514  real(dp) :: mineig, mineig_global
515  real(dp) :: lambda_minus
516  real(dp) :: lambda_plus
517  real(dp) :: one_over_r
518  real(dp) :: two_over_r
519  real(dp) :: center
520  real(dp) :: radius
521  type(xg_t) :: DivResults
522 !arrays
523  real(dp) :: tsec(2)
524  !Pointers similar to old Chebfi
525  integer,allocatable :: nline_bands(:) !Oracle variable
526  real(dp),pointer :: eig(:,:)
527 
528 ! *********************************************************************
529 
530 ! call timab(tim_run,1,tsec)
531 
532  spacedim = chebfi%spacedim
533  neigenpairs = chebfi%neigenpairs
534  nline = chebfi%nline
535  chebfi%eigenvalues = eigen
536 
537  if (chebfi%paral_kgb == 0) then
538    ABI_MALLOC(nline_bands,(neigenpairs))
539    call xg_init(DivResults, chebfi%space, neigenpairs, 1, gpu_option=chebfi%gpu_option)
540  else
541    ABI_MALLOC(nline_bands,(chebfi%bandpp))
542    call xg_init(DivResults, chebfi%space, chebfi%bandpp, 1, gpu_option=chebfi%gpu_option)
543  end if
544 
545  tolerance = chebfi%tolerance
546  lambda_plus = chebfi%ecut
547  chebfi%X = X0
548 
549  ! Transpose
550  if (chebfi%paral_kgb == 1) then
551 
552    call xgTransposer_constructor(chebfi%xgTransposerX,chebfi%X,chebfi%xXColsRows,nspinor,&
553      STATE_LINALG,TRANS_ALL2ALL,chebfi%comm_rows,chebfi%comm_cols,0,0,gpu_option=chebfi%gpu_option)
554 
555 !   !save existing ABinit communicators
556 !   comm_fft_save = mpi_enreg%comm_fft
557 !   comm_band_save = mpi_enreg%comm_band
558 
559 !   !set new communicators from Transposer so it can interact with getghc
560 !   !transpose correctly
561 !   mpi_enreg%comm_fft = xgTransposer_getComm(chebfi%xgTransposerX, 2)
562 !   mpi_enreg%comm_band = xgTransposer_getComm(chebfi%xgTransposerX, 3)
563 
564    call xgTransposer_copyConstructor(chebfi%xgTransposerAX,chebfi%xgTransposerX,chebfi%AX%self,chebfi%xAXColsRows,STATE_LINALG)
565    call xgTransposer_copyConstructor(chebfi%xgTransposerBX,chebfi%xgTransposerX,chebfi%BX%self,chebfi%xBXColsRows,STATE_LINALG)
566 
567    chebfi%xgTransposerX%gpu_kokkos_nthrd  = chebfi%gpu_kokkos_nthrd
568    chebfi%xgTransposerAX%gpu_kokkos_nthrd = chebfi%gpu_kokkos_nthrd
569    chebfi%xgTransposerBX%gpu_kokkos_nthrd = chebfi%gpu_kokkos_nthrd
570 
571    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_TRANSPOSE)
572    call xgTransposer_transpose(chebfi%xgTransposerX,STATE_COLSROWS)
573    chebfi%xgTransposerAX%state = STATE_COLSROWS
574    chebfi%xgTransposerBX%state = STATE_COLSROWS
575    !call xgTransposer_transpose(chebfi%xgTransposerAX,STATE_COLSROWS)
576    !call xgTransposer_transpose(chebfi%xgTransposerBX,STATE_COLSROWS)
577    ABI_NVTX_END_RANGE()
578  else
579    call xgBlock_setBlock(chebfi%X, chebfi%xXColsRows, 1, spacedim, neigenpairs)   !use xXColsRows instead of X notion
580    call xgBlock_setBlock(chebfi%AX%self, chebfi%xAXColsRows, 1, spacedim, neigenpairs)   !use xAXColsRows instead of AX notion
581    call xgBlock_setBlock(chebfi%BX%self, chebfi%xBXColsRows, 1, spacedim, neigenpairs)
582  end if
583 
584  call timab(tim_getAX_BX,1,tsec)
585  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_AX_BX)
586  call getAX_BX(chebfi%xXColsRows,chebfi%xAXColsRows,chebfi%xBXColsRows,chebfi%xgTransposerX)
587  ABI_NVTX_END_RANGE()
588  call timab(tim_getAX_BX,2,tsec)
589 
590  if (chebfi%paral_kgb == 1) then
591    call xmpi_barrier(chebfi%spacecom)
592  end if
593 
594 !********************* Compute Rayleigh quotients for every band, and set lambda equal to the largest one *****
595  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_RRQ)
596 
597  ! NOTICE : the following lines are kept for reference
598  ! they are no longer necessary as chebfi_rayleighRitzQuotients can fully run on GPU
599  ! it is no longer necessary to issue a data prefetch, data are already present on device
600 
601 !  if (chebfi%gpu_option == ABI_GPU_KOKKOS) then
602 ! #if defined(HAVE_GPU_CUDA)
603 !    call xgBlock_prefetch_async(chebfi%xXColsRows,  CPU_DEVICE_ID)
604 !    call xgBlock_prefetch_async(chebfi%xAXColsRows, CPU_DEVICE_ID)
605 !    call xgBlock_prefetch_async(chebfi%xBXColsRows, CPU_DEVICE_ID)
606 ! #endif
607 !  end if
608 
609  call timab(tim_RR_q, 1, tsec)
610  call chebfi_rayleighRitzQuotients(chebfi, maxeig, mineig, DivResults%self) !OK
611  call timab(tim_RR_q, 2, tsec)
612 
613  if (chebfi%paral_kgb == 1) then
614    call xmpi_max(maxeig,maxeig_global,chebfi%spacecom,ierr)
615    call xmpi_min(mineig,mineig_global,chebfi%spacecom,ierr)
616  else
617    maxeig_global = maxeig
618    mineig_global = mineig
619  end if
620  ABI_NVTX_END_RANGE()
621 
622  lambda_minus = maxeig_global
623 
624  nline_max = cheb_oracle1(mineig_global, lambda_minus, lambda_plus, 1D-16, 40)
625 
626  if (chebfi%paral_kgb == 0) then
627    call xgBlock_reverseMap(DivResults%self,eig,1,neigenpairs)
628    do iband=1, neigenpairs !TODO TODO
629   ! !nline necessary to converge to tolerance
630   ! !nline_tolwfr = cheb_oracle1(dble(eig(iband*2-1,1)), lambda_minus, lambda_plus, tolerance / resids_filter(iband), nline)
631   ! !nline necessary to decrease residual by a constant factor
632   ! !nline_decrease = cheb_oracle1(dble(eig(iband*2-1,1)), lambda_minus, lambda_plus, 0.1D, dtset%nline)
633   ! !nline_bands(iband) = MAX(MIN(nline_tolwfr, nline_decrease, nline_max, chebfi%nline), 1)
634      nline_bands(iband) = nline ! fiddle with this to use locking
635    end do
636  else
637    call xgBlock_reverseMap(DivResults%self,eig,1,chebfi%bandpp)
638    do iband=1, chebfi%bandpp !TODO TODO
639      nline_bands(iband) = nline ! fiddle with this to use locking
640    end do
641  end if
642 
643  center = (lambda_plus + lambda_minus)*0.5
644  radius = (lambda_plus - lambda_minus)*0.5
645 
646  one_over_r = 1/radius
647  two_over_r = 2/radius
648 
649  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_CORE)
650  do iline = 0, nline - 1
651 
652    call timab(tim_next_p,1,tsec)
653    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_NEXT_ORDER)
654    call chebfi_computeNextOrderChebfiPolynom(chebfi, iline, center, one_over_r, two_over_r, getBm1X)
655    ABI_NVTX_END_RANGE()
656    call timab(tim_next_p,2,tsec)
657 
658    call timab(tim_swap,1,tsec)
659    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_SWAP_BUF)
660    if (chebfi%paral_kgb == 0) then
661      call chebfi_swapInnerBuffers(chebfi, spacedim, neigenpairs)
662    else
663      call chebfi_swapInnerBuffers(chebfi, chebfi%total_spacedim, chebfi%bandpp)
664    end if
665    ABI_NVTX_END_RANGE()
666    call timab(tim_swap,2,tsec)
667 
668    !A * Psi
669    call timab(tim_getAX_BX,1,tsec)
670    ABI_NVTX_START_RANGE(NVTX_CHEBFI2_GET_AX_BX)
671    call getAX_BX(chebfi%xXColsRows,chebfi%xAXColsRows,chebfi%xBXColsRows,chebfi%xgTransposerX)
672    ABI_NVTX_END_RANGE()
673    call timab(tim_getAX_BX,2,tsec)
674 
675  end do ! iline
676  ABI_NVTX_END_RANGE()
677 
678  if (chebfi%paral_kgb == 1) then
679    call xmpi_barrier(chebfi%spacecom)
680  end if
681 
682  call timab(tim_amp_f,1,tsec)
683  call chebfi_ampfactor(chebfi, eig, lambda_minus, lambda_plus, nline_bands)
684  call timab(tim_amp_f,2,tsec)
685 
686  call xg_free(DivResults)
687  ABI_FREE(nline_bands)
688 
689  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_TRANSPOSE)
690  if (chebfi%paral_kgb == 1) then
691    call xmpi_barrier(chebfi%spacecom)
692 
693    call xgTransposer_transpose(chebfi%xgTransposerX, STATE_LINALG)
694    call xgTransposer_transpose(chebfi%xgTransposerAX,STATE_LINALG)
695    call xgTransposer_transpose(chebfi%xgTransposerBX,STATE_LINALG)
696 
697    if (xmpi_comm_size(chebfi%spacecom) == 1) then !only one MPI proc reset buffers to right addresses (because of X-Xcolwise swaps)
698      call xgBlock_setBlock(chebfi%xXColsRows,  chebfi%X,       1, spacedim, neigenpairs)
699      call xgBlock_setBlock(chebfi%xAXColsRows, chebfi%AX%self, 1, spacedim, neigenpairs)
700      call xgBlock_setBlock(chebfi%xBXColsRows, chebfi%BX%self, 1, spacedim, neigenpairs)
701    end if
702  else
703    call xgBlock_setBlock(chebfi%xXColsRows,  chebfi%X,       1, spacedim, neigenpairs)
704    call xgBlock_setBlock(chebfi%xAXColsRows, chebfi%AX%self, 1, spacedim, neigenpairs)
705    call xgBlock_setBlock(chebfi%xBXColsRows, chebfi%BX%self, 1, spacedim, neigenpairs)
706  end if
707  ABI_NVTX_END_RANGE()
708 
709  ABI_NVTX_START_RANGE(NVTX_CHEBFI2_RR)
710  call xg_RayleighRitz(chebfi%X,chebfi%AX%self,chebfi%BX%self,chebfi%eigenvalues,ierr,0,tim_RR,chebfi%gpu_option,solve_ax_bx=.true.)
711  ABI_NVTX_END_RANGE()
712 
713  call timab(tim_residu, 1, tsec)
714  if (chebfi%paw) then
715    call xgBlock_colwiseCymax(chebfi%AX%self,chebfi%eigenvalues,chebfi%BX%self,chebfi%AX%self)
716  else
717    call xgBlock_colwiseCymax(chebfi%AX%self,chebfi%eigenvalues,chebfi%X,chebfi%AX%self)
718  end if
719 
720 ! call timab(tim_pcond,1,tsec)
721  call pcond(chebfi%AX%self)
722 ! call timab(tim_pcond,2,tsec)
723 
724  call xgBlock_colwiseNorm2(chebfi%AX%self, residu)
725  call timab(tim_residu, 2, tsec)
726 
727  call xgBlock_copy(chebfi%X,X0)
728 
729 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
730    if (chebfi%gpu_option==ABI_GPU_KOKKOS) then
731      call gpu_device_synchronize()
732    end if
733 #endif
734 
735  if (chebfi%paral_kgb == 1) then
736    call xgTransposer_free(chebfi%xgTransposerX)
737    call xgTransposer_free(chebfi%xgTransposerAX)
738    call xgTransposer_free(chebfi%xgTransposerBX)
739 !   !Reset communicators to original Abinit values for rest of ABinit
740 !   mpi_enreg%comm_fft = comm_fft_save
741 !   mpi_enreg%comm_band = comm_band_save
742  end if
743 
744 ! call timab(tim_run,2,tsec)
745 
746 end subroutine chebfi_run

m_chebfi2/chebfi_swapInnerBuffers [ Functions ]

[ Top ] [ m_chebfi2 ] [ Functions ]

NAME

 chebfi_swapInnerBuffers

FUNCTION

 Swap buffers inside a 'chebfi' datastructure.

INPUTS

  neigenpairs= number of requested eigenvectors/eigenvalues
  spacedim= space dimension for one vector

OUTPUT

SIDE EFFECTS

  chebfi <type(chebfi_t)>=all data used to apply Chebyshev Filtering algorithm

SOURCE

920 subroutine chebfi_swapInnerBuffers(chebfi,spacedim,neigenpairs)
921 
922   implicit none
923 
924   ! Arguments ------------------------------------
925   integer        , intent(in   ) :: spacedim
926   integer        , intent(in   ) :: neigenpairs
927   type(chebfi_t) , intent(inout) :: chebfi
928 
929   ! *********************************************************************
930 
931   call xgBlock_setBlock(chebfi%X_prev,     chebfi%X_swap,     1, spacedim, neigenpairs) !X_swap = X_prev
932   call xgBlock_setBlock(chebfi%xXColsRows, chebfi%X_prev,     1, spacedim, neigenpairs) !X_prev = xXColsRows
933   call xgBlock_setBlock(chebfi%X_next,     chebfi%xXColsRows, 1, spacedim, neigenpairs) !xXColsRows = X_next
934   call xgBlock_setBlock(chebfi%X_swap,     chebfi%X_next,     1, spacedim, neigenpairs) !X_next = X_swap
935 
936 end subroutine chebfi_swapInnerBuffers