TABLE OF CONTENTS


ABINIT/m_lobpcgwf [ Functions ]

[ Top ] [ Functions ]

NAME

 m_lobpcgwf

FUNCTION

 This routine updates the whole wave functions at a given k-point,
 using the lobpcg method
 for a given spin-polarization, from a fixed hamiltonian
 but might also simply compute eigenvectors and eigenvalues at this k point.
 it will also update the matrix elements of the hamiltonian.

COPYRIGHT

 Copyright (C) 1998-2022 ABINIT group (JB)
 this file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

SOURCE

 21 #if defined HAVE_CONFIG_H
 22 #include "config.h"
 23 #endif
 24 
 25 #include "abi_common.h"
 26 
 27 module m_lobpcgwf
 28 
 29  use defs_basis
 30  use m_abicore
 31  use m_lobpcg
 32  use m_xmpi
 33  use m_errors
 34  use m_time
 35  use m_xomp
 36  use m_fstrings
 37  use m_xg
 38  use m_lobpcg2
 39  use m_dtset
 40 
 41  use defs_abitypes, only : mpi_type
 42  use m_hamiltonian, only : gs_hamiltonian_type
 43  use m_pawcprj,     only : pawcprj_type
 44  use m_nonlop,      only : nonlop
 45  use m_prep_kgb,    only : prep_getghc, prep_nonlop
 46  use m_getghc,      only : multithreaded_getghc
 47  use m_cgtools,     only : dotprod_g
 48 
 49  use, intrinsic :: iso_c_binding
 50 
 51  private
 52 
 53  integer, parameter :: l_tim_getghc=5
 54  double precision, parameter :: inv_sqrt2 = 1/sqrt2
 55 
 56  ! For use in getghc_gsc1
 57  integer,save  :: l_cpopt
 58  integer,save  :: l_icplx
 59  integer,save  :: l_istwf
 60  integer,save  :: l_npw
 61  integer,save  :: l_nspinor
 62  logical,save  :: l_paw
 63  integer,save  :: l_prtvol
 64  integer,save  :: l_sij_opt
 65  real(dp), allocatable,save ::  l_pcon(:)
 66  type(mpi_type),pointer,save :: l_mpi_enreg
 67  type(gs_hamiltonian_type),pointer,save :: l_gs_hamk
 68 
 69  public :: lobpcgwf2
 70 
 71  contains
 72 
 73 subroutine lobpcgwf2(cg,dtset,eig,enl_out,gs_hamk,kinpw,mpi_enreg,&
 74 &                   nband,npw,nspinor,prtvol,resid)
 75 
 76  implicit none
 77 
 78 !Arguments ------------------------------------
 79  integer,intent(in) :: nband,npw,prtvol,nspinor
 80  type(gs_hamiltonian_type),target,intent(inout) :: gs_hamk
 81  type(dataset_type)              ,intent(in   ) :: dtset
 82  type(mpi_type)           ,target,intent(inout) :: mpi_enreg
 83  real(dp)                 ,target,intent(inout) :: cg(2,nspinor*nband*npw)!,gsc(2,nspinor*nband*npw)
 84  real(dp)                        ,intent(in   ) :: kinpw(npw)
 85  real(dp)                 ,target,intent(  out) :: resid(nband)
 86  real(dp)                        ,intent(  out) :: enl_out(nband)
 87  real(dp)                 ,target,intent(  out) :: eig(nband)
 88 
 89 !Local variables-------------------------------
 90 
 91  type(xgBlock_t) :: xgx0
 92  type(xgBlock_t) :: xgeigen
 93  type(xgBlock_t) :: xgresidu
 94  type(lobpcg_t) :: lobpcg
 95 
 96  integer :: space, blockdim,  nline
 97  integer :: ipw
 98 
 99  integer :: nthreads
100 
101  double precision :: lobpcgMem(2)
102  double precision :: localmem
103 
104  integer, parameter :: tim_lobpcgwf2 = 1650
105  double precision :: cputime, walltime
106  double precision :: tsec(2)
107 
108  type(c_ptr) :: cptr
109  real(dp), pointer :: eig_ptr(:,:) => NULL()
110  real(dp), pointer :: resid_ptr(:,:) => NULL()
111 
112  ! Important things for NC
113  integer,parameter :: choice=1, paw_opt=0, signs=1
114  type(pawcprj_type) :: cprj_dum(gs_hamk%natom,0)
115  integer :: iband, shift
116  real(dp) :: gsc_dummy(0,0)
117  real(dp), allocatable :: l_gvnlxc(:,:)
118 
119 ! *********************************************************************
120 
121 
122 !###########################################################################
123 !################ INITIALISATION  ##########################################
124 !###########################################################################
125 
126  call timab(tim_lobpcgwf2,1,tsec)
127  cputime = abi_cpu_time()
128  walltime = abi_wtime()
129 
130  ! Set module variables
131  l_paw = (gs_hamk%usepaw==1)
132  l_cpopt=-1;l_sij_opt=0;if (l_paw) l_sij_opt=1
133  l_istwf=gs_hamk%istwf_k
134  l_npw = npw
135  l_nspinor = nspinor
136  l_prtvol = prtvol
137  l_mpi_enreg => mpi_enreg
138  l_gs_hamk => gs_hamk
139 
140 !Variables
141  nline=dtset%nline
142  blockdim=l_mpi_enreg%nproc_band*l_mpi_enreg%bandpp
143 
144 !Depends on istwfk
145  if ( l_istwf == 2 ) then ! Real only
146    ! SPACE_CR mean that we have complex numbers but no re*im terms only re*re
147    ! and im*im so that a vector of complex is consider as a long vector of real
148    ! therefore the number of data is (2*npw*nspinor)*nband
149    ! This space is completely equivalent to SPACE_R but will correctly set and
150    ! get the array data into the xgBlock
151    space = SPACE_CR
152    l_icplx = 2
153 
154  else ! complex
155    space = SPACE_C
156    l_icplx = 1
157  end if
158 
159  ! Memory info
160  if ( prtvol >= 3 ) then
161    lobpcgMem = lobpcg_memInfo(nband,l_icplx*l_npw*l_nspinor,blockdim,space)
162    localMem = (l_npw+2*l_npw*l_nspinor*blockdim+2*nband)*kind(1.d0)
163    write(std_out,'(1x,A,F10.6,1x,A)') "Each MPI process calling lobpcg should need around ", &
164    (localMem+sum(lobpcgMem))/1e9, &
165    "GB of peak memory as follows :"
166    write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in lobpcgwf : ", &
167    (localMem)/1e9, "GB"
168    write(std_out,'(4x,A,F10.6,1x,A)') "Permanent memory in m_lobpcg : ", &
169    (lobpcgMem(1))/1e9, "GB"
170    write(std_out,'(4x,A,F10.6,1x,A)') "Temporary memory in m_lobpcg : ", &
171    (lobpcgMem(2))/1e9, "GB"
172  end if
173 
174  !For preconditionning
175  ABI_MALLOC(l_pcon,(1:l_icplx*npw))
176  !$omp parallel do schedule(static), shared(l_pcon,kinpw)
177  do ipw=1-1,l_icplx*npw-1
178    if(kinpw(ipw/l_icplx+1)>huge(0.0_dp)*1.d-11) then
179      l_pcon(ipw+1)=0.d0
180    else
181      l_pcon(ipw+1) = (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1)))) &
182 &    / (27+kinpw(ipw/l_icplx+1)*(18+kinpw(ipw/l_icplx+1)*(12+8*kinpw(ipw/l_icplx+1))) + 16*kinpw(ipw/l_icplx+1)**4)
183    end if
184  end do
185 
186  ! Local variables for lobpcg
187  !call xg_init(xgx0,space,icplx*npw*nspinor,nband)
188  call xgBlock_map(xgx0,cg,space,l_icplx*l_npw*l_nspinor,nband,l_mpi_enreg%comm_bandspinorfft)
189  if ( l_istwf == 2 ) then ! Real only
190    ! Scale cg
191    call xgBlock_scale(xgx0,sqrt2,1)
192    ! This is possible since the memory in cg and xgx0 is the same
193    ! Don't know yet how to deal with this with xgBlock
194    if(l_mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * inv_sqrt2
195  end if
196 
197  !call xg_init(xgeigen,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft)
198  ! Trick the with C to change rank of arrays (:) to (:,:)
199  cptr = c_loc(eig)
200  call c_f_pointer(cptr,eig_ptr,(/ nband,1 /))
201  call xgBlock_map(xgeigen,eig_ptr,SPACE_R,nband,1)
202 
203  !call xg_init(xgresidu,SPACE_R,nband,1,l_mpi_enreg%comm_bandspinorfft)
204  ! Trick the with C to change rank of arrays (:) to (:,:)
205  cptr = c_loc(resid)
206  call c_f_pointer(cptr,resid_ptr,(/ nband,1 /))
207  call xgBlock_map(xgresidu,resid_ptr,SPACE_R,nband,1)
208 
209  !ABI_MALLOC(l_gvnlxc,(2,l_npw*l_nspinor*blockdim))
210 
211  call lobpcg_init(lobpcg,nband, l_icplx*l_npw*l_nspinor, blockdim,dtset%tolwfr,nline,space, l_mpi_enreg%comm_bandspinorfft)
212 
213 !###########################################################################
214 !################    RUUUUUUUN    ##########################################
215 !###########################################################################
216 
217  ! Run lobpcg
218  call lobpcg_run(lobpcg,xgx0,getghc_gsc,precond,xgeigen,xgresidu,prtvol)
219 
220  ! Free preconditionning since not needed anymore
221  ABI_FREE(l_pcon)
222 
223  ! Scale back
224  if(l_istwf == 2) then
225    call xgBlock_scale(xgx0,inv_sqrt2,1)
226    if(l_mpi_enreg%me_g0 == 1) cg(:, 1:npw*nspinor*nband:npw) = cg(:, 1:npw*nspinor*nband:npw) * sqrt2
227  end if
228 
229  ! Compute enlout (nonlocal energy for each band if necessary) This is the best
230  ! quick and dirty trick to compute this part in NC. gvnlxc cannot be part of
231  ! lobpcg algorithm
232  if ( .not. l_paw ) then
233    !Check l_gvnlxc size
234    !if ( size(l_gvnlxc) < 2*nband*l_npw*l_nspinor ) then
235    !if ( size(l_gvnlxc) /= 0 ) then
236    !  ABI_FREE(l_gvnlxc)
237      !ABI_MALLOC(l_gvnlxc,(2,nband*l_npw*l_nspinor))
238    ABI_MALLOC(l_gvnlxc,(0,0))
239 
240    !Call nonlop
241    if (mpi_enreg%paral_kgb==0) then
242 
243      call nonlop(choice,l_cpopt,cprj_dum,enl_out,l_gs_hamk,0,eig,mpi_enreg,nband,1,paw_opt,&
244 &                signs,gsc_dummy,l_tim_getghc,cg,l_gvnlxc)
245 
246    else
247      do iband=1,nband/blockdim
248        shift = (iband-1)*blockdim*l_npw*l_nspinor
249       call prep_nonlop(choice,l_cpopt,cprj_dum, &
250 &       enl_out((iband-1)*blockdim+1:iband*blockdim),l_gs_hamk,0,&
251 &       eig((iband-1)*blockdim+1:iband*blockdim),blockdim,mpi_enreg,1,paw_opt,signs,&
252 &       gsc_dummy,l_tim_getghc, &
253 &       cg(:,shift+1:shift+blockdim*l_npw*l_nspinor),&
254 !&       l_gvnlxc(:,shift+1:shift+blockdim*l_npw*l_nspinor),&
255 &       l_gvnlxc(:,:),&
256 &       already_transposed=.false.)
257      end do
258    end if
259    !Compute enlout
260 !   do iband=1,nband
261 !     shift = npw*nspinor*(iband-1)
262 !       call dotprod_g(enl_out(iband),dprod_i,l_gs_hamk%istwf_k,npw*nspinor,1,cg(:, shift+1:shift+npw*nspinor),&
263 !  &     l_gvnlxc(:, shift+1:shift+npw*nspinor),mpi_enreg%me_g0,mpi_enreg%comm_bandspinorfft)
264 !   end do
265    ABI_FREE(l_gvnlxc)
266  end if
267 
268 
269  ! Free lobpcg
270  call lobpcg_free(lobpcg)
271 
272 !###########################################################################
273 !################    SORRY IT'S ALREADY FINISHED : )  ######################
274 !###########################################################################
275 
276 
277  call timab(tim_lobpcgwf2,2,tsec)
278  cputime = abi_cpu_time() - cputime
279  walltime = abi_wtime() - walltime
280  nthreads = xomp_get_num_threads(open_parallel = .true.)
281  if ( cputime/walltime/dble(nthreads) < 0.75 .and. (int(cputime/walltime)+1) /= nthreads) then
282    if ( prtvol >= 3 ) then
283      write(std_out,'(a)',advance='no') sjoin(" Lobpcg took", sec2str(cputime), "of cpu time")
284      write(std_out,*) sjoin("for a wall time of", sec2str(walltime))
285      write(std_out,'(a,f6.2)') " -> Ratio of ", cputime/walltime
286    end if
287    ABI_COMMENT(sjoin("You should set the number of threads to something close to",itoa(int(cputime/walltime)+1)))
288  end if
289 
290 
291  DBG_EXIT("COLL")
292 
293 end subroutine lobpcgwf2
294 
295 
296  subroutine getghc_gsc(X,AX,BX)
297    use m_xg, only : xg_t, xgBlock_get, xgBlock_set, xgBlock_getSize, xgBlock_t
298 #ifdef HAVE_OPENMP
299    use omp_lib
300 #endif
301   type(xgBlock_t), intent(inout) :: X
302   type(xgBlock_t), intent(inout) :: AX
303   type(xgBlock_t), intent(inout) :: BX
304   integer         :: blockdim
305   integer         :: spacedim
306   type(pawcprj_type) :: cprj_dum(l_gs_hamk%natom,0)
307   double precision :: dum
308   double precision, parameter :: inv_sqrt2 = 1/sqrt2
309   double precision, pointer :: cg(:,:)
310   double precision, pointer :: ghc(:,:)
311   double precision, pointer :: gsc(:,:)
312   double precision, allocatable :: l_gvnlxc(:,:)
313 
314   call xgBlock_getSize(X,spacedim,blockdim)
315   spacedim = spacedim/l_icplx
316 
317 
318   !call xgBlock_get(X,cg(:,1:blockdim*spacedim),0,spacedim)
319   call xgBlock_reverseMap(X,cg,l_icplx,spacedim*blockdim)
320   call xgBlock_reverseMap(AX,ghc,l_icplx,spacedim*blockdim)
321   call xgBlock_reverseMap(BX,gsc,l_icplx,spacedim*blockdim)
322 
323   ! scale back cg
324   if(l_istwf == 2) then
325     !cg(:,1:spacedim*blockdim) = cg(:,1:spacedim*blockdim) * inv_sqrt2
326     call xgBlock_scale(X,inv_sqrt2,1)
327     if(l_mpi_enreg%me_g0 == 1) cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * sqrt2
328   end if
329 
330   !if ( size(l_gvnlxc) < 2*blockdim*spacedim ) then
331   !  ABI_FREE(l_gvnlxc)
332   !  ABI_MALLOC(l_gvnlxc,(2,blockdim*spacedim))
333   !end if
334   ABI_MALLOC(l_gvnlxc,(0,0))
335 
336   if (l_mpi_enreg%paral_kgb==0) then
337 
338     call multithreaded_getghc(l_cpopt,cg(:,1:blockdim*spacedim),cprj_dum,ghc,gsc(:,1:blockdim*spacedim),&
339       l_gs_hamk,l_gvnlxc,dum, l_mpi_enreg,blockdim,l_prtvol,l_sij_opt,l_tim_getghc,0)
340 
341   else
342     call prep_getghc(cg(:,1:blockdim*spacedim),l_gs_hamk,l_gvnlxc,ghc,gsc(:,1:blockdim*spacedim),dum,blockdim,l_mpi_enreg,&
343 &                     l_prtvol,l_sij_opt,l_cpopt,cprj_dum,already_transposed=.false.)
344   end if
345 
346   ! scale cg, ghc, gsc
347   if ( l_istwf == 2 ) then
348     !cg(:,1:spacedim*blockdim) = cg(:,1:spacedim*blockdim) * sqrt2
349     !ghc(:,1:spacedim*blockdim) = ghc(:,1:spacedim*blockdim) * sqrt2
350     call xgBlock_scale(X,sqrt2,1)
351     call xgBlock_scale(AX,sqrt2,1)
352     if(l_mpi_enreg%me_g0 == 1) then
353       cg(:, 1:spacedim*blockdim:l_npw) = cg(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
354       ghc(:, 1:spacedim*blockdim:l_npw) = ghc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
355     endif
356     if(l_paw) then
357       !gsc(:,1:spacedim*blockdim) = gsc(:,1:spacedim*blockdim) * sqrt2
358       call xgBlock_scale(BX,sqrt2,1)
359       if(l_mpi_enreg%me_g0 == 1) gsc(:, 1:spacedim*blockdim:l_npw) = gsc(:, 1:spacedim*blockdim:l_npw) * inv_sqrt2
360     end if
361   end if
362 
363   ABI_FREE(l_gvnlxc)
364 
365   if ( .not. l_paw ) call xgBlock_copy(X,BX)
366 
367   !call xgBlock_set(AX,ghc,0,spacedim)
368   !call xgBlock_set(BX,gsc(:,1:blockdim*spacedim),0,spacedim)
369  end subroutine getghc_gsc
370 
371  subroutine precond(W)
372    use m_xg, only : xg_t, xgBlock_colwiseMul
373    type(xgBlock_t), intent(inout) :: W
374    integer :: ispinor
375    !integer :: cplx
376 
377    ! precondition resid_vec
378    do ispinor = 1,l_nspinor
379      !do cplx = 1, l_icplx
380      call xgBlock_colwiseMul(W,l_pcon,l_icplx*l_npw*(ispinor-1))
381       !end do
382    end do
383 
384  end subroutine precond
385 
386  end module m_lobpcgwf