TABLE OF CONTENTS


ABINIT/forstr [ Functions ]

[ Top ] [ Functions ]

NAME

 forstr

FUNCTION

 Drives the computation of forces and/or stress tensor

INPUTS

  atindx1(natom)=index table for atoms, inverse of atindx
  cg(2,mcg)=wavefunctions (may be read from disk instead of input)
  cprj(natom,mcprj*usecprj)=<p_lmn|Cnk> coefficients for each WF |Cnk> and each NL proj |p_lmn>
  dtefield <type(efield_type)> = variables related to Berry phase
  dtset <type(dataset_type)>=all input variables in this dataset
   | berryopt  = 4: electric field is on -> add the contribution of the
   |                - \Omega E.P term to the total energy
   |          /= 4: electric field is off
   |  from Etot(npw) data (at fixed geometry), used for making
   |  Pulay correction to stress tensor (hartree).  Should be <=0.
   | ecut=cut-off energy for plane wave basis sphere (Ha)
   | ecutsm=smearing energy for plane wave kinetic energy (Ha)
   | effmass_free=effective mass for electrons (1. in common case)
   | efield = cartesian coordinates of the electric field in atomic units
   | ionmov=governs the movement of atoms (see help file)
   | densfor_pred=governs the mixed electronic-atomic part of the preconditioner
   | istwfk(nkpt)=input option parameter that describes the storage of wfs
   | kptns(3,nkpt)=reduced coordinates of k points in Brillouin zone
   | mband=maximum number of bands
   | mgfft=maximum size of 1D FFTs
   | mkmem=maximum number of k points in core memory
   | mpw = maximum number of plane waves
   | natom=number of atoms in cell
   | nband(nkpt*nsppol)=number of bands to be included in summation at each k point
   | nfft=(effective) number of FFT grid points (for this processor)
   | ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
   | nkpt=number of k points in Brillouin zone
   | nloalg(3)=governs the choice of the algorithm for non-local operator.
   | nspden=number of spin-density components
   | nsppol=1 for unpolarized, 2 for spin-polarized
   | pawprtvol=control print volume and debugging output for PAW
   | prtvol=integer controlling volume of printed output
   | symafm(nsym)=(anti)ferromagnetic part of symmetry operations
   | tfkinfunc=1 if use of Thomas-Fermi kinetic functional
   |          =2 if use of recursion method
   | typat(natom)=type integer for each atom in cell
   | wtk(nkpt)=weights associated with various k points
   | nsym=number of symmetries in space group
  energies <type(energies_type)>=all part of total energy.
   | e_localpsp(IN)=local psp energy (hartree)
   | e_hartree(IN)=Hartree part of total energy (hartree units)
   | e_corepsp(IN)=psp core-core energy
   | e_kinetic(IN)=kinetic energy part of total energy.
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  fock <type(fock_type)>= quantities to calculate Fock exact exchange
  grchempottn(3,natom)=d(E_chemical potential)/d(xred) (hartree)
  grcondft(3,natom)=d(E_constrainedDFT)/d(xred) (hartree)
  grewtn(3,natom)=d(Ewald)/d(xred) (hartree)
  grvdw(3,ngrvdw)=gradients of energy due to Van der Waals DFT-D dispersion (hartree)
  gsqcut=cutoff value on G**2 for (large) sphere inside FFT box.
                       gsqcut=(boxcut**2)*ecut/(2._dp*(Pi**2)
  indsym(4,nsym,natom)=index showing transformation of atom labels
                       under symmetry operations (computed in symatm)
  kg(3,mpw*mkmem)=reduced (integer) coordinates of G vecs in basis sphere
  kxc(nfft,nkxc)=exchange-correlation kernel, needed only if nkxc>0
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
  mgfftf= -PAW ONLY- maximum size of 1D FFTs for the fine grid
         (mgfftf=mgfft for norm-conserving potential runs)
  mpi_enreg=information about MPI parallelization
  my_natom=number of atoms treated by current processor
  n3xccc=dimension of the xccc3d array (0 or nfftf).
  nattyp(ntypat)=number of atoms of each type
  nfftf= -PAW ONLY- number of FFT grid points for the fine grid
         (nfftf=nfft for norm-conserving potential runs)
  ngfftf(18)= -PAW ONLY- contain all needed information about 3D FFT for the fine grid
              (ngfftf=ngfft for norm-conserving potential runs)
  ngrvdw=size of grvdw(:,:); can be 0 or natom according to dtset%vdw_xc
  nhat(nfftf,nspden*psps%usepaw)= -PAW only- compensation density
  nkxc=second dimension of the array kxc, see rhotoxc.f for a description
  npwarr(nkpt)=number of planewaves in basis and on boundary for each k
  ntypat=number of types of atoms
  nvresid(nfftf,nspden)=array for the residual of the density/potential
  occ(mband*nkpt*nsppol)=occupancies of bands at various k points
  optfor=1 if computation of forces is required
  optres=0 if the potential residual has to be used for forces corrections
        =1 if the density residual has to be used for forces corrections
  paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  pawang <type(pawang_type)>=paw angular mesh and related data
  pawfgr <type(pawfgr_type)>=fine grid parameters and related data
  pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
  pawrad(ntypat*usepaw) <type(pawrad_type)>=paw radial mesh and related data
  pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
  ph1d(2,3*(2*mgfft+1)*natom)=1-dim structure factor phases
  ph1df(2,3*(2*mgfftf+1)*natom)=-PAW only- 1-dim structure factor phases for the fine grid
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
   | mpsang= 1+maximum angular momentum
  rhog(2,nfftf)=Fourier transform of charge density (bohr^-3)
  rhor(nfftf,nspden)=array for electron density in electrons/bohr**3.
  rprimd(3,3)=dimensional primitive translations in real space (bohr)
  strscondft(6)=cDFT correction to stress
  strsxc(6)=xc correction to stress
  stress_needed=1 if computation of stress tensor is required
  symrec(3,3,nsym)=symmetries in reciprocal space, reduced coordinates
  ucvol=unit cell volume in bohr**3
  usecprj=1 if cprj datastructure is stored in memory
  vhartr(nfftf)=array for holding Hartree potential
  vpsp(nfftf)=array for holding local psp
  vxc(nfftf,nspden)=exchange-correlation potential (hartree) in real space
  vxctau(nfft,nspden,4*usekden)=(only for meta-GGA): derivative of XC energy density
                                wrt kinetic energy density (depsxcdtau)
  xccc3d(n3xccc)=3D core electron density for XC core correction, bohr^-3
  xcctau3d(n3xccc*usekden)=(only for meta-GGA): 3D core electron kinetic energy density for XC core correction
  xred(3,natom)=reduced dimensionless atomic coordinates
  ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point
  ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics

OUTPUT

  ==== if (optfor==1) ====
   diffor=maximal absolute value of changes in the components of
          force between the input and the output.
   favg(3)=mean of the forces before correction for translational symmetry
   fcart(3,natom)=forces in cartesian coordinates (Ha/Bohr)
     at input, previous value of forces,
     at output, new value.
     Note : unlike gred, this array has been corrected by enforcing
     the translational symmetry, namely that the sum of force
     on all atoms is zero.
   forold(3,natom)=cartesian forces of previous SCF cycle (hartree/bohr)
   gred(3,natom)=symmetrized grtn = d(etotal)/d(xred)
   gresid(3,natom)=forces due to the residual of the density/potential
   grhf(3,natom)=Hellman-Feynman derivatives of the total energy
   grxc(9+3*natom)=d(Exc)/d(xred) if core charges are used
   maxfor=maximal absolute value of the output array force.
   synlgr(3,natom)=symmetrized gradients of energy due to nonlocal contributions
  ==== if (stress_needed==1) ====
   strten(6)=components of the stress tensor (hartree/bohr^3) for the
    6 unique components of this symmetric 3x3 tensor:
    Given in order (1,1), (2,2), (3,3), (3,2), (3,1), (2,1).

SIDE EFFECTS

  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation
  forold(3,natom)=cartesian forces of previous SCF cycle (hartree/bohr)
  ===== if psps%usepaw==1
  pawrhoij(my_natom) <type(pawrhoij_type)>= paw rhoij occupancies and related data
    (gradients of rhoij for each atom with respect to atomic positions are computed here)
  wvl <type(wvl_data)>=all wavelets data.

NOTES

  Be careful to the meaning of nfft (size of FFT grids):
   - In case of norm-conserving calculations the FFT grid is the usual FFT grid.
   - In case of PAW calculations:
     Two FFT grids are used; one with nfft points (coarse grid) for
     the computation of wave functions ; one with nfftf points
     (fine grid) for the computation of total density.

SOURCE

259 subroutine forstr(atindx1,cg,cprj,diffor,dtefield,dtset,eigen,electronpositron,energies,favg,fcart,fock,&
260 &                 forold,gred,grchempottn,grcondft,gresid,grewtn,grhf,grvdw,grxc,gsqcut,extfpmd,indsym,&
261 &                 kg,kxc,maxfor,mcg,mcprj,mgfftf,mpi_enreg,my_natom,n3xccc,nattyp,&
262 &                 nfftf,ngfftf,ngrvdw,nhat,nkxc,npwarr,&
263 &                 ntypat,nvresid,occ,optfor,optres,paw_ij,pawang,pawfgr,&
264 &                 pawfgrtab,pawrad,pawrhoij,pawtab,ph1d,ph1df,psps,rhog,rhor,rprimd,stress_needed,&
265 &                 strscondft,strsxc,strten,symrec,synlgr,ucvol,usecprj,vhartr,vpsp,&
266 &                 vxc,vxctau,wvl,xccc3d,xcctau3d,xred,ylm,ylmgr,qvpotzero)
267 
268 !Arguments ------------------------------------
269 !scalars
270  integer,intent(in) :: mcg,mcprj,mgfftf,my_natom,n3xccc,nfftf,ngrvdw,nkxc,ntypat,optfor,optres
271  integer,intent(in) :: stress_needed,usecprj
272  real(dp),intent(in) :: gsqcut,qvpotzero,ucvol
273  real(dp),intent(inout) :: diffor,maxfor
274  type(electronpositron_type),pointer :: electronpositron
275  type(MPI_type),intent(inout) :: mpi_enreg
276  type(efield_type),intent(in) :: dtefield
277  type(dataset_type),intent(in) :: dtset
278  type(energies_type),intent(in) :: energies
279  type(extfpmd_type),pointer,intent(inout) :: extfpmd
280  type(pawang_type),intent(in) :: pawang
281  type(pawfgr_type),intent(in) :: pawfgr
282  type(pseudopotential_type),intent(in) :: psps
283  type(wvl_data),intent(inout) :: wvl
284  type(fock_type),pointer, intent(inout) :: fock
285 !arrays
286  integer,intent(in) :: atindx1(dtset%natom),indsym(4,dtset%nsym,dtset%natom)
287  integer,intent(in) :: kg(3,dtset%mpw*dtset%mkmem),nattyp(ntypat),ngfftf(18)
288  integer,intent(in) :: npwarr(dtset%nkpt),symrec(3,3,dtset%nsym)
289  real(dp),intent(in) :: cg(2,mcg)
290  real(dp),intent(in) :: eigen(dtset%mband*dtset%nkpt*dtset%nsppol)
291  real(dp),intent(in) :: grchempottn(3,dtset%natom),grcondft(3,dtset%natom),grewtn(3,dtset%natom)
292  real(dp),intent(in) :: grvdw(3,ngrvdw),kxc(dtset%nfft,nkxc)
293  real(dp),intent(in) :: occ(dtset%mband*dtset%nkpt*dtset%nsppol)
294  real(dp),intent(in) :: ph1d(2,3*(2*dtset%mgfft+1)*dtset%natom)
295  real(dp),intent(in) :: ph1df(2,3*(2*mgfftf+1)*dtset%natom)
296  real(dp),intent(in) :: rhog(2,nfftf),strscondft(6),strsxc(6),vhartr(nfftf)
297  real(dp),intent(in) :: vpsp(nfftf),vxc(nfftf,dtset%nspden),vxctau(nfftf,dtset%nspden,4*dtset%usekden)
298  real(dp),intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm)
299  real(dp),intent(in) :: ylmgr(dtset%mpw*dtset%mkmem,3,psps%mpsang*psps%mpsang*psps%useylm)
300  real(dp),intent(inout) :: forold(3,dtset%natom)
301  real(dp),intent(inout) :: nhat(nfftf,dtset%nspden*psps%usepaw),rhor(nfftf,dtset%nspden),rprimd(3,3)
302  real(dp),intent(inout) :: xccc3d(n3xccc),xcctau3d(n3xccc*dtset%usekden),xred(3,dtset%natom)
303  real(dp),intent(inout),target :: nvresid(nfftf,dtset%nspden)
304  real(dp),intent(out) :: favg(3)
305  real(dp),intent(inout) :: fcart(3,dtset%natom),gred(3,dtset%natom)
306  real(dp),intent(inout) :: gresid(3,dtset%natom),grhf(3,dtset%natom)
307  real(dp),intent(inout) :: grxc(3,dtset%natom),strten(6),synlgr(3,dtset%natom)
308  type(pawcprj_type),intent(inout) :: cprj(dtset%natom,mcprj*usecprj)
309  type(paw_ij_type),intent(in) :: paw_ij(my_natom*psps%usepaw)
310  type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*psps%usepaw)
311  type(pawrhoij_type),intent(inout) :: pawrhoij(my_natom*psps%usepaw)
312  type(pawrad_type),intent(in) :: pawrad(ntypat*psps%usepaw)
313  type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw)
314 
315 !Local variables-------------------------------
316 !scalars
317  integer :: comm_grid,ifft,ispden,ncpgr,occopt_,optgr,optgr2,option,optnc,optstr,optstr2,iorder_cprj,ctocprj_choice
318  integer :: idir,iatom,unpaw,mcgbz
319  integer,allocatable :: dimcprj(:)
320  real(dp) ::dum,dum1,ucvol_
321  logical :: apply_residual
322 !arrays
323  real(dp),parameter :: k0(3)=(/zero,zero,zero/)
324  real(dp) :: kinstr(6),nlstr(6),tsec(2),strdum(6),gmet(3,3),gprimd(3,3),rmet(3,3)
325  real(dp) :: dummy(0)
326  real(dp),allocatable :: grnl(:),vlocal(:,:),vxc_hf(:,:),xcart(:,:),ylmbz(:,:),ylmgrbz(:,:,:)
327  real(dp), ABI_CONTIGUOUS pointer :: resid(:,:)
328 
329 ! *************************************************************************
330 
331  call timab(910,1,tsec)
332  call timab(911,1,tsec)
333 
334 !Do nothing if nothing is required
335  if (optfor==0.and.stress_needed==0) return
336 
337 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW)
338  if (dtset%usewvl==0) then
339    if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then
340      ABI_BUG(' wrong values for nfft, nfftf !')
341    end if
342    if ((psps%usepaw==1.and.pawfgr%mgfft/=mgfftf).or.(psps%usepaw==0.and.dtset%mgfft/=mgfftf)) then
343      ABI_BUG('wrong values for mgfft, mgfftf!')
344    end if
345  end if
346 
347 !==========================================================================
348 !Here compute terms common to forces and stresses
349 !==========================================================================
350 
351  !output only if (optfor==1) but we have to allocate it
352  ABI_MALLOC(grnl,(3*dtset%natom*optfor))
353  grnl(:)=zero
354 
355 !Compute nonlocal psp + potential Fock ACE parts of forces and stress tensor
356 !-involves summations over wavefunctions at all k points
357  if (dtset%tfkinfunc>0.and.stress_needed==1) then
358    kinstr(1:3)=-two/three*energies%e_kinetic/ucvol ; kinstr(4:6)=zero
359    nlstr(1:6)=zero
360  else if (dtset%usewvl==0) then
361    occopt_=0 ! This means that occ are now fixed
362    if(dtset%usefock==1 .and. associated(fock)) then
363 !     if((dtset%optstress/=0).and.(psps%usepaw==1)) then
364      if((psps%usepaw==1).and.((dtset%optstress/=0).or.(dtset%optforces==2))) then
365        if(dtset%optstress==0) then
366          ctocprj_choice=2
367          ncpgr=3
368        end if
369        if(dtset%optstress/=0) then
370          ctocprj_choice=20*optfor+3*dtset%optstress
371          ncpgr=6*dtset%optstress+3*optfor
372        end if
373        if (allocated(fock%fock_BZ%cwaveocc_prj)) then
374          call pawcprj_free(fock%fock_BZ%cwaveocc_prj)
375          ABI_FREE(fock%fock_BZ%cwaveocc_prj)
376          ABI_MALLOC(fock%fock_BZ%cwaveocc_prj,(dtset%natom,fock%fock_BZ%mcprj))
377          ABI_MALLOC(dimcprj,(dtset%natom))
378          call pawcprj_getdim(dimcprj,dtset%natom,nattyp,dtset%ntypat,dtset%typat,pawtab,'O')
379          call pawcprj_alloc(fock%fock_BZ%cwaveocc_prj,ncpgr,dimcprj)
380          ABI_FREE(dimcprj)
381        end if
382        iatom=-1;idir=0;iorder_cprj=0;unpaw=26
383        call metric(gmet,gprimd,-1,rmet,rprimd,dum)
384        if (fock%fock_BZ%mkpt/=dtset%mkmem.or.(fock%fock_BZ%mpi_enreg%paral_hf ==1)) then
385          ABI_MALLOC(ylmbz,(dtset%mpw*fock%fock_BZ%mkpt,psps%mpsang*psps%mpsang*psps%useylm))
386          ABI_MALLOC(ylmgrbz,(dtset%mpw*fock%fock_BZ%mkpt,3,psps%mpsang*psps%mpsang*psps%useylm))
387          option=1; mcgbz=dtset%mpw*fock%fock_BZ%mkptband*fock%fock_common%my_nsppol
388          call initylmg(gprimd,fock%fock_BZ%kg_bz,fock%fock_BZ%kptns_bz,fock%fock_BZ%mkpt,fock%fock_BZ%mpi_enreg,&
389 &         psps%mpsang,dtset%mpw,fock%fock_BZ%nbandocc_bz,fock%fock_BZ%mkpt,&
390 &         fock%fock_BZ%npwarr,dtset%nsppol,option,rprimd,ylmbz,ylmgrbz)
391          call ctocprj(fock%fock_common%atindx,fock%fock_BZ%cgocc,ctocprj_choice,fock%fock_BZ%cwaveocc_prj,gmet,gprimd,iatom,idir,&
392 &         iorder_cprj,fock%fock_BZ%istwfk_bz,fock%fock_BZ%kg_bz,fock%fock_BZ%kptns_bz,mcgbz,&
393 &         fock%fock_BZ%mcprj,dtset%mgfft,fock%fock_BZ%mkpt,fock%fock_BZ%mpi_enreg,psps%mpsang,&
394 &         dtset%mpw,dtset%natom,nattyp,fock%fock_BZ%nbandocc_bz,dtset%natom,dtset%ngfft,fock%fock_BZ%mkpt,&
395 &         dtset%nloalg,fock%fock_BZ%npwarr,dtset%nspinor,&
396 &         dtset%nsppol,fock%fock_common%my_nsppol,dtset%ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,ucvol,unpaw,&
397 &         xred,ylmbz,ylmgrbz)
398          ABI_FREE(ylmbz)
399          ABI_FREE(ylmgrbz)
400        else
401          call ctocprj(fock%fock_common%atindx,fock%fock_BZ%cgocc,ctocprj_choice,fock%fock_BZ%cwaveocc_prj,gmet,gprimd,iatom,idir,&
402 &         iorder_cprj,fock%fock_BZ%istwfk_bz,fock%fock_BZ%kg_bz,fock%fock_BZ%kptns_bz,mcg,&
403 &         fock%fock_BZ%mcprj,dtset%mgfft,fock%fock_BZ%mkpt,mpi_enreg,psps%mpsang,&
404 &         dtset%mpw,dtset%natom,nattyp,fock%fock_BZ%nbandocc_bz,dtset%natom,dtset%ngfft,fock%fock_BZ%mkpt,&
405 &         dtset%nloalg,fock%fock_BZ%npwarr,dtset%nspinor,&
406 &         dtset%nsppol,fock%fock_common%my_nsppol,dtset%ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,ucvol,unpaw,&
407 &         xred,ylm,ylmgr)
408        end if
409      end if
410    end if
411    call forstrnps(cg,cprj,dtset%ecut,dtset%ecutsm,dtset%effmass_free,eigen,electronpositron,fock,grnl,&
412 &   dtset%istwfk,kg,kinstr,nlstr,dtset%kptns,dtset%mband,mcg,mcprj,dtset%mgfft,dtset%mkmem,&
413 &   mpi_enreg,psps%mpsang,dtset%mpw,my_natom,dtset%natom,dtset%nband,dtset%nfft,dtset%ngfft,&
414 &   dtset%nkpt,dtset%nloalg,npwarr,dtset%nspden,dtset%nspinor,dtset%nsppol,dtset%nsym,ntypat,&
415 &   dtset%nucdipmom,occ,optfor,paw_ij,pawtab,ph1d,psps,rprimd,stress_needed,symrec,dtset%typat,&
416 &   usecprj,dtset%usefock,dtset%gpu_option,dtset%wtk,xred,ylm,ylmgr)
417 !DEBUG
418 !   write(6,*)' after forstrnps, nlstr=',nlstr(1:6)
419 !ENDDEBUG
420  else if (optfor>0) then !WVL
421    ABI_MALLOC(xcart,(3, dtset%natom))
422    call xred2xcart(dtset%natom, rprimd, xcart, xred)
423    call wvl_nl_gradient(grnl, mpi_enreg, dtset%natom, rprimd, wvl, xcart)
424    ABI_FREE(xcart)
425  end if
426 
427  call timab(911,2,tsec)
428  call timab(912,1,tsec)
429 
430 !PAW: add gradients due to Dij derivatives to non-local term
431  if (psps%usepaw==1) then
432    ABI_MALLOC(vlocal,(nfftf,dtset%nspden))
433 
434 !$OMP PARALLEL DO COLLAPSE(2)
435    do ispden=1,min(dtset%nspden,2)
436      do ifft=1,nfftf
437        vlocal(ifft,ispden)=vhartr(ifft)+vxc(ifft,ispden)+vpsp(ifft)
438      end do
439    end do
440    if (dtset%nspden==4) then
441 !$OMP PARALLEL DO COLLAPSE(2)
442      do ispden=3,4
443        do ifft=1,nfftf
444          vlocal(ifft,ispden)=vxc(ifft,ispden)
445        end do
446      end do
447    end if
448    ucvol_=ucvol
449 #if defined HAVE_BIGDFT
450    if (dtset%usewvl==1) ucvol_=product(wvl%den%denspot%dpbox%hgrids)*real(product(wvl%den%denspot%dpbox%ndims),dp)
451 #endif
452    optgr=optfor;optgr2=0;optstr=stress_needed;optstr2=0
453    comm_grid=mpi_enreg%comm_fft;if(dtset%usewvl==1) comm_grid=mpi_enreg%comm_wvl
454    call pawgrnl(atindx1,dtset%nspden,dummy,1,dummy,grnl,gsqcut,mgfftf,my_natom,dtset%natom,&
455 &   nattyp,nfftf,ngfftf,nhat,nlstr,dtset%nspden,dtset%nsym,ntypat,optgr,optgr2,optstr,optstr2,&
456 &   pawang,pawfgrtab,pawrhoij,pawtab,ph1df,psps,k0,rprimd,symrec,dtset%typat,ucvol_,vlocal,vxc,xred,&
457 &   mpi_atmtab=mpi_enreg%my_atmtab, comm_atom=mpi_enreg%comm_atom,mpi_comm_grid=comm_grid)
458 !DEBUG
459 !   write(6,*)' after pawgrnl, nlstr=',nlstr(1:6)
460 !ENDDEBUG
461    ABI_FREE(vlocal)
462 
463  end if
464  call timab(912,2,tsec)
465  call timab(913,1,tsec)
466 
467 !==========================================================================
468 !Here compute forces (if required)
469 !==========================================================================
470  if (optfor==1) then
471    apply_residual=(optres==1 .and. dtset%usewvl==0.and.abs(dtset%densfor_pred)>=1 .and. &
472 &   abs(dtset%densfor_pred)<=6.and.abs(dtset%densfor_pred)/=5)
473 
474 !  If residual is a density residual (and forces from residual asked),
475 !  has to convert it into a potential residual before calling forces routine
476    if (apply_residual) then
477      ABI_MALLOC(resid,(nfftf,dtset%nspden))
478      option=0; if (dtset%densfor_pred<0) option=1
479      optnc=1;if (dtset%nspden==4.and.(abs(dtset%densfor_pred)==4.or.abs(dtset%densfor_pred)==6)) optnc=2
480      call nres2vres(dtset,gsqcut,psps%usepaw,kxc,mpi_enreg,my_natom,nfftf,ngfftf,nhat,&
481 &     nkxc,nvresid,n3xccc,optnc,option,pawang,pawfgrtab,pawrhoij,pawtab,&
482 &     rhor,rprimd,psps%usepaw,resid,xccc3d,xred,vxc)
483    else
484      resid => nvresid
485    end if
486 
487    call forces(atindx1,diffor,dtefield,dtset,favg,fcart,fock,forold,gred,grchempottn,grcondft,gresid,grewtn,&
488 &   grhf,grnl,grvdw,grxc,gsqcut,indsym,maxfor,mgfftf,&
489 &   mpi_enreg,psps%n1xccc,n3xccc,nattyp,&
490 &   nfftf,ngfftf,ngrvdw,ntypat,pawrad,pawtab,ph1df,psps,rhog,&
491 &   rhor,rprimd,symrec,synlgr,dtset%usefock,resid,vxc,vxctau,wvl%descr,wvl%den,xred,&
492 &   electronpositron=electronpositron)
493 
494    if (apply_residual) then
495      ABI_FREE(resid)
496    end if
497  end if
498 
499  call timab(913,2,tsec)
500  call timab(914,1,tsec)
501 
502 !==========================================================================
503 !Here compute stress tensor (if required)
504 !==========================================================================
505 
506  if (stress_needed==1.and.dtset%usewvl==0) then
507 !   if (dtset%usefock==1 .and. associated(fock).and.fock%fock_common%optstr.and.psps%usepaw==0) then
508    if (dtset%usefock==1 .and. associated(fock)) then
509      if (fock%fock_common%optstr) then
510        fock%fock_common%stress(1:3)=fock%fock_common%stress(1:3)-(two*energies%e_fock-energies%e_fock0)/ucvol
511        if (n3xccc>0.and.psps%usepaw==0 .and. &
512 &        (dtset%ixc==41.or.dtset%ixc==42.or.libxc_functionals_is_hybrid())) then
513          ABI_MALLOC(vxc_hf,(nfftf,dtset%nspden))
514 !compute Vxc^GGA(rho_val)
515          call xchybrid_ncpp_cc(dtset,dum,mpi_enreg,nfftf,ngfftf,n3xccc,rhor,rprimd,strdum,dum1,xccc3d,vxc=vxc_hf,optstr=1)
516        end if
517      end if
518    end if
519    call stress(atindx1,dtset%berryopt,dtefield,energies%e_localpsp,dtset%efield,&
520 &   energies%e_hartree,energies%e_corepsp,fock,gsqcut,extfpmd,dtset%ixc,kinstr,mgfftf,&
521 &   mpi_enreg,psps%mqgrid_vl,psps%n1xccc,n3xccc,dtset%natom,nattyp,&
522 &   nfftf,ngfftf,nlstr,dtset%nspden,dtset%nsym,ntypat,psps,pawrad,pawtab,ph1df,&
523 &   dtset%prtvol,psps%qgrid_vl,dtset%red_efieldbar,rhog,rprimd,strten,strscondft,strsxc,symrec,&
524 &   dtset%typat,dtset%usefock,dtset%usekden,psps%usepaw,&
525 &   dtset%vdw_tol,dtset%vdw_tol_3bt,dtset%vdw_xc,psps%vlspl,vxc,vxctau,vxc_hf,&
526 &   psps%xccc1d,xccc3d,xcctau3d,psps%xcccrc,xred,psps%ziontypat,psps%znucltypat,qvpotzero,&
527 &   electronpositron=electronpositron)
528  end if
529 
530 !Memory deallocation
531  ABI_FREE(grnl)
532  if (allocated(vxc_hf)) then
533    ABI_FREE(vxc_hf)
534  end if
535 
536 
537  call timab(914,2,tsec)
538  call timab(910,2,tsec)
539 
540 end subroutine forstr

ABINIT/forstrnps [ Functions ]

[ Top ] [ Functions ]

NAME

 forstrnps

FUNCTION

 Compute nonlocal pseudopotential energy contribution to forces and/or stress tensor
 as well as kinetic energy contribution to stress tensor.

INPUTS

  cg(2,mcg)=wavefunctions (may be read from disk file)
  cprj(natom,mcprj*usecprj)=<p_lmn|Cnk> coefficients for each WF |Cnk> and each NL proj |p_lmn>
  ecut=cut-off energy for plane wave basis sphere (Ha)
  ecutsm=smearing energy for plane wave kinetic energy (Ha)
  effmass_free=effective mass for electrons (1. in common case)
  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation (optional argument)
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  fock <type(fock_type)>= quantities to calculate Fock exact exchange
  istwfk(nkpt)=input option parameter that describes the storage of wfs
  kg(3,mpw*mkmem)=reduced coordinates (integers) of G vecs in basis
  kpt(3,nkpt)=k points in reduced coordinates
  mband=maximum number of bands
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
  mgfft=maximum size of 1D FFTs
  mkmem=number of k points treated by this node.
  mpi_enreg=information about MPI parallelization
  mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
  mpw= maximum number of plane waves
  my_natom=number of atoms treated by current processor
  natom=number of atoms in cell.
  nband(nkpt)=number of bands at each k point
  nfft=number of FFT grid points
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nkpt=number of k points in Brillouin zone
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  npwarr(nkpt)=number of planewaves in basis and boundary at each k
  nspden=Number of spin Density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=1 for unpolarized, 2 for spin-polarized
  nsym=number of elements in symmetry group
  ntypat=number of types of atoms
  nucdipmom(3,my_natom)= nuclear dipole moments
  occ(mband*nkpt*nsppol)=occupation numbers for each band over all k points
  optfor=1 if computation of forces is required
  paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
  ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
  rprimd(3,3)=dimensional primitive translations in real space (bohr)
  stress_needed=1 if computation of stress tensor is required
  symrec(3,3,nsym)=symmetries in reciprocal space (dimensionless)
  typat(natom)=type of each atom
  usecprj=1 if cprj datastructure has been allocated
  gpu_option= GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)
  wtk(nkpt)=weight associated with each k point
  xred(3,natom)=reduced dimensionless atomic coordinates
  ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point
  ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics

OUTPUT

  if (optfor==1)
   grnl(3*natom*optfor)=stores grads of nonlocal energy wrt atomic coordinates
  if (stress_needed==1)
   kinstr(6)=kinetic energy part of stress tensor (hartree/bohr^3)
   Store 6 unique components of symmetric 3x3 tensor in the order
   11, 22, 33, 32, 31, 21
   npsstr(6)=nonlocal pseudopotential energy part of stress tensor
    (hartree/bohr^3)

SOURCE

 614 subroutine forstrnps(cg,cprj,ecut,ecutsm,effmass_free,eigen,electronpositron,fock,&
 615 &  grnl,istwfk,kg,kinstr,npsstr,kpt,mband,mcg,mcprj,mgfft,mkmem,mpi_enreg,mpsang,&
 616 &  mpw,my_natom,natom,nband,nfft,ngfft,nkpt,nloalg,npwarr,nspden,nspinor,nsppol,nsym,&
 617 &  ntypat,nucdipmom,occ,optfor,paw_ij,pawtab,ph1d,psps,rprimd,&
 618 &  stress_needed,symrec,typat,usecprj,usefock,gpu_option,wtk,xred,ylm,ylmgr)
 619 
 620 !Arguments ------------------------------------
 621 !scalars
 622  integer,intent(in) :: mband,mcg,mcprj,mgfft,mkmem,mpsang,mpw,my_natom,natom,nfft,nkpt
 623  integer,intent(in) :: nspden,nsppol,nspinor,nsym,ntypat,optfor,stress_needed
 624  integer,intent(in) :: usecprj,usefock,gpu_option
 625  real(dp),intent(in) :: ecut,ecutsm,effmass_free
 626  type(electronpositron_type),pointer :: electronpositron
 627  type(MPI_type),intent(inout) :: mpi_enreg
 628  type(pseudopotential_type),intent(in) :: psps
 629 !arrays
 630  integer,intent(in) :: istwfk(nkpt),kg(3,mpw*mkmem),nband(nkpt*nsppol)
 631  integer,intent(in) :: ngfft(18),nloalg(3),npwarr(nkpt)
 632  integer,intent(in) :: symrec(3,3,nsym),typat(natom)
 633  real(dp),intent(in) :: cg(2,mcg)
 634  real(dp),intent(in) :: eigen(mband*nkpt*nsppol),kpt(3,nkpt),nucdipmom(3,my_natom)
 635  real(dp),intent(in) :: occ(mband*nkpt*nsppol),ph1d(2,3*(2*mgfft+1)*natom)
 636  real(dp),intent(in) :: rprimd(3,3),wtk(nkpt),xred(3,natom)
 637  real(dp),intent(in) :: ylm(mpw*mkmem,mpsang*mpsang*psps%useylm)
 638  real(dp),intent(in) :: ylmgr(mpw*mkmem,3,mpsang*mpsang*psps%useylm)
 639  real(dp),intent(out) :: grnl(3*natom*optfor),kinstr(6),npsstr(6)
 640  type(pawcprj_type),intent(inout) :: cprj(natom,mcprj*usecprj)
 641  type(paw_ij_type),intent(in) :: paw_ij(my_natom*psps%usepaw)
 642  type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw)
 643  type(fock_type),pointer, intent(inout) :: fock
 644 !Local variables-------------------------------
 645 !scalars
 646  integer,parameter :: tim_rwwf=7
 647  integer :: bandpp,bdtot_index,choice,cpopt,dimffnl,dimffnl_str,iband,iband_cprj,iband_last,ibg,icg,ider,ider_str
 648  integer :: idir,idir_str,ierr,ii,ikg,ikpt,ilm,ipositron,ipw,ishift,isppol,istwf_k
 649  integer :: mband_cprj,me_distrb,my_ikpt,my_nspinor,nband_k,nband_cprj_k,ndat,nkpg
 650  integer :: nnlout,npw_k,paw_opt,signs,spaceComm
 651  integer :: tim_nonlop,tim_nonlop_prep,usecprj_local,use_ACE_old
 652  integer :: blocksize,iblock,iblocksize,ibs,nblockbd
 653  real(dp) :: ar,renorm_factor,dfsm,ecutsm_inv,fact_kin,fsm,htpisq,kgc1
 654  real(dp) :: kgc2,kgc3,kin,xx
 655  type(gs_hamiltonian_type) :: gs_hamk
 656  logical :: compute_gbound,usefock_loc
 657  character(len=500) :: msg
 658  type(fock_common_type),pointer :: fockcommon
 659 !arrays
 660  integer,allocatable :: kg_k(:,:)
 661  real(dp) :: kpoint(3),nonlop_dum(1,1),rmet(3,3),tsec(2)
 662 #if defined HAVE_GPU && defined HAVE_YAKL
 663  real(c_double), ABI_CONTIGUOUS pointer :: cwavef(:,:) => null()
 664 #else
 665  real(dp),allocatable :: cwavef(:,:)
 666 #endif
 667  real(dp),allocatable :: enlout(:),ffnl_sav(:,:,:,:),ffnl_str(:,:,:,:)
 668  real(dp),allocatable :: ghc_dum(:,:),gprimd(:,:),kpg_k(:,:),kpg_k_sav(:,:)
 669  real(dp),allocatable :: kstr1(:),kstr2(:),kstr3(:),kstr4(:),kstr5(:),kstr6(:)
 670  real(dp),allocatable :: lambda(:),occblock(:),ph3d(:,:,:),ph3d_sav(:,:,:)
 671  real(dp),allocatable :: weight(:),ylm_k(:,:),ylmgr_k(:,:,:)
 672  real(dp),allocatable,target :: ffnl(:,:,:,:)
 673  type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null()
 674  type(pawcprj_type),target,allocatable :: cwaveprj(:,:)
 675  type(pawcprj_type),pointer :: cwaveprj_idat(:,:)
 676 
 677 
 678 !*************************************************************************
 679 
 680  ABI_NVTX_START_RANGE(NVTX_FORSTRNPS)
 681  call timab(920,1,tsec) ; call timab(921,-1,tsec)
 682 
 683 !Init mpicomm and me
 684  if(mpi_enreg%paral_kgb==1)then
 685    spaceComm=mpi_enreg%comm_kpt
 686    me_distrb=mpi_enreg%me_kpt
 687  else
 688 !* In case of HF calculation
 689    if (mpi_enreg%paral_hf==1) then
 690      spaceComm=mpi_enreg%comm_kpt
 691      me_distrb=mpi_enreg%me_kpt
 692    else
 693      spaceComm=mpi_enreg%comm_cell
 694      me_distrb=mpi_enreg%me_cell
 695    end if
 696  end if
 697 
 698 !Some constants
 699  ipositron=abs(electronpositron_calctype(electronpositron))
 700  my_nspinor=max(1,nspinor/mpi_enreg%nproc_spinor)
 701 !Smearing of plane wave kinetic energy
 702  ecutsm_inv=zero;if( ecutsm>1.0d-20) ecutsm_inv=1/ecutsm
 703 !htpisq is (1/2) (2 Pi) **2:
 704  htpisq=0.5_dp*(two_pi)**2
 705 
 706 !Check that fock is present if want to use fock option
 707  compute_gbound=.false.
 708  usefock_loc = (usefock==1 .and. associated(fock))
 709 !Arrays initializations
 710  grnl(:)=zero
 711  if (usefock_loc) then
 712    fockcommon =>fock%fock_common
 713    fockcommon%optfor=.false.
 714    fockcommon%optstr=.false.
 715    use_ACE_old=fockcommon%use_ACE
 716    fockcommon%use_ACE=0
 717    if (fockcommon%optfor) compute_gbound=.true.
 718  end if
 719  if (stress_needed==1) then
 720    kinstr(:)=zero;npsstr(:)=zero
 721    if (usefock_loc) then
 722      fockcommon%optstr=.TRUE.
 723      fockcommon%stress=zero
 724      compute_gbound=.true.
 725    end if
 726  end if
 727 
 728  usecprj_local=usecprj
 729 
 730  if ((usefock_loc).and.(psps%usepaw==1)) then
 731    usecprj_local=1
 732    if(optfor==1)then
 733      fockcommon%optfor=.true.
 734      if (.not.allocated(fockcommon%forces_ikpt)) then
 735        ABI_MALLOC(fockcommon%forces_ikpt,(3,natom,mband))
 736      end if
 737      if (.not.allocated(fockcommon%forces)) then
 738        ABI_MALLOC(fockcommon%forces,(3,natom))
 739      end if
 740      fockcommon%forces=zero
 741      compute_gbound=.true.
 742    end if
 743  end if
 744 
 745 !Initialize Hamiltonian (k-independent terms)
 746 
 747  call init_hamiltonian(gs_hamk,psps,pawtab,nspinor,nsppol,nspden,natom,&
 748 & typat,xred,nfft,mgfft,ngfft,rprimd,nloalg,usecprj=usecprj_local,&
 749 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,&
 750 & paw_ij=paw_ij,ph1d=ph1d,electronpositron=electronpositron,fock=fock,&
 751 & nucdipmom=nucdipmom,gpu_option=gpu_option)
 752  rmet = MATMUL(TRANSPOSE(rprimd),rprimd)
 753 
 754 !need to reorder cprj=<p_lmn|Cnk> (from unsorted to atom-sorted)
 755  if (psps%usepaw==1.and.usecprj_local==1) then
 756    call pawcprj_reorder(cprj,gs_hamk%atindx)
 757  end if
 758 
 759 !Common data for "nonlop" routine
 760  signs=1 ; idir=0  ; ishift=0 ; tim_nonlop=4 ; tim_nonlop_prep=12
 761  choice=2*optfor;if (stress_needed==1) choice=10*choice+3
 762  if (optfor==1.and.stress_needed==1)  ishift=6
 763  nnlout=max(1,6*stress_needed+3*natom*optfor)
 764  if (psps%usepaw==0) then
 765    paw_opt=0 ; cpopt=-1
 766  else
 767    paw_opt=2 ; cpopt=-1+3*usecprj_local
 768  end if
 769 
 770  call timab(921,2,tsec)
 771 
 772 !LOOP OVER SPINS
 773  bdtot_index=0;ibg=0;icg=0
 774  do isppol=1,nsppol
 775 
 776 !  Continue to initialize the Hamiltonian (PAW DIJ coefficients)
 777    call gs_hamk%load_spin(isppol,with_nonlocal=.true.)
 778    if (usefock_loc) fockcommon%isppol=isppol
 779 
 780 !  Loop over k points
 781    ikg=0
 782    do ikpt=1,nkpt
 783      if (usefock_loc) fockcommon%ikpt=ikpt
 784      nband_k=nband(ikpt+(isppol-1)*nkpt)
 785      istwf_k=istwfk(ikpt)
 786      npw_k=npwarr(ikpt)
 787      kpoint(:)=kpt(:,ikpt)
 788 
 789      if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then
 790        bdtot_index=bdtot_index+nband_k
 791        cycle
 792      end if
 793 
 794      call timab(922,1,tsec)
 795 
 796 !    Parallelism over FFT and/or bands: define sizes and tabs
 797      if (mpi_enreg%paral_kgb==1) then
 798        my_ikpt=mpi_enreg%my_kpttab(ikpt)
 799        nblockbd=nband_k/(mpi_enreg%nproc_band*mpi_enreg%bandpp)
 800        bandpp=mpi_enreg%bandpp
 801        my_bandfft_kpt => bandfft_kpt(my_ikpt)
 802      else
 803        my_ikpt=ikpt
 804        bandpp=mpi_enreg%bandpp
 805        nblockbd=nband_k/bandpp
 806      end if
 807      blocksize=nband_k/nblockbd
 808      mband_cprj=mband/mpi_enreg%nproc_band
 809      nband_cprj_k=nband_k/mpi_enreg%nproc_band
 810 
 811      if(gpu_option == ABI_GPU_KOKKOS) then
 812 #if defined HAVE_GPU && defined HAVE_YAKL
 813        ABI_MALLOC_MANAGED(cwavef,(/2,npw_k*my_nspinor*blocksize/))
 814 #endif
 815      else
 816        ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize))
 817      end if
 818 
 819      if (psps%usepaw==1.and.usecprj_local==1) then
 820        ABI_MALLOC(cwaveprj,(natom,my_nspinor*bandpp))
 821        call pawcprj_alloc(cwaveprj,0,gs_hamk%dimcprj)
 822      else
 823        ABI_MALLOC(cwaveprj,(0,0))
 824      end if
 825 
 826      if (stress_needed==1) then
 827        ABI_MALLOC(kstr1,(npw_k))
 828        ABI_MALLOC(kstr2,(npw_k))
 829        ABI_MALLOC(kstr3,(npw_k))
 830        ABI_MALLOC(kstr4,(npw_k))
 831        ABI_MALLOC(kstr5,(npw_k))
 832        ABI_MALLOC(kstr6,(npw_k))
 833      end if
 834 
 835      ABI_MALLOC(kg_k,(3,mpw))
 836 !$OMP PARALLEL DO
 837      do ipw=1,npw_k
 838        kg_k(:,ipw)=kg(:,ipw+ikg)
 839      end do
 840 
 841      ABI_MALLOC(ylm_k,(npw_k,mpsang*mpsang*psps%useylm))
 842      if (stress_needed==1) then
 843        ABI_MALLOC(ylmgr_k,(npw_k,3,mpsang*mpsang*psps%useylm))
 844      else
 845        ABI_MALLOC(ylmgr_k,(0,0,0))
 846      end if
 847      if (psps%useylm==1) then
 848 !$OMP PARALLEL DO COLLAPSE(2)
 849        do ilm=1,mpsang*mpsang
 850          do ipw=1,npw_k
 851            ylm_k(ipw,ilm)=ylm(ipw+ikg,ilm)
 852          end do
 853        end do
 854        if (stress_needed==1) then
 855 !$OMP PARALLEL DO COLLAPSE(2)
 856          do ilm=1,mpsang*mpsang
 857            do ii=1,3
 858              do ipw=1,npw_k
 859                ylmgr_k(ipw,ii,ilm)=ylmgr(ipw+ikg,ii,ilm)
 860              end do
 861            end do
 862          end do
 863        end if
 864      end if
 865 
 866 !    Prepare kinetic contribution to stress tensor (Warning : the symmetry
 867 !    has not been broken, like in mkkin.f or kpg3.f . It should be, in order to be coherent).
 868      if (stress_needed==1) then
 869        ABI_MALLOC(gprimd,(3,3))
 870        gprimd=gs_hamk%gprimd
 871 !$OMP PARALLEL DO PRIVATE(fact_kin,ipw,kgc1,kgc2,kgc3,kin,xx,fsm,dfsm) &
 872 !$OMP&SHARED(ecut,ecutsm,ecutsm_inv,gs_hamk,htpisq,kg_k,kpoint,kstr1,kstr2,kstr3,kstr4,kstr5,kstr6,npw_k)
 873        do ipw=1,npw_k
 874 !        Compute Cartesian coordinates of (k+G)
 875          kgc1=gprimd(1,1)*(kpoint(1)+kg_k(1,ipw))+&
 876 &         gprimd(1,2)*(kpoint(2)+kg_k(2,ipw))+&
 877 &         gprimd(1,3)*(kpoint(3)+kg_k(3,ipw))
 878          kgc2=gprimd(2,1)*(kpoint(1)+kg_k(1,ipw))+&
 879 &         gprimd(2,2)*(kpoint(2)+kg_k(2,ipw))+&
 880 &         gprimd(2,3)*(kpoint(3)+kg_k(3,ipw))
 881          kgc3=gprimd(3,1)*(kpoint(1)+kg_k(1,ipw))+&
 882 &         gprimd(3,2)*(kpoint(2)+kg_k(2,ipw))+&
 883 &         gprimd(3,3)*(kpoint(3)+kg_k(3,ipw))
 884          kin=htpisq* ( kgc1**2 + kgc2**2 + kgc3**2 )
 885          fact_kin=1.0_dp
 886          if(kin>ecut-ecutsm)then
 887            if(kin>ecut)then
 888              fact_kin=0.0_dp
 889            else
 890 !            See the routine mkkin.f, for the smearing procedure
 891              xx=(ecut-kin)*ecutsm_inv
 892 !            This kinetic cutoff smoothing function and its xx derivatives
 893 !            were produced with Mathematica and the fortran code has been
 894 !            numerically checked against Mathematica.
 895              fsm=1.0_dp/(xx**2*(3+xx*(1+xx*(-6+3*xx))))
 896              dfsm=-3.0_dp*(-1+xx)**2*xx*(2+5*xx)*fsm**2
 897 !            d2fsm=6.0_dp*xx**2*(9+xx*(8+xx*(-52+xx*(-3+xx*(137+xx*&
 898 !            &                         (-144+45*xx))))))*fsm**3
 899              fact_kin=fsm+kin*(-ecutsm_inv)*dfsm
 900            end if
 901          end if
 902          kstr1(ipw)=fact_kin*kgc1*kgc1
 903          kstr2(ipw)=fact_kin*kgc2*kgc2
 904          kstr3(ipw)=fact_kin*kgc3*kgc3
 905          kstr4(ipw)=fact_kin*kgc3*kgc2
 906          kstr5(ipw)=fact_kin*kgc3*kgc1
 907          kstr6(ipw)=fact_kin*kgc2*kgc1
 908        end do ! ipw
 909        ABI_FREE(gprimd)
 910      end if
 911 
 912 !    Compute (k+G) vectors
 913      nkpg=3*nloalg(3)
 914      ABI_MALLOC(kpg_k,(npw_k,nkpg))
 915      if (nkpg>0) then
 916        call mkkpg(kg_k,kpg_k,kpoint,nkpg,npw_k)
 917      end if
 918 
 919 !    Compute nonlocal form factors ffnl at all (k+G)
 920      ider=0;idir=0;dimffnl=1
 921      if (stress_needed==1) then
 922        ider=1;dimffnl=2+2*psps%useylm
 923      end if
 924      ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,ntypat))
 925      call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,gs_hamk%gmet,gs_hamk%gprimd,&
 926 &     ider,idir,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,psps%lnmax,psps%mpsang,psps%mqgrid_ff,&
 927 &     nkpg,npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,psps%usepaw,psps%useylm,ylm_k,ylmgr_k)
 928      if ((stress_needed==1).and.(usefock_loc).and.(psps%usepaw==1))then
 929        ider_str=1; dimffnl_str=7;idir_str=-7
 930        ABI_MALLOC(ffnl_str,(npw_k,dimffnl_str,psps%lmnmax,ntypat))
 931        call mkffnl(psps%dimekb,dimffnl_str,psps%ekb,ffnl_str,psps%ffspl,gs_hamk%gmet,gs_hamk%gprimd,&
 932 &       ider_str,idir_str,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,psps%lnmax,psps%mpsang,psps%mqgrid_ff,&
 933 &       nkpg,npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,psps%usepaw,psps%useylm,ylm_k,ylmgr_k)
 934      end if
 935 
 936 !    Load k-dependent part in the Hamiltonian datastructure
 937 !     - Compute 3D phase factors
 938 !     - Prepare various tabs in case of band-FFT parallelism
 939 !     - Load k-dependent quantities in the Hamiltonian
 940      ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk))
 941      call gs_hamk%load_k(kpt_k=kpoint,istwf_k=istwf_k,npw_k=npw_k,&
 942 &     kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,ph3d_k=ph3d,compute_gbound=compute_gbound,compute_ph3d=.true.)
 943 
 944 !    Load band-FFT tabs (transposed k-dependent arrays)
 945      if (mpi_enreg%paral_kgb==1) then
 946        call bandfft_kpt_savetabs(my_bandfft_kpt,ffnl=ffnl_sav,ph3d=ph3d_sav,kpg=kpg_k_sav)
 947        call prep_bandfft_tabs(gs_hamk,ikpt,mkmem,mpi_enreg)
 948        call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, &
 949 &       kg_k     =my_bandfft_kpt%kg_k_gather, &
 950 &       kpg_k    =my_bandfft_kpt%kpg_k_gather, &
 951 &       ffnl_k   =my_bandfft_kpt%ffnl_gather, &
 952 &       ph3d_k   =my_bandfft_kpt%ph3d_gather,compute_gbound=compute_gbound)
 953      end if
 954 
 955 !    If OpenMP GPU, load "hamiltonian" on GPU device
 956      if (gpu_option == ABI_GPU_OPENMP) then
 957        if(mpi_enreg%paral_kgb==0) then
 958          call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
 959        else if(gs_hamk%istwf_k==1) then
 960          call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=my_bandfft_kpt%kg_k_gather)
 961        else if(gs_hamk%istwf_k==2) then
 962          call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=my_bandfft_kpt%kg_k_gather_sym)
 963        else
 964          ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
 965        end if
 966      end if
 967 
 968 !    Setup gemm_nonlop
 969      if (gemm_nonlop_use_gemm) then
 970        gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt
 971        if ( gpu_option == ABI_GPU_DISABLED) then
 972          call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
 973              gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
 974              gs_hamk%ucvol,  gs_hamk%ffnl_k, gs_hamk%ph3d_k, gs_hamk%kpt_k, &
 975              gs_hamk%kg_k, gs_hamk%kpg_k, &
 976              compute_grad_strain=(stress_needed>0),compute_grad_atom=(optfor>0))
 977        else if ( gpu_option == ABI_GPU_OPENMP) then
 978          if(mpi_enreg%paral_kgb==0) then
 979            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp)
 980          else if(gs_hamk%istwf_k==1) then
 981            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather)
 982          else if(gs_hamk%istwf_k==2) then
 983            call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym)
 984          else
 985            ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !")
 986          end if
 987          call make_gemm_nonlop_ompgpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
 988              gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, &
 989              gs_hamk%ucvol,  gs_hamk%ffnl_k, gs_hamk%ph3d_k, gs_hamk%kpt_k, &
 990              gs_hamk%kg_k, gs_hamk%kpg_k, &
 991              compute_grad_strain=(stress_needed>0),compute_grad_atom=(optfor>0))
 992        end if
 993      end if
 994 
 995 !    Loop over (blocks of) bands; accumulate forces and/or stresses
 996 !    The following is now wrong. In sequential, nblockbd=nband_k/bandpp
 997 !    blocksize= bandpp (JB 2016/04/16)
 998 !    Note that in sequential mode iblock=iband, nblockbd=nband_k and blocksize=1
 999      ABI_MALLOC(lambda,(blocksize))
1000      ABI_MALLOC(occblock,(blocksize))
1001      ABI_MALLOC(weight,(blocksize))
1002      ABI_MALLOC(enlout,(nnlout*blocksize))
1003      occblock=zero;weight=zero;enlout(:)=zero
1004      if (usefock_loc) then
1005        if (fockcommon%optstr) then
1006          ABI_MALLOC(fockcommon%stress_ikpt,(6,nband_k))
1007          fockcommon%stress_ikpt=zero
1008        end if
1009      end if
1010      if ((usefock_loc).and.(psps%usepaw==1)) then
1011        if (fockcommon%optfor) then
1012          fockcommon%forces_ikpt=zero
1013        end if
1014      end if
1015 
1016      call timab(922,2,tsec)
1017 
1018      do iblock=1,nblockbd
1019 
1020        iband=(iblock-1)*blocksize+1;iband_last=min(iband+blocksize-1,nband_k)
1021        iband_cprj=(iblock-1)*bandpp+1
1022        if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,iband,iband_last,isppol,me_distrb)) cycle
1023 
1024 !      Select occupied bandsddk
1025        occblock(:)=occ(1+(iblock-1)*blocksize+bdtot_index:iblock*blocksize+bdtot_index)
1026        if( abs(maxval(occblock))>=tol8 ) then
1027          call timab(923,1,tsec)
1028          weight(:)=wtk(ikpt)*occblock(:)
1029 
1030 !        gs_hamk%ffnl_k is changed in fock_getghc, so that it is necessary to restore it when stresses are to be calculated.
1031          if ((stress_needed==1).and.(usefock_loc).and.(psps%usepaw==1))then
1032            call gs_hamk%load_k(ffnl_k=ffnl)
1033          end if
1034 
1035 !        Load contribution from n,k
1036          cwavef(:,1:npw_k*my_nspinor*blocksize)=&
1037 &         cg(:,1+(iblock-1)*npw_k*my_nspinor*blocksize+icg:iblock*npw_k*my_nspinor*blocksize+icg)
1038          if (psps%usepaw==1.and.usecprj_local==1) then
1039            call pawcprj_get(gs_hamk%atindx1,cwaveprj,cprj,natom,iband_cprj,ibg,ikpt,0,isppol,&
1040 &           mband_cprj,mkmem,natom,bandpp,nband_cprj_k,my_nspinor,nsppol,0,&
1041 &           mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb)
1042          end if
1043 
1044          call timab(923,2,tsec)
1045          call timab(924,-1,tsec)
1046 
1047          lambda(1:blocksize)= eigen(1+(iblock-1)*blocksize+bdtot_index:iblock*blocksize+bdtot_index)
1048          ABI_NVTX_START_RANGE(NVTX_FORSTR_NONLOP)
1049          if (mpi_enreg%paral_kgb/=1) then
1050            call nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir,lambda,mpi_enreg,blocksize,nnlout,&
1051 &           paw_opt,signs,nonlop_dum,tim_nonlop,cwavef,cwavef)
1052          else
1053            ! here we MUST pass option gpu_option=ABI_GPU_DISABLED, as cwavef here is a host memory buffer
1054            call prep_nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir,lambda,blocksize,&
1055 &           mpi_enreg,nnlout,paw_opt,signs,nonlop_dum,tim_nonlop_prep,cwavef,cwavef,&
1056 &           already_transposed=.False.,gpu_option=ABI_GPU_DISABLED)
1057          end if
1058          ABI_NVTX_END_RANGE()
1059          if ((stress_needed==1).and.(usefock_loc).and.(psps%usepaw==1))then
1060            call gs_hamk%load_k(ffnl_k=ffnl_str)
1061          end if
1062 
1063 !        Accumulate non-local contributions from n,k
1064          if (optfor==1) then
1065            do iblocksize=1,blocksize
1066              ibs=nnlout*(iblocksize-1)
1067              grnl(1:3*natom)=grnl(1:3*natom)+weight(iblocksize)*enlout(ibs+1+ishift:ibs+3*natom+ishift)
1068            end do
1069          end if
1070          if (stress_needed==1) then
1071            do iblocksize=1,blocksize
1072              ibs=nnlout*(iblocksize-1)
1073              npsstr(1:6)=npsstr(1:6) + weight(iblocksize)*enlout(ibs+1:ibs+6)
1074            end do
1075          end if
1076 
1077          call timab(924,2,tsec)
1078 
1079 #if defined HAVE_GPU && defined HAVE_YAKL
1080          if(gpu_option==ABI_GPU_KOKKOS) then
1081            ! the following is done on CPU, so prefetch wave functions from device to host (for efficiency)
1082            call gpu_data_prefetch_async(C_LOC(cwavef), INT(2, c_size_t)*npw_k*my_nspinor*blocksize, CPU_DEVICE_ID)
1083            call gpu_device_synchronize()
1084          end if
1085 #endif
1086 
1087 !        Accumulate stress tensor kinetic contributions
1088          if (stress_needed==1) then
1089            call timab(925,1,tsec)
1090            do iblocksize=1,blocksize
1091              call meanvalue_g(ar,kstr1,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1092 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1093 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1094              kinstr(1)=kinstr(1)+weight(iblocksize)*ar
1095              call meanvalue_g(ar,kstr2,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1096 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1097 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1098              kinstr(2)=kinstr(2)+weight(iblocksize)*ar
1099              call meanvalue_g(ar,kstr3,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1100 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1101 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1102              kinstr(3)=kinstr(3)+weight(iblocksize)*ar
1103              call meanvalue_g(ar,kstr4,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1104 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1105 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1106              kinstr(4)=kinstr(4)+weight(iblocksize)*ar
1107              call meanvalue_g(ar,kstr5,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1108 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1109 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1110              kinstr(5)=kinstr(5)+weight(iblocksize)*ar
1111              call meanvalue_g(ar,kstr6,0,istwf_k,mpi_enreg,npw_k,my_nspinor,&
1112 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),&
1113 &             cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),0)
1114              kinstr(6)=kinstr(6)+weight(iblocksize)*ar
1115            end do
1116            call timab(925,2,tsec)
1117          end if
1118 
1119 !        Accumulate stress tensor and forces for the Fock part
1120          if (usefock_loc) then
1121            if(fockcommon%optstr.or.fockcommon%optfor) then
1122              call timab(926,1,tsec)
1123              if (mpi_enreg%paral_kgb==1) then
1124                msg='forsrtnps: Paral_kgb is not yet implemented for fock stresses'
1125                ABI_BUG(msg)
1126              end if
1127              ndat=mpi_enreg%bandpp
1128              if (gs_hamk%usepaw==0) cwaveprj_idat => cwaveprj
1129              ABI_MALLOC(ghc_dum,(0,0))
1130              do iblocksize=1,blocksize
1131                fockcommon%ieigen=(iblock-1)*blocksize+iblocksize
1132                fockcommon%iband=(iblock-1)*blocksize+iblocksize
1133                if (gs_hamk%usepaw==1) then
1134                  cwaveprj_idat => cwaveprj(:,(iblocksize-1)*my_nspinor+1:iblocksize*my_nspinor)
1135                end if
1136                call fock_getghc(cwavef(:,1+(iblocksize-1)*npw_k*my_nspinor:iblocksize*npw_k*my_nspinor),cwaveprj_idat,&
1137 &               ghc_dum,gs_hamk,mpi_enreg)
1138                if (fockcommon%optstr) then
1139                  fockcommon%stress(:)=fockcommon%stress(:)+weight(iblocksize)*fockcommon%stress_ikpt(:,fockcommon%ieigen)
1140                end if
1141                if (fockcommon%optfor) then
1142                  fockcommon%forces(:,:)=fockcommon%forces(:,:)+weight(iblocksize)*fockcommon%forces_ikpt(:,:,fockcommon%ieigen)
1143                end if
1144              end do
1145              ABI_FREE(ghc_dum)
1146              call timab(926,2,tsec)
1147            end if
1148          end if ! usefock_loc
1149        end if
1150        if ( gpu_option == ABI_GPU_OPENMP) then
1151          call ompgpu_free_hamilt_buffers()
1152        end if
1153 
1154      end do ! End of loop on block of bands
1155 
1156      call timab(927,1,tsec)
1157 
1158 !    Restore the bandfft tabs
1159      if (mpi_enreg%paral_kgb==1) then
1160        call bandfft_kpt_restoretabs(my_bandfft_kpt,ffnl=ffnl_sav,ph3d=ph3d_sav,kpg=kpg_k_sav)
1161      end if
1162 
1163 !    Increment indexes
1164      bdtot_index=bdtot_index+nband_k
1165      if (mkmem/=0) then
1166        ibg=ibg+my_nspinor*nband_cprj_k
1167        icg=icg+npw_k*my_nspinor*nband_k
1168        ikg=ikg+npw_k
1169      end if
1170 
1171      if (usefock_loc) then
1172        if (fockcommon%optstr) then
1173          ABI_FREE(fockcommon%stress_ikpt)
1174        end if
1175      end if
1176 
1177      if (psps%usepaw==1) then
1178        call pawcprj_free(cwaveprj)
1179      end if
1180      ABI_FREE(cwaveprj)
1181 
1182      if(gpu_option == ABI_GPU_KOKKOS) then
1183 #if defined HAVE_GPU && defined HAVE_YAKL
1184        ABI_FREE_MANAGED(cwavef)
1185 #endif
1186      else
1187        ABI_FREE(cwavef)
1188      end if
1189 
1190      ABI_FREE(lambda)
1191      ABI_FREE(occblock)
1192      ABI_FREE(weight)
1193      ABI_FREE(enlout)
1194      ABI_FREE(ffnl)
1195      ABI_FREE(kg_k)
1196      ABI_FREE(kpg_k)
1197      ABI_FREE(ylm_k)
1198      ABI_FREE(ylmgr_k)
1199      ABI_FREE(ph3d)
1200      if (stress_needed==1) then
1201        ABI_FREE(kstr1)
1202        ABI_FREE(kstr2)
1203        ABI_FREE(kstr3)
1204        ABI_FREE(kstr4)
1205        ABI_FREE(kstr5)
1206        ABI_FREE(kstr6)
1207      end if
1208      if ((stress_needed==1).and.(usefock_loc).and.(psps%usepaw==1))then
1209        ABI_FREE(ffnl_str)
1210      end if
1211 
1212      call timab(927,2,tsec)
1213 
1214    end do ! End k point loop
1215  end do ! End loop over spins
1216 
1217  call timab(928,1,tsec)
1218 
1219 !Stress is equal to dE/d_strain * (1/ucvol)
1220  npsstr(:)=npsstr(:)/gs_hamk%ucvol
1221 
1222 !Parallel case: accumulate (n,k) contributions
1223  if (xmpi_paral==1) then
1224 !  Forces
1225    if (optfor==1) then
1226      call timab(65,1,tsec)
1227      call xmpi_sum(grnl,spaceComm,ierr)
1228      call timab(65,2,tsec)
1229      if ((usefock_loc).and.(psps%usepaw==1)) then
1230        call xmpi_sum(fockcommon%forces,spaceComm,ierr)
1231      end if
1232    end if
1233 !  Stresses
1234    if (stress_needed==1) then
1235      call timab(65,1,tsec)
1236      call xmpi_sum(kinstr,spaceComm,ierr)
1237      call xmpi_sum(npsstr,spaceComm,ierr)
1238      if (usefock_loc) then
1239        if (fockcommon%optstr) then
1240          call xmpi_sum(fockcommon%stress,spaceComm,ierr)
1241        end if
1242      end if
1243      call timab(65,2,tsec)
1244    end if
1245  end if
1246 
1247 !Do final normalizations and symmetrizations of stress tensor contributions
1248  if (stress_needed==1) then
1249    renorm_factor=-(two_pi**2)/effmass_free/gs_hamk%ucvol
1250    kinstr(:)=kinstr(:)*renorm_factor
1251    if (nsym>1) then
1252      call stresssym(gs_hamk%gprimd,nsym,kinstr,symrec)
1253      call stresssym(gs_hamk%gprimd,nsym,npsstr,symrec)
1254      if (usefock_loc) then
1255        if (fockcommon%optstr) then
1256          call stresssym(gs_hamk%gprimd,nsym,fockcommon%stress,symrec)
1257        end if
1258      end if
1259    end if
1260  end if
1261 
1262 !Need to reorder cprj=<p_lmn|Cnk> (from atom-sorted to unsorted)
1263  if (psps%usepaw==1.and.usecprj_local==1) then
1264    call pawcprj_reorder(cprj,gs_hamk%atindx1)
1265  end if
1266 
1267 !Deallocate temporary space
1268  call gs_hamk%free()
1269  if (usefock_loc) then
1270    fockcommon%use_ACE=use_ACE_old
1271  end if
1272  call timab(928,2,tsec) ; call timab(920,-2,tsec)
1273  ABI_NVTX_END_RANGE()
1274 
1275 end subroutine forstrnps

ABINIT/m_forstr [ Modules ]

[ Top ] [ Modules ]

NAME

  m_forstr

FUNCTION

COPYRIGHT

  Copyright (C) 1998-2024 ABINIT group (DCA, XG, GMR, AF, AR, MB, MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 ! nvtx related macro definition
23 #include "nvtx_macros.h"
24 
25 module m_forstr
26 
27  use defs_basis
28  use defs_wvltypes
29  use m_abicore
30  use m_efield
31  use m_errors
32  use m_xmpi
33  use m_fock
34  use m_cgtools
35  use m_xcdata
36  use m_dtset
37  use m_extfpmd
38  use m_ompgpu_utils
39 
40  use defs_datatypes,     only : pseudopotential_type
41  use defs_abitypes,      only : MPI_type
42  use m_time,             only : timab
43  use m_geometry,         only : xred2xcart, metric, stresssym
44  use m_energies,         only : energies_type
45  use m_pawang,           only : pawang_type
46  use m_pawrad,           only : pawrad_type
47  use m_pawtab,           only : pawtab_type
48  use m_paw_ij,           only : paw_ij_type
49  use m_pawfgrtab,        only : pawfgrtab_type
50  use m_pawrhoij,         only : pawrhoij_type
51  use m_pawfgr,           only : pawfgr_type
52  use m_pawcprj,          only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_get, pawcprj_reorder, pawcprj_getdim
53  use m_paw_dfpt,         only : pawgrnl
54  use libxc_functionals,  only : libxc_functionals_is_hybrid
55  use m_stress,           only : stress
56  use m_forces,           only : forces
57  use m_initylmg,         only : initylmg
58  use m_xchybrid,         only : xchybrid_ncpp_cc
59  use m_kg,               only : mkkpg
60  use m_hamiltonian,      only : init_hamiltonian, gs_hamiltonian_type !,K_H_KPRIME
61  use m_electronpositron, only : electronpositron_type, electronpositron_calctype
62  use m_bandfft_kpt,      only : bandfft_kpt, bandfft_kpt_type, prep_bandfft_tabs, &
63 &                               bandfft_kpt_savetabs, bandfft_kpt_restoretabs
64  use m_spacepar,         only : meanvalue_g, hartre
65  use m_mkffnl,           only : mkffnl
66  use m_mpinfo,           only : proc_distrb_cycle
67  use m_nonlop,           only : nonlop
68  use m_gemm_nonlop,      only : make_gemm_nonlop,gemm_nonlop_use_gemm, &
69 &                               gemm_nonlop_ikpt_this_proc_being_treated
70  use m_gemm_nonlop_ompgpu,  only : make_gemm_nonlop_ompgpu
71  use m_fock_getghc,      only : fock_getghc
72  use m_prep_kgb,         only : prep_nonlop
73  use m_paw_nhat,         only : pawmknhat
74  use m_rhotoxc,          only : rhotoxc
75  use m_dfpt_mkvxc,       only : dfpt_mkvxc, dfpt_mkvxc_noncoll
76  use m_cgprj,            only : ctocprj
77  use m_psolver,          only : psolver_hartree
78  use m_wvl_psi,          only : wvl_nl_gradient
79  use m_fft,              only : fourdp
80  use, intrinsic :: iso_c_binding,      only : c_loc,c_f_pointer,c_double,c_size_t
81 
82 #if defined(HAVE_GPU_CUDA) && defined(HAVE_YAKL)
83  use gator_mod
84  use m_gpu_toolbox, only : CPU_DEVICE_ID, gpu_device_synchronize, gpu_data_prefetch_async
85 #endif
86 
87 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS)
88  use m_nvtx_data
89 #endif
90 
91  implicit none
92 
93  private

ABINIT/nres2vres [ Functions ]

[ Top ] [ Functions ]

NAME

 nres2vres

FUNCTION

 Convert a density residual into a potential residual
 using a first order formula:
     V^res(r)=dV/dn.n^res(r)
             =V_hartree(n^res)(r) + Kxc.n^res(r)

INPUTS

 dtset <type(dataset_type)>=all input variables in this dataset
  | icoulomb=0 periodic treatment of Hartree potential, 1 use of Poisson solver
  | natom= number of atoms in cell
  | nspden=number of spin-density components
  | ntypat=number of atom types
  | typat(natom)=type (integer) for each atom
 gsqcut=cutoff value on G**2 for sphere inside fft box
 izero=if 1, unbalanced components of Vhartree(g) are set to zero
 kxc(nfft,nkxc)=exchange-correlation kernel, needed only if nkxc>0
 mpi_enreg=information about MPI parallelization
 my_natom=number of atoms treated by current processor
 nfft=(effective) number of FFT grid points (for this processor)
 ngfft(18)=contain all needed information about 3D FFT
 nhat(nfft,nspden*usepaw)= -PAW only- compensation density
 nkxc=second dimension of the array kxc, see rhotoxc.F90 for a description
 nresid(nfft,nspden)= the input density residual
 n3xccc=dimension of the xccc3d array (0 or nfft).
 optnc=option for non-collinear magnetism (nspden=4):
       1: the whole 2x2 Vres matrix is computed
       2: only Vres^{11} and Vres^{22} are computed
 optxc=0 if LDA part of XC kernel has only to be taken into account (even for GGA)
       1 if XC kernel has to be fully taken into
      -1 if XC kernel does not have to be taken into account
 pawang <type(pawang_type)>=paw angular mesh and related data
 pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
 pawrhoij(my_natom*usepaw) <type(pawrhoij_type)>= paw rhoij occupancies and related data
 pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
 rhor(nfft,nspden)=electron density in real space
                   (used only if Kxc was not computed before)
 rprimd(3,3)=dimensional primitive translation vectors (bohr)
 usepaw= 0 for non paw calculation; =1 for paw calculation
 xccc3d(n3xccc)=3D core electron density for XC core correction (bohr^-3)
 xred(3,natom)=reduced dimensionless atomic coordinates

 === optional inputs ===
 vxc(cplex*nfft,nspden)=XC GS potential

OUTPUT

 vresid(nfft,nspden)= the output potential residual

SOURCE

1331 subroutine nres2vres(dtset,gsqcut,izero,kxc,mpi_enreg,my_natom,nfft,ngfft,nhat,&
1332 &                 nkxc,nresid,n3xccc,optnc,optxc,pawang,pawfgrtab,pawrhoij,pawtab,&
1333 &                 rhor,rprimd,usepaw,vresid,xccc3d,xred,vxc)
1334 
1335 !Arguments ------------------------------------
1336 !scalars
1337  integer,intent(in) :: izero,my_natom,n3xccc,nfft,nkxc,optnc,optxc,usepaw
1338  real(dp),intent(in) :: gsqcut
1339  type(MPI_type),intent(in) :: mpi_enreg
1340  type(dataset_type),intent(in) :: dtset
1341  type(pawang_type),intent(in) :: pawang
1342 !arrays
1343  integer,intent(in) :: ngfft(18)
1344  real(dp),intent(in) :: kxc(nfft,nkxc),nresid(nfft,dtset%nspden)
1345  real(dp),intent(in) :: rhor(nfft,dtset%nspden),rprimd(3,3),xccc3d(n3xccc),xred(3,dtset%natom)
1346  real(dp),intent(inout) :: nhat(nfft,dtset%nspden*usepaw)
1347  real(dp),intent(out) :: vresid(nfft,dtset%nspden)
1348  type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*usepaw)
1349  type(pawrhoij_type),intent(in) :: pawrhoij(my_natom*usepaw)
1350  type(pawtab_type),intent(in) :: pawtab(dtset%ntypat*usepaw)
1351  real(dp),intent(in) :: vxc(nfft,dtset%nspden) !FR TODO:cplex?
1352 
1353 !Local variables-------------------------------
1354 !scalars
1355  integer :: cplex,ider,idir,ipert,ispden,nhatgrdim,nkxc_cur,option,me,nproc,comm,usexcnhat
1356  logical :: has_nkxc_gga,non_magnetic_xc
1357  real(dp) :: dum,energy,m_norm_min,ucvol,vxcavg
1358  character(len=500) :: message
1359  type(xcdata_type) :: xcdata
1360 !arrays
1361  integer :: nk3xc
1362  real(dp) :: dummy6(6),gmet(3,3),gprimd(3,3),qq(3),rmet(3,3)
1363  real(dp),allocatable :: dummy(:),kxc_cur(:,:),nhatgr(:,:,:)
1364  real(dp),allocatable :: nresg(:,:),rhor0(:,:),vhres(:)
1365 
1366 ! *************************************************************************
1367 
1368 !Compatibility tests:
1369  has_nkxc_gga=(nkxc==7.or.nkxc==19)
1370 
1371  if(optxc<-1.or.optxc>1)then
1372    write(message,'(a,i0)')' Wrong value for optxc ',optxc
1373    ABI_BUG(message)
1374  end if
1375 
1376  if((optnc/=1.and.optnc/=2).or.(dtset%nspden/=4.and.optnc/=1))then
1377    write(message,'(a,i0)')' Wrong value for optnc ',optnc
1378    ABI_BUG(message)
1379  end if
1380 
1381  if(dtset%icoulomb==1.and.optxc/=-1)then
1382    write(message,'(a)')' This routine is not compatible with icoulomb==1 and optxc/=-1 !'
1383    ABI_BUG(message)
1384  end if
1385 
1386  if(dtset%nspden==4.and.dtset%xclevel==2.and.optxc==1.and.(.not.has_nkxc_gga))then
1387    ABI_ERROR(' Wrong values for optxc and nkxc !')
1388  end if
1389 
1390  qq=zero
1391  nkxc_cur=0
1392  m_norm_min=EPSILON(0.0_dp)**2
1393  usexcnhat=0;if (usepaw==1) usexcnhat=maxval(pawtab(1:dtset%ntypat)%usexcnhat)
1394  non_magnetic_xc=(dtset%usepaw==1.and.mod(abs(dtset%usepawu),10)==4)
1395  if (dtset%xclevel==1.or.optxc==0) nkxc_cur= 2*min(dtset%nspden,2)-1 ! LDA: nkxc=1,3
1396  if (dtset%xclevel==2.and.optxc==1)nkxc_cur=12*min(dtset%nspden,2)-5 ! GGA: nkxc=7,19
1397  ABI_MALLOC(vhres,(nfft))
1398 
1399 !Compute different geometric tensor, as well as ucvol, from rprimd
1400  call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
1401 
1402 !Compute density residual in reciprocal space
1403  if (dtset%icoulomb==0) then
1404    ABI_MALLOC(nresg,(2,nfft))
1405    ABI_MALLOC(dummy,(nfft))
1406    dummy(:)=nresid(:,1)
1407    call fourdp(1,nresg,dummy,-1,mpi_enreg,nfft,1,ngfft,0)
1408    ABI_FREE(dummy)
1409  end if
1410 
1411 !For GGA, has to recompute gradients of nhat
1412  nhatgrdim=0
1413  if ((nkxc==nkxc_cur.and.has_nkxc_gga).or.(optxc==-1.and.has_nkxc_gga).or.&
1414 & (optxc/=-1.and.nkxc/=nkxc_cur)) then
1415    if (usepaw==1.and.dtset%xclevel==2.and.usexcnhat>0.and.dtset%pawnhatxc>0) then
1416      nhatgrdim=1
1417      ABI_MALLOC(nhatgr,(nfft,dtset%nspden,3))
1418      ider=1;cplex=1;ipert=0;idir=0
1419      call pawmknhat(dum,cplex,ider,idir,ipert,izero,gprimd,my_natom,dtset%natom,&
1420 &     nfft,ngfft,nhatgrdim,dtset%nspden,dtset%ntypat,pawang,pawfgrtab,&
1421 &     nhatgr,nhat,pawrhoij,pawrhoij,pawtab,qq,rprimd,ucvol,dtset%usewvl,xred,&
1422 &     comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,&
1423 &     comm_fft=mpi_enreg%comm_fft,paral_kgb=dtset%paral_kgb,me_g0=mpi_enreg%me_g0,&
1424 &     distribfft=mpi_enreg%distribfft,mpi_comm_wvl=mpi_enreg%comm_wvl)
1425    else
1426      ABI_MALLOC(nhatgr,(0,0,0))
1427    end if
1428  else
1429    ABI_MALLOC(nhatgr,(0,0,0))
1430  end if
1431 
1432  ABI_MALLOC(dummy,(0))
1433 !First case: Kxc has already been computed
1434 !-----------------------------------------
1435  if (nkxc==nkxc_cur.or.optxc==-1) then
1436 
1437 !  Compute VH(n^res)(r)
1438    if (dtset%icoulomb == 0) then
1439      call hartre(1,gsqcut,dtset%icutcoul,izero,mpi_enreg,nfft,ngfft,&
1440                  &dtset%nkpt,dtset%rcut,nresg,rprimd,dtset%vcutgeo,vhres)
1441    else
1442      comm=mpi_enreg%comm_cell
1443      nproc=xmpi_comm_size(comm)
1444      me=xmpi_comm_rank(comm)
1445      call psolver_hartree(energy, (/ rprimd(1,1) / dtset%ngfft(1), &
1446 &     rprimd(2,2) / dtset%ngfft(2), rprimd(3,3) / dtset%ngfft(3) /), dtset%icoulomb, &
1447 &     me, comm, dtset%nfft, dtset%ngfft(1:3), nproc, dtset%nscforder, dtset%nspden, &
1448 &     nresid(:,1), vhres, dtset%usewvl)
1449    end if
1450 
1451 !  Compute Kxc(r).n^res(r)
1452    if (optxc/=-1) then
1453 
1454 !    Collinear magnetism or non-polarized
1455      if (dtset%nspden/=4) then
1456        !Note: imposing usexcnhat=1 avoid nhat to be substracted
1457        call dfpt_mkvxc(1,dtset%ixc,kxc,mpi_enreg,nfft,ngfft,nhat,usepaw,nhatgr,nhatgrdim,&
1458 &       nkxc,non_magnetic_xc,dtset%nspden,0,2,qq,nresid,rprimd,1,vresid,dummy)
1459      else
1460 !FR    call routine for Non-collinear magnetism
1461        ABI_MALLOC(rhor0,(nfft,dtset%nspden))
1462        rhor0(:,:)=rhor(:,:)-nresid(:,:)
1463        !Note: imposing usexcnhat=1 avoid nhat to be substracted
1464        call dfpt_mkvxc_noncoll(1,dtset%ixc,kxc,mpi_enreg,nfft,ngfft,nhat,usepaw,nhat,usepaw,nhatgr,nhatgrdim,&
1465 &       nkxc,non_magnetic_xc,dtset%nspden,0,2,2,qq,rhor0,nresid,rprimd,1,vxc,vresid,xccc3d)
1466        ABI_FREE(rhor0)
1467      end if
1468 
1469    else
1470      vresid=zero
1471    end if
1472 
1473  end if
1474 
1475 !2nd case: Kxc has to be computed
1476 !--------------------------------
1477  if (nkxc/=nkxc_cur.and.optxc/=-1) then
1478 
1479 !  Has to use the "initial" density to compute Kxc
1480    ABI_MALLOC(rhor0,(nfft,dtset%nspden))
1481    rhor0(:,:)=rhor(:,:)-nresid(:,:)
1482 
1483 !  Compute VH(n^res) and XC kernel (Kxc) together
1484    ABI_MALLOC(kxc_cur,(nfft,nkxc_cur))
1485 
1486    option=2;if (dtset%xclevel==2.and.optxc==0) option=12
1487 
1488    call hartre(1,gsqcut,dtset%icutcoul,izero,mpi_enreg,nfft,ngfft,&
1489                &dtset%nkpt,dtset%rcut,nresg,rprimd,dtset%vcutgeo,vhres)
1490    call xcdata_init(xcdata,dtset=dtset)
1491 
1492 !  To be adjusted for the call to rhotoxc
1493    nk3xc=1
1494    call rhotoxc(energy,kxc_cur,mpi_enreg,nfft,ngfft,&
1495 &   nhat,usepaw,nhatgr,nhatgrdim,nkxc_cur,nk3xc,non_magnetic_xc,n3xccc,option,&
1496 &   rhor0,rprimd,dummy6,usexcnhat,vresid,vxcavg,xccc3d,xcdata,vhartr=vhres)  !vresid=work space
1497    if (dtset%nspden/=4)  then
1498      ABI_FREE(rhor0)
1499    end if
1500 
1501 !  Compute Kxc(r).n^res(r)
1502 
1503    if (dtset%nspden/=4) then
1504 !    Collinear magnetism or non-polarized
1505      call dfpt_mkvxc(1,dtset%ixc,kxc_cur,mpi_enreg,nfft,ngfft,nhat,usepaw,nhatgr,nhatgrdim,&
1506 &     nkxc_cur,non_magnetic_xc,dtset%nspden,0,2,qq,nresid,rprimd,1,vresid,dummy)
1507    else
1508 !    Non-collinear magnetism
1509      ABI_MALLOC(rhor0,(nfft,dtset%nspden))
1510      rhor0(:,:)=rhor(:,:)-nresid(:,:)
1511      call dfpt_mkvxc_noncoll(1,dtset%ixc,kxc_cur,mpi_enreg,nfft,ngfft,nhat,usepaw,nhat,usepaw,nhatgr,nhatgrdim,&
1512 &     nkxc,non_magnetic_xc,dtset%nspden,0,2,2,qq,rhor0,nresid,rprimd,1,vxc,vresid,xccc3d)
1513      ABI_FREE(rhor0)
1514    end if
1515 
1516    ABI_FREE(kxc_cur)
1517  end if
1518 
1519  !if (nhatgrdim>0)  then
1520  ABI_FREE(nhatgr)
1521  !end if
1522 
1523 !Assemble potential residual: V^res(r)=VH(n^res)(r) + Kxc(r).n^res(r)
1524 !--------------------------------------------------------------------
1525  do ispden=1,dtset%nspden/optnc
1526    vresid(:,ispden)=vresid(:,ispden)+vhres(:)
1527  end do
1528 
1529  if (dtset%icoulomb==0)  then
1530    ABI_FREE(nresg)
1531  end if
1532  ABI_FREE(vhres)
1533  ABI_FREE(dummy)
1534 
1535 end subroutine nres2vres