TABLE OF CONTENTS


ABINIT/energy [ Functions ]

[ Top ] [ Functions ]

NAME

 energy

FUNCTION

 Compute electronic energy terms
 energies%e_eigenvalues, ek and enl from arbitrary (orthonormal) provided wf,
 ehart, enxc, and eei from provided density and potential,
 energies%e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
 energies%e_zeeman=Zeeman spin energy from applied magnetic field -m.B
 ek=kinetic energy, ehart=Hartree electron-electron energy,
 enxc,enxcdc=exchange-correlation energies, eei=local pseudopotential energy,
 enl=nonlocal pseudopotential energy
 Also, compute new density from provided wfs, after the evaluation
 of ehart, enxc, and eei.
 WARNING XG180913 : At present, Fock energy not computed !

 NOTE that this routine is callned in m_scfcv_core only when nstep == 0

INPUTS

  [add_tfw]=flag controling the addition of Weiszacker gradient correction to Thomas-Fermi kin energy
  cg(2,mpw*nspinor*mband*mkmem*nsppol)=<G|Cnk>=Fourier coefficients of wavefunction
  dtset <type(dataset_type)>=all input variables for this dataset
   | mband=maximum number of bands
   | mgfft=maximum size of 1D FFTs
   | mkmem=number of k points treated by this node.
   | mpw=maximum dimension for number of planewaves
   | natom=number of atoms in unit cell
   | nfft=(effective) number of FFT grid points (for this processor)
   | nkpt=number of k points
   | nspden=number of spin-density components
   | nsppol=1 for unpolarized, 2 for polarized
   | nspinor=number of spinorial components
   | nsym=number of symmetry elements in space group (at least 1)
   | occopt=option for occupancies
   | tsmear=smearing energy or temperature (if metal)
  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
  gsqcut=G^2 cutoff from gsqcut=ecut/(2 Pi^2)
  indsym(4,nsym,natom)=indirect indexing array for atom labels
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  kg(3,mpw*mkmem)=work array for coordinates of G vectors in basis
  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
  mpi_enreg=information about MPI parallelization
  my_natom=number of atoms treated by current processor
  nfftf= -PAW ONLY- number of FFT grid points for the fine grid
         (nfftf=nfft for norm-conserving potential runs)
  ngfftf(18)= -PAW ONLY- contain all needed information about 3D FFT for the fine grid
              (ngfftf=ngfft for norm-conserving potential runs)
  nhatgr(nfft,nspden,3*nhatgrdim)= -PAW only- cartesian gradients of compensation density
  nhatgrdim= -PAW only- 0 if nhatgr array is not used ; 1 otherwise
  npwarr(nkpt)=number of planewaves at each k point, and boundary
  n3xccc=dimension of the xccc3d array (0 or nfftf).
  occ(mband*nkpt*nsppol)=occupation numbers of bands (usually 2) at each k point
  optene=option for the computation of total energy (direct scheme or double-counting scheme)
  paw_ij(natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels
  pawang <type(pawang_type)>=paw angular mesh and related data
  pawfgr(natom) <type(pawfgr_type)>=fine grid parameters and related data
  pawfgrtab(natom) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid
  pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
  ph1d(2,3*(2*mgfft+1)*natom)=phase information related to structure factor
  psps <type(pseudopotential_type)>=variables related to pseudopotentials
   | mpsang= 1+maximum angular momentum for nonlocal pseudopotentials
   | ntypat=number of types of atoms in cell
  rprimd(3,3)=dimensional real space primitive translations (bohr)
  symrec(3,3,nsym)=symmetry operations in reciprocal space
  usexcnhat= -PAW only- flag controling use of compensation density in Vxc
  vpsp(nfftf)=local pseudopotential in real space (hartree)
  wfs <type(wvl_projector_type)>=wavefunctions information for wavelets.
  wvl <type(wvl_internal_type)>=wavelets internal data
  xccc3d(n3xccc)=3D core electron density for XC core correction (bohr^-3)
  xred(3,natom)=reduced coordinates of atoms (dimensionless)
  ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point

OUTPUT

  compch_fft=-PAW only- compensation charge inside spheres computed over fine fft grid
  etotal=total energy (hartree):
    - computed by direct scheme if optene=0 or 2
    - computed by double-counting scheme if optene=1 or 3
  resid(mband*nkpt*nsppol)=residuals for each band over all k points (hartree^2)
  strsxc(6)=exchange-correlation contribution to stress tensor
  vhartr(nfftf)=work space to hold Hartree potential in real space (hartree)
  vtrial(nfftf,nspden)=total local potential (hartree)
  vxc(nfftf,nspden)=work space to hold Vxc(r) in real space (hartree)
  [vxctau(nfft,nspden,4*usekden)]=(only for meta-GGA): derivative of XC energy density
    with respect to kinetic energy density (depsxcdtau). The arrays vxctau contains also
    the gradient of vxctau (gvxctau) in vxctau(:,:,2:4)

SIDE EFFECTS

  electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation (optional argument)
  energies <type(energies_type)>=all part of total energy.
   | entropy(IN)=entropy due to the occupation number smearing (if metal)
   | e_chempot(IN)=energy from spatially varying chemical potential (hartree)
   | e_ewald(IN)=Ewald energy (hartree)
   | e_vdw_dftd(IN)=VdW DFT-D energy
   | e_corepsp(IN)=psp core-core energy
   | e_paw(IN)=PAW spherical part energy
   | e_pawdc(IN)=PAW spherical part double-counting energy
   | e_eigenvalues(OUT)=Sum of the eigenvalues - Band energy (Hartree)
   | e_hartree(OUT)=Hartree part of total energy (hartree units)
   | e_kinetic(OUT)=kinetic energy part of total energy.
   | e_nlpsp_vfock(OUT)=nonlocal psp + potential Fock ACE part of total energy.
   | e_xc(OUT)=exchange-correlation energy (hartree)
  ==== if optene==0, 2 or 3
   | e_localpsp(OUT)=local psp energy (hartree)
  ==== if optene==1, 2 or 3
   | e_xcdc(OUT)=exchange-correlation double-counting energy (hartree)
  rhog(2,nfftf)=work space for rho(G); save intact on return (? MT 08-12-2008: is that true now ?)
  rhor(nfftf,nspden)=work space for rho(r); save intact on return (? MT 08-12-2008: is that true now ?)
  taug(2,nfftf*dtset%usekden)=array for Fourier transform of kinetic energy density
  taur(nfftf,nspden*dtset%usekden)=array for kinetic energy density
  nspinor should not be modified in the call of rdnpw
  === if psps%usepaw==1 ===
    nhat(nfftf,nspden*usepaw)= compensation charge density
    pawrhoij(natom) <type(pawrhoij_type)>= paw rhoij occupancies and related data

NOTES

  Be careful to the meaning of nfft (size of FFT grids):
   - In case of norm-conserving calculations the FFT grid is the usual FFT grid.
   - In case of PAW calculations:
     Two FFT grids are used; one with nfft points (coarse grid) for
     the computation of wave functions ; one with nfftf points
     (fine grid) for the computation of total density.

  There is a large amount of overhead in the way this routine do the computation of the energy !
  For example, the density has already been precomputed, so why to compute it again here ??

SOURCE

214 subroutine energy(cg,compch_fft,constrained_dft,dtset,electronpositron,&
215 & energies,eigen,etotal,gsqcut,extfpmd,indsym,irrzon,kg,mcg,mpi_enreg,my_natom,nfftf,ngfftf,nhat,&
216 & nhatgr,nhatgrdim,npwarr,n3xccc,occ,optene,paw_dmft,paw_ij,pawang,pawfgr,&
217 & pawfgrtab,pawrhoij,pawtab,phnons,ph1d,psps,resid,rhog,rhor,rprimd,strsxc,symrec,&
218 & taug,taur,usexcnhat,vhartr,vtrial,vpsp,vxc,wfs,wvl,wvl_den,wvl_e,xccc3d,xred,ylm,&
219 & add_tfw,vxctau) ! optional argument
220 
221 !Arguments ------------------------------------
222 !scalars
223  integer,intent(in) :: mcg,my_natom,n3xccc,nfftf,nhatgrdim,optene,usexcnhat
224  logical,intent(in),optional :: add_tfw
225  real(dp),intent(in) :: gsqcut
226  real(dp),intent(out) :: compch_fft,etotal
227  type(MPI_type),intent(inout) :: mpi_enreg
228  type(constrained_dft_t),intent(in) :: constrained_dft
229  type(dataset_type),intent(in) :: dtset
230  type(electronpositron_type),pointer :: electronpositron
231  type(energies_type),intent(inout) :: energies
232  type(extfpmd_type),pointer,intent(inout) :: extfpmd
233  type(paw_dmft_type), intent(inout) :: paw_dmft
234  type(pawang_type),intent(in) :: pawang
235  type(pawfgr_type),intent(in) :: pawfgr
236  type(pseudopotential_type),intent(in) :: psps
237  type(wvl_internal_type), intent(in) :: wvl
238  type(wvl_wf_type),intent(inout) :: wfs
239  type(wvl_denspot_type), intent(inout) :: wvl_den
240  type(wvl_energy_terms),intent(inout) ::wvl_e
241 !arrays
242 !nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
243  integer, intent(in) :: indsym(4,dtset%nsym,dtset%natom)
244  integer :: irrzon(dtset%nfft**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)),kg(3,dtset%mpw*dtset%mkmem)
245  integer, intent(in) :: ngfftf(18),npwarr(dtset%nkpt),symrec(3,3,dtset%nsym)
246  real(dp), intent(in) :: cg(2,mcg),eigen(dtset%mband*dtset%nkpt*dtset%nsppol)
247  real(dp), intent(in) :: occ(dtset%mband*dtset%nkpt*dtset%nsppol),ph1d(2,3*(2*dtset%mgfft+1)*dtset%natom)
248  real(dp), intent(inout) :: nhat(nfftf,dtset%nspden*psps%usepaw)
249  real(dp),intent(in) :: nhatgr(nfftf,dtset%nspden,3*nhatgrdim)
250 !nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise
251  real(dp), intent(in) :: phnons(2,dtset%nfft**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4))
252  real(dp), intent(inout) :: resid(dtset%mband*dtset%nkpt*dtset%nsppol)
253  real(dp), intent(inout) :: rhog(2,nfftf),rhor(nfftf,dtset%nspden)
254  real(dp), intent(inout) :: taug(2,nfftf*dtset%usekden),taur(nfftf,dtset%nspden*dtset%usekden)
255  real(dp), intent(out) :: strsxc(6)
256  real(dp), intent(in) :: rprimd(3,3),vpsp(nfftf),xccc3d(n3xccc),xred(3,dtset%natom)
257  real(dp), intent(out) :: vhartr(nfftf),vtrial(nfftf,dtset%nspden),vxc(nfftf,dtset%nspden)
258  real(dp),intent(out),optional,target :: vxctau(nfftf,dtset%nspden,4*dtset%usekden)
259  real(dp), intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm)
260  type(paw_ij_type), intent(in) :: paw_ij(my_natom*psps%usepaw)
261  type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom)
262  type(pawrhoij_type),target,intent(inout) :: pawrhoij(my_natom*psps%usepaw)
263  type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw)
264 
265 !Local variables-------------------------------
266 !scalars
267  integer :: bdtot_index,blocksize,choice,cplex,cplex_rhoij,cpopt,dimffnl
268  integer :: iband,iband_last,iblock,iblocksize,icg,ider,idir,ierr,ifft,ikg,ikpt,ilm
269  integer :: ipert,ipositron,iresid,ispden,isppol,istwf_k,izero
270  integer :: me_distrb,mpi_comm_sphgrid,my_ikpt,my_nspinor,n1,n2,n3,n4,n5,n6
271  integer :: nband_k,nblockbd,nfftotf,nkpg,nkxc,nk3xc,nnlout,npw_k,nspden_rhoij,option
272  integer :: option_rhoij,paw_opt,signs,spaceComm,tim_mkrho,tim_nonlop
273  logical :: add_tfw_,paral_atom,usetimerev,with_vxctau
274  logical :: non_magnetic_xc,wvlbigdft=.false.
275  real(dp) :: dotr,doti,eeigk,ekk,enlk,evxc,e_xcdc_vxctau,ucvol,ucvol_local,vxcavg
276  !character(len=500) :: message
277  type(gs_hamiltonian_type) :: gs_hamk
278  type(xcdata_type) :: xcdata
279 !arrays
280  integer,allocatable :: kg_k(:,:)
281  real(dp) :: gmet(3,3),gprimd(3,3),kpg_dum(0,0),kpoint(3),nonlop_out(1,1)
282  real(dp) :: qpt(3),rhodum(1),rmet(3,3),tsec(2),ylmgr_dum(1,1,1),vzeeman(4)
283  real(dp) :: magvec(dtset%nspden)
284  real(dp),target :: vxctau_dum(0,0,0)
285  real(dp),allocatable :: buffer(:)
286  real(dp),allocatable :: cwavef(:,:),eig_k(:),enlout(:),ffnl(:,:,:,:),ffnl_sav(:,:,:,:)
287  real(dp),allocatable :: kinpw(:),kinpw_sav(:),kxc(:,:),occ_k(:),occblock(:)
288  real(dp),allocatable :: ph3d(:,:,:),ph3d_sav(:,:,:)
289  real(dp),allocatable :: resid_k(:),rhowfg(:,:),rhowfr(:,:),vlocal(:,:,:,:)
290  real(dp),allocatable :: vxctaulocal(:,:,:,:,:),ylm_k(:,:),v_constr_dft_r(:,:)
291  real(dp),pointer :: vxctau_(:,:,:)
292  type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null()
293  type(pawcprj_type),target,allocatable :: cwaveprj(:,:)
294  type(pawcprj_type),pointer :: cwaveprj_gat(:,:)
295  type(pawrhoij_type),pointer :: pawrhoij_unsym(:)
296 
297 ! *************************************************************************
298 
299  DBG_ENTER("COLL")
300 
301 !Check that usekden is not 0 if want to use vxctau
302  with_vxctau = (present(vxctau).and.dtset%usekden/=0)
303  vxctau_ => vxctau_dum ; if (with_vxctau) vxctau_ => vxctau
304 
305 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW)
306  nfftotf=PRODUCT(ngfftf(1:3))
307  if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then
308    ABI_BUG('wrong values for nfft, nfftf!')
309  end if
310 
311 !If usewvl: wvlbigdft indicates that the BigDFT workflow will be followed
312  wvlbigdft=(dtset%usewvl==1 .and. dtset%wvl_bigdft_comp==1)
313 
314  call timab(59,1,tsec)
315 
316 !Data for parallelism
317  spaceComm=mpi_enreg%comm_cell
318  if(mpi_enreg%paral_kgb==1) spaceComm=mpi_enreg%comm_kpt
319  if(mpi_enreg%paral_hf==1) spaceComm=mpi_enreg%comm_kpt
320  mpi_comm_sphgrid=mpi_enreg%comm_fft
321  if(dtset%usewvl==1) then
322    spaceComm=mpi_enreg%comm_wvl
323    mpi_comm_sphgrid=mpi_enreg%comm_wvl
324  end if
325  me_distrb=mpi_enreg%me_kpt
326  my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor)
327  paral_atom=(my_natom/=dtset%natom)
328 
329 !Compute gmet, gprimd and ucvol from rprimd
330  call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
331  if (dtset%usewvl == 0) then
332    ucvol_local = ucvol
333 #if defined HAVE_BIGDFT
334  else
335 !  We need to tune the volume when wavelets are used because, not
336 !  all FFT points are used.
337 !  ucvol_local = (half * dtset%wvl_hgrid) ** 3 * ngfft(1)*ngfft(2)*ngfft(3)
338    ucvol_local = product(wvl_den%denspot%dpbox%hgrids) * real(product(wvl_den%denspot%dpbox%ndims), dp)
339 #endif
340  end if
341 
342 !Compute Hxc potential from density
343  option=1;nkxc=0
344  ipositron=electronpositron_calctype(electronpositron)
345  add_tfw_=.false.;if (present(add_tfw)) add_tfw_=add_tfw
346  non_magnetic_xc=(dtset%usepaw==1.and.mod(abs(dtset%usepawu),10)==4)
347 
348  if (ipositron/=1) then
349 
350    if (dtset%icoulomb == 0) then
351 !    Use the periodic solver to compute Hxc.
352      call hartre(1,gsqcut,dtset%icutcoul,psps%usepaw,mpi_enreg,nfftf,ngfftf,&
353                  &dtset%nkpt,dtset%rcut,rhog,rprimd,dtset%vcutgeo,vhartr)
354      call xcdata_init(xcdata,dtset=dtset)
355      ABI_MALLOC(kxc,(1,nkxc))
356 !    to be adjusted for the call to rhotoxc
357      nk3xc=1
358      if (ipositron==0) then
359        call rhotoxc(energies%e_xc,kxc, &
360 &       mpi_enreg,nfftf,ngfftf,nhat,psps%usepaw,nhatgr,nhatgrdim, &
361 &       nkxc,nk3xc,non_magnetic_xc,n3xccc,option,rhor,rprimd,strsxc, &
362 &       usexcnhat,vxc,vxcavg,xccc3d,xcdata,taur=taur,vhartr=vhartr, &
363 &       vxctau=vxctau_,exc_vdw_out=energies%e_xc_vdw,add_tfw=add_tfw_)
364      else
365        call rhotoxc(energies%e_xc,kxc, &
366 &       mpi_enreg,nfftf,ngfftf,nhat,psps%usepaw,nhatgr,nhatgrdim, &
367 &       nkxc,nk3xc,non_magnetic_xc,n3xccc,option,rhor,rprimd,strsxc, &
368 &       usexcnhat,vxc,vxcavg,xccc3d,xcdata, &
369 &       electronpositron=electronpositron,taur=taur,vhartr=vhartr, &
370 &       vxctau=vxctau_,exc_vdw_out=energies%e_xc_vdw,add_tfw=add_tfw_)
371      end if
372      ABI_FREE(kxc)
373    else if (dtset%usewvl == 0) then
374 !    Use the free boundary solver.
375      call psolver_rhohxc(energies%e_hartree, energies%e_xc, evxc, &
376 &     dtset%icoulomb, dtset%ixc, mpi_enreg, nfftf, &
377 &     ngfftf,nhat,psps%usepaw,&
378 &     dtset%nscforder,dtset%nspden,n3xccc,rhor,rprimd, &
379 &     usexcnhat,psps%usepaw,dtset%usewvl,&
380 &     vhartr, vxc, vxcavg, wvl,wvl_den,wvl_e,&
381 &     xccc3d,dtset%xclevel,dtset%xc_denpos)
382    end if
383  else
384    energies%e_xc=zero
385    call rhohxcpositron(electronpositron,gprimd,kxc,mpi_enreg,nfftf,ngfftf,nhat,nkxc,dtset%nspden,n3xccc,&
386 &   dtset%paral_kgb,rhor,strsxc,ucvol,usexcnhat,psps%usepaw,vhartr,vxc,vxcavg,xccc3d,dtset%xc_denpos)
387  end if
388  if (ipositron/=0) then
389    call dotprod_vn(1,rhor,electronpositron%e_hartree,doti,nfftf,nfftotf,1,1,electronpositron%vha_ep,&
390 &   ucvol,mpi_comm_sphgrid=mpi_comm_sphgrid)
391    vhartr=vhartr+electronpositron%vha_ep
392  end if
393 
394 !Total local potential (for either spin channel) is
395 !Hartree + local psp + Vxc(spin), minus its mean
396 !(Note : this potential should agree with the input vtrial)
397  do ispden=1,min(dtset%nspden,2)
398    do ifft=1,nfftf
399      vtrial(ifft,ispden)=vhartr(ifft)+vpsp(ifft)+vxc(ifft,ispden)
400    end do
401  end do
402  if (dtset%nspden==4) then
403    do ifft=1,nfftf
404      vtrial(ifft,3:4)=vxc(ifft,3:4)
405    end do
406  end if
407 
408 !Add the vzeeman pot in the trial pot
409 !Vzeeman might have to be allocated correctly --> to be checked
410  if (any(abs(dtset%zeemanfield(:))>tol8)) then
411    vzeeman(:) = zero
412    if(dtset%nspden==2)then
413 !TODO: check this against rhotov and setvtr, where the potential is -1/2 and +1/2 for the 2 spin components.
414 ! see comment by SPr in rhotov
415 ! TODO: check this 1/2 factor is for the electron spin magnetic moment.
416      vzeeman(1) = -half*dtset%zeemanfield(3) ! For collinear ispden=1 potential is v_upup
417      vzeeman(2) = +half*dtset%zeemanfield(3) ! For collinear ispden=2 potential is v_dndn
418    end if
419    if(dtset%nspden==4)then
420      vzeeman(1)=-half*dtset%zeemanfield(3)
421      vzeeman(2)= half*dtset%zeemanfield(3)
422      vzeeman(3)=-half*dtset%zeemanfield(1)
423      vzeeman(4)= half*dtset%zeemanfield(2)
424    end if
425    magvec = zero
426    do ispden=1,dtset%nspden
427      do ifft=1,nfftf
428 !TODO: the full cell magnetization will need extra PAW terms, and is certainly calculated elsewhere.
429 !The calculation of the zeeman energy can be moved there
430        magvec(ispden) = magvec(ispden) + rhor(ifft,ispden)
431        vtrial(ifft,ispden)=vtrial(ifft,ispden)+vzeeman(ispden)
432      end do
433    end do
434    if(dtset%nspden==2)then
435      energies%e_zeeman = -half*dtset%zeemanfield(3)*(two*magvec(2)-magvec(1)) !  diff rho = rhoup-rhodown = 2 rhoup - rho
436    else if(dtset%nspden==4)then
437      energies%e_zeeman = -half * (dtset%zeemanfield(1)*magvec(2)& ! x
438 &                                +dtset%zeemanfield(2)*magvec(3)& ! y
439 &                                +dtset%zeemanfield(3)*magvec(4)) ! z
440    end if
441  end if
442 
443 !Compute the constrained potential for the magnetic moments
444 !NOTE: here in energy.F90 rhor and vtrial are given on nfftf grid
445 !the values coming from mag_penalty may be different from those calculated
446 !calling mag_penalty with nfft in setvtr and rhotov
447  if (dtset%magconon==1.or.dtset%magconon==2) then
448    ABI_MALLOC(v_constr_dft_r, (nfftf,dtset%nspden))
449    v_constr_dft_r = zero
450    call mag_penalty(constrained_dft,mpi_enreg,rhor,v_constr_dft_r,xred)
451 !   call mag_penalty(dtset%natom, dtset%spinat, dtset%nspden, dtset%magconon, dtset%magcon_lambda, rprimd, &
452 !&   mpi_enreg, nfftf, dtset%ngfft, dtset%ntypat, dtset%ratsph, rhor, &
453 !&   dtset%typat, v_constr_dft_r, xred)
454    do ispden=1,dtset%nspden
455      do ifft=1,nfftf
456        vtrial(ifft,ispden)=vtrial(ifft,ispden)+v_constr_dft_r(ifft,ispden)
457      end do
458    end do
459    ABI_FREE(v_constr_dft_r)
460  end if
461 
462 !Compute Hartree energy - use up+down rhor
463  if (ipositron/=1) then
464    call dotprod_vn(1,rhor,energies%e_hartree ,doti,nfftf,nfftotf,1,1,vhartr,&
465 &   ucvol_local,mpi_comm_sphgrid=mpi_comm_sphgrid)
466    if (ipositron==0) energies%e_hartree=half*energies%e_hartree
467    if (ipositron==2) energies%e_hartree = half *(energies%e_hartree-electronpositron%e_hartree)
468  else
469    energies%e_hartree=zero
470  end if
471 
472 !Compute local psp energy - use up+down rhor
473  if (optene/=1) then
474    call dotprod_vn(1,rhor,energies%e_localpsp,doti,nfftf,nfftotf,1,1,vpsp,&
475 &   ucvol_local,mpi_comm_sphgrid=mpi_comm_sphgrid)
476  end if
477 
478 !Compute DC-xc energy - use up+down rhor
479  if (optene>0) then
480    if (ipositron/=1) then
481      if (psps%usepaw==0.or.usexcnhat/=0) then
482        call dotprod_vn(1,rhor,energies%e_xcdc,doti,nfftf,nfftotf,dtset%nspden,1,vxc,&
483 &       ucvol_local,mpi_comm_sphgrid=mpi_comm_sphgrid)
484        if (with_vxctau)then
485          call dotprod_vn(1,taur,e_xcdc_vxctau,doti,nfftf,nfftotf,dtset%nspden,1,vxctau(:,:,1),&
486 &         ucvol_local,mpi_comm_sphgrid=mpi_comm_sphgrid)
487          energies%e_xcdc=energies%e_xcdc+e_xcdc_vxctau
488        end if
489      else
490        ABI_MALLOC(rhowfr,(nfftf,dtset%nspden))
491        rhowfr=rhor-nhat
492        call dotprod_vn(1,rhowfr,energies%e_xcdc,doti,nfftf,nfftotf,dtset%nspden,1,vxc,&
493 &       ucvol_local,mpi_comm_sphgrid=mpi_comm_sphgrid)
494        ABI_FREE(rhowfr)
495      end if
496      if (ipositron==2) energies%e_xcdc=energies%e_xcdc-electronpositron%e_xcdc
497    else
498      energies%e_xcdc=zero
499    end if
500  end if
501 
502  energies%e_eigenvalues=zero
503  energies%e_kinetic=zero
504  energies%e_nlpsp_vfock=zero
505  energies%e_fock0=zero
506  bdtot_index=0
507  icg=0
508 
509  n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3)
510  n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6)
511 
512 !============================================
513 !==== Initialize most of the Hamiltonian ====
514 !============================================
515 !1) Allocate all arrays and initialize quantities that do not depend on k and spin.
516 !2) Perform the setup needed for the non-local factors:
517 !* Norm-conserving: Constant kleimann-Bylander energies are copied from psps to gs_hamk.
518 !* PAW: Initialize the overlap coefficients and allocate the Dij coefficients.
519 
520  call init_hamiltonian(gs_hamk,psps,pawtab,dtset%nspinor,dtset%nsppol,dtset%nspden,&
521 & dtset%natom,dtset%typat,xred,dtset%nfft,dtset%mgfft,dtset%ngfft,rprimd,dtset%nloalg,&
522 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,&
523 & paw_ij=paw_ij,ph1d=ph1d,electronpositron=electronpositron,&
524 & nucdipmom=dtset%nucdipmom,use_gpu_cuda=dtset%use_gpu_cuda)
525 
526  ABI_MALLOC(vlocal,(n4,n5,n6,gs_hamk%nvloc))
527  if (with_vxctau) then
528    ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4))
529  end if
530 
531 !PAW: additional initializations
532  if (psps%usepaw==1) then
533    ABI_MALLOC(cwaveprj,(dtset%natom,my_nspinor))
534    call pawcprj_alloc(cwaveprj,0,gs_hamk%dimcprj)
535    if (mpi_enreg%paral_spinor==1) then
536      ABI_MALLOC(cwaveprj_gat,(dtset%natom,dtset%nspinor))
537      call pawcprj_alloc(cwaveprj_gat,0,gs_hamk%dimcprj)
538    else
539      cwaveprj_gat => cwaveprj
540    end if
541    if (paral_atom) then
542      ABI_MALLOC(pawrhoij_unsym,(dtset%natom))
543      call pawrhoij_inquire_dim(cplex_rhoij=cplex_rhoij,nspden_rhoij=nspden_rhoij,&
544 &                nspden=dtset%nspden,spnorb=dtset%pawspnorb,cpxocc=dtset%pawcpxocc)
545      call pawrhoij_alloc(pawrhoij_unsym,cplex_rhoij,nspden_rhoij,dtset%nspinor,&
546 &     dtset%nsppol,dtset%typat,pawtab=pawtab,use_rhoijp=0,use_rhoij_=1)
547    else
548      pawrhoij_unsym => pawrhoij
549      call pawrhoij_init_unpacked(pawrhoij_unsym)
550    end if
551    option_rhoij=1
552    usetimerev=(dtset%kptopt>0.and.dtset%kptopt<3)
553  else
554    ABI_MALLOC(cwaveprj,(0,0))
555  end if
556 
557 !LOOP OVER SPINS
558  do isppol=1,dtset%nsppol
559    ikg=0
560 
561    ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin.
562    ! Also take into account the spin.
563 
564    call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
565                                  dtset%nspden, gs_hamk%nvloc, 1, pawfgr, mpi_enreg, vtrial, vlocal)
566    call gs_hamk%load_spin(isppol,vlocal=vlocal,with_nonlocal=.true.)
567 
568    if (with_vxctau) then
569      call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, &
570                                    dtset%nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal)
571      call gs_hamk%load_spin(isppol, vxctaulocal=vxctaulocal)
572    end if
573 
574 !  Loop over k points
575    do ikpt=1,dtset%nkpt
576      nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt)
577      istwf_k=dtset%istwfk(ikpt)
578      npw_k=npwarr(ikpt)
579 
580 !    Skip this k-point if not the proper processor
581      if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then
582        resid(1+bdtot_index : nband_k+bdtot_index) = zero
583        bdtot_index=bdtot_index+nband_k
584        cycle
585      end if
586 
587 !    Parallelism over FFT and/or bands: define sizes and tabs
588      if (mpi_enreg%paral_kgb==1) then
589        my_ikpt=mpi_enreg%my_kpttab(ikpt)
590        nblockbd=nband_k/(mpi_enreg%nproc_band*mpi_enreg%bandpp)
591        my_bandfft_kpt => bandfft_kpt(my_ikpt)
592      else
593        my_ikpt=ikpt
594        nblockbd=nband_k/mpi_enreg%bandpp
595        !if (nband_k/=nblockbd*mpi_enreg%nproc_fft) nblockbd=nblockbd+1
596      end if
597      blocksize=nband_k/nblockbd
598 
599      ABI_MALLOC(eig_k,(nband_k))
600      ABI_MALLOC(occ_k,(nband_k))
601      ABI_MALLOC(resid_k,(nband_k))
602      ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize))
603      resid_k(:)=zero
604      kpoint(:)=dtset%kptns(:,ikpt)
605      occ_k(:)=occ(1+bdtot_index:nband_k+bdtot_index)
606      eig_k(:)=eigen(1+bdtot_index:nband_k+bdtot_index)
607      if (minval(eig_k)>1.d100) eig_k=zero
608      eeigk=zero ; ekk=zero ; enlk=zero
609 
610      ABI_MALLOC(kg_k,(3,npw_k))
611      kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
612 
613      ABI_MALLOC(ylm_k,(npw_k,psps%mpsang*psps%mpsang*psps%useylm))
614      if (psps%useylm==1) then
615        do ilm=1,psps%mpsang*psps%mpsang
616          ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm)
617        end do
618      end if
619 
620 !    Compute kinetic energy
621      ABI_MALLOC(kinpw,(npw_k))
622      call mkkin(dtset%ecut,dtset%ecutsm,dtset%effmass_free,gmet,kg_k,kinpw,kpoint,npw_k,0,0)
623 
624 !    Compute kinetic energy of each band
625      do iblock=1,nblockbd
626        do iblocksize=1,blocksize
627          iband=(iblock-1)*blocksize+iblocksize
628          if (abs(occ_k(iband))>tol8) then
629            cwavef(1:2,1:npw_k*my_nspinor)= &
630 &           cg(:,1+(iband-1)*npw_k*my_nspinor+icg:iband*npw_k*my_nspinor+icg)
631            call meanvalue_g(dotr,kinpw,0,istwf_k,mpi_enreg,npw_k,my_nspinor,cwavef,cwavef,0)
632            energies%e_kinetic=energies%e_kinetic+dtset%wtk(ikpt)*occ_k(iband)*dotr
633          end if
634        end do
635      end do
636 
637 !    Compute nonlocal form factors ffnl at all (k+G):
638      ider=0;dimffnl=1;nkpg=0
639      ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,psps%ntypat))
640      call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,&
641 &     gmet,gprimd,ider,ider,psps%indlmn,kg_k,kpg_dum,kpoint,psps%lmnmax,&
642 &     psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,&
643 &     npw_k,psps%ntypat,psps%pspso,psps%qgrid_ff,rmet,&
644 &     psps%usepaw,psps%useylm,ylm_k,ylmgr_dum)
645 
646 !    Load k-dependent part in the Hamiltonian datastructure
647 !     - Compute 3D phase factors
648 !     - Prepare various tabs in case of band-FFT parallelism
649 !     - Load k-dependent quantities in the Hamiltonian
650      ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk))
651      call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,&
652 &     kinpw_k=kinpw,kg_k=kg_k,ffnl_k=ffnl,ph3d_k=ph3d,&
653 &     compute_ph3d=.true.,compute_gbound=(mpi_enreg%paral_kgb/=1))
654 
655 !    Load band-FFT tabs (transposed k-dependent arrays)
656      if (mpi_enreg%paral_kgb==1) then
657        call bandfft_kpt_savetabs(my_bandfft_kpt,ffnl=ffnl_sav,ph3d=ph3d_sav,kinpw=kinpw_sav)
658        call prep_bandfft_tabs(gs_hamk,ikpt,dtset%mkmem,mpi_enreg)
659        call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, &
660 &       gbound_k =my_bandfft_kpt%gbound, &
661 &       kinpw_k  =my_bandfft_kpt%kinpw_gather, &
662 &       kg_k     =my_bandfft_kpt%kg_k_gather, &
663 &       ffnl_k   =my_bandfft_kpt%ffnl_gather, &
664 &       ph3d_k   =my_bandfft_kpt%ph3d_gather)
665      end if
666 
667 !    Setup gemm_nonlop
668      if (gemm_nonlop_use_gemm) then
669        gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt
670        call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, &
671 &       gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, gs_hamk%ucvol, gs_hamk%ffnl_k,&
672 &       gs_hamk%ph3d_k, gs_hamk%kpt_k, gs_hamk%kg_k, gs_hamk%kpg_k)
673      end if
674 
675 #if defined HAVE_GPU_CUDA
676      if (dtset%use_gpu_cuda==1) then
677        call gpu_update_ffnl_ph3d(ph3d,size(ph3d),ffnl,size(ffnl))
678      end if
679 #endif
680 
681 !    Compute nonlocal psp energy (NCPP) or Rhoij (PAW)
682      ABI_MALLOC(enlout,(blocksize))
683      ABI_MALLOC(occblock,(blocksize))
684      do iblock=1,nblockbd
685        iband=(iblock-1)*blocksize+1;iband_last=min(iband+blocksize-1,nband_k)
686        if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,iband,iband_last,isppol,me_distrb)) cycle
687 
688 !      Select occupied bands
689        occblock(:)=occ(1+(iblock-1)*blocksize+bdtot_index:iblock*blocksize+bdtot_index)
690        if(abs(maxval(occblock))>=tol8 ) then
691          cwavef(:,1:npw_k*my_nspinor*blocksize)=&
692 &         cg(:,1+(iblock-1)*npw_k*my_nspinor*blocksize+icg:iblock*npw_k*my_nspinor*blocksize+icg)
693 
694          choice=1-gs_hamk%usepaw ; signs=1 ; idir=0 ; nnlout=blocksize
695          paw_opt=gs_hamk%usepaw;cpopt=gs_hamk%usepaw-1
696 
697          if (mpi_enreg%paral_kgb/=1) then
698            tim_nonlop=3
699            call nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir,(/zero/),mpi_enreg,blocksize,nnlout,&
700 &           paw_opt,signs,nonlop_out,tim_nonlop,cwavef,cwavef)
701          else
702            tim_nonlop=14
703            call prep_nonlop(choice,cpopt,cwaveprj,enlout,gs_hamk,idir,(/zero/),blocksize,&
704 &           mpi_enreg,nnlout,paw_opt,signs,nonlop_out,tim_nonlop,cwavef,cwavef)
705          end if
706 
707          do iblocksize=1,blocksize
708            iband=(iblock-1)*blocksize+iblocksize
709            energies%e_eigenvalues=energies%e_eigenvalues+dtset%wtk(ikpt)*occ_k(iband)*eig_k(iband)
710 !          WARNING : the Fock contribution is NOT computed !!!
711            energies%e_nlpsp_vfock=energies%e_nlpsp_vfock+dtset%wtk(ikpt)*occ_k(iband)*enlout(iblocksize)
712          end do
713 
714 !        PAW: accumulate rhoij
715          if (psps%usepaw==1) then
716            cplex=merge(1,2,istwf_k>1)
717            if (mpi_enreg%paral_spinor==1) then
718              call pawcprj_gather_spin(cwaveprj,cwaveprj_gat,dtset%natom,1,my_nspinor,dtset%nspinor,&
719 &             mpi_enreg%comm_spinor,ierr)
720              call pawaccrhoij(gs_hamk%atindx,cplex,cwaveprj_gat,cwaveprj_gat,0,isppol,dtset%natom,dtset%natom,&
721 &             dtset%nspinor,occ_k(iband),option_rhoij,pawrhoij_unsym,usetimerev,dtset%wtk(ikpt))
722            else
723              call pawaccrhoij(gs_hamk%atindx,cplex,cwaveprj,cwaveprj,0,isppol,dtset%natom,dtset%natom,&
724 &             dtset%nspinor,occ_k(iband),option_rhoij,pawrhoij_unsym,usetimerev,dtset%wtk(ikpt))
725            end if
726          end if
727 
728 !        End loop on bands
729        end if
730      end do
731 
732 !    Compute residual of each band (for informative purposes)
733      call mkresi(cg,eig_k,gs_hamk,icg,ikpt,isppol,mcg,mpi_enreg,nband_k,dtset%prtvol,resid_k)
734      resid(1+bdtot_index : nband_k+bdtot_index) = resid_k(:)
735 
736 !    Restore the bandfft tabs
737      if (mpi_enreg%paral_kgb==1) then
738        call bandfft_kpt_restoretabs(my_bandfft_kpt,ffnl=ffnl_sav,ph3d=ph3d_sav,kinpw=kinpw_sav)
739      end if
740 
741 !    Incremente indexes
742      bdtot_index=bdtot_index+nband_k
743      if (dtset%mkmem/=0) then
744        icg=icg+npw_k*my_nspinor*nband_k
745        ikg=ikg+npw_k
746      end if
747 
748 #if defined HAVE_GPU_CUDA
749      if(dtset%use_gpu_cuda==1) then
750        call gpu_finalize_ffnl_ph3d()
751      end if
752 #endif
753 
754      ABI_FREE(eig_k)
755      ABI_FREE(occ_k)
756      ABI_FREE(resid_k)
757      ABI_FREE(enlout)
758      ABI_FREE(occblock)
759      ABI_FREE(ffnl)
760      ABI_FREE(kinpw)
761      ABI_FREE(ph3d)
762      ABI_FREE(cwavef)
763      ABI_FREE(kg_k)
764      ABI_FREE(ylm_k)
765 
766 !    End loops on isppol and ikpt
767    end do
768  end do
769 
770  call gs_hamk%free()
771 
772  if(xmpi_paral==1)then
773 !  Accumulate enl eeig and ek on all proc.
774    ABI_MALLOC(buffer,(3+dtset%mband*dtset%nkpt*dtset%nsppol))
775    buffer(1)=energies%e_nlpsp_vfock ; buffer(2)=energies%e_kinetic ; buffer(3)=energies%e_eigenvalues
776    do iresid=1,dtset%mband*dtset%nkpt*dtset%nsppol
777      buffer(iresid+3)=resid(iresid)
778    end do
779    call timab(48,1,tsec)
780    call xmpi_sum(buffer,spaceComm,ierr)
781    call timab(48,2,tsec)
782    energies%e_nlpsp_vfock=buffer(1) ; energies%e_kinetic=buffer(2) ; energies%e_eigenvalues=buffer(3)
783    do iresid=1,dtset%mband*dtset%nkpt*dtset%nsppol
784      resid(iresid)=buffer(iresid+3)
785    end do
786    ABI_FREE(buffer)
787 !  Accumulate rhoij_
788    if (psps%usepaw==1) then
789      call pawrhoij_mpisum_unpacked(pawrhoij_unsym,spaceComm,comm2=mpi_enreg%comm_band)
790    end if
791  end if
792 
793 !Compute total (free) energy
794  if (optene==0.or.optene==2) then
795    etotal = energies%e_kinetic + energies%e_hartree + energies%e_xc + &
796 !&   energies%e_nlpsp_vfock - energies%e_fock0 +
797 !   Should compute the e_fock0 energy !! Also, the Fock contribution to e_nlpsp_vfock
798 &   energies%e_nlpsp_vfock + energies%e_localpsp + energies%e_corepsp
799    if (psps%usepaw==1) etotal=etotal + energies%e_paw
800  else if (optene==1.or.optene==3) then
801    etotal = energies%e_eigenvalues - energies%e_hartree + energies%e_xc - &
802 &   energies%e_xcdc + energies%e_corepsp - energies%e_corepspdc
803    if (psps%usepaw==1) etotal=etotal + energies%e_pawdc
804  end if
805  etotal = etotal + energies%e_ewald + energies%e_chempot + energies%e_vdw_dftd
806 !Add the contribution of extfpmd to the entropy
807  if(associated(extfpmd)) then
808    energies%entropy=energies%entropy+extfpmd%entropy
809    energies%e_extfpmd=extfpmd%e_kinetic
810    energies%edc_extfpmd=extfpmd%edc_kinetic
811    if(optene==0.or.optene==2) etotal=etotal+energies%e_extfpmd
812    if(optene==1.or.optene==3) etotal=etotal+energies%edc_extfpmd
813  end if
814  if(dtset%occopt>=3 .and. dtset%occopt<=8) etotal=etotal-dtset%tsmear*energies%entropy
815 
816 !Additional stuff for electron-positron
817  if (dtset%positron/=0) then
818    if (ipositron==0) then
819      energies%e_electronpositron  =zero
820      energies%edc_electronpositron=zero
821    else
822      energies%e_electronpositron  =electronpositron%e_hartree+electronpositron%e_xc
823      energies%edc_electronpositron=electronpositron%e_hartree+electronpositron%e_xcdc
824      if (psps%usepaw==1) then
825        energies%e_electronpositron  =energies%e_electronpositron  +electronpositron%e_paw
826        energies%edc_electronpositron=energies%edc_electronpositron+electronpositron%e_pawdc
827      end if
828    end if
829    if (optene==0.or.optene==2) electronpositron%e0=etotal
830    if (optene==1.or.optene==3) electronpositron%e0=etotal-energies%edc_electronpositron
831    etotal=electronpositron%e0+energies%e0_electronpositron+energies%e_electronpositron
832  end if
833 
834 !Compute new charge density based on incoming wf
835 !Keep rhor and rhog intact for later use e.g. in stress. (? MT 08-12-2008: is that true now ?)
836 !=== Norm-conserving psps: simply compute rho from WFs
837  !paw_dmft%use_dmft=0 ! dmft not used here
838  !paw_dmft%use_sc_dmft=0 ! dmft not used here
839  if (psps%usepaw==0) then
840    tim_mkrho=3
841    call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,&
842 &   npwarr,occ,paw_dmft,phnons,rhog,rhor,rprimd,tim_mkrho,ucvol,wvl_den,wfs,&
843 &   extfpmd=extfpmd)
844    if(dtset%usekden==1)then
845      call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,&
846 &     npwarr,occ,paw_dmft,phnons,taug,taur,rprimd,tim_mkrho,ucvol,wvl_den,wfs,option=1)
847    end if
848  else
849 !  === PAW case: symmetrize rhoij and add compensation charge density
850    tim_mkrho=3;option=1;choice=1
851    call pawrhoij_symrhoij(pawrhoij,pawrhoij_unsym,choice,gprimd,indsym,0,dtset%natom,dtset%nsym,&
852 &   dtset%ntypat,option,pawang,dtset%pawprtvol,pawtab,rprimd,dtset%symafm,symrec,dtset%typat,&
853 &   comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab)
854    call pawrhoij_free_unpacked(pawrhoij_unsym)
855    if (paral_atom) then
856      call pawrhoij_free(pawrhoij_unsym)
857      ABI_FREE(pawrhoij_unsym)
858    end if
859    ider=0;izero=0;cplex=1;ipert=0;idir=0;qpt(:)=zero
860    call pawmknhat(compch_fft,cplex,ider,idir,ipert,izero,gprimd,&
861 &   my_natom,dtset%natom,nfftf,ngfftf,&
862 &   0,dtset%nspden,dtset%ntypat,pawang,pawfgrtab,rhodum,nhat,pawrhoij,pawrhoij,&
863 &   pawtab,qpt,rprimd,ucvol_local,dtset%usewvl,xred,&
864 &   comm_fft=mpi_enreg%comm_fft,paral_kgb=dtset%paral_kgb,me_g0=mpi_enreg%me_g0,&
865 &   comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,&
866 &   distribfft=mpi_enreg%distribfft,mpi_comm_wvl=mpi_enreg%comm_wvl)
867 
868    ABI_MALLOC(rhowfr,(dtset%nfft,dtset%nspden))
869    ABI_MALLOC(rhowfg,(2,dtset%nfft))
870    rhowfr(:,:)=zero
871    call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,&
872 &   npwarr,occ,paw_dmft,phnons,rhowfg,rhowfr,rprimd,tim_mkrho,ucvol_local,wvl_den,wfs,&
873 &   extfpmd=extfpmd)
874 
875    call transgrid(1,mpi_enreg,dtset%nspden,+1,1,0,dtset%paral_kgb,pawfgr,rhowfg,rhodum,rhowfr,rhor)
876    rhor(:,:)=rhor(:,:)+nhat(:,:)
877    call fourdp(1,rhog,rhor(:,1),-1,mpi_enreg,nfftf,1,ngfftf,0)
878    if(dtset%usekden==1)then
879      call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,&
880 &               rhowfg,rhowfr,rprimd,tim_mkrho,ucvol,wvl_den,wfs,option=1)
881      call transgrid(1,mpi_enreg,dtset%nspden,+1,1,1,dtset%paral_kgb,pawfgr,rhowfg,taug,rhowfr,taur)
882    end if
883    ABI_FREE(rhowfr)
884    ABI_FREE(rhowfg)
885  end if
886 
887  ABI_COMMENT('New density rho(r) made from input wfs')
888 
889  call timab(59,2,tsec)
890 
891  ABI_FREE(vlocal)
892  if (with_vxctau) then
893    ABI_FREE(vxctaulocal)
894  end if
895 
896  if (psps%usepaw==1) then
897    call pawcprj_free(cwaveprj)
898    ABI_FREE(cwaveprj)
899    if (mpi_enreg%paral_spinor==1) then
900      call pawcprj_free(cwaveprj_gat)
901      ABI_FREE(cwaveprj_gat)
902    else
903      nullify(cwaveprj_gat)
904    end if
905  end if
906 
907  DBG_EXIT("COLL")
908 
909 end subroutine energy

ABINIT/m_dft_energy [ Modules ]

[ Top ] [ Modules ]

NAME

  m_dft_energy

FUNCTION

COPYRIGHT

  Copyright (C) 1998-2022 ABINIT group (DCA, XG, GMR, AR, MB, MT, EB)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_dft_energy
23 
24  use defs_basis
25  use defs_wvltypes
26  use m_abicore
27  use m_hamiltonian
28  use m_errors
29  use m_xmpi
30  use m_gemm_nonlop
31  use m_xcdata
32  use m_cgtools
33  use m_dtset
34  use m_extfpmd
35 
36  use defs_datatypes, only : pseudopotential_type
37  use defs_abitypes,      only : MPI_type
38  use m_time,             only : timab
39  use m_geometry,         only : metric
40  use m_kg,               only : mkkin
41  use m_energies,         only : energies_type
42  use m_electronpositron, only : electronpositron_type, electronpositron_calctype, rhohxcpositron
43  use m_bandfft_kpt,      only : bandfft_kpt, bandfft_kpt_type, prep_bandfft_tabs, &
44                                 bandfft_kpt_savetabs, bandfft_kpt_restoretabs
45  use m_pawang,           only : pawang_type
46  use m_pawtab,           only : pawtab_type
47  use m_paw_ij,           only : paw_ij_type
48  use m_pawfgrtab,        only : pawfgrtab_type
49  use m_pawrhoij,         only : pawrhoij_type, pawrhoij_alloc, pawrhoij_free, pawrhoij_init_unpacked, &
50                                 pawrhoij_mpisum_unpacked, pawrhoij_free_unpacked, pawrhoij_inquire_dim, &
51 &                               pawrhoij_symrhoij
52  use m_pawcprj,          only : pawcprj_type,pawcprj_alloc,pawcprj_free,pawcprj_gather_spin
53  use m_pawfgr,           only : pawfgr_type
54  use m_paw_dmft,         only : paw_dmft_type
55  use m_paw_nhat,         only : pawmknhat
56  use m_paw_occupancies,  only : pawaccrhoij
57  use m_fft,              only : fftpac, fourdp
58  use m_spacepar,         only : meanvalue_g, hartre
59  use m_dens,             only : constrained_dft_t,mag_penalty
60  use m_mkrho,            only : mkrho
61  use m_mkffnl,           only : mkffnl
62  use m_getghc,           only : getghc
63  use m_rhotoxc,          only : rhotoxc
64  use m_mpinfo,           only : proc_distrb_cycle
65  use m_nonlop,           only : nonlop
66  use m_fourier_interpol, only : transgrid
67  use m_prep_kgb,         only : prep_getghc, prep_nonlop
68  use m_psolver,          only : psolver_rhohxc
69 
70 #if defined HAVE_GPU_CUDA
71  use m_manage_cuda
72 #endif
73 
74  implicit none
75 
76  private

ABINIT/mkresi [ Functions ]

[ Top ] [ Functions ]

NAME

 mkresi

FUNCTION

 Make residuals from knowledge of wf in G space and application of Hamiltonian.

INPUTS

  cg(2,mcg)=<G|Cnk>=Fourier coefficients of wavefunction
  gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k
  icg=shift to be applied on the location of data in the array cg
  ikpt=index of k-point
  isppol=index of spin
  mcg=second dimension of the cg array
  mpi_enreg=information about MPI parallelization
  nband=number of bands involved in subspace matrix.
  npw=number of planewaves in basis sphere at this k point.
  prtvol=control print volume and debugging output
  usepaw= 0 for non paw calculation; =1 for paw calculation

OUTPUT

  eig_k(nband)$= \langle C_n \mid H \mid C_n \rangle $ for each band.
  resid_k(nband)=residual for each band
   $= \langle C_n \mid H H \mid C_n \rangle- \langle C_n \mid H \mid C_n \rangle^2 $.

SOURCE

 939 subroutine mkresi(cg,eig_k,gs_hamk,icg,ikpt,isppol,mcg,mpi_enreg,nband,prtvol,resid_k)
 940 
 941 !Arguments ------------------------------------
 942 !scalars
 943  integer,intent(in) :: icg,ikpt,isppol,mcg,nband,prtvol
 944  type(MPI_type),intent(inout) :: mpi_enreg
 945  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
 946 !arrays
 947  real(dp),intent(in) :: cg(2,mcg)
 948  real(dp),intent(out) :: eig_k(nband),resid_k(nband)
 949 
 950 !Local variables-------------------------------
 951 !scalars
 952  integer,parameter :: tim_getghc=3
 953  integer :: blocksize,cpopt,iband,iband_last,iblock,iblocksize,ipw,ipw_shift
 954  integer :: my_nspinor,nblockbd,npw_k
 955  real(dp) :: doti,dotr
 956 !arrays
 957  real(dp) :: tsec(2)
 958  real(dp),allocatable,target :: cwavef(:,:),ghc(:,:),gsc(:,:),gvnlxc(:,:)
 959  real(dp), ABI_CONTIGUOUS pointer :: cwavef_ptr(:,:),ghc_ptr(:,:),gsc_ptr(:,:)
 960  type(pawcprj_type) :: cwaveprj(1,1)
 961 
 962 ! *************************************************************************
 963 
 964 !Keep track of total time spent in mkresi
 965  call timab(13,1,tsec)
 966 
 967 !Parallelism over FFT and/or bands: define sizes and tabs
 968  my_nspinor=max(1,gs_hamk%nspinor/mpi_enreg%nproc_spinor)
 969  if (mpi_enreg%paral_kgb==1) then
 970    nblockbd=nband/(mpi_enreg%nproc_band*mpi_enreg%bandpp)
 971  else
 972    nblockbd=nband/mpi_enreg%nproc_fft
 973    if (nband/=nblockbd*mpi_enreg%nproc_fft) nblockbd=nblockbd+1
 974  end if
 975  blocksize=nband/nblockbd
 976 
 977  npw_k=gs_hamk%npw_k
 978  ABI_MALLOC(cwavef,(2,npw_k*my_nspinor))
 979  ABI_MALLOC(ghc,(2,npw_k*my_nspinor))
 980  ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor))
 981  if (gs_hamk%usepaw==1)  then
 982    ABI_MALLOC(gsc,(2,npw_k*my_nspinor))
 983  else
 984    ABI_MALLOC(gsc,(0,0))
 985  end if
 986 
 987 !Loop over (blocks of) bands
 988  do iblock=1,nblockbd
 989    iband=(iblock-1)*blocksize+1;iband_last=min(iband+blocksize-1,nband)
 990    if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,iband,iband_last,isppol,mpi_enreg%me_kpt)) cycle
 991 
 992 !  Load |Cn>
 993    ipw_shift=(iblock-1)*npw_k*my_nspinor*blocksize+icg
 994 !$OMP PARALLEL DO
 995    do ipw=1,npw_k*my_nspinor*blocksize
 996      cwavef(1,ipw)=cg(1,ipw+ipw_shift)
 997      cwavef(2,ipw)=cg(2,ipw+ipw_shift)
 998    end do
 999 
1000 !  Compute H|Cn>
1001    cpopt=-1
1002    if (mpi_enreg%paral_kgb==0) then
1003      call getghc(cpopt,cwavef,cwaveprj,ghc,gsc,gs_hamk,gvnlxc,zero,mpi_enreg,1,&
1004 &     prtvol,gs_hamk%usepaw,tim_getghc,0)
1005    else
1006      call prep_getghc(cwavef,gs_hamk,gvnlxc,ghc,gsc,zero,nband,mpi_enreg,&
1007 &     prtvol,gs_hamk%usepaw,cpopt,cwaveprj,&
1008 &     already_transposed=.false.)
1009    end if
1010 
1011    !call cg_get_eigens(usepaw, istwf_k, npwsp, nband, cg, ghc, gsc, eig, me_g0, comm_bsf)
1012    !call cg_get_residvecs(usepaw, npwsp, nband, eig, cg, ghc, gsc, gwork)
1013    !call cg_norm2g(istwf_k, npwsp, nband, gwork, resid, me_g0, comm_bsf)
1014    ! MG: Communicators are wrongi if paral_kgb. One should use mpi_enreg%comm_bandspinorfft
1015 
1016 !  Compute the residual, <Cn|(H-<Cn|H|Cn>)**2|Cn>:
1017    do iblocksize=1,blocksize
1018      iband=(iblock-1)*blocksize+iblocksize
1019      ipw_shift=(iblocksize-1)*npw_k*my_nspinor
1020      cwavef_ptr => cwavef(:,1+ipw_shift:npw_k*my_nspinor+ipw_shift)
1021      ghc_ptr    => ghc   (:,1+ipw_shift:npw_k*my_nspinor+ipw_shift)
1022 
1023 !    First get eigenvalue <Cn|H|Cn>:
1024      call dotprod_g(dotr,doti,gs_hamk%istwf_k,npw_k*my_nspinor,1,cwavef_ptr,ghc_ptr,&
1025 &     mpi_enreg%me_g0,mpi_enreg%comm_spinorfft)
1026      eig_k(iband)=dotr
1027 
1028 !    Next need <G|(H-S<Cn|H|Cn>)|Cn> (in ghc):
1029      if (gs_hamk%usepaw==0) then
1030 !$OMP PARALLEL DO PRIVATE(ipw) SHARED(cwavef_ptr,ghc_ptr,eig_k,iband,npw_k,my_nspinor)
1031        do ipw=1,npw_k*my_nspinor
1032          ghc_ptr(1,ipw)=ghc_ptr(1,ipw)-eig_k(iband)*cwavef_ptr(1,ipw)
1033          ghc_ptr(2,ipw)=ghc_ptr(2,ipw)-eig_k(iband)*cwavef_ptr(2,ipw)
1034        end do
1035      else
1036        gsc_ptr => gsc(:,1+ipw_shift:npw_k*my_nspinor+ipw_shift)
1037 !$OMP PARALLEL DO PRIVATE(ipw) SHARED(gsc_ptr,ghc_ptr,eig_k,iband,npw_k,my_nspinor)
1038        do ipw=1,npw_k*my_nspinor
1039          ghc_ptr(1,ipw)=ghc_ptr(1,ipw)-eig_k(iband)*gsc_ptr(1,ipw)
1040          ghc_ptr(2,ipw)=ghc_ptr(2,ipw)-eig_k(iband)*gsc_ptr(2,ipw)
1041        end do
1042      end if
1043 
1044 !    Then simply square the result:
1045      call sqnorm_g(dotr,gs_hamk%istwf_k,npw_k*my_nspinor,ghc_ptr,&
1046 &     mpi_enreg%me_g0,mpi_enreg%comm_fft)
1047      resid_k(iband)=dotr
1048 
1049    end do ! iblocksize
1050 
1051  end do ! iblock
1052 
1053  ABI_FREE(cwavef)
1054  ABI_FREE(ghc)
1055  ABI_FREE(gvnlxc)
1056  ABI_FREE(gsc)
1057 
1058  call timab(13,2,tsec)
1059 
1060 end subroutine mkresi