TABLE OF CONTENTS
- ABINIT/cgq_builder
- ABINIT/e_eigen
- ABINIT/m_vtorho
- ABINIT/vtorho
- ABINIT/wvl_comm_eigen
- ABINIT/wvl_nscf_loop
- ABINIT/wvl_nscf_loop_bigdft
- ABINIT/wvl_occ
- ABINIT/wvl_occ_bigdft
ABINIT/cgq_builder [ Functions ]
NAME
cgq_builder
FUNCTION
This routine locates cgq for efield calculations, especially for parallel case
INPUTS
berryflag = logical flag determining use of electric field variables cg(2,mcg)=planewave coefficients of wavefunctions. dtset <type(dataset_type)>=all input variables for this dataset ikpt=index of current k kpt ikpt_loc=index of k point on current processor (see vtorho.F90) isspol=value of spin polarization currently treated me_distrb=current value from spaceComm_distrb (see vtorho.F90) mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol mcgq=size of cgq array (see vtorho.F90) mkgq=size of pwnsfacq array (see vtorho.F90) my_nspinor=nspinor value determined by current // set up nband_k=number of bands at each k point nproc_distrb=nproc from spaceComm_distrb (see vtorho.F90) npwarr(nkpt)=number of planewaves in basis at this k point pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) pwind_alloc = first dimension of pwind spaceComm_distrb=comm_cell from mpi_enreg
OUTPUT
cgq(2,mcgq)=planewave coefficients of wavenfunctions adjacent to cg at ikpt pwnsfacq(2,mkgq)=phase factors for non-symmorphic translations for cg's adjacent to cg(ikpt)
SIDE EFFECTS
Input/Output dtefield <type(efield_type)> = efield variables mpi_enreg=information about MPI parallelization
TODO
NOTES
SOURCE
2523 subroutine cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,& 2524 & me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,& 2525 & npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb) 2526 2527 !Arguments ------------------------------------ 2528 integer,intent(in) :: ikpt,ikpt_loc,isppol,me_distrb,mcg,mcgq,mkgq,my_nspinor,nband_k 2529 integer,intent(in) :: nproc_distrb,pwind_alloc,spaceComm_distrb 2530 logical,intent(in) :: berryflag 2531 type(dataset_type), intent(in) :: dtset 2532 type(efield_type), intent(inout) :: dtefield 2533 type(MPI_type), intent(in) :: mpi_enreg 2534 !arrays 2535 integer,intent(in) :: npwarr(dtset%nkpt) 2536 real(dp),intent(in) :: cg(2,mcg),pwnsfac(2,pwind_alloc) 2537 real(dp),intent(out) :: cgq(2,mcgq),pwnsfacq(2,mkgq) 2538 2539 !Local variables ------------------------- 2540 !scalars 2541 integer :: count,count1,icg1,icg2,dest,his_source 2542 integer :: idir,ierr,ifor,ikg1,ikg2,ikptf,ikpt1f,ikpt1i 2543 integer :: jkpt,jkpt1i,jkptf,jkpt1f,jsppol,my_source,npw_k1,tag 2544 !arrays 2545 integer,allocatable :: flag_send(:,:), flag_receive(:) 2546 real(dp) :: tsec(2) 2547 real(dp),allocatable :: buffer(:,:) 2548 2549 ! ************************************************************************* 2550 2551 if (mcgq==0.or.mkgq==0) return 2552 2553 call timab(983,1,tsec) 2554 2555 !Test compatbility of berryflag 2556 if (berryflag) then 2557 ABI_MALLOC(flag_send,(0:nproc_distrb-1,dtefield%fnkpt)) 2558 end if 2559 ABI_MALLOC(flag_receive,(dtset%nkpt)) 2560 flag_send(:,:) = 0 2561 flag_receive(:) = 0 2562 2563 if (berryflag) ikptf = dtefield%i2fbz(ikpt) 2564 2565 do idir = 1, 3 2566 2567 ! skip idir values for which efield_dot(idir) = 0 2568 if (berryflag .and. abs(dtefield%efield_dot(idir)) < tol12 ) cycle 2569 2570 do ifor = 1, 2 2571 2572 if(berryflag) then 2573 dtefield%sflag(:,ikpt + dtset%nkpt*(isppol - 1),ifor,idir) = 0 2574 ikpt1f = dtefield%ikpt_dk(ikptf,ifor,idir) 2575 ikpt1i = dtefield%indkk_f2ibz(ikpt1f,1) 2576 end if 2577 2578 npw_k1 = npwarr(ikpt1i) 2579 count = npw_k1*my_nspinor*nband_k 2580 my_source = mpi_enreg%proc_distrb(ikpt1i,1,isppol) 2581 2582 do dest = 0, nproc_distrb-1 2583 2584 if ((dest==me_distrb).and.(ikpt_loc <= dtset%mkmem)) then 2585 ! I am dest and have something to do 2586 2587 if ( my_source == me_distrb ) then 2588 ! I am destination and source 2589 2590 if(berryflag) then 2591 ikg1 = dtefield%fkgindex(ikpt1f) 2592 ikg2 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2593 icg1 = dtefield%cgindex(ikpt1i,isppol) 2594 icg2 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2595 end if 2596 2597 pwnsfacq(:,ikg2 + 1:ikg2 + npw_k1) = pwnsfac(:,ikg1 + 1:ikg1 + npw_k1) 2598 cgq(:,icg2 + 1:icg2 + count) = cg(:,icg1 + 1:icg1 + count) 2599 2600 else ! I am the destination but not the source -> receive 2601 ! receive pwnsfacq 2602 if(berryflag) then 2603 tag = ikpt1f + (isppol - 1)*dtefield%fnkpt 2604 ikg1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2605 end if 2606 ABI_MALLOC(buffer,(2,npw_k1)) 2607 call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr) 2608 pwnsfacq(:,ikg1+1:ikg1+npw_k1) = buffer(:,1:npw_k1) 2609 ABI_FREE(buffer) 2610 2611 ! receive cgq if necessary 2612 if(flag_receive(ikpt1i) == 0) then 2613 ABI_MALLOC(buffer,(2,count)) 2614 tag = ikpt1i + (isppol - 1)*dtset%nkpt 2615 call xmpi_recv(buffer,my_source,tag,spaceComm_distrb,ierr) 2616 if(berryflag) icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*dtset%nkpt) 2617 cgq(:,icg1+1:icg1+count) = buffer(:,1:count) 2618 ABI_FREE(buffer) 2619 flag_receive(ikpt1i) = 1 2620 end if ! end if flag_receive == 0 2621 end if ! end tasks if I am the destination 2622 2623 else if (ikpt_loc <= mpi_enreg%mkmem(dest)) then ! dest != me and the dest has a k-point to treat 2624 2625 ! jkpt is the kpt which is being treated by dest (in ibz) 2626 ! jsppol is his isppol 2627 jkpt = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,1) 2628 jsppol = mpi_enreg%kpt_loc2ibz_sp(dest, ikpt_loc,2) 2629 2630 if(jkpt > 0 .and. jsppol > 0) then 2631 2632 if(berryflag) then 2633 jkptf = dtefield%i2fbz(jkpt) 2634 jkpt1f = dtefield%ikpt_dk(jkptf,ifor,idir) 2635 jkpt1i = dtefield%indkk_f2ibz(jkpt1f,1) 2636 end if 2637 his_source = mpi_enreg%proc_distrb(jkpt1i,1,jsppol) 2638 2639 if (his_source == me_distrb) then 2640 2641 ! send 2642 ! pwnsfacq 2643 if(berryflag) then 2644 ikg1 = dtefield%fkgindex(jkpt1f) 2645 tag = jkpt1f + (jsppol - 1)*dtefield%fnkpt 2646 end if 2647 count1 = npwarr(jkpt1i) 2648 ABI_MALLOC(buffer,(2,count1)) 2649 buffer(:,1:count1) = pwnsfac(:,ikg1+1:ikg1+count1) 2650 call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr) 2651 ABI_FREE(buffer) 2652 2653 ! send cgq if necessary 2654 if(flag_send(dest, jkpt1i)==0) then 2655 if(berryflag) icg1 = dtefield%cgindex(jkpt1i,jsppol) 2656 tag = jkpt1i + (jsppol - 1)*dtset%nkpt 2657 count1 = npwarr(jkpt1i)*nband_k*my_nspinor 2658 ABI_MALLOC(buffer,(2,count1)) 2659 buffer(:,1:count1) = cg(:,icg1+1:icg1+count1) 2660 call xmpi_send(buffer,dest,tag,spaceComm_distrb,ierr) 2661 ABI_FREE(buffer) 2662 flag_send(dest, jkpt1i)=1 2663 end if ! if send cgq 2664 2665 end if ! end check that his_source == me 2666 2667 end if ! end check on jkpt > 0 and jsppol > 0 2668 2669 end if ! end check on me = dest else if me != dest 2670 2671 end do ! end loop over dest = 0, nproc-1 2672 2673 end do !end loop over ifor 2674 2675 end do !end loop over idir 2676 2677 call timab(983,2,tsec) 2678 2679 ABI_FREE(flag_send) 2680 ABI_FREE(flag_receive) 2681 2682 end subroutine cgq_builder
ABINIT/e_eigen [ Functions ]
NAME
e_eigen
FUNCTION
Computes eigenvalues energy from eigen, occ, kpt, wtk
INPUTS
eigen(nkpt*nsppol)=eigenvalues mband= maximum number of bands nband(nkpt*nsppol)= number of bands for each k-point and spin nkpt= number of k-points nsppol= number of spin polarization occ(mband*nkpt*nsppol)=occupations wtk(nkpt)= k-point weights
OUTPUT
e_eigenvalues= eigenvalues energy
SOURCE
2290 subroutine e_eigen(eigen,e_eigenvalues,mband,nband,nkpt,nsppol,occ,wtk) 2291 2292 !Arguments ------------------------------------ 2293 integer , intent(in) :: mband,nkpt,nsppol 2294 integer , intent(in) :: nband(nkpt*nsppol) 2295 real(dp) , intent(in) :: eigen(mband*nkpt*nsppol) 2296 real(dp) , intent(in) :: occ(mband*nkpt*nsppol) 2297 real(dp) , intent(in) :: wtk(nkpt) 2298 real(dp) , intent(out) :: e_eigenvalues 2299 2300 !Local variables------------------------------- 2301 integer :: ib,iband,ii,ikpt,isppol,nband_k 2302 real(dp) :: wtk_k 2303 2304 ! ************************************************************************* 2305 2306 DBG_ENTER("COLL") 2307 ii=0;ib=0 2308 do isppol=1,nsppol 2309 do ikpt=1,nkpt 2310 ii=ii+1 2311 nband_k=nband(ii) ; wtk_k=wtk(ii) 2312 do iband=1,nband_k 2313 ib=ib+1 2314 if(abs(occ(ib)) > tol8) then 2315 e_eigenvalues = e_eigenvalues + wtk_k*occ(ib)*eigen(ib) 2316 end if 2317 end do 2318 end do 2319 end do 2320 2321 DBG_EXIT("COLL") 2322 2323 end subroutine e_eigen
ABINIT/m_vtorho [ Modules ]
NAME
m_vtorho
FUNCTION
COPYRIGHT
Copyright (C) 1998-2024 ABINIT group (DCA, XG, GMR, MF, AR, MM, MT, FJ, MB, MT, TR) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 ! nvtx related macro definition 23 #include "nvtx_macros.h" 24 25 module m_vtorho 26 27 use iso_fortran_env, only : int32,int64,real32,real64 28 use defs_basis 29 use defs_wvltypes 30 use m_abicore 31 use m_xmpi 32 use m_ab7_mixing 33 use m_errors 34 use m_wffile 35 use m_efield 36 use m_cgtools 37 use m_gemm_nonlop 38 use m_gemm_nonlop_gpu 39 use m_gemm_nonlop_ompgpu 40 use m_hdr 41 use m_dtset 42 use m_dtfil 43 use m_extfpmd 44 use m_ompgpu_utils 45 46 use defs_datatypes, only : pseudopotential_type 47 use defs_abitypes, only : MPI_type 48 use m_fstrings, only : sjoin, itoa 49 use m_time, only : timab 50 use m_geometry, only : xred2xcart 51 use m_occ, only : newocc 52 use m_pawang, only : pawang_type 53 use m_pawtab, only : pawtab_type 54 use m_paw_ij, only : paw_ij_type 55 use m_pawfgrtab, only : pawfgrtab_type 56 use m_pawrhoij, only : pawrhoij_type, pawrhoij_alloc, pawrhoij_free, pawrhoij_io, pawrhoij_inquire_dim 57 use m_pawcprj, only : pawcprj_type, pawcprj_alloc, pawcprj_free, pawcprj_getdim 58 use m_pawfgr, only : pawfgr_type 59 use m_energies, only : energies_type 60 use m_hamiltonian, only : init_hamiltonian, gs_hamiltonian_type, gspot_transgrid_and_pack 61 use m_bandfft_kpt, only : bandfft_kpt, bandfft_kpt_type, bandfft_kpt_set_ikpt, & 62 bandfft_kpt_savetabs, bandfft_kpt_restoretabs, prep_bandfft_tabs 63 use m_electronpositron, only : electronpositron_type,electronpositron_calctype 64 use m_paw_dmft, only : paw_dmft_type,init_dmft,destroy_dmft,print_dmft,saveocc_dmft 65 use m_paw_correlations, only : setnoccmmp 66 use m_paw_occupancies, only : pawmkrhoij 67 use m_paw_mkrho, only : pawmkrho 68 use m_crystal, only : crystal_init, crystal_t 69 use m_oper, only : oper_type,init_oper,destroy_oper 70 use m_io_tools, only : flush_unit 71 use m_abi2big, only : wvl_occ_abi2big, wvl_rho_abi2big, wvl_occopt_abi2big, wvl_eigen_abi2big 72 use m_fock, only : fock_type, fock_ACE_type, fock_updateikpt, fock_calc_ene 73 use m_invovl, only : make_invovl 74 use m_tddft, only : tddft 75 use m_kg, only : mkkin, mkkpg 76 use m_suscep_stat, only : suscep_stat 77 use m_fft, only : fftpac 78 use m_spacepar, only : symrhg 79 use m_vtowfk, only : vtowfk 80 use m_mkrho, only : mkrho, prtrhomxmn 81 use m_mkffnl, only : mkffnl 82 use m_mpinfo, only : proc_distrb_cycle 83 use m_common, only : prteigrs 84 use m_dmft, only : dmft_solve 85 use m_datafordmft, only : datafordmft 86 use m_fourier_interpol, only : transgrid 87 use m_cgprj, only : ctocprj 88 use m_wvl_rho, only : wvl_mkrho 89 use m_wvl_psi, only : wvl_hpsitopsi, wvl_psitohpsi, wvl_nl_gradient 90 use m_inwffil, only : cg_from_atoms 91 use m_chebfiwf, only : chebfiwf2_blocksize 92 93 #if defined HAVE_GPU_CUDA 94 use m_manage_cuda 95 #endif 96 97 #if defined HAVE_YAKL 98 use gator_mod 99 #endif 100 101 #if defined HAVE_BIGDFT 102 use BigDFT_API, only : last_orthon, evaltoocc, write_energies, eigensystem_info 103 #endif 104 105 #if defined(HAVE_GPU) && defined(HAVE_GPU_MARKERS) 106 use m_nvtx_data 107 #endif 108 109 #ifdef HAVE_FC_ISO_C_BINDING 110 use, intrinsic :: iso_c_binding, only : c_int64_t 111 #endif 112 113 114 implicit none 115 116 private
ABINIT/vtorho [ Functions ]
NAME
vtorho
FUNCTION
This routine compute the new density from a fixed potential (vtrial) but might also simply compute eigenvectors and eigenvalues. The main part of it is a wf update over all k points.
INPUTS
itimes(2)=itime array, contain itime=itimes(1) and itimimage_gstate=itimes(2) from outer loops afford=used to dimension susmat atindx(natom)=index table for atoms (see gstate.f) atindx1(natom)=index table for atoms, inverse of atindx (see gstate.f) cpus= cpu time limit in seconds dbl_nnsclo=if 1, will double the value of dtset%nnsclo dielop= if positive, the dielectric matrix must be computed. dielstrt=number of the step at which the dielectric preconditioning begins. dmatpawu= fixed occupation matrix of correlated orbitals (DFT+U or DMFT only) dtefield <type(efield_type)> = variables related to Berry phase calculations (see initberry.f) dtfil <type(datafiles_type)>=variables related to files dtset <type(dataset_type)>=all input variables for this dataset | mband=maximum number of bands | mgfft=maximum size of 1D FFTs | mkmem =number of k points treated by this node. | mpw=maximum dimensioned size of npw | nfft=(effective) number of FFT grid points (for this processor) | nkpt=number of k points. | nspden=number of spin-density components | nsppol=1 for unpolarized, 2 for spin-polarized | nsym=number of symmetry elements in space group | typat= array of types of the natoms electronpositron <type(electronpositron_type)>=quantities for the electron-positron annihilation etotal=total energy (Ha) - only needed for tddft fock <type(fock_type)>= quantities to calculate Fock exact exchange gbound_diel(2*mgfftdiel+8,2)=G sphere boundary for the dielectric matrix gmet(3,3)=reciprocal space metric tensor in bohr**-2. gprimd(3,3)=dimensional reciprocal space primitive translations (3x3 tensor) and grads wrt atomic coordinates (3*natom) gsqcut=cutoff on (k+G)^2 (bohr^-2) hdr <type(hdr_type)>=the header of wf, den and pot files indsym(4,nsym,natom)=indirect indexing array for atom labels irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data irrzondiel(nfftdiel**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data for diel matrix nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise istep=index of the number of steps in the routine scfcv istep_mix=index of the number of steps for the SCF mixing (can be <istep) kg(3,mpw*mkmem)=reduced planewave coordinates. kg_diel(3,npwdiel)=reduced planewave coordinates for the dielectric matrix. kxc(nfftf,nkxc)=exchange-correlation kernel, needed only if nkxc/=0 . lmax_diel=1+max. value of l angular momentum used for dielectric matrix mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol mgfftdiel=maximum size of 1D FFTs, for the computation of the dielectric matrix mpi_enreg=information about MPI parallelization my_natom=number of atoms treated by current processor natom=number of atoms in cell. nattyp(ntypat)= # atoms of each type. nfftf= -PAW ONLY- number of FFT grid points for the fine grid (nfftf=nfft for norm-conserving potential runs) nfftdiel=number of fft grid points for the computation of the diel matrix ngfftdiel(18)=contain all needed information about 3D FFT, for dielectric matrix, see ~abinit/doc/variables/vargs.htm#ngfft nkxc=second dimension of the array kxc, see rhotoxc.f for a description npwarr(nkpt)=number of planewaves in basis at this k point npwdiel=size of the susmat array. ntypat=number of types of atoms in unit cell. optforces=option for the computation of forces (0: no force;1: forces) optres=0: the new value of the density is computed in place of the input value 1: only the density residual is computed ; the input density is kept paw_dmft <type(paw_dmft_type)>= paw+dmft related data paw_ij(my_natom*usepaw) <type(paw_ij_type)>=paw arrays given on (i,j) channels pawang <type(pawang)>=paw angular mesh and related data pawfgr <type(pawfgr_type)>=fine grid parameters and related data pawfgrtab(my_natom*usepaw) <type(pawfgrtab_type)>=atomic data given on fine rectangular grid pawtab(ntypat*usepaw) <type(pawtab_type)>=paw tabulated starting data phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise phnonsdiel(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases, for diel matr nfft**(1-1/nsym) is 1 if nsym==1, and nfft otherwise ph1d(2,3*(2*mgfft+1)*natom)=one-dimensional structure factor information ph1ddiel(2,3*(2*mgfftdiel+1)*natom*usepaw)=one-dimensional structure factor information for the dielectric matrix psps <type(pseudopotential_type)>=variables related to pseudopotentials pwind(pwind_alloc,2,3) = array used to compute the overlap matrix smat between k-points (see initberry.f) pwind_alloc = first dimension of pwind pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) rmet(3,3)=real space metric (bohr**2) rprimd(3,3)=dimensional primitive vectors symrec(3,3,nsym)=symmetry operations in reciprocal space ucvol=unit cell volume in bohr**3. usecprj=1 if cprj datastructure is stored in memory wffnew,unit numbers for wf disk files. with_vectornd = 1 if vectornd allocated vectornd(with_vectornd*nfftf,nspden,3)=nuclear dipole moment vector potential vtrial(nfftf,nspden)=INPUT potential Vtrial(r). [vxctau(nfft,nspden,4*usekden)]=(only for meta-GGA): derivative of XC energy density with respect to kinetic energy density (depsxcdtau). The arrays vxctau contains also the gradient of vxctau (gvxctau) in vxctau(:,:,2:4) xred(3,natom)=reduced dimensionless atomic coordinates ylm(mpw*mkmem,mpsang*mpsang*useylm)= real spherical harmonics for each G and k point ylmgr(mpw*mkmem,3,mpsang*mpsang*useylm)= gradients of real spherical harmonics ylmdiel(npwdiel,lmax_diel**2)= real spherical harmonics for each G and k point for the dielectric matrix
OUTPUT
compch_fft=-PAW only- compensation charge inside spheres computed over fine fft grid dphase(3) : dphase(idir) = accumulated change in the string-averaged Zak phase along the idir-th direction caused by the update of all the occupied Bloch states at all the k-points (only if finite electric field) eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree) resid(mband*nkpt*nsppol)=residuals for each band over all k points. residm=maximum value from resid array (except for nbdbuf highest bands) susmat(2,npwdiel*afford,nspden,npwdiel,nspden)= the susceptibility (or density-density response) matrix in reciprocal space === if optforces>0 === grnl(3*natom)=stores grads of nonlocal energy wrt length scales ==== if optres==1 nres2=square of the norm of the residual nvresid(nfftf,nspden)=density residual ==== if psps%usepaw==1 cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors: cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector. nhat(nfftf,nspden*psps%usepaw)=compensation charge density on rectangular grid in real space
SIDE EFFECTS
cg(2,mpw*dtset%nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions. At output contains updated wavefunctions coefficients; if nkpt>1, these are kept in a disk file. energies <type(energies_type)>=storage for energies computed here : | e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree) | e_kinetic=kinetic energy part of total energy | e_nlpsp_vfock=nonlocal psp + potential Fock ACE part of total energy | e_fermie=fermi energy (Hartree) occ(mband*nkpt*nsppol)=occupation number for each band for each k. (input if insulator - occopt<3 - ; output if metallic) pawrhoij(my_natom*usepaw) <type(pawrhoij_type)>= paw rhoij occupancies and related data rhog(2,nfftf)=Fourier transform of total electron density rhor(nfftf,nspden)=total electron density (el/bohr**3) taug(2,nfftf*dtset%usekden)=array for Fourier transform of kinetic energy density taur(nfftf,nspden*dtset%usekden)=array for kinetic energy density tauresid(nfftf,nspden*dtset%usekden)=array for kinetic energy density residual wvl <type(wvl_data)>=wavelets structures in case of wavelets basis. ==== if (usepaw==1) ==== cprj(natom,mcprj*usecprj)= wave functions projected with non-local projectors: cprj(n,k,i)=<p_i|Cnk> where p_i is a non-local projector. rmm_diis_status= Status of the RMM-DIIS eigensolver. See m_rmm_diis
NOTES
Be careful to the meaning of nfft (size of FFT grids): - In case of norm-conserving calculations the FFT grid is the usual FFT grid. - In case of PAW calculations: Two FFT grids are used; one with nfft points (coarse grid) for the computation of wave functions ; one with nfftf points (fine grid) for the computation of total density. The total electronic density (rhor,rhog) is divided into two terms: - The density related to WFs =Sum[Psi**2] - The compensation density (nhat) - only in PAW The parallelisation needed for the electric field should be made an independent subroutine, so that this routine could be put back in the 95_drive directory.
SOURCE
295 subroutine vtorho(afford,atindx,atindx1,cg,compch_fft,cprj,cpus,dbl_nnsclo,& 296 & dielop,dielstrt,dmatpawu,dphase,dtefield,dtfil,dtset,& 297 & eigen,electronpositron,energies,etotal,gbound_diel,& 298 & gmet,gprimd,grnl,gsqcut,hdr,extfpmd,indsym,irrzon,irrzondiel,& 299 & istep,istep_mix,itimes,kg,kg_diel,kxc,lmax_diel,mcg,mcprj,mgfftdiel,mpi_enreg,& 300 & my_natom,natom,nattyp,nfftf,nfftdiel,ngfftdiel,nhat,nkxc,& 301 & npwarr,npwdiel,nres2,ntypat,nvresid,occ,optforces,& 302 & optres,paw_dmft,paw_ij,pawang,pawfgr,pawfgrtab,pawrhoij,pawtab,& 303 & phnons,phnonsdiel,ph1d,ph1ddiel,psps,fock,& 304 & pwind,pwind_alloc,pwnsfac,resid,residm,rhog,rhor,& 305 & rmet,rprimd,susmat,symrec,taug,taur,tauresid,& 306 & ucvol,usecprj,wffnew,with_vectornd,vectornd,vtrial,vxctau,wvl,xred,& 307 & ylm,ylmgr,ylmdiel, rmm_diis_status) 308 309 !Arguments ------------------------------- 310 !scalars 311 integer, intent(in) :: afford,dbl_nnsclo,dielop,dielstrt,istep,istep_mix,lmax_diel,mcg,mcprj,mgfftdiel 312 integer, intent(in) :: my_natom,natom,nfftf,nfftdiel,nkxc,npwdiel 313 integer, intent(in) :: ntypat,optforces,optres,pwind_alloc,usecprj,with_vectornd 314 real(dp), intent(in) :: cpus,etotal,gsqcut,ucvol 315 real(dp), intent(out) :: compch_fft,nres2,residm 316 type(MPI_type), intent(inout) :: mpi_enreg 317 type(datafiles_type), intent(in) :: dtfil 318 type(dataset_type), intent(inout) :: dtset 319 type(efield_type), intent(inout) :: dtefield 320 type(electronpositron_type),pointer :: electronpositron 321 type(energies_type), intent(inout) :: energies 322 type(hdr_type), intent(inout) :: hdr 323 type(extfpmd_type),pointer,intent(inout) :: extfpmd 324 type(paw_dmft_type), intent(inout) :: paw_dmft 325 type(pawang_type), intent(in) :: pawang 326 type(pawfgr_type), intent(in) :: pawfgr 327 type(pseudopotential_type), intent(in) :: psps 328 type(fock_type),pointer, intent(inout) :: fock 329 type(wffile_type), intent(inout) :: wffnew 330 type(wvl_data), intent(inout) :: wvl 331 !arrays 332 integer, intent(in) :: atindx(natom),atindx1(natom),gbound_diel(2*mgfftdiel+8,2) 333 integer, intent(in) :: indsym(4,dtset%nsym,natom) 334 integer, intent(in) :: irrzon(dtset%nfft**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 335 integer, intent(in) :: irrzondiel(nfftdiel**(1-1/dtset%nsym),2,(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 336 integer, intent(in) :: itimes(2) 337 integer, intent(in) :: kg(3,dtset%mpw*dtset%mkmem),kg_diel(3,npwdiel),nattyp(ntypat),ngfftdiel(18),npwarr(dtset%nkpt) 338 integer, intent(in) :: pwind(pwind_alloc,2,3),symrec(3,3,dtset%nsym) 339 integer, intent(inout) :: rmm_diis_status(2, dtset%nkpt, dtset%nsppol) 340 real(dp), intent(in) :: dmatpawu(:,:,:,:),gmet(3,3),gprimd(3,3),ph1d(2,3*(2*dtset%mgfft+1)*natom) 341 real(dp), intent(in) :: ph1ddiel(2,(3*(2*mgfftdiel+1)*natom)*psps%usepaw) 342 real(dp), intent(in) :: phnons(2,dtset%nfft**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 343 real(dp), intent(in) :: phnonsdiel(2,nfftdiel**(1-1/dtset%nsym),(dtset%nspden/dtset%nsppol)-3*(dtset%nspden/4)) 344 real(dp), intent(in) :: pwnsfac(2,pwind_alloc),rmet(3,3),rprimd(3,3) 345 real(dp), intent(inout) :: vectornd(with_vectornd*nfftf,dtset%nspden,3),vtrial(nfftf,dtset%nspden) 346 real(dp), intent(inout) :: xred(3,natom) 347 real(dp), intent(in) :: ylm(dtset%mpw*dtset%mkmem,psps%mpsang*psps%mpsang*psps%useylm) 348 real(dp), intent(in) :: ylmgr(dtset%mpw*dtset%mkmem,3,psps%mpsang*psps%mpsang*psps%useylm) 349 real(dp), intent(in) :: ylmdiel(npwdiel,lmax_diel**2) 350 real(dp), intent(out) :: dphase(3),grnl(3*natom) 351 real(dp), intent(inout) :: eigen(dtset%mband*dtset%nkpt*dtset%nsppol) 352 real(dp), intent(out) :: nhat(nfftf,dtset%nspden*psps%usepaw) 353 real(dp), intent(inout) :: resid(dtset%mband*dtset%nkpt*dtset%nsppol) 354 real(dp), intent(out) :: nvresid(nfftf,dtset%nspden) 355 real(dp), intent(out) :: susmat(2,npwdiel*afford,dtset%nspden,npwdiel,dtset%nspden) 356 real(dp), intent(inout) :: cg(2,mcg) 357 real(dp), intent(inout) :: kxc(nfftf,nkxc),occ(dtset%mband*dtset%nkpt*dtset%nsppol) 358 real(dp), intent(inout) :: rhog(2,nfftf),rhor(nfftf,dtset%nspden) 359 real(dp), intent(inout) :: taug(2,nfftf*dtset%usekden),taur(nfftf,dtset%nspden*dtset%usekden) 360 real(dp), intent(inout) :: tauresid(nfftf,dtset%nspden*dtset%usekden) 361 real(dp), intent(inout),optional :: vxctau(nfftf,dtset%nspden,4*dtset%usekden) 362 type(pawcprj_type),pointer,intent(inout) :: cprj(:,:) 363 type(paw_ij_type),intent(inout) :: paw_ij(my_natom*psps%usepaw) 364 type(pawfgrtab_type),intent(inout) :: pawfgrtab(my_natom*psps%usepaw) 365 type(pawrhoij_type),target,intent(inout) :: pawrhoij(my_natom*psps%usepaw) 366 type(pawtab_type),intent(in) :: pawtab(ntypat*psps%usepaw) 367 368 !Local variables------------------------------- 369 !scalars 370 integer,parameter :: level=111,tim_mkrho=2 371 !integer,save :: nwarning=0 372 integer :: bdtot_index,counter,cplex,cplex_rhoij,dimffnl,enunit,iband,iband1,ibdkpt 373 integer :: ibg,icg,ider,idir,ierr,ifft,ifor,ifor1,ii,ikg,ikpt 374 integer :: ikpt_loc,ikpt1,my_ikpt,ikxc,ilm,imagn,index1,iorder_cprj,ipert,iplex 375 integer :: iscf,ispden,isppol,istwf_k,mband_cprj,mbdkpsp,mb2dkpsp 376 integer :: mcgq,mcprj_local,mcprj_tmp,me_distrb,mkgq,mpi_comm_sphgrid 377 integer :: my_nspinor,n1,n2,n3,n4,n5,n6,nband_eff,nbdbuf_eff !mwarning, 378 integer :: nband_k,nband_cprj_k,nbuf,neglect_pawhat,nfftot,nkpg,nkpt1,nnn,nnsclo_now 379 integer :: nproc_distrb,npw_k,nspden_rhoij,option,prtvol,nblk_gemm_nonlop 380 integer :: spaceComm_distrb,usecprj_local,usefock_ACE,usetimerev 381 logical :: berryflag,computesusmat,fixed_occ,has_vectornd 382 logical :: locc_test,paral_atom,remove_inv,usefock,with_vxctau 383 logical :: do_last_ortho,wvlbigdft=.false. 384 real(dp) :: dmft_dftocc 385 real(dp) :: edmft,ebandlda,ebanddmft,ebandldatot,ekindmft,ekindmft2,ekinlda 386 real(dp) :: min_occ,vxcavg_dum,strsxc(6) 387 character(len=500) :: msg 388 type(bandfft_kpt_type),pointer :: my_bandfft_kpt => null() 389 type(gs_hamiltonian_type) :: gs_hamk 390 !arrays 391 integer(int32), ABI_CONTIGUOUS pointer :: kg_k(:,:) => null() 392 real(dp) :: dielar(7),dphase_k(3),kpoint(3),qpt(3),rhodum(1),tsec(2),ylmgr_dum(0,0,0) 393 real(dp),allocatable :: EigMin(:,:),buffer1(:),buffer2(:),cgq(:,:) 394 real(dp),allocatable :: cgrkxc(:,:),doccde(:) 395 real(dp),allocatable :: dphasek(:,:),ek_k(:),ek_k_nd(:,:,:),eknk(:),eknk_nd(:,:,:,:,:),end_k(:) 396 real(dp),allocatable :: enlx_k(:),enlxnk(:),focknk(:),fockfornk(:,:,:),ffnl(:,:,:,:),grnl_k(:,:), xcart(:,:) 397 real(dp),allocatable :: grnlnk(:,:) 398 399 #if defined HAVE_GPU && defined HAVE_YAKL 400 real(c_double), ABI_CONTIGUOUS pointer :: kinpw(:) => null() 401 real(c_double), ABI_CONTIGUOUS pointer :: eig_k(:) => null() 402 #else 403 real(dp),allocatable :: kinpw(:) 404 real(dp),allocatable :: eig_k(:) 405 #endif 406 407 real(dp),allocatable :: kpg_k(:,:),occ_k(:),ph3d(:,:,:) 408 real(dp),allocatable :: pwnsfacq(:,:) 409 410 #if defined HAVE_GPU && defined HAVE_YAKL 411 real(c_double), ABI_CONTIGUOUS pointer :: resid_k(:) => null() 412 real(c_double), ABI_CONTIGUOUS pointer :: rhoaug(:,:,:,:) => null() 413 #else 414 real(dp),allocatable :: resid_k(:) 415 real(dp),allocatable :: rhoaug(:,:,:,:) 416 #endif 417 418 real(dp),allocatable :: rhowfg(:,:),rhowfr(:,:),tauwfg(:,:),tauwfr(:,:) 419 real(dp),allocatable :: vectornd_pac(:,:,:,:,:) 420 421 #if defined HAVE_GPU && defined HAVE_YAKL 422 real(real64), ABI_CONTIGUOUS pointer :: vlocal(:,:,:,:) => null() 423 #else 424 real(dp), allocatable :: vlocal(:,:,:,:) 425 #endif 426 427 real(dp),allocatable :: vxctaulocal(:,:,:,:,:),ylm_k(:,:),zshift(:) 428 type(pawcprj_type),allocatable :: cprj_tmp(:,:) 429 type(pawcprj_type),allocatable,target:: cprj_local(:,:) 430 type(oper_type) :: dft_occup 431 type(pawrhoij_type),pointer :: pawrhoij_unsym(:) 432 type(crystal_t) :: cryst_struc 433 integer :: idum1(0),idum3(0,0,0) 434 real(dp) :: rdum2(0,0),rdum4(0,0,0,0) 435 #if defined HAVE_BIGDFT 436 integer :: occopt_bigdft 437 #endif 438 439 #if defined(HAVE_GPU_CUDA) 440 type(gemm_nonlop_gpu_type) :: gemm_nonlop_gpu_obj 441 #endif 442 443 444 ! ********************************************************************* 445 446 DBG_ENTER("COLL") 447 448 !Keep track of total time spent in vtorho 449 call timab(980,1,tsec) 450 call timab(981,1,tsec) 451 452 !Structured debugging if prtvol==-level 453 prtvol=dtset%prtvol 454 455 ! Electric fields: set flag to turn on various behaviors 456 berryflag = any(dtset%berryopt == [4, 14, 6, 16, 7, 17]) 457 458 !If usewvl: wvlbigdft indicates that the BigDFT workflow will be followed 459 wvlbigdft=(dtset%usewvl==1.and.dtset%wvl_bigdft_comp==1) 460 461 !Several inits 462 n1=dtset%ngfft(1) ; n2=dtset%ngfft(2) ; n3=dtset%ngfft(3) 463 n4=dtset%ngfft(4) ; n5=dtset%ngfft(5) ; n6=dtset%ngfft(6) 464 usecprj_local=0;if (psps%usepaw==1) usecprj_local=1 465 my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor) 466 paral_atom=(my_natom/=natom) 467 compch_fft=-1.d5 468 469 !Check that usekden is not 0 if want to use vxctau 470 with_vxctau = (present(vxctau).and.dtset%usekden/=0) 471 472 !Check that fock is present if want to use fock option 473 usefock = (dtset%usefock==1 .and. associated(fock)) 474 usefock_ACE=0 475 if (usefock) usefock_ACE=fock%fock_common%use_ACE 476 477 !Init MPI 478 spaceComm_distrb=mpi_enreg%comm_cell 479 if (mpi_enreg%paral_kgb==1) spaceComm_distrb=mpi_enreg%comm_kpt 480 if (mpi_enreg%paral_hf ==1) spaceComm_distrb=mpi_enreg%comm_kpt 481 nproc_distrb=xmpi_comm_size(spaceComm_distrb) 482 me_distrb=xmpi_comm_rank(spaceComm_distrb) 483 mpi_comm_sphgrid=mpi_enreg%comm_fft 484 if(dtset%usewvl==1) mpi_comm_sphgrid=mpi_enreg%comm_wvl 485 !if (mpi_enreg%me_img/=0) nwarning=nwarning+1 486 487 !Test size of FFT grids (1 grid in norm-conserving, 2 grids in PAW) 488 if ((psps%usepaw==1.and.pawfgr%nfft/=nfftf).or.(psps%usepaw==0.and.dtset%nfft/=nfftf)) then 489 ABI_BUG('wrong values for nfft, nfftf!') 490 end if 491 492 !Test optforces (to prevent memory overflow) 493 if (optforces/=0.and.optforces/=1) then 494 ABI_BUG(sjoin('wrong value for optforces: ',itoa(optforces))) 495 end if 496 497 iscf=dtset%iscf 498 fixed_occ=(dtset%occopt<3.or.electronpositron_calctype(electronpositron)==1) 499 if(.not. wvlbigdft) then 500 energies%e_eigenvalues = zero 501 energies%e_kinetic = zero 502 energies%e_nucdip = zero 503 energies%e_nlpsp_vfock = zero 504 if (usefock) then 505 energies%e_fock=zero 506 energies%e_fockdc=zero 507 end if 508 grnl(:)=zero 509 if (berryflag) then 510 resid(:) = zero ! JWZ 13 May 2010. resid and eigen need to be fully zeroed each time before use 511 end if 512 ! MG: The previous line is not compatible with the RMM-DIIS since rmm_diis recevies the previous resid_k 513 ! to select the accuracy level. 514 ! For the time being, we set resid to zero if berryflag to avoid breaking the CG solver with E-field 515 ! but it's clear that the treatment of resid should be rationalized and that the previous values should be passed to vtowfk 516 eigen(:) = zero 517 bdtot_index=0 518 ibg=0;icg=0 519 mbdkpsp=dtset%mband*dtset%nkpt*dtset%nsppol 520 if(paw_dmft%use_dmft==1) mb2dkpsp=2*dtset%mband*dtset%mband*dtset%nkpt*dtset%nsppol 521 end if 522 523 if(dtset%usewvl==0) then 524 ABI_MALLOC(eknk,(mbdkpsp)) 525 ABI_MALLOC(enlxnk,(mbdkpsp)) 526 ABI_MALLOC(eknk_nd,(dtset%nsppol,dtset%nkpt,2,dtset%mband,dtset%mband*paw_dmft%use_dmft)) 527 ABI_MALLOC(EigMin,(2,dtset%mband)) 528 ABI_MALLOC(grnlnk,(3*natom,mbdkpsp*optforces)) 529 if (usefock) then 530 ABI_MALLOC(focknk,(mbdkpsp)) 531 focknk=zero 532 if (optforces>0)then 533 ABI_MALLOC(fockfornk,(3,natom,mbdkpsp)) 534 fockfornk=zero 535 end if 536 end if 537 eknk(:)=zero;enlxnk(:)=zero 538 if (optforces>0) grnlnk(:,:)=zero 539 if(paw_dmft%use_dmft==1) eknk_nd=zero 540 end if !usewvl==0 541 542 !Initialize rhor if needed; store old rhor 543 if(iscf>=0 .or. iscf==-3) then 544 if (optres==1) then 545 nvresid=rhor ; tauresid=taur 546 end if 547 ! NC and plane waves 548 if (psps%usepaw==0 .and. dtset%usewvl==0) then 549 rhor=zero ; taur=zero 550 ! PAW 551 else if(psps%usepaw==1) then 552 ABI_MALLOC(rhowfr,(dtset%nfft,dtset%nspden)) 553 ABI_MALLOC(rhowfg,(2,dtset%nfft)) 554 ABI_MALLOC(tauwfr,(dtset%nfft,dtset%nspden*dtset%usekden)) 555 ABI_MALLOC(tauwfg,(2,dtset%nfft*dtset%usekden)) 556 rhowfr(:,:)=zero ; tauwfr(:,:)=zero 557 end if 558 end if 559 560 ! Here we set the max number of non-self-consistent loops nnsclo_now used in vtowfk 561 if (iscf<0) then 562 ! Non self-consistent case 563 nnsclo_now=dtset%nstep 564 else 565 ! Self-consistent case 566 if (dtset%nnsclo>0) then 567 ! Use input variable if specified and > 0 568 nnsclo_now=dtset%nnsclo 569 else if (dtset%nnsclo < 0) then 570 ! imposed during abs(nnsclo) steps 571 nnsclo_now=1 572 if (istep<=abs(dtset%nnsclo)) nnsclo_now=merge(5,dtset%useria,dtset%useria==0) 573 else 574 ! Default branch for self-consistent case. 575 ! Perform 2 NSCF loops for the first two iterations. This is important especially wfs have 576 ! been initialized with random numbers. 577 nnsclo_now = 1 578 if (dtset%usewvl == 0) then 579 ! Plane waves 580 if (istep <= 2 .and. iscf /= 0) nnsclo_now = 2 581 ! MG: I don't understand why we need to perform 2 NSCF loops after the first SCF cycle 582 ! when we are relaxing the structure as the initial density and wavefunctions should be already good enough. 583 ! Here I change the default behavior to avoid the extra loop but only if RMM-DIIS is used. 584 ! XG 20210312 : I prefectly agree with you. This is historical, and should be changed, after testing and update of reference files. 585 if ((itimes(1) > 1 .or. (itimes(2)>1)) .and. dtset%rmm_diis /= 0) nnsclo_now = 1 586 else 587 ! Wavelets 588 if (iscf==0) then 589 nnsclo_now=0 590 else if (istep<=2) then 591 nnsclo_now=3 592 else if (istep<=4) then 593 nnsclo_now=2 594 end if 595 end if 596 end if 597 ! Double the value if required 598 if (dbl_nnsclo==1) nnsclo_now=nnsclo_now*2 599 end if 600 601 if(dtset%wfoptalg==2)nnsclo_now=40 ! UNDER DEVELOPMENT 602 603 if (dtset%prtvol > 0) then 604 write(msg, '(a,i0,a,3(i0,1x))' ) ' vtorho: nnsclo_now = ',nnsclo_now,& 605 ', note that nnsclo, dbl_nnsclo, istep= ',dtset%nnsclo,dbl_nnsclo,istep 606 call wrtout(std_out,msg) 607 else 608 if (nnsclo_now > 1) call wrtout(std_out, sjoin(" Max number of non-self-consistent loops:", itoa(nnsclo_now))) 609 end if 610 611 !==== Initialize most of the Hamiltonian ==== 612 !Allocate all arrays and initialize quantities that do not depend on k and spin. 613 call init_hamiltonian(gs_hamk,psps,pawtab,dtset%nspinor,dtset%nsppol,dtset%nspden,natom,& 614 & dtset%typat,xred,dtset%nfft,dtset%mgfft,dtset%ngfft,rprimd,dtset%nloalg,& 615 & paw_ij=paw_ij,ph1d=ph1d,usecprj=usecprj_local,electronpositron=electronpositron,fock=fock,& 616 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab,mpi_spintab=mpi_enreg%my_isppoltab,& 617 & nucdipmom=dtset%nucdipmom,gpu_option=dtset%gpu_option) 618 619 !Initializations for PAW (projected wave functions) 620 mcprj_local=0 ; mband_cprj=0 621 if (psps%usepaw==1) then 622 mband_cprj=dtset%mband 623 if (dtset%paral_kgb/=0) mband_cprj=mband_cprj/mpi_enreg%nproc_band 624 iorder_cprj=0 ; mcprj_local=mcprj 625 if (usecprj==0) then 626 mcprj_local=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol 627 !This is a check but should always be true since scfcv allocated cprj anyway 628 if (allocated(cprj_local)) then 629 !Was allocated in scfcv so we just destroy and reconstruct it as desired 630 call pawcprj_free(cprj_local) 631 ABI_FREE(cprj_local) 632 end if 633 ABI_MALLOC(cprj_local,(dtset%natom,mcprj_local)) 634 call pawcprj_alloc(cprj_local,0,gs_hamk%dimcprj) 635 cprj => null() 636 cprj => cprj_local 637 end if 638 end if 639 640 call timab(981,2,tsec) 641 642 !=================================================================== 643 ! WAVELETS - Branching with a separate VTORHO procedure 644 !=================================================================== 645 646 if (dtset%usewvl == 1) then 647 #ifndef HAVE_BIGDFT 648 BIGDFT_NOTENABLED_ERROR() 649 #else 650 651 ! do_last_ortho in case of diagonalization scheme 652 if ( wvlbigdft) do_last_ortho=(dtset%iscf/=0) 653 if (.not.wvlbigdft) do_last_ortho=(.true.) 654 655 ABI_MALLOC(xcart,(3, dtset%natom)) 656 call xred2xcart(dtset%natom, rprimd, xcart, xred) 657 658 if(wvlbigdft) then 659 ! NSCF loop for wvlbigdt: 660 call wvl_nscf_loop_bigdft() 661 else 662 ! NSCF loop for WVL: (not wvlbigdft) 663 call wvl_nscf_loop() 664 end if 665 666 ! Eventually orthogonalize WFs now 667 if (do_last_ortho) then 668 call write_energies(ii,0,wvl%e%energs,0.d0,0.d0,"final") 669 call last_orthon(me_distrb, nproc_distrb, ii, wvl%wfs%ks, wvl%e%energs%evsum, .true.) 670 if(wvlbigdft) energies%e_xcdc = wvl%e%energs%evxc 671 ! If occupation numbers are not changed... 672 if (fixed_occ .or. (iscf<0 .and. iscf/=-3)) then 673 call wvl_comm_eigen() 674 end if 675 ! ... or update occupations: 676 if( ( .not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then 677 if(wvlbigdft) then 678 call wvl_occ_bigdft() 679 else 680 ! Communicate eigenvalues: 681 call wvl_comm_eigen() 682 ! Update occ and Fermi level 683 call wvl_occ() 684 end if 685 end if 686 ! This might accelerate convergence: 687 wvl%wfs%ks%diis%energy_min=one 688 wvl%wfs%ks%diis%alpha=two 689 end if !do_last_ortho 690 691 ! Compute eigenvalues energy 692 if(.not. wvlbigdft .and. nnsclo_now>0) then 693 call e_eigen(eigen,energies%e_eigenvalues,dtset%mband,dtset%nband,dtset%nkpt,& 694 & dtset%nsppol,occ,dtset%wtk) 695 else 696 energies%e_eigenvalues = energies%e_kinetic + energies%e_localpsp & 697 & + energies%e_xcdc + two*energies%e_hartree +energies%e_nlpsp_vfock 698 end if 699 700 if (optforces == 1) then ! not compatible with iscf=0 and wvlbigdftcomp=1 + PAW 701 call wvl_nl_gradient(grnl, mpi_enreg, dtset%natom, rprimd, wvl, xcart) 702 end if 703 704 ! For iscf<0 we do not update the density 705 if (dtset%iscf>=0) then !(dtset%iscf>=0 .and. .not. wvlbigdft ) then 706 call wvl_mkrho(dtset,irrzon,mpi_enreg,phnons,rhor,wvl%wfs,wvl%den) 707 end if 708 ABI_FREE(xcart) 709 710 ! Note in WVL+NC: the rest will be skipped. 711 ! For PAW: we will compute Rho_ij at the end. 712 !if(wvlbigdft) return 713 #endif 714 else 715 716 !=================================================================== 717 ! PLANE WAVES - Standard VTORHO procedure 718 !=================================================================== 719 720 ! Electric field: allocate dphasek 721 nkpt1 = dtset%nkpt 722 if ( berryflag ) then 723 ABI_MALLOC(dphasek,(3,dtset%nkpt*dtset%nsppol)) 724 dphasek(:,:) = zero 725 nkpt1 = dtefield%mkmem_max 726 end if 727 728 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 729 #if defined HAVE_GPU && defined HAVE_YAKL 730 ABI_MALLOC_MANAGED(rhoaug, (/n4,n5,n6,gs_hamk%nvloc/)) 731 ABI_MALLOC_MANAGED(vlocal, (/n4,n5,n6,gs_hamk%nvloc/)) 732 #endif 733 else 734 ABI_MALLOC(rhoaug,(n4,n5,n6,gs_hamk%nvloc)) 735 ABI_MALLOC(vlocal,(n4,n5,n6,gs_hamk%nvloc)) 736 end if 737 738 if(with_vxctau) then 739 ABI_MALLOC(vxctaulocal,(n4,n5,n6,gs_hamk%nvloc,4)) 740 end if 741 742 has_vectornd = (with_vectornd .EQ. 1) 743 if(has_vectornd) then 744 ABI_MALLOC(vectornd_pac,(n4,n5,n6,gs_hamk%nvloc,3)) 745 vectornd_pac=zero 746 end if 747 748 nbdbuf_eff = dtset%nbdbuf 749 ! In metallic case, at first iteration, occupations could be 0. So residm should be computed as usual 750 if (dtset%nbdbuf==-101.and..not.fixed_occ.and.istep==1.and.minval(occ)<tol10) then 751 write(msg,*) 'vtorho: nbdbuf is set to 0 for this step' 752 call wrtout(std_out,msg,'COLL') 753 nbdbuf_eff = 0 754 end if 755 756 ! LOOP OVER SPINS 757 do isppol=1,dtset%nsppol 758 call timab(982,1,tsec) 759 760 ikpt_loc = 0 761 ikg=0 762 763 ! Set up local potential vlocal on the coarse FFT mesh from vtrial taking into account the spin. 764 ! Also, continue to initialize the Hamiltonian. 765 766 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 767 dtset%nspden, gs_hamk%nvloc, 1, pawfgr, mpi_enreg, vtrial, vlocal) 768 call gs_hamk%load_spin(isppol, vlocal=vlocal, with_nonlocal=.true.) 769 770 if (with_vxctau) then 771 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 772 dtset%nspden, gs_hamk%nvloc, 4, pawfgr, mpi_enreg, vxctau, vxctaulocal) 773 call gs_hamk%load_spin(isppol, vxctaulocal=vxctaulocal) 774 end if 775 776 rhoaug(:,:,:,:)=zero 777 778 ! if vectornd is present, set it up for addition to gs_hamk similarly to how it's done for 779 ! vtrial. Note that it must be done for the three Cartesian directions. Also, the following 780 ! code assumes explicitly and implicitly that nvloc = 1. This should eventually be generalized. 781 if(has_vectornd) then 782 call gspot_transgrid_and_pack(isppol, psps%usepaw, dtset%paral_kgb, dtset%nfft, dtset%ngfft, nfftf, & 783 & dtset%nspden, gs_hamk%nvloc, 3, pawfgr, mpi_enreg, vectornd, vectornd_pac) 784 call gs_hamk%load_spin(isppol, vectornd=vectornd_pac) 785 end if 786 787 call timab(982,2,tsec) 788 789 ! BIG FAT k POINT LOOP 790 ! MVeithen: I had to modify the structure of this loop in order to implement MPI // of the electric field 791 ! Note that the loop here differs from the similar one in berryphase_new.F90. 792 ! here, ikpt_loc numbers the kpts treated by the current processor. 793 ! in berryphase_new.F90, ikpt_loc ALSO includes info about value of isppol. 794 795 ikpt = 0 796 do while (ikpt_loc < nkpt1) 797 798 call timab(997,1,tsec) 799 800 if ( .not.berryflag ) then 801 ikpt_loc = ikpt_loc + 1 802 ikpt = ikpt_loc 803 my_ikpt = mpi_enreg%my_kpttab(ikpt) 804 else 805 if (ikpt_loc < dtset%mkmem) ikpt = ikpt + 1 806 if ((ikpt > dtset%nkpt).and.(ikpt_loc < dtset%mkmem)) exit 807 my_ikpt=ikpt 808 end if 809 810 dphase_k(:) = zero 811 counter=100*ikpt+isppol 812 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 813 nband_cprj_k=nband_k/mpi_enreg%nproc_band 814 istwf_k=dtset%istwfk(ikpt) 815 npw_k=npwarr(ikpt) 816 817 mcgq=1 ; mkgq=1 818 if (.not.berryflag) then 819 if(proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) then 820 eigen(1+bdtot_index : nband_k+bdtot_index) = zero 821 resid(1+bdtot_index : nband_k+bdtot_index) = zero 822 bdtot_index=bdtot_index+nband_k 823 cycle 824 end if 825 else 826 if ((proc_distrb_cycle(mpi_enreg%proc_distrb,ikpt,1,nband_k,isppol,me_distrb)) .and.(ikpt_loc <= dtset%mkmem)) then 827 eigen(1+bdtot_index : nband_k+bdtot_index) = zero 828 resid(1+bdtot_index : nband_k+bdtot_index) = zero 829 bdtot_index = bdtot_index + nband_k 830 cycle 831 end if 832 ikpt_loc = ikpt_loc + 1 833 mcgq = dtset%mpw*my_nspinor*nband_k*dtefield%nneigh(ikpt) 834 ikg = dtefield%kgindex(ikpt) 835 mkgq = 6*dtset%mpw 836 end if ! berryflag 837 838 call timab(997,2,tsec) 839 840 ! In case of MPI // of a finite field calculation 841 ! build the cgq array that stores the wavefunctions for the 842 ! neighbours of ikpt, and the pwnsfacq array that stores the 843 ! corresponding phase factors (in case of tnons) 844 ABI_MALLOC(cgq,(2,mcgq)) 845 ABI_MALLOC(pwnsfacq,(2,mkgq)) 846 if ( berryflag ) then 847 call cgq_builder(berryflag,cg,cgq,dtefield,dtset,ikpt,ikpt_loc,isppol,mcg,mcgq,& 848 & me_distrb,mkgq,mpi_enreg,my_nspinor,nband_k,nproc_distrb,& 849 & npwarr,pwnsfac,pwnsfacq,pwind_alloc,spaceComm_distrb) 850 if (ikpt_loc > dtset%mkmem) then 851 ABI_FREE(cgq) 852 ABI_FREE(pwnsfacq) 853 cycle 854 end if 855 end if !berryopt 856 857 call timab(984,1,tsec) 858 859 if (mpi_enreg%paral_kgb==1) my_bandfft_kpt => bandfft_kpt(my_ikpt) 860 call bandfft_kpt_set_ikpt(ikpt,mpi_enreg) 861 ! my_ikpt = ikpt 862 ! if (mpi_enreg%paral_kgb==1) then 863 ! my_ikpt = mpi_enreg%my_kpttab(ikpt) 864 ! my_bandfft_kpt => bandfft_kpt(my_ikpt) 865 ! call bandfft_kpt_set_ikpt(ikpt,mpi_enreg) 866 ! end if 867 868 ABI_MALLOC(ek_k,(nband_k)) 869 ABI_MALLOC(end_k,(nband_k)) 870 ABI_MALLOC(enlx_k,(nband_k)) 871 ABI_MALLOC(ek_k_nd,(2,nband_k,nband_k*paw_dmft%use_dmft)) 872 ABI_MALLOC(occ_k,(nband_k)) 873 ABI_MALLOC(zshift,(nband_k)) 874 ABI_MALLOC(grnl_k,(3*natom,nband_k*optforces)) 875 876 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 877 #if defined HAVE_GPU && defined HAVE_YAKL 878 ABI_MALLOC_MANAGED(eig_k,(/nband_k/)) 879 ABI_MALLOC_MANAGED(resid_k,(/nband_k/)) 880 #endif 881 else 882 ABI_MALLOC(eig_k,(nband_k)) 883 ABI_MALLOC(resid_k,(nband_k)) 884 end if 885 886 eig_k(:)=zero 887 ek_k(:)=zero 888 end_k(:)=zero 889 enlx_k(:)=zero 890 if(paw_dmft%use_dmft==1) ek_k_nd(:,:,:)=zero 891 if (optforces>0) grnl_k(:,:)=zero 892 kpoint(:)=dtset%kptns(:,ikpt) 893 occ_k(:)=occ(1+bdtot_index:nband_k+bdtot_index) 894 resid_k(:) = resid(1+bdtot_index : nband_k+bdtot_index) 895 !resid_k(:)=zero 896 zshift(:)=dtset%eshift 897 898 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 899 #if defined HAVE_GPU && defined HAVE_YAKL 900 ABI_MALLOC_MANAGED(kg_k, (/3,npw_k/)) 901 #endif 902 else 903 ABI_MALLOC(kg_k,(3,npw_k)) 904 end if 905 906 ABI_MALLOC(ylm_k,(npw_k,psps%mpsang*psps%mpsang*psps%useylm)) 907 kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg) 908 if (psps%useylm==1) then 909 do ilm=1,psps%mpsang*psps%mpsang 910 ylm_k(1:npw_k,ilm)=ylm(1+ikg:npw_k+ikg,ilm) 911 end do 912 end if 913 914 ! Set up remaining of the Hamiltonian 915 916 ! Compute (1/2) (2 Pi)**2 (k+G)**2: 917 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 918 #if defined HAVE_GPU && defined HAVE_YAKL 919 ABI_MALLOC_MANAGED(kinpw,(/npw_k/)) 920 #endif 921 else 922 ABI_MALLOC(kinpw,(npw_k)) 923 end if 924 925 call mkkin(dtset%ecut,dtset%ecutsm,dtset%effmass_free,gmet,kg_k,kinpw,kpoint,npw_k,0,0) 926 927 ! Compute (k+G) vectors (only if useylm=1) 928 nkpg=3*optforces*dtset%nloalg(3) 929 ABI_MALLOC(kpg_k,(npw_k,nkpg)) 930 if ((mpi_enreg%paral_kgb/=1.or.istep<=1).and.nkpg>0) call mkkpg(kg_k,kpg_k,kpoint,nkpg,npw_k) 931 932 ! Compute nonlocal form factors ffnl at all (k+G): 933 ider=0;idir=0;dimffnl=1 934 ABI_MALLOC(ffnl,(npw_k,dimffnl,psps%lmnmax,ntypat)) 935 if (mpi_enreg%paral_kgb/=1.or.istep<=1) then 936 call mkffnl(psps%dimekb,dimffnl,psps%ekb,ffnl,psps%ffspl,& 937 & gmet,gprimd,ider,idir,psps%indlmn,kg_k,kpg_k,kpoint,psps%lmnmax,& 938 & psps%lnmax,psps%mpsang,psps%mqgrid_ff,nkpg,& 939 & npw_k,ntypat,psps%pspso,psps%qgrid_ff,rmet,& 940 & psps%usepaw,psps%useylm,ylm_k,ylmgr) 941 end if 942 943 ! Load k-dependent part in the Hamiltonian datastructure 944 ! - Compute 3D phase factors 945 ! - Prepare various tabs in case of band-FFT parallelism 946 ! - Load k-dependent quantities in the Hamiltonian 947 ABI_MALLOC(ph3d,(2,npw_k,gs_hamk%matblk)) 948 949 if(usefock_ACE/=0) then 950 call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,& 951 & kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,fockACE_k=fock%fockACE(ikpt,isppol),ph3d_k=ph3d,& 952 & compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),& 953 & compute_gbound=(mpi_enreg%paral_kgb/=1)) 954 else 955 call gs_hamk%load_k(kpt_k=dtset%kptns(:,ikpt),istwf_k=istwf_k,npw_k=npw_k,& 956 & kinpw_k=kinpw,kg_k=kg_k,kpg_k=kpg_k,ffnl_k=ffnl,ph3d_k=ph3d,& 957 & compute_ph3d=(mpi_enreg%paral_kgb/=1.or.istep<=1),& 958 & compute_gbound=(mpi_enreg%paral_kgb/=1)) 959 end if 960 961 ! Load band-FFT tabs (transposed k-dependent arrays) 962 if (mpi_enreg%paral_kgb==1) then 963 if (istep<=1) call prep_bandfft_tabs(gs_hamk,ikpt,dtset%mkmem,mpi_enreg) 964 call gs_hamk%load_k(npw_fft_k=my_bandfft_kpt%ndatarecv, & 965 gbound_k =my_bandfft_kpt%gbound, & 966 kinpw_k =my_bandfft_kpt%kinpw_gather, & 967 kg_k =my_bandfft_kpt%kg_k_gather, & 968 kpg_k =my_bandfft_kpt%kpg_k_gather, & 969 ffnl_k =my_bandfft_kpt%ffnl_gather, & 970 ph3d_k =my_bandfft_kpt%ph3d_gather) 971 end if 972 973 ! If OpenMP GPU, load "hamiltonian" on GPU device 974 if (dtset%gpu_option == ABI_GPU_OPENMP) then 975 if(dtset%paral_kgb==0) then 976 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp) 977 else if(istwf_k==1) then 978 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather) 979 else if(istwf_k==2) then 980 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym) 981 else 982 ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !") 983 end if 984 end if 985 986 if(dtset%wfoptalg == 111 .and. istep <= 1) then 987 gemm_nonlop_nblocks = dtset%gpu_nl_splitsize 988 ! Only compute CHEBFI number of blocks if user didn't set it themselves 989 if(gemm_nonlop_nblocks==1) then 990 call chebfiwf2_blocksize(gs_hamk,mpi_enreg%bandpp,npw_k,nband_k,dtset%nspinor,mpi_enreg%paral_kgb,& 991 & dtset%gpu_option,nblk_gemm_nonlop) 992 gemm_nonlop_nblocks = nblk_gemm_nonlop 993 end if 994 if(gemm_nonlop_nblocks > 1) gemm_nonlop_is_distributed = .true. 995 end if 996 997 ! Build inverse of overlap matrix for chebfi 998 if(psps%usepaw == 1 .and. (dtset%wfoptalg == 1 .or. dtset%wfoptalg == 111) .and. istep <= 1) then 999 ABI_NVTX_START_RANGE(NVTX_INVOVL) 1000 call make_invovl(gs_hamk, dimffnl, ffnl, ph3d, mpi_enreg) 1001 ABI_NVTX_END_RANGE() 1002 end if 1003 1004 ! Setup gemm_nonlop 1005 if (gemm_nonlop_use_gemm) then 1006 !set the global variable indicating to gemm_nonlop where to get its data from 1007 gemm_nonlop_ikpt_this_proc_being_treated = my_ikpt 1008 if (istep <= 1) then 1009 !Init the arrays 1010 !FIXME Settle this 1011 if ( dtset%gpu_option == ABI_GPU_DISABLED) then 1012 call make_gemm_nonlop(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1013 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1014 gs_hamk%ucvol, gs_hamk%ffnl_k,& 1015 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k,& 1016 compute_grad_atom=(optforces>0)) 1017 else if ( dtset%gpu_option == ABI_GPU_LEGACY .or. dtset%gpu_option == ABI_GPU_KOKKOS) then 1018 call make_gemm_nonlop_gpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1019 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1020 gs_hamk%ucvol, gs_hamk%ffnl_k, & 1021 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, & 1022 compute_grad_atom=(optforces>0)) 1023 else if ( dtset%gpu_option == ABI_GPU_OPENMP) then 1024 if(mpi_enreg%paral_kgb==0) then 1025 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp) 1026 else if(gs_hamk%istwf_k==1) then 1027 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather) 1028 else if(gs_hamk%istwf_k==2) then 1029 call ompgpu_load_hamilt_buffers(gs_hamk%kg_k,gs_hamk%kg_kp,kg_k_gather=bandfft_kpt(my_ikpt)%kg_k_gather_sym) 1030 else 1031 ABI_ERROR("istwfk > 2 is not handled with OpenMP GPU offload mode !") 1032 end if 1033 call make_gemm_nonlop_ompgpu(my_ikpt,gs_hamk%npw_fft_k,gs_hamk%lmnmax, & 1034 gs_hamk%ntypat, gs_hamk%indlmn, gs_hamk%nattyp, gs_hamk%istwf_k, & 1035 gs_hamk%ucvol, gs_hamk%ffnl_k, & 1036 gs_hamk%ph3d_k,gs_hamk%kpt_k,gs_hamk%kg_k,gs_hamk%kpg_k, & 1037 compute_grad_atom=(optforces>0)) 1038 end if 1039 end if 1040 end if 1041 1042 #if defined HAVE_GPU_CUDA 1043 if (dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) then 1044 if (mpi_enreg%paral_kgb==1) then 1045 call gpu_update_ffnl_ph3d( & 1046 & my_bandfft_kpt%ph3d_gather, INT(size(my_bandfft_kpt%ph3d_gather),c_int64_t), & 1047 & my_bandfft_kpt%ffnl_gather, INT(size(my_bandfft_kpt%ffnl_gather),c_int64_t) ) 1048 else 1049 call gpu_update_ffnl_ph3d( & 1050 & ph3d, INT(size(ph3d),c_int64_t), & 1051 & ffnl, INT(size(ffnl),c_int64_t) ) 1052 end if 1053 end if 1054 #endif 1055 1056 call timab(984,2,tsec) 1057 1058 ! Update the value of ikpt,isppol in fock_exchange and allocate the memory space to perform HF calculation. 1059 if (usefock) call fock_updateikpt(fock%fock_common,ikpt,isppol) 1060 if (psps%usepaw==1 .and. usefock) then 1061 if ((fock%fock_common%optfor).and.(usefock_ACE==0)) fock%fock_common%forces_ikpt=zero 1062 end if 1063 1064 ! Here we initialize the wavefunctions with atomic orbitals at the first GS iteration of the first 1065 ! relaxation step (if any). 1066 ! NB: Not all the cases are presently supported. 1067 !print *, "istep, itimes(1), wfinit", istep, itimes(1), dtset%wfinit 1068 ! FIXME: This check is not enough as I need to check whether cg have been read from WFK file 1069 if (istep == 1 .and. itimes(1) == 0 .and. dtset%wfinit /= 0) then 1070 call cg_from_atoms(ikpt, isppol, rprimd, xred, kg_k, cg(:,icg+1:), dtset, psps, eig_k, gs_hamk, & 1071 mpi_enreg, nband_k, npw_k, my_nspinor) 1072 end if 1073 1074 ABI_NVTX_START_RANGE(NVTX_VTOWFK) 1075 ! Compute the eigenvalues, wavefunction, residuals, 1076 ! contributions to kinetic energy, nuclear dipole energy, 1077 ! nonlocal energy, forces, 1078 ! and update of rhor to this k-point and this spin polarization. 1079 call vtowfk(cg,cgq,cprj,cpus,dphase_k,dtefield,dtfil,& 1080 & dtset,eig_k,ek_k,ek_k_nd,end_k,enlx_k,fixed_occ,grnl_k,gs_hamk,& 1081 & ibg,icg,ikpt,iscf,isppol,kg_k,kinpw,mband_cprj,mcg,mcgq,mcprj_local,mkgq,& 1082 & mpi_enreg,dtset%mpw,natom,nband_k,nbdbuf_eff,dtset%nkpt,istep,nnsclo_now,npw_k,npwarr,& 1083 & occ_k,optforces,prtvol,pwind,pwind_alloc,pwnsfac,pwnsfacq,resid_k,& 1084 & rhoaug,paw_dmft,dtset%wtk(ikpt),zshift, rmm_diis_status(:,ikpt,isppol)) 1085 ABI_NVTX_END_RANGE() 1086 1087 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included... 1088 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated 1089 ! Note: Should we keep nvhpc-23.1 in eos? 1090 #ifndef FC_NVHPC 1091 call timab(985,1,tsec) 1092 #endif 1093 1094 #if defined HAVE_GPU_CUDA 1095 if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ffnl_ph3d() 1096 #endif 1097 ABI_FREE(ffnl) 1098 1099 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1100 #if defined HAVE_GPU && defined HAVE_YAKL 1101 ABI_FREE_MANAGED(kinpw) 1102 ABI_FREE_MANAGED(kg_k) 1103 #endif 1104 else 1105 ABI_FREE(kinpw) 1106 ABI_FREE(kg_k) 1107 end if 1108 1109 ABI_FREE(kpg_k) 1110 ABI_FREE(ylm_k) 1111 ABI_FREE(ph3d) 1112 ABI_FREE(cgq) 1113 ABI_FREE(pwnsfacq) 1114 1115 ! electric field 1116 if (berryflag) then 1117 dphasek(:,ikpt + (isppol - 1)*dtset%nkpt) = dphase_k(:) 1118 1119 ! The overlap matrices for all first neighbours of ikpt are no more up to date 1120 do idir = 1, 3 1121 do ifor = 1, 2 1122 ikpt1 = dtefield%ikpt_dk(dtefield%i2fbz(ikpt),ifor,idir) 1123 ikpt1 = dtefield%indkk_f2ibz(ikpt1,1) 1124 ifor1 = -1*ifor + 3 ! ifor = 1 -> ifor1 = 2 & ifor = 2 -> ifor1 = 1 1125 dtefield%sflag(:,ikpt1+(isppol-1)*dtset%nkpt,ifor1,idir) = 0 1126 end do 1127 end do 1128 end if ! berryflag 1129 1130 ! Save eigenvalues (hartree), residuals (hartree**2) 1131 eigen(1+bdtot_index : nband_k+bdtot_index) = eig_k(:) 1132 eknk (1+bdtot_index : nband_k+bdtot_index) = ek_k (:) 1133 if(usefock) then 1134 focknk (1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%eigen_ikpt (:) 1135 if (optforces>0) fockfornk(:,:,1+bdtot_index : nband_k+bdtot_index) = fock%fock_common%forces_ikpt(:,:,:) 1136 end if 1137 if(paw_dmft%use_dmft==1) eknk_nd(isppol,ikpt,:,:,:) = ek_k_nd(:,:,:) 1138 resid(1+bdtot_index : nband_k+bdtot_index) = resid_k(:) 1139 if (optforces>0) grnlnk(:,1+bdtot_index : nband_k+bdtot_index) = grnl_k(:,:) 1140 enlxnk(1+bdtot_index : nband_k+bdtot_index) = enlx_k(:) 1141 1142 if(iscf>0 .or. iscf==-3)then 1143 ! Accumulate sum over k points for band, nonlocal and kinetic energies, 1144 ! also accumulate gradients of Enonlocal: 1145 do iband=1,nband_k 1146 if (abs(occ_k(iband))>tol8) then 1147 energies%e_kinetic = energies%e_kinetic + dtset%wtk(ikpt)*occ_k(iband)*ek_k(iband) 1148 energies%e_nucdip = energies%e_nucdip + dtset%wtk(ikpt)*occ_k(iband)*end_k(iband) 1149 energies%e_eigenvalues = energies%e_eigenvalues + dtset%wtk(ikpt)*occ_k(iband)*eig_k(iband) 1150 energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + dtset%wtk(ikpt)*occ_k(iband)*enlx_k(iband) 1151 if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ_k(iband)*grnl_k(:,iband) 1152 if (usefock) then 1153 energies%e_fock=energies%e_fock + half*fock%fock_common%eigen_ikpt(iband)*occ_k(iband)*dtset%wtk(ikpt) 1154 if (usefock_ACE==0) energies%e_fock0=energies%e_fock 1155 endif 1156 end if 1157 end do 1158 1159 ! Calculate Fock contribution to the total energy if required 1160 if ((psps%usepaw==1).and.(usefock)) then 1161 if ((fock%fock_common%optfor).and.(usefock_ACE==0)) then 1162 !WARNING : this routine actually does NOT compute the Fock contrib to total energy, but modifies the force ONLY. 1163 call fock_calc_ene(dtset,fock%fock_common,energies%e_exactX,ikpt,nband_k,occ_k) 1164 end if 1165 end if 1166 end if 1167 1168 if ( dtset%gpu_option == ABI_GPU_OPENMP) then 1169 call ompgpu_free_hamilt_buffers() 1170 end if 1171 ! LB-01/03/2024: Very weird compiler error on eos-nvhpc23.1 if the second call of timab(985,...) is included... 1172 ! Drastic short-term solution : disable this timing for nvhpc... In fact this part is not important unless fock is activated 1173 #ifndef FC_NVHPC 1174 call timab(985,2,tsec) 1175 #endif 1176 1177 ABI_FREE(ek_k) 1178 ABI_FREE(ek_k_nd) 1179 ABI_FREE(end_k) 1180 ABI_FREE(grnl_k) 1181 ABI_FREE(occ_k) 1182 ABI_FREE(zshift) 1183 ABI_FREE(enlx_k) 1184 1185 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1186 #if defined HAVE_GPU && defined HAVE_YAKL 1187 ABI_FREE_MANAGED(eig_k) 1188 ABI_FREE_MANAGED(resid_k) 1189 #endif 1190 else 1191 ABI_FREE(eig_k) 1192 ABI_FREE(resid_k) 1193 end if 1194 1195 ! Keep track of total number of bands (all k points so far, even for k points not treated by me) 1196 bdtot_index=bdtot_index+nband_k 1197 1198 ! Also shift array memory if dtset%mkmem/=0 1199 if (dtset%mkmem/=0) then 1200 ibg=ibg+my_nspinor*nband_cprj_k 1201 icg=icg+npw_k*my_nspinor*nband_k 1202 ikg=ikg+npw_k 1203 end if 1204 1205 end do ! End big k point loop 1206 1207 call timab(986,1,tsec) 1208 1209 if (fixed_occ .and. mpi_enreg%paral_kgb==1) then 1210 call xmpi_sum(rhoaug,mpi_enreg%comm_bandspinorfft,ierr) !Sum the contributions over bands/FFT/spinors 1211 end if 1212 1213 ! Transfer density on augmented fft grid to normal fft grid in real space 1214 ! Also take into account the spin. 1215 if(iscf>0.or.iscf==-3)then 1216 if (psps%usepaw==0) then 1217 call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,1),1) 1218 if(dtset%nspden==4)then 1219 do imagn=2,4 1220 call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhor,rhoaug(:,:,:,imagn),1) 1221 end do 1222 end if 1223 else 1224 call fftpac(isppol,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,1),1) 1225 if(dtset%nspden==4)then 1226 do imagn=2,4 1227 call fftpac(imagn,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,rhowfr,rhoaug(:,:,:,imagn),1) 1228 end do 1229 end if 1230 end if 1231 end if 1232 1233 call timab(986,2,tsec) 1234 1235 end do ! End loop over spins 1236 1237 call timab(988,1,tsec) 1238 if (usefock) then 1239 if (usefock_ACE==0) then 1240 call xmpi_sum(energies%e_fock0,mpi_enreg%comm_kpt,ierr) 1241 end if 1242 if(fock%fock_common%optfor) call xmpi_sum(fock%fock_common%forces,mpi_enreg%comm_kpt,ierr) 1243 end if 1244 ! Electric field: compute string-averaged change in Zak phase 1245 ! along each direction, store it in dphase(idir) 1246 ! ji: it is not convenient to do this anymore. Remove. Set dphase(idir)=0.0_dp. 1247 ! eventually, dphase(idir) will have to go... 1248 if (berryflag) then 1249 dphase(:) = zero 1250 ! In case of MPI // of a finite field calculation, send dphasek to all cpus 1251 call xmpi_sum(dphasek,spaceComm_distrb,ierr) 1252 ABI_FREE(dphasek) 1253 end if ! berryflag 1254 1255 if(dtset%gpu_option==ABI_GPU_KOKKOS) then 1256 #if defined HAVE_GPU && defined HAVE_YAKL 1257 ABI_FREE_MANAGED(rhoaug) 1258 ABI_FREE_MANAGED(vlocal) 1259 #endif 1260 else 1261 ABI_FREE(rhoaug) 1262 ABI_FREE(vlocal) 1263 end if 1264 1265 if(with_vxctau) then 1266 ABI_FREE(vxctaulocal) 1267 end if 1268 if(has_vectornd) then 1269 ABI_FREE(vectornd_pac) 1270 end if 1271 1272 call timab(988,2,tsec) 1273 1274 ABI_MALLOC(doccde,(dtset%mband*dtset%nkpt*dtset%nsppol)) 1275 doccde(:)=zero 1276 1277 ! Treat now varying occupation numbers, in the self-consistent case 1278 if((.not.fixed_occ) .and. (iscf>0.or.iscf==-3)) then 1279 1280 ! Parallel case 1281 if (mpi_enreg%nproc_spkpt>1) then 1282 1283 call timab(989,1,tsec) 1284 1285 ! If needed, exchange the values of eigen,resid,eknk,enlxnk,grnlnk 1286 ABI_MALLOC(buffer1,((4+3*natom*optforces+dtset%usefock+3*natom*dtset%usefock*optforces)*mbdkpsp)) 1287 if(paw_dmft%use_dmft==1) then 1288 ABI_MALLOC(buffer2,(mb2dkpsp*paw_dmft%use_dmft)) 1289 end if 1290 ! Pack eigen,resid,eknk,enlxnk,grnlnk in buffer1 1291 buffer1(1 : mbdkpsp)=eigen(:) 1292 buffer1(1+ mbdkpsp:2*mbdkpsp)=resid(:) 1293 buffer1(1+2*mbdkpsp:3*mbdkpsp)=eknk(:) 1294 buffer1(1+3*mbdkpsp:4*mbdkpsp)=enlxnk(:) 1295 index1=4*mbdkpsp 1296 if (optforces>0) then 1297 buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(grnlnk,(/(3*natom)*mbdkpsp/) ) 1298 index1=index1+3*natom*mbdkpsp 1299 end if 1300 if (usefock) then 1301 buffer1(1+index1:index1+mbdkpsp)=focknk(:) 1302 if (optforces>0) then 1303 index1=index1+mbdkpsp 1304 buffer1(index1+1:index1+3*natom*mbdkpsp)=reshape(fockfornk,(/(3*natom)*mbdkpsp/) ) 1305 end if 1306 end if 1307 if(paw_dmft%use_dmft==1) then 1308 nnn=0 1309 do ikpt=1,dtset%nkpt 1310 do isppol=1,dtset%nsppol 1311 do iband=1,dtset%mband 1312 do iband1=1,dtset%mband 1313 do iplex=1,2 1314 nnn=nnn+1 1315 buffer2(nnn)=eknk_nd(isppol,ikpt,iplex,iband,iband1) 1316 end do 1317 end do 1318 end do 1319 end do 1320 end do 1321 if(nnn.ne.mb2dkpsp) then 1322 write(msg,*)' BUG in vtorho2, buffer2',nnn,mb2dkpsp 1323 ABI_BUG(msg) 1324 end if 1325 end if 1326 ! Build sum of everything 1327 call timab(48,1,tsec) 1328 call xmpi_sum(buffer1,mpi_enreg%comm_kpt,ierr) 1329 ! if(mpi_enreg%paral_kgb/=1.and.paw_dmft%use_dmft==1) then 1330 if(paw_dmft%use_dmft==1) call xmpi_sum(buffer2,mpi_enreg%comm_kpt,ierr) 1331 call timab(48,2,tsec) 1332 1333 ! Unpack eigen,resid,eknk,enlxnk,grnlnk in buffer1 1334 eigen(:) =buffer1(1 : mbdkpsp) 1335 resid(:) =buffer1(1+ mbdkpsp:2*mbdkpsp) 1336 eknk(:) =buffer1(1+2*mbdkpsp:3*mbdkpsp) 1337 enlxnk(:) =buffer1(1+3*mbdkpsp:4*mbdkpsp) 1338 index1=4*mbdkpsp 1339 if (optforces>0) then 1340 grnlnk(:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3*natom,mbdkpsp/) ) 1341 end if 1342 if (usefock) then 1343 focknk(:)=buffer1(1+index1:index1+mbdkpsp) 1344 if (optforces>0) then 1345 index1=index1+mbdkpsp 1346 fockfornk(:,:,:)=reshape(buffer1(index1+1:index1+3*natom*mbdkpsp),(/3,natom,mbdkpsp/) ) 1347 end if 1348 end if 1349 if(paw_dmft%use_dmft==1) then 1350 nnn=0 1351 do ikpt=1,dtset%nkpt 1352 do isppol=1,dtset%nsppol 1353 do iband=1,dtset%mband 1354 do iband1=1,dtset%mband 1355 do iplex=1,2 1356 nnn=nnn+1 1357 eknk_nd(isppol,ikpt,iplex,iband,iband1)=buffer2(nnn) 1358 end do 1359 end do 1360 end do 1361 end do 1362 end do 1363 end if 1364 if(allocated(buffer2)) then 1365 ABI_FREE(buffer2) 1366 end if 1367 ABI_FREE(buffer1) 1368 call timab(989,2,tsec) 1369 1370 end if ! nproc_spkpt>1 1371 1372 ! Compute extfpmd u0 energy shift factor from eigenvalues and kinetic energy. 1373 if(associated(extfpmd)) then 1374 extfpmd%vtrial=vtrial 1375 call extfpmd%compute_shiftfactor(eigen,eknk,dtset%mband,mpi_enreg%me,& 1376 & dtset%nband,dtset%nkpt,dtset%nsppol,dtset%wtk) 1377 end if 1378 1379 ! Compute the new occupation numbers from eigen 1380 call timab(990,1,tsec) 1381 call newocc(doccde,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,& 1382 & dtset%spinmagntarget,dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,& 1383 & dtset%nkpt,dtset%nspinor,dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,& 1384 & dtset%tsmear,dtset%wtk,& 1385 & prtstm=dtset%prtstm,stmbias=dtset%stmbias,extfpmd=extfpmd) 1386 call timab(990,2,tsec) 1387 1388 1389 ! Compute number of free electrons of extfpmd model 1390 if(associated(extfpmd)) then 1391 extfpmd%nelect=zero 1392 call extfpmd%compute_nelect(energies%e_fermie,extfpmd%nelect,dtset%tsmear) 1393 call extfpmd%compute_e_kinetic(energies%e_fermie,dtset%tsmear) 1394 call extfpmd%compute_entropy(energies%e_fermie,dtset%tsmear) 1395 end if 1396 1397 ! !========= DMFT call begin ============================================ 1398 dmft_dftocc=0 1399 if(paw_dmft%use_dmft==1.and.psps%usepaw==1.and.dtset%nbandkss==0) then 1400 call timab(991,1,tsec) 1401 1402 if (dtset%dmftcheck>=0.and.dtset%usedmft>=1.and.(sum(pawtab(:)%upawu)>=tol8.or. & 1403 & sum(pawtab(:)%jpawu)>tol8).and.dtset%dmft_entropy==0) energies%entropy=zero 1404 1405 ! == 0 to a dmft calculation and do not use lda occupations 1406 ! == 1 to a lda calculation with the dmft loop 1407 if(dtset%dmftcheck==-1) dmft_dftocc=1 1408 1409 ! == initialise occnd 1410 paw_dmft%occnd=zero 1411 1412 bdtot_index = 1 1413 do isppol=1,dtset%nsppol 1414 do ikpt=1,dtset%nkpt 1415 do iband=1,dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1416 paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index) 1417 bdtot_index = bdtot_index + 1 1418 end do 1419 end do 1420 end do 1421 1422 1423 if(dmft_dftocc==0) then 1424 if(dtset%occopt/=3) then 1425 ABI_ERROR('occopt should be equal to 3 in dmft') 1426 end if 1427 ! == initialise edmft 1428 if(paw_dmft%use_dmft>=1) edmft = zero 1429 1430 ! Compute residm to check the value 1431 ibdkpt=1 1432 residm=zero 1433 do isppol=1,dtset%nsppol 1434 do ikpt=1,dtset%nkpt 1435 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1436 if (nbdbuf_eff>=0) then 1437 nband_eff=max(1,nband_k-nbdbuf_eff) 1438 residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1))) 1439 else if (nbdbuf_eff==-101) then 1440 residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1))) 1441 else 1442 ABI_ERROR('Bad value of nbdbuf_eff') 1443 end if 1444 ibdkpt=ibdkpt+nband_k 1445 end do 1446 end do 1447 1448 ! Test residm 1449 if (paw_dmft%use_dmft>0 .and. residm>tol4 .and. dtset%dmftcheck>=0) then 1450 if(dtset%dmft_entropy>0) then 1451 write(msg,'(a,e12.3)')& 1452 ' WARNING: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm 1453 call wrtout(std_out,msg) 1454 else 1455 write(msg,'(a,e12.3)')& 1456 ' ERROR: Wavefunctions not converged: DFT+DMFT calculation cannot be carried out safely ',residm 1457 call wrtout(std_out,msg) 1458 write(msg,'(a,i0)')' Action: increase nline and nnsclo',dtset%nstep 1459 ABI_ERROR(msg) 1460 end if 1461 1462 else if (paw_dmft%use_dmft>0 .and. residm>tol10.and. dtset%dmftcheck>=0) then 1463 write(msg,'(3a)')ch10,& 1464 ' Wavefunctions not converged: DFT+DMFT calculation might not be carried out safely ',ch10 1465 ABI_WARNING(msg) 1466 end if 1467 1468 ! == allocate paw_dmft%psichi and paw_dmft%eigen_dft 1469 call init_dmft(dmatpawu,dtset,energies%e_fermie,dtfil%fnameabo_app,& 1470 dtfil%filnam_ds(3),dtset%nspinor,paw_dmft,pawtab,psps,dtset%typat) 1471 call print_dmft(paw_dmft,dtset%pawprtvol) 1472 1473 ! == gather crystal structure date into data "cryst_struc" 1474 remove_inv=.false. 1475 if(dtset%nspden==4) remove_inv=.true. 1476 call crystal_init(dtset%amu_orig(:,1),cryst_struc,dtset%spgroup,natom,dtset%npsp,ntypat, & 1477 dtset%nsym,rprimd,dtset%typat,xred,dtset%ziontypat,dtset%znucl,1,& 1478 dtset%nspden==2.and.dtset%nsppol==1,remove_inv,hdr%title,& 1479 dtset%symrel,dtset%tnons,dtset%symafm) 1480 1481 ! == compute psichi 1482 call xmpi_barrier(spaceComm_distrb) 1483 call init_oper(paw_dmft,dft_occup) 1484 call flush_unit(std_out) 1485 call timab(620,1,tsec) 1486 call datafordmft(cryst_struc,cprj,gs_hamk%dimcprj,dtset,eigen,energies%e_fermie,& 1487 & dft_occup,dtset%mband,mband_cprj,dtset%mkmem,mpi_enreg,& 1488 & dtset%nkpt,my_nspinor,dtset%nsppol,occ,& 1489 & paw_dmft,paw_ij,pawang,pawtab,psps,usecprj_local,dtfil%unpaw) 1490 call timab(620,2,tsec) 1491 call flush_unit(std_out) 1492 1493 1494 ! == solve dmft loop 1495 call xmpi_barrier(spaceComm_distrb) 1496 1497 call dmft_solve(cryst_struc,istep,dft_occup,paw_dmft,pawang,pawtab,dtset%pawprtvol) 1498 edmft=paw_dmft%edmft 1499 energies%e_paw=energies%e_paw+edmft 1500 energies%e_pawdc=energies%e_pawdc+edmft 1501 call flush_unit(std_out) 1502 ! paw_dmft%occnd(:,:,:,:,:)=0.5_dp 1503 1504 ! For compatibility with old test, do not use for calculation 1505 if(dtset%dmft_occnd_imag==0) paw_dmft%occnd(2,:,:,:,:)=zero 1506 1507 ! call print_dmft(paw_dmft,dtset%pawprtvol) 1508 ! if(dtset%paral_kgb==1) then 1509 ! write(msg,'(5a)')ch10,& 1510 !& ' Parallelization over bands is not yet compatible with self-consistency in DMFT ',ch10,& 1511 !& ' Calculation of density does not taken into account non diagonal occupations',ch10 1512 ! call wrtout(std_out,msg) 1513 ! call wrtout(ab_out,msg) 1514 !! ABI_ERROR(msg) 1515 ! if(dtset%nstep>1) then 1516 ! write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep 1517 ! ABI_ERROR(msg) 1518 ! end if 1519 ! residm=zero 1520 ! end if 1521 ! if(dtset%nspinor==2) then 1522 ! call flush_unit(ab_out) 1523 ! write(msg,'(3a)')& 1524 ! & ' Self consistent DFT+DMFT with nspinor==2 is not possible yet ',ch10,& 1525 ! & ' Calculation are restricted to nstep =1' 1526 ! ! ABI_ERROR(msg) 1527 ! if(dtset%nstep>1) then 1528 ! write(msg,'(a,i0)')' Action: use nstep=1 instead of nstep=',dtset%nstep 1529 ! ! ABI_ERROR(msg) 1530 ! endif 1531 ! end if 1532 1533 if(me_distrb==0) then 1534 call saveocc_dmft(paw_dmft) 1535 end if 1536 call destroy_dmft(paw_dmft) 1537 1538 ! == destroy crystal_t cryst_struc 1539 call cryst_struc%free() 1540 call destroy_oper(dft_occup) 1541 end if ! dmft_dftocc 1542 call timab(991,2,tsec) 1543 1544 end if ! usedmft 1545 1546 if(dtset%nbandkss/=0) then 1547 write(msg,'(a,i3,2a,i3,4a)') & 1548 " dtset%nbandkss = ",dtset%nbandkss,ch10,& 1549 " and dtset%usedmft = ",dtset%usedmft,ch10,& 1550 " a DFT loop is carried out without DMFT.",ch10,& 1551 " Only psichi's will be written at convergence of the DFT loop." 1552 call wrtout(std_out,msg) 1553 end if 1554 ! !========= DMFT call end ============================================ 1555 1556 call timab(992,1,tsec) 1557 1558 ! Compute eeig, ek,enl and grnl from the new occ, and the shared eknk,enlxnk,grnlnk 1559 energies%e_eigenvalues = zero 1560 energies%e_kinetic = zero 1561 energies%e_nlpsp_vfock = zero 1562 if (usefock) then 1563 energies%e_fock = zero 1564 if (optforces>0) fock%fock_common%forces=zero 1565 end if 1566 if (optforces>0) grnl(:)=zero 1567 if(paw_dmft%use_dmft>=1) then 1568 ebandlda = zero 1569 ebanddmft = zero 1570 ebandldatot = zero 1571 ekindmft = zero 1572 ekindmft2 = zero 1573 ekinlda = zero 1574 end if 1575 1576 ! Compute new energy terms due to non diagonal occupations and DMFT. 1577 bdtot_index=1 1578 do isppol=1,dtset%nsppol 1579 do ikpt=1,dtset%nkpt 1580 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1581 do iband=1,nband_k 1582 1583 locc_test = abs(occ(bdtot_index))>tol8 1584 ! dmft 1585 if(paw_dmft%use_dmft>=1.and.dtset%nbandkss==0) then 1586 if(paw_dmft%band_in(iband)) then 1587 if( paw_dmft%use_dmft == 1 .and. dmft_dftocc == 1 ) then ! test of the code 1588 paw_dmft%occnd(1,iband,iband,ikpt,isppol)=occ(bdtot_index) 1589 end if 1590 locc_test = abs(paw_dmft%occnd(1,iband,iband,ikpt,isppol))+& 1591 & abs(paw_dmft%occnd(2,iband,iband,ikpt,isppol))>tol8 1592 end if 1593 end if 1594 1595 if (locc_test) then 1596 ! dmft 1597 if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then 1598 ebandldatot=ebandldatot+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1599 if(paw_dmft%band_in(iband)) then 1600 ebandlda=ebandlda+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1601 ekinlda=ekinlda+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1602 occ(bdtot_index)=paw_dmft%occnd(1,iband,iband,ikpt,isppol) 1603 ebanddmft=ebanddmft+dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1604 ekindmft=ekindmft+dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1605 end if 1606 end if 1607 1608 energies%e_eigenvalues = energies%e_eigenvalues + & 1609 & dtset%wtk(ikpt)*occ(bdtot_index)*eigen(bdtot_index) 1610 energies%e_kinetic = energies%e_kinetic + & 1611 & dtset%wtk(ikpt)*occ(bdtot_index)*eknk(bdtot_index) 1612 energies%e_nlpsp_vfock = energies%e_nlpsp_vfock + & 1613 & dtset%wtk(ikpt)*occ(bdtot_index)*enlxnk(bdtot_index) 1614 if (usefock) then 1615 energies%e_fock=energies%e_fock + half*focknk(bdtot_index)*occ(bdtot_index)*dtset%wtk(ikpt) 1616 if (optforces>0) fock%fock_common%forces(:,:)=fock%fock_common%forces(:,:)+& 1617 & dtset%wtk(ikpt)*occ(bdtot_index)*fockfornk(:,:,bdtot_index) 1618 end if 1619 if (optforces>0) grnl(:)=grnl(:)+dtset%wtk(ikpt)*occ(bdtot_index)*grnlnk(:,bdtot_index) 1620 end if 1621 bdtot_index=bdtot_index+1 1622 if(paw_dmft%use_dmft==1.and.dtset%nbandkss==0) then 1623 do iband1=1,nband_k 1624 if(paw_dmft%band_in(iband).and.paw_dmft%band_in(iband1)) then 1625 ! write(std_out,*) "II+", isppol,ikpt,iband,iband1 1626 ekindmft2=ekindmft2 + dtset%wtk(ikpt)*paw_dmft%occnd(1,iband,iband1,ikpt,isppol)*& 1627 & eknk_nd(isppol,ikpt,1,iband,iband1) 1628 ekindmft2=ekindmft2 - dtset%wtk(ikpt)*paw_dmft%occnd(2,iband,iband1,ikpt,isppol)*& 1629 & eknk_nd(isppol,ikpt,2,iband,iband1) 1630 ! write(std_out,*) "II", occnd(1,iband,iband1,ikpt,isppol),eknk_nd(isppol,ikpt,iband,iband1) 1631 end if 1632 end do 1633 end if 1634 end do 1635 end do 1636 end do 1637 1638 if(paw_dmft%use_dmft==1) then 1639 energies%e_kinetic = energies%e_kinetic -ekindmft+ekindmft2 1640 if(abs(dtset%pawprtvol)>=2) then 1641 write(msg,'(4a,7(2x,a,2x,e14.7,a),a)') & 1642 "-----------------------------------------------",ch10,& 1643 "--- Energy for DMFT and tests (in Ha) ",ch10,& 1644 "--- Ebandldatot (Ha.) = ",ebandldatot,ch10,& 1645 "--- Ebandlda (Ha.) = ",ebandlda,ch10,& 1646 "--- Ebanddmft (Ha.) = ",ebanddmft,ch10,& 1647 "--- Ekinlda (Ha.) = ",ekinlda,ch10, & 1648 "--- Ekindmftdiag (Ha.) = ",ekindmft,ch10,& 1649 "--- Ekindmftnondiag(Ha.) = ",ekindmft2,ch10,& 1650 "--- Edmft= (Ha.) = ",edmft,ch10,& 1651 "-----------------------------------------------" 1652 call wrtout(std_out,msg) 1653 end if 1654 ! if(paw_dmft%use_dmft==1.and.mpi_enreg%paral_kgb==1) paw_dmft%use_dmft=0 1655 end if 1656 1657 ABI_NVTX_START_RANGE(NVTX_MKRHO) 1658 if (psps%usepaw==0) then 1659 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1660 & rhog,rhor,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,& 1661 & extfpmd=extfpmd) 1662 else 1663 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1664 & rhowfg,rhowfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,& 1665 & extfpmd=extfpmd) 1666 end if 1667 ABI_NVTX_END_RANGE() 1668 call timab(992,2,tsec) 1669 1670 ! Treat fixed occupation numbers or non-self-consistent case 1671 else 1672 1673 if (mpi_enreg%nproc_spkpt>1) then 1674 1675 call timab(989,1,tsec) 1676 1677 nbuf=2*mbdkpsp+dtset%nfft*dtset%nspden+4+3*natom*optforces 1678 ! If Hartree-Fock calculation, the exact exchange energy is k-dependent. 1679 if(dtset%usefock==1) then 1680 nbuf=nbuf+1 1681 if (optforces>0) nbuf=nbuf+3*natom 1682 end if 1683 if(iscf==-1 .or. iscf==-2)nbuf=2*mbdkpsp 1684 ABI_MALLOC(buffer1,(nbuf)) 1685 ! Pack eigen,resid,rho[wf]r,grnl,enl,ek 1686 buffer1(1:mbdkpsp)=eigen(:) 1687 buffer1(1+mbdkpsp:2*mbdkpsp)=resid(:) 1688 index1=2*mbdkpsp 1689 if(iscf/=-1 .and. iscf/=-2)then 1690 if (psps%usepaw==0) then 1691 buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhor,(/dtset%nfft*dtset%nspden/)) 1692 else 1693 buffer1(index1+1:index1+dtset%nfft*dtset%nspden)=reshape(rhowfr,(/dtset%nfft*dtset%nspden/)) 1694 end if 1695 index1=index1+dtset%nfft*dtset%nspden 1696 buffer1(index1+1) = energies%e_kinetic 1697 buffer1(index1+2) = energies%e_eigenvalues 1698 buffer1(index1+3) = energies%e_nlpsp_vfock 1699 buffer1(index1+4) = energies%e_nucdip 1700 index1=index1+4 1701 ! If Hartree-Fock calculation, save e_fock in buffer1 1702 if (dtset%usefock==1) then 1703 buffer1(index1+1) = energies%e_fock 1704 index1=index1+1 1705 if (optforces>0)then 1706 buffer1(index1+1:index1+3*natom)=reshape(fock%fock_common%forces,(/3*natom/)) 1707 index1=index1+3*natom 1708 end if 1709 end if 1710 if (optforces>0) buffer1(index1+1:index1+3*natom)=grnl(1:3*natom) 1711 end if 1712 1713 ! Build sum of everything 1714 call timab(48,1,tsec) 1715 call xmpi_sum(buffer1,nbuf,mpi_enreg%comm_kpt ,ierr) 1716 call timab(48,2,tsec) 1717 1718 ! Unpack the final result 1719 eigen(:)=buffer1(1:mbdkpsp) 1720 resid(:)=buffer1(1+mbdkpsp:2*mbdkpsp) 1721 index1=2*mbdkpsp 1722 if(iscf/=-1 .and. iscf/=-2)then 1723 if (psps%usepaw==0) then 1724 ii=1 1725 do ispden=1,dtset%nspden 1726 do ifft=1,dtset%nfft 1727 rhor(ifft,ispden)=buffer1(index1+ii) 1728 ii=ii+1 1729 end do 1730 end do 1731 else 1732 ii=1 1733 do ispden=1,dtset%nspden 1734 do ifft=1,dtset%nfft 1735 rhowfr(ifft,ispden)=buffer1(index1+ii) 1736 ii=ii+1 1737 end do 1738 end do 1739 end if 1740 index1=index1+dtset%nfft*dtset%nspden 1741 energies%e_kinetic = buffer1(index1+1) 1742 energies%e_eigenvalues = buffer1(index1+2) 1743 energies%e_nlpsp_vfock = buffer1(index1+3) 1744 energies%e_nucdip = buffer1(index1+4) 1745 index1=index1+4 1746 ! If Hartree-Fock calculation, save e_fock in buffer1 1747 if (dtset%usefock==1) then 1748 energies%e_fock = buffer1(index1+1) 1749 index1=index1+1 1750 if (optforces>0) then 1751 fock%fock_common%forces(:,:)=reshape(buffer1(index1+1:index1+3*natom),(/3,natom/)) 1752 index1=index1+3*natom 1753 end if 1754 end if 1755 if (optforces>0) grnl(1:3*natom)=buffer1(index1+1:index1+3*natom) 1756 end if 1757 ABI_FREE(buffer1) 1758 call timab(989,2,tsec) 1759 1760 end if ! nproc_spkpt>1 1761 1762 ! Compute the highest occupied eigenenergy 1763 if(iscf/=-1 .and. iscf/=-2)then 1764 call timab(993,1,tsec) 1765 energies%e_fermie = -huge(one) 1766 if (dtset%occopt==9) then 1767 energies%e_fermih = -huge(one) 1768 end if 1769 bdtot_index=1 1770 do isppol=1,dtset%nsppol 1771 do ikpt=1,dtset%nkpt 1772 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1773 if (dtset%occopt/=9) then 1774 do iband=1,nband_k 1775 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then 1776 energies%e_fermie=eigen(bdtot_index) 1777 end if 1778 bdtot_index=bdtot_index+1 1779 end do 1780 else 1781 ! In case occopt 9, computing the fermi level for the electrons remaining in the VB = fermi level of holes 1782 do iband=1,dtset%ivalence 1783 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermih+tol10) then 1784 energies%e_fermih=eigen(bdtot_index) 1785 end if 1786 bdtot_index=bdtot_index+1 1787 end do 1788 ! In case occopt 9, computing the fermi level for the electrons thermalized in the conduction bands 1789 do iband=dtset%ivalence+1,nband_k 1790 if(abs(occ(bdtot_index))>tol8 .and. eigen(bdtot_index)>energies%e_fermie+tol10) then 1791 energies%e_fermie=eigen(bdtot_index) 1792 end if 1793 bdtot_index=bdtot_index+1 1794 end do 1795 end if 1796 end do 1797 end do 1798 call xmpi_max(energies%e_fermie,spaceComm_distrb,ierr) 1799 if (dtset%occopt == 9) then 1800 call xmpi_max(energies%e_fermih,spaceComm_distrb,ierr) 1801 end if 1802 call timab(993,2,tsec) 1803 end if 1804 1805 ! If needed, compute rhog, and symmetrizes the density 1806 if (iscf > 0 .or. iscf==-3 ) then 1807 ! energies%e_fermie=zero ! Actually, should determine the maximum of the valence band XG20020802 1808 nfftot=dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3) 1809 1810 call timab(994,1,tsec) 1811 if (psps%usepaw==0) then 1812 call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,& 1813 & dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons) 1814 else 1815 call symrhg(1,gprimd,irrzon,mpi_enreg,dtset%nfft,nfftot,dtset%ngfft,dtset%nspden,& 1816 & dtset%nsppol,dtset%nsym,phnons,rhowfg,rhowfr,rprimd,dtset%symafm,dtset%symrel,dtset%tnons) 1817 end if 1818 call timab(994,2,tsec) 1819 ! We now have both rho(r) and rho(G), symmetrized, and if dtset%nsppol=2 1820 ! we also have the spin-up density, symmetrized, in rhor(:,2). 1821 end if 1822 1823 end if ! End of test on varying or fixed occupation numbers 1824 1825 call timab(994,1,tsec) 1826 1827 ! Compute the kinetic energy density 1828 if(dtset%usekden==1 .and. (iscf > 0 .or. iscf==-3 ) )then 1829 if (psps%usepaw==0) then 1830 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1831 & taug,taur,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1) 1832 else 1833 call mkrho(cg,dtset,gprimd,irrzon,kg,mcg,mpi_enreg,npwarr,occ,paw_dmft,phnons,& 1834 & tauwfg,tauwfr,rprimd,tim_mkrho,ucvol,wvl%den,wvl%wfs,option=1) 1835 end if 1836 end if 1837 1838 ABI_FREE(eknk) 1839 if (usefock) then 1840 ABI_FREE(focknk) 1841 if (optforces>0)then 1842 ABI_FREE(fockfornk) 1843 end if 1844 end if 1845 ABI_FREE(eknk_nd) 1846 ABI_FREE(grnlnk) 1847 ABI_FREE(enlxnk) 1848 1849 ! In the non-self-consistent case, print eigenvalues and residuals 1850 if(iscf<=0 .and. me_distrb==0)then 1851 option=2 ; enunit=1 ; vxcavg_dum=zero 1852 call prteigrs(eigen,enunit,energies%e_fermie,energies%e_fermih,& 1853 & dtfil%fnameabo_app_eig,ab_out,iscf,dtset%kptns,dtset%kptopt,dtset%mband,& 1854 & dtset%nband,nbdbuf_eff,dtset%nkpt,nnsclo_now,dtset%nsppol,occ,dtset%occopt,option,& 1855 & dtset%prteig,prtvol,resid,dtset%tolwfr,vxcavg_dum,dtset%wtk) 1856 end if 1857 1858 ! Find largest residual over bands, k points, and spins, except for nbdbuf highest bands 1859 ibdkpt=1 1860 residm=zero 1861 do isppol=1,dtset%nsppol 1862 do ikpt=1,dtset%nkpt 1863 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1864 if (nbdbuf_eff>=0) then 1865 nband_eff=max(1,nband_k-nbdbuf_eff) 1866 residm=max(residm,maxval(resid(ibdkpt:ibdkpt+nband_eff-1))) 1867 else if (nbdbuf_eff==-101) then 1868 residm=max(residm,maxval(occ(ibdkpt:ibdkpt+nband_k-1)*resid(ibdkpt:ibdkpt+nband_k-1))) 1869 else 1870 ABI_ERROR('Bad value of nbdbuf_eff') 1871 end if 1872 ibdkpt=ibdkpt+nband_k 1873 end do 1874 end do 1875 1876 end if !usewvl==0 1877 1878 !=================================================================== 1879 ! End of PLANE WAVES section 1880 !=================================================================== 1881 1882 !In the self-consistent case, diagnose lack of unoccupied state (for each spin and k-point). 1883 1884 !Print a warning if the number of such messages already written does not exceed mwarning. 1885 ! MG: This is not a good idea as this is a typical mistake done by beginners and we should 1886 ! keep on spamming this warning message in the log file. 1887 !mwarning=5 1888 !if(nwarning<mwarning .and. iscf>=0)then 1889 !nwarning=nwarning+1 1890 1891 if(iscf>=0)then 1892 bdtot_index=1 1893 do isppol=1,dtset%nsppol 1894 do ikpt=1,dtset%nkpt 1895 min_occ=two 1896 nband_k=dtset%nband(ikpt+(isppol-1)*dtset%nkpt) 1897 do iband=1,nband_k 1898 if(occ(bdtot_index)<min_occ)min_occ=occ(bdtot_index) 1899 bdtot_index=bdtot_index+1 1900 end do 1901 if(min_occ>0.01_dp .and. .not. associated(extfpmd))then 1902 if(dtset%nsppol==1)then 1903 write(msg, '(a,i0,3a,f7.3,5a)' )& 1904 'For k-point number: ',ikpt,',',ch10,& 1905 'The minimal occupation factor is: ',min_occ,'.',ch10,& 1906 'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,& 1907 'Action: increase slightly the number of bands.' 1908 else 1909 write(msg, '(a,i0,3a,i0,a,f7.3,5a)' )& 1910 'For k-point number: ',ikpt,', and',ch10,& 1911 'for spin polarization: ',isppol, ' the minimal occupation factor is: ',min_occ,'.',ch10,& 1912 'An adequate monitoring of convergence requires it to be at most 0.01_dp.',ch10,& 1913 'Action: increase slightly the number of bands.' 1914 end if 1915 ABI_WARNING(msg) 1916 exit ! It is enough if one lack of adequate occupation is identified, so exit. 1917 end if 1918 end do 1919 end do 1920 end if 1921 1922 ABI_NVTX_START_RANGE(NVTX_VTORHO_EXTRA) 1923 if (iscf>0.or.iscf==-3 .or. (dtset%usewvl==1 .and. iscf==0)) then 1924 1925 ! PAW: Build new rhoij quantities from new occ then symetrize them 1926 ! Compute and add the compensation density to rhowfr to get the total density 1927 if (psps%usepaw==1) then 1928 call timab(555,1,tsec) 1929 if (paral_atom) then 1930 ABI_MALLOC(pawrhoij_unsym,(natom)) 1931 call pawrhoij_inquire_dim(cplex_rhoij=cplex_rhoij,nspden_rhoij=nspden_rhoij,& 1932 & nspden=dtset%nspden,spnorb=dtset%pawspnorb,cpxocc=dtset%pawcpxocc) 1933 call pawrhoij_alloc(pawrhoij_unsym,cplex_rhoij,nspden_rhoij,dtset%nspinor,& 1934 & dtset%nsppol,dtset%typat,pawtab=pawtab,use_rhoijp=0) 1935 else 1936 pawrhoij_unsym => pawrhoij 1937 end if 1938 if (usecprj_local==1) then 1939 call pawmkrhoij(atindx,atindx1,cprj,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,dtset%mband,mband_cprj,& 1940 & mcprj_local,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,dtset%nspden,dtset%nspinor,& 1941 & dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,dtfil%unpaw,dtset%usewvl,dtset%wtk) 1942 else 1943 mcprj_tmp=my_nspinor*mband_cprj*dtset%mkmem*dtset%nsppol 1944 ABI_MALLOC(cprj_tmp,(natom,mcprj_tmp)) 1945 call pawcprj_alloc(cprj_tmp,0,gs_hamk%dimcprj) 1946 call ctocprj(atindx,cg,1,cprj_tmp,gmet,gprimd,0,0,0,dtset%istwfk,kg,dtset%kptns,& 1947 & mcg,mcprj_tmp,dtset%mgfft,dtset%mkmem,mpi_enreg,psps%mpsang,dtset%mpw,& 1948 & dtset%natom,nattyp,dtset%nband,dtset%natom,dtset%ngfft,dtset%nkpt,dtset%nloalg,& 1949 & npwarr,dtset%nspinor,dtset%nsppol,dtset%nsppol,ntypat,dtset%paral_kgb,ph1d,psps,rmet,dtset%typat,& 1950 & ucvol,dtfil%unpaw,xred,ylm,ylmgr_dum) 1951 call pawmkrhoij(atindx,atindx1,cprj_tmp,gs_hamk%dimcprj,dtset%istwfk,dtset%kptopt,& 1952 & dtset%mband,mband_cprj,mcprj_tmp,dtset%mkmem,mpi_enreg,natom,dtset%nband,dtset%nkpt,& 1953 & dtset%nspden,dtset%nspinor,dtset%nsppol,occ,dtset%paral_kgb,paw_dmft,pawrhoij_unsym,& 1954 & dtfil%unpaw,dtset%usewvl,dtset%wtk) 1955 call pawcprj_free(cprj_tmp) 1956 ABI_FREE(cprj_tmp) 1957 end if 1958 call timab(555,2,tsec) 1959 ! Build symetrized packed rhoij and compensated pseudo density 1960 cplex=1;ipert=0;idir=0;qpt(:)=zero 1961 if(dtset%usewvl==0) then 1962 call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,& 1963 & my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,& 1964 & dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,& 1965 & symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat,rhog=rhog) 1966 if (dtset%usekden==1) then 1967 ! DO WE NEED TAUG? 1968 call transgrid(1,mpi_enreg,dtset%nspden,+1,1,1,dtset%paral_kgb,pawfgr,tauwfg,taug,tauwfr,taur) 1969 end if 1970 else 1971 ! here do not pass rhog, we do not use it 1972 call pawmkrho(1,compch_fft,cplex,gprimd,idir,indsym,ipert,mpi_enreg,& 1973 & my_natom,natom,dtset%nspden,dtset%nsym,ntypat,dtset%paral_kgb,pawang,pawfgr,pawfgrtab,& 1974 & dtset%pawprtvol,pawrhoij,pawrhoij_unsym,pawtab,qpt,rhowfg,rhowfr,rhor,rprimd,dtset%symafm,& 1975 & symrec,dtset%typat,ucvol,dtset%usewvl,xred,pawnhat=nhat) 1976 ! In WVL: copy density to BigDFT object: 1977 call wvl_rho_abi2big(1,rhor,wvl%den) 1978 end if 1979 if (paral_atom) then 1980 call pawrhoij_free(pawrhoij_unsym) 1981 ABI_FREE(pawrhoij_unsym) 1982 end if 1983 end if ! psps%usepaw==1 1984 1985 if(paw_dmft%use_dmft==1) then 1986 ! == check noccmmp 1987 call setnoccmmp(1,0,rdum4,0,0,idum3,my_natom,natom,0,1,dtset%nsppol,0,ntypat,& 1988 & paw_ij,pawang,dtset%pawprtvol,pawrhoij,pawtab,rdum2,idum1,dtset%typat,0,dtset%usepawu,& 1989 & comm_atom=mpi_enreg%comm_atom,mpi_atmtab=mpi_enreg%my_atmtab) 1990 end if 1991 1992 ! Find and print minimum and maximum total electron density and locations 1993 ! Compute density residual (if required) and its squared norm 1994 if (iscf>=0) then 1995 if (psps%usepaw==0) then 1996 call prtrhomxmn(std_out,mpi_enreg,dtset%nfft,dtset%ngfft,dtset%nspden,1,rhor,ucvol=ucvol) 1997 else 1998 call prtrhomxmn(std_out,mpi_enreg,nfftf,pawfgr%ngfft,dtset%nspden,1,rhor,ucvol=ucvol) 1999 end if 2000 if (optres==1) then 2001 nvresid=rhor-nvresid 2002 call sqnorm_v(1,nfftf,nres2,dtset%nspden,optres,nvresid,mpi_comm_sphgrid=mpi_comm_sphgrid) 2003 if (dtset%usekden==1) then 2004 if (optres==1) tauresid=taur-tauresid 2005 endif 2006 end if 2007 end if 2008 2009 end if ! iscf>0 or iscf=-3 2010 ABI_NVTX_END_RANGE() 2011 2012 if(psps%usepaw==1.and.(iscf>=0.or.iscf==-3)) then 2013 ABI_FREE(rhowfr) 2014 ABI_FREE(rhowfg) 2015 if (dtset%usekden==1) then 2016 ABI_FREE(tauwfr) 2017 ABI_FREE(tauwfg) 2018 end if 2019 end if 2020 2021 call timab(994,2,tsec) 2022 2023 if (iscf==-1) then 2024 ! Eventually compute the excited states within tddft 2025 call timab(995,1,tsec) 2026 if (psps%usepaw==1) then 2027 ! In case of PAW calculation, have to transfer kxc from the fine to the coarse grid: 2028 ABI_MALLOC(cgrkxc,(dtset%nfft,nkxc)) 2029 do ikxc=1,nkxc 2030 call transgrid(1,mpi_enreg,1,-1,0,0,dtset%paral_kgb,pawfgr,rhodum,rhodum,cgrkxc(:,ikxc),kxc(:,ikxc)) 2031 end do 2032 call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,& 2033 & kg,cgrkxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,& 2034 & ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew) 2035 ABI_FREE(cgrkxc) 2036 else 2037 call tddft(cg,dtfil,dtset,eigen,etotal,gmet,gprimd,gsqcut,& 2038 & kg,kxc,dtset%mband,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,dtset%nfft,& 2039 & ngfftdiel,dtset%nkpt,nkxc,npwarr,dtset%nspinor,dtset%nsppol,occ,ucvol,wffnew) 2040 end if 2041 call timab(995,2,tsec) 2042 2043 else 2044 ! Eventually compute the susceptibility matrix and the 2045 ! dielectric matrix when istep_mix is equal to 1 or dielstrt 2046 call timab(996,1,tsec) 2047 computesusmat = dtset%testsusmat(dielop, dielstrt, istep_mix) !test if the matrix is to be computed 2048 if(computesusmat) then 2049 dielar(1)=dtset%diecut;dielar(2)=dtset%dielng 2050 dielar(3)=dtset%diemac;dielar(4)=dtset%diemix 2051 dielar(5)=dtset%diegap;dielar(6)=dtset%dielam 2052 dielar(7)=dtset%diemix;if (iscf>=10) dielar(7)=dtset%diemixmag 2053 usetimerev=1 2054 if (psps%usepaw==1.and.dtset%pawspnorb>0.and.dtset%kptopt/=1.and.dtset%kptopt/=2) usetimerev=0 2055 neglect_pawhat=1-dtset%pawsushat 2056 call suscep_stat(atindx,atindx1,cg,cprj,dielar,& 2057 & gs_hamk%dimcprj,doccde,eigen,gbound_diel,gprimd,& 2058 & irrzondiel,dtset%istwfk,kg,kg_diel,lmax_diel,& 2059 & dtset%mband,mcg,mcprj_local,mgfftdiel,dtset%mkmem,mpi_enreg,dtset%mpw,natom,dtset%nband,& 2060 & neglect_pawhat,nfftdiel,ngfftdiel,& 2061 & dtset%nkpt,npwarr,npwdiel,dtset%nspden,dtset%nspinor,dtset%nsppol,dtset%nsym,ntypat,& 2062 & occ,dtset%occopt,pawang,pawtab,phnonsdiel,ph1ddiel,rprimd,& 2063 & susmat,dtset%symafm,dtset%symrel,dtset%tnons,dtset%typat,ucvol,& 2064 & dtfil%unpaw,usecprj_local,psps%usepaw,usetimerev,dtset%wtk,ylmdiel) 2065 end if 2066 call timab(996,2,tsec) 2067 2068 end if ! end condition on iscf 2069 2070 call gs_hamk%free() 2071 2072 if (psps%usepaw==1) then 2073 if (usecprj==0) then 2074 call pawcprj_free(cprj_local) 2075 ABI_FREE(cprj_local) 2076 end if 2077 end if 2078 2079 if(dtset%usewvl==0) then 2080 ABI_FREE(EigMin) 2081 ABI_FREE(doccde) 2082 #if defined HAVE_GPU_CUDA 2083 if(dtset%gpu_option==ABI_GPU_LEGACY .or. dtset%gpu_option==ABI_GPU_KOKKOS) call gpu_finalize_ham_data() 2084 #endif 2085 end if 2086 2087 call timab(980,2,tsec) 2088 2089 DBG_EXIT("COLL") 2090 2091 contains
ABINIT/wvl_comm_eigen [ Functions ]
NAME
wvl_comm_eigen
FUNCTION
Computes occupations for the wavelet case Using BigDFT routines
INPUTS
OUTPUT
SIDE EFFECTS
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2434 subroutine wvl_comm_eigen() 2435 2436 !Arguments ------------------------------------ 2437 2438 !Local variables------------------------------- 2439 #if defined HAVE_BIGDFT 2440 integer:: ikpt,norb,shift 2441 #endif 2442 2443 ! ************************************************************************* 2444 2445 DBG_ENTER("COLL") 2446 2447 #if defined HAVE_BIGDFT 2448 if(wvlbigdft) then 2449 ! Communicates eigenvalues to all procs. 2450 ! This will print out the eigenvalues and Fermi level. 2451 call eigensystem_info(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl,0.d0,& 2452 & wvl%wfs%ks%Lzd%Glr%wfd%nvctr_c+7*wvl%wfs%ks%Lzd%Glr%wfd%nvctr_f,& 2453 & wvl%wfs%ks%orbs,wvl%wfs%ks%psi) 2454 else 2455 ! Send all eigenvalues to all procs. 2456 ! I simply communicate eigenvalues: I do not print them into screen, nor calculate Fermi-level. 2457 norb=wvl%wfs%ks%orbs%norb 2458 if (mpi_enreg%nproc_wvl > 1) then 2459 shift=1 2460 do ikpt = 1, wvl%wfs%ks%orbs%nkpts 2461 call xmpi_bcast(wvl%wfs%ks%orbs%eval(shift:shift+norb-1),wvl%wfs%ks%orbs%ikptproc(ikpt),mpi_enreg%comm_wvl,ierr) 2462 shift=shift+norb 2463 end do 2464 end if 2465 end if 2466 2467 !Copy eigenvalues from BigDFT object to "eigen" 2468 call wvl_eigen_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,eigen,2,wvl%wfs) 2469 2470 #else 2471 BIGDFT_NOTENABLED_ERROR() 2472 #endif 2473 2474 DBG_EXIT("COLL") 2475 2476 end subroutine wvl_comm_eigen 2477 2478 end subroutine vtorho
ABINIT/wvl_nscf_loop [ Functions ]
NAME
wvl_nscf_loop
FUNCTION
Non-self-consistent field cycle in Wavelets See also "wvl_nscf_loop_bigdft"
INPUTS
nnsclo= number of non-self consistent field iterations
OUTPUT
SOURCE
2109 subroutine wvl_nscf_loop() 2110 2111 !Arguments ------------------------------------ 2112 ! integer, intent(in) :: istep,mcprj,nfft,nnsclo 2113 ! real(dp), intent(inout) :: residm 2114 ! type(dataset_type), intent(in) :: dtset 2115 ! type(MPI_type), intent(in) :: mpi_enreg 2116 ! type(energies_type), intent(inout) :: energies 2117 ! type(wvl_data), intent(inout) :: wvl 2118 ! !arrays 2119 ! real(dp), intent(inout) :: xcart(3, dtset%natom) 2120 ! real(dp), dimension(6), intent(out) :: strsxc 2121 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj 2122 2123 !Local variables------------------------------- 2124 integer :: inonsc,ii 2125 integer,parameter :: iscf_=-1 !do not do a SCF cycle 2126 logical,parameter :: do_scf=.false. !do not do a SCF cycle 2127 logical,parameter :: wvlbigdft=.false. 2128 real(dp) :: dum,eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc 2129 2130 ! ************************************************************************* 2131 2132 DBG_ENTER("COLL") 2133 2134 if(nnsclo_now>0) then 2135 do inonsc=1,nnsclo_now 2136 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2137 & istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2138 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2139 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2140 call wvl_hpsitopsi(cprj,dtset,energies,inonsc,mcprj_local,mpi_enreg, & 2141 & residm,wvl,xcart) 2142 if(residm<dtset%tolwfr) exit !Exit loop if converged 2143 end do 2144 2145 else 2146 do ii=1, dtset%nline 2147 ! Direct minimization technique: no diagonalization 2148 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2149 & istep,ii,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2150 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2151 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2152 call wvl_hpsitopsi(cprj,dtset,energies,ii,mcprj_local,mpi_enreg, & 2153 & residm,wvl,xcart) 2154 if(residm<dtset%tolwfr) exit !Exit loop if converged 2155 end do 2156 end if 2157 2158 ! Update energies depending on new WF 2159 energies%e_kinetic=ekin 2160 energies%e_nlpsp_vfock=enl 2161 energies%e_exactX=eexctx 2162 energies%e_sicdc=esicdc 2163 2164 ! Eventually update energies depending on density 2165 if (dtset%iscf<10) then 2166 energies%e_localpsp=eloc 2167 energies%e_hartree=eh 2168 energies%e_xc=exc ; energies%e_xcdc=evxc 2169 else if (nnsclo_now==0) then 2170 energies%e_localpsp=eloc 2171 end if 2172 2173 ! End of nscf iterations 2174 if (do_last_ortho) then 2175 ! !Don't update energies (nscf cycle has been done); just recompute potential 2176 inonsc=nnsclo_now;if (nnsclo_now==0) inonsc=dtset%nline 2177 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc,& 2178 & istep,inonsc,iscf_,mpi_enreg%me_wvl,dtset%natom,& 2179 & nfftf,mpi_enreg%nproc_wvl,dtset%nspden,& 2180 & dum,do_scf,evxc,wvl,wvlbigdft,xcart,strsxc) 2181 end if 2182 2183 DBG_EXIT("COLL") 2184 2185 end subroutine wvl_nscf_loop
ABINIT/wvl_nscf_loop_bigdft [ Functions ]
NAME
wvl_nscf_loop_bigdft
FUNCTION
Non-self-consistent field cycle in Wavelets It follows the BigDFT scheme. See also "wvl_nscf_loop"
INPUTS
nnsclo= number of non-self consistent field iterations
OUTPUT
argout(sizeout)=description
SOURCE
2205 subroutine wvl_nscf_loop_bigdft() 2206 2207 !Arguments ------------------------------------ 2208 ! integer, intent(in) :: istep,mcprj,nfft,nnsclo 2209 ! real(dp), intent(inout) :: residm 2210 ! real(dp), intent(out) :: nres2 2211 ! type(dataset_type), intent(in) :: dtset 2212 ! type(MPI_type), intent(in) :: mpi_enreg 2213 ! type(energies_type), intent(inout) :: energies 2214 ! type(wvl_data), intent(inout) :: wvl 2215 !arrays 2216 ! real(dp), intent(inout) :: xcart(3, dtset%natom) 2217 ! real(dp), dimension(6), intent(out) :: strsxc 2218 ! type(pawcprj_type),dimension(dtset%natom,mcprj),intent(out)::cprj 2219 2220 !Local variables------------------------------- 2221 integer :: inonsc 2222 integer,parameter :: iscf_=-1 !do not do a SCF cycle 2223 logical,parameter :: do_scf=.false. !do not do a SCF cycle 2224 logical,parameter :: wvlbigdft=.true. 2225 real(dp) :: eexctx,eh,ekin,eloc,enl,esicdc,evxc,exc 2226 2227 ! ************************************************************************* 2228 2229 DBG_ENTER("COLL") 2230 2231 call wvl_hpsitopsi(cprj,dtset, energies, istep, mcprj_local,mpi_enreg, & 2232 & residm, wvl,xcart) 2233 2234 if (nnsclo_now>2) then 2235 do inonsc = 2, nnsclo_now-1 2236 call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, & 2237 & energies%e_hartree, energies%e_kinetic, energies%e_localpsp, & 2238 & energies%e_nlpsp_vfock, energies%e_sicdc, istep, inonsc, iscf_, & 2239 & mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,& 2240 & dtset%nspden, nres2, do_scf,energies%e_xcdc, & 2241 & wvl, wvlbigdft, xcart, strsxc) 2242 call wvl_hpsitopsi(cprj,dtset, energies, inonsc, mcprj_local,mpi_enreg, & 2243 & residm, wvl,xcart) 2244 end do 2245 end if 2246 2247 ! End of nscf iterations 2248 if (do_last_ortho.and.nnsclo_now<=1) then 2249 ! !Don't update energies (nscf cycle has been done); just recompute potential 2250 call wvl_psitohpsi(dtset%diemix,eexctx,exc,eh,ekin,eloc,enl,esicdc, & 2251 & istep, 1, iscf_, mpi_enreg%me_wvl, dtset%natom, nfftf, & 2252 & mpi_enreg%nproc_wvl,dtset%nspden, nres2, do_scf,evxc, & 2253 & wvl, wvlbigdft, xcart, strsxc) 2254 else if (do_last_ortho.and.nnsclo_now>1) then 2255 ! !Update energies and potential (nscf cycles are not finished) 2256 call wvl_psitohpsi(dtset%diemix, energies%e_exactX, energies%e_xc, & 2257 & energies%e_hartree,energies%e_kinetic, energies%e_localpsp, & 2258 & energies%e_nlpsp_vfock, energies%e_sicdc, istep, nnsclo_now, iscf_, & 2259 & mpi_enreg%me_wvl, dtset%natom, nfftf, mpi_enreg%nproc_wvl,& 2260 & dtset%nspden, nres2, do_scf,energies%e_xcdc, & 2261 & wvl, wvlbigdft, xcart, strsxc) 2262 end if 2263 2264 DBG_EXIT("COLL") 2265 2266 end subroutine wvl_nscf_loop_bigdft
ABINIT/wvl_occ [ Functions ]
NAME
wvl_occ
FUNCTION
Computes occupations for the wavelet case
INPUTS
OUTPUT
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2342 subroutine wvl_occ() 2343 2344 !Local variables------------------------------- 2345 real(dp):: doccde_(dtset%mband*dtset%nkpt*dtset%nsppol) 2346 ! ************************************************************************* 2347 2348 DBG_ENTER("COLL") 2349 2350 ! Compute the new occupation numbers from eigen 2351 call newocc(doccde_,eigen,energies%entropy,energies%e_fermie,energies%e_fermih,dtset%ivalence,dtset%spinmagntarget,& 2352 & dtset%mband,dtset%nband,dtset%nelect,dtset%ne_qFD,dtset%nh_qFD,dtset%nkpt,dtset%nspinor,& 2353 & dtset%nsppol,occ,dtset%occopt,prtvol,dtset%tphysel,dtset%tsmear,dtset%wtk,& 2354 & prtstm=dtset%prtstm,stmbias=dtset%stmbias) 2355 2356 ! Copy occupations and efermi to BigDFT variables 2357 call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,1,wvl%wfs) 2358 2359 #if defined HAVE_BIGDFT 2360 ! Copy Fermi level to BigDFT variable: 2361 wvl%wfs%ks%orbs%efermi=energies%e_fermie 2362 #endif 2363 2364 DBG_EXIT("COLL") 2365 2366 end subroutine wvl_occ
ABINIT/wvl_occ_bigdft [ Functions ]
NAME
wvl_occ_bigdft
FUNCTION
Computes occupations for the wavelet case Using BigDFT routines
INPUTS
OUTPUT
SIDE EFFECTS
NOTES
for the wvlbigdft case, see the routine 'wvl_occ_bigdft'
SOURCE
2388 subroutine wvl_occ_bigdft() 2389 2390 ! ************************************************************************* 2391 2392 DBG_ENTER("COLL") 2393 2394 ! Transfer occopt from ABINIT to BigDFT 2395 #if defined HAVE_BIGDFT 2396 occopt_bigdft=dtset%occopt 2397 call wvl_occopt_abi2big(occopt_bigdft,occopt_bigdft,1) 2398 2399 !Calculate occupations using BigDFT routine 2400 call evaltoocc(mpi_enreg%me_wvl, mpi_enreg%nproc_wvl, .false., & 2401 & dtset%tsmear, wvl%wfs%ks%orbs, occopt_bigdft) 2402 2403 !Pass e_fermi from BigDFT object to ABINIT variable: 2404 energies%e_fermie = wvl%wfs%ks%orbs%efermi 2405 2406 !Copy occupations from BigDFT to ABINIT variables 2407 call wvl_occ_abi2big(dtset%mband,dtset%nkpt,dtset%nsppol,occ,2,wvl%wfs) 2408 #endif 2409 2410 DBG_EXIT("COLL") 2411 2412 end subroutine wvl_occ_bigdft