TABLE OF CONTENTS
ABINIT/m_cgwf [ Modules ]
NAME
m_cgwf
FUNCTION
Conjugate-gradient eigensolver.
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group (DCA, XG, GMR, MT, MVeithen, ISouza, JIniguez) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_cgwf 23 24 use defs_basis 25 use m_errors 26 use m_xmpi 27 use m_abicore 28 use m_cgtools 29 use m_efield 30 31 use defs_abitypes, only : MPI_type 32 use m_time, only : timab 33 use m_numeric_tools, only : rhophi 34 use m_pawcprj, only : pawcprj_type, pawcprj_alloc, pawcprj_put, pawcprj_copy, & 35 pawcprj_get, pawcprj_mpi_allgather, pawcprj_free, pawcprj_symkn 36 use m_hamiltonian, only : gs_hamiltonian_type 37 use m_fock, only : fock_set_ieigen,fock_set_getghc_call 38 use m_getghc, only : getghc 39 use m_berrytk, only : smatrix 40 use m_nonlop, only : nonlop 41 use m_paw_overlap, only : smatrix_k_paw 42 use m_cgprj, only : getcprj 43 44 implicit none 45 46 private
ABINIT/make_grad_berry [ Functions ]
NAME
make_grad_berry
FUNCTION
compute gradient contribution from berry phase in finite electric field case
INPUTS
cg(2,mcg)=input wavefunctions cgq(2,mcgq) = wavefunctions at neighboring k points cprj_k(natom,nband_k*usepaw)=cprj at this k point dimlmn(natom)=lmn_size for each atom in input order dimlmn_srt(natom)=lmn_size for each atom sorted by type direc(2,npw*nspinor)=gradient vector gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k iband=index of band currently being treated icg=shift to be applied on the location of data in the array cg ikpt=number of the k-point currently being treated isppol=spin polarization currently treated natom=number of atoms in cell. mband =maximum number of bands mpw=maximum dimensioned size of npw mcg=second dimension of the cg array mcgq=second dimension of the cgq array mkgq = second dimension of pwnsfacq nkpt=number of k points mpi_enreg=information about MPI parallelization npw=number of planewaves in basis sphere at given k. nspinor=number of spinorial components of the wavefunctions (on current proc) nsppol=number of spin polarizations pwind(pwind_alloc,2,3) = array used to compute the overlap matrix smat between k-points (see initberry.f) pwind_alloc = first dimension of pwind pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) pwnsfacq(2,mkgq) = phase factors for the nearest neighbours of the current k-point (electric field, MPI //)
OUTPUT
grad_berry(2,npw*nspinor) :: contribution to gradient in finite electric field case
SIDE EFFECTS
dtefield <type(efield_type)> = variables related to Berry phase calculations (see initberry.f)
NOTES
SOURCE
2023 subroutine make_grad_berry(cg,cgq,cprj_k,detovc,dimlmn,dimlmn_srt,direc,dtefield,grad_berry,& 2024 & gs_hamk,iband,icg,ikpt,isppol,mband,mcg,mcgq,mkgq,mpi_enreg,mpw,natom,nkpt,& 2025 & npw,npwarr,nspinor,nsppol,pwind,pwind_alloc,pwnsfac,pwnsfacq) 2026 2027 !Arguments ------------------------------------ 2028 !scalars 2029 integer,intent(in) :: iband,icg,ikpt,isppol,mband,mcg,mcgq 2030 integer,intent(in) :: mkgq,mpw,natom,nkpt,npw,nspinor,nsppol,pwind_alloc 2031 type(gs_hamiltonian_type),intent(in) :: gs_hamk 2032 type(efield_type),intent(inout) :: dtefield 2033 type(MPI_type),intent(in) :: mpi_enreg 2034 2035 !arrays 2036 integer,intent(in) :: dimlmn(natom),dimlmn_srt(natom) 2037 integer,intent(in) :: npwarr(nkpt),pwind(pwind_alloc,2,3) 2038 real(dp),intent(in) :: cg(2,mcg),cgq(2,mcgq) 2039 real(dp),intent(inout) :: direc(2,npw*nspinor) 2040 real(dp),intent(in) :: pwnsfac(2,pwind_alloc),pwnsfacq(2,mkgq) 2041 real(dp),intent(out) :: detovc(2,2,3),grad_berry(2,npw*nspinor) 2042 type(pawcprj_type),intent(in) :: cprj_k(natom,dtefield%mband_occ*gs_hamk%usepaw*dtefield%nspinor) 2043 2044 !Local variables------------------------------- 2045 !scalars 2046 integer :: choice,cpopt,ddkflag,dimenlr1,iatom,icg1,icp2,idum1 2047 integer :: idir,ifor,ikgf,ikptf,ikpt2,ikpt2f,ipw,i_paw_band,ispinor,itrs,itypat,job 2048 integer :: klmn,mcg1_k,mcg_q,nbo,npw_k2,nspinortot,paw_opt,shiftbd,signs 2049 real(dp) :: fac 2050 character(len=500) :: message 2051 !arrays 2052 integer :: pwind_k(npw),sflag_k(dtefield%mband_occ) 2053 real(dp) :: cg1_k(2,npw*nspinor),dtm_k(2),pwnsfac_k(4,mpw) 2054 real(dp) :: smat_k(2,dtefield%mband_occ,dtefield%mband_occ) 2055 real(dp) :: smat_inv(2,dtefield%mband_occ,dtefield%mband_occ),svectout_dum(2,0) 2056 real(dp) :: dummy_enlout(0) 2057 real(dp),allocatable :: cgq_k(:,:),enl_rij(:,:,:,:),grad_berry_ev(:,:) 2058 real(dp),allocatable :: qijbkk(:,:,:,:),smat_k_paw(:,:,:) 2059 ! type(pawcprj_type) :: cprj_dum(1,1) ! was used in on-site dipole, now suppressed 2060 ! 15 June 2012 J Zwanziger 2061 type(pawcprj_type),allocatable :: cprj_kb(:,:),cprj_band_srt(:,:) 2062 type(pawcprj_type),allocatable :: cprj_fkn(:,:),cprj_ikn(:,:) 2063 2064 2065 ! ********************************************************************* 2066 2067 !DBG_ENTER("COLL") 2068 2069 nbo = dtefield%mband_occ 2070 2071 !allocations 2072 2073 !Electric field: compute the gradient of the Berry phase part of the energy functional. 2074 !See PRL 89, 117602 (2002) [[cite:Souza2002]], grad_berry(:,:) is the second term of Eq. (4) 2075 grad_berry(:,:) = zero 2076 job = 11 ; shiftbd = 1 2077 mcg_q = mpw*mband*nspinor 2078 mcg1_k = npw*nspinor 2079 2080 if (gs_hamk%usepaw /= 0) then 2081 dimenlr1 = gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2 2082 ABI_MALLOC(qijbkk,(dimenlr1,natom,nspinor**2,2)) 2083 ABI_MALLOC(enl_rij,(nspinor*dimenlr1,natom,nspinor**2,1)) 2084 ABI_MALLOC(smat_k_paw,(2,nbo,nbo)) 2085 ABI_MALLOC(grad_berry_ev,(2,npw*nspinor)) 2086 enl_rij = zero 2087 qijbkk = zero 2088 smat_k_paw = zero 2089 ABI_MALLOC(cprj_kb,(natom,nbo*nspinor)) 2090 call pawcprj_alloc(cprj_kb,0,dimlmn) 2091 ABI_MALLOC(cprj_band_srt,(natom,nspinor)) 2092 call pawcprj_alloc(cprj_band_srt,0,dimlmn_srt) 2093 if (nkpt /= dtefield%fnkpt) then 2094 ABI_MALLOC(cprj_fkn,(natom,nbo*nspinor)) 2095 ABI_MALLOC(cprj_ikn,(natom,nbo*nspinor)) 2096 call pawcprj_alloc(cprj_fkn,0,dimlmn) 2097 call pawcprj_alloc(cprj_ikn,0,dimlmn) 2098 else 2099 ABI_MALLOC(cprj_fkn,(0,0)) 2100 ABI_MALLOC(cprj_ikn,(0,0)) 2101 end if 2102 else 2103 ABI_MALLOC(qijbkk,(0,0,0,0)) 2104 ABI_MALLOC(enl_rij,(0,0,0,0)) 2105 ABI_MALLOC(smat_k_paw,(0,0,0)) 2106 ABI_MALLOC(grad_berry_ev,(0,0)) 2107 ABI_MALLOC(cprj_kb,(0,0)) 2108 ABI_MALLOC(cprj_band_srt,(0,0)) 2109 ABI_MALLOC(cprj_fkn,(0,0)) 2110 ABI_MALLOC(cprj_ikn,(0,0)) 2111 end if 2112 2113 ikptf = dtefield%i2fbz(ikpt) 2114 ikgf = dtefield%fkgindex(ikptf) ! this is the shift for pwind 2115 2116 do idir = 1, 3 2117 ! skip idir values for which efield_dot(idir)=0 2118 if (abs(dtefield%efield_dot(idir)) < tol12) cycle 2119 ! Implicitly, we use the gradient multiplied by the number of k points in the FBZ 2120 fac = dtefield%efield_dot(idir)*dble(dtefield%fnkpt)/& 2121 & (dble(dtefield%nstr(idir))*four_pi) 2122 do ifor = 1, 2 2123 ! Handle dtefield%i2fbz properly and ask whether t.r.s. is used 2124 ikpt2f = dtefield%ikpt_dk(ikptf,ifor,idir) 2125 if (dtefield%indkk_f2ibz(ikpt2f,6) == 1) then 2126 itrs = 10 2127 else 2128 itrs = 0 2129 end if 2130 ikpt2 = dtefield%indkk_f2ibz(ikpt2f,1) 2131 npw_k2 = npwarr(ikpt2) 2132 ABI_MALLOC(cgq_k,(2,nbo*nspinor*npw_k2)) 2133 pwind_k(1:npw) = pwind(ikgf+1:ikgf+npw,ifor,idir) 2134 pwnsfac_k(1:2,1:npw) = pwnsfac(1:2,ikgf+1:ikgf+npw) 2135 sflag_k(:) = dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir) 2136 smat_k(:,:,:) = dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir) 2137 if (mpi_enreg%nproc_cell > 1) then 2138 icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt) 2139 cgq_k(:,1:nbo*nspinor*npw_k2) = & 2140 & cgq(:,icg1+1:icg1+nbo*nspinor*npw_k2) 2141 idum1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt) 2142 pwnsfac_k(3:4,1:npw_k2) = pwnsfacq(1:2,idum1+1:idum1+npw_k2) 2143 else 2144 icg1 = dtefield%cgindex(ikpt2,isppol) 2145 cgq_k(:,1:nbo*nspinor*npw_k2) = & 2146 & cg(:,icg1+1:icg1+nbo*nspinor*npw_k2) 2147 idum1 = dtefield%fkgindex(ikpt2f) 2148 pwnsfac_k(3:4,1:npw_k2) = pwnsfac(1:2,idum1+1:idum1+npw_k2) 2149 end if 2150 if (gs_hamk%usepaw == 1) then 2151 icp2=nbo*(ikpt2-1)*nspinor 2152 call pawcprj_get(gs_hamk%atindx1,cprj_kb,dtefield%cprj,natom,1,icp2,ikpt,0,isppol,& 2153 & nbo,dtefield%fnkpt,natom,nbo,nbo,nspinor,nsppol,0,& 2154 & mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb) 2155 if (ikpt2 /= ikpt2f) then ! construct cprj_kb by symmetry 2156 call pawcprj_copy(cprj_kb,cprj_ikn) 2157 call pawcprj_symkn(cprj_fkn,cprj_ikn,dtefield%atom_indsym,dimlmn,-1,gs_hamk%indlmn,& 2158 & dtefield%indkk_f2ibz(ikpt2f,2),dtefield%indkk_f2ibz(ikpt2f,6),& 2159 & dtefield%fkptns(:,dtefield%i2fbz(ikpt2)),& 2160 & dtefield%lmax,dtefield%lmnmax,mband,natom,nbo,nspinor,& 2161 & dtefield%nsym,gs_hamk%ntypat,gs_hamk%typat,dtefield%zarot) 2162 call pawcprj_copy(cprj_fkn,cprj_kb) 2163 end if 2164 call smatrix_k_paw(cprj_k,cprj_kb,dtefield,idir,ifor,mband,natom,smat_k_paw,gs_hamk%typat) 2165 end if 2166 2167 icg1 = 0 ; ddkflag = 1 2168 call smatrix(cg,cgq_k,cg1_k,ddkflag,dtm_k,icg,icg1,itrs,& 2169 & job,iband,mcg,mcg_q,mcg1_k,iband,mpw,nbo,dtefield%nband_occ(isppol),& 2170 & npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,& 2171 & shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw) 2172 ABI_FREE(cgq_k) 2173 detovc(:,ifor,idir) = dtm_k(:) !store the determinant of the overlap 2174 if (sqrt(dtm_k(1)*dtm_k(1) + dtm_k(2)*dtm_k(2)) < tol12) then 2175 write(message,'(3a,i5,a,i3,a,a,a)') & 2176 & ' (electric field)',ch10,& 2177 & ' For k-point #',ikpt,' and band # ',iband,',',ch10,& 2178 & ' the determinant of the overlap matrix is found to be 0. Fixing...' 2179 ! REC try this: 2180 write(std_out,*)message,dtm_k(1:2) 2181 if(abs(dtm_k(1))<=1d-12)dtm_k(1)=1d-12 2182 if(abs(dtm_k(2))<=1d-12)dtm_k(2)=1d-12 2183 write(std_out,*)' Changing to:',dtm_k(1:2) 2184 ! REC ABI_BUG(message) 2185 end if 2186 2187 if (gs_hamk%usepaw == 1) then 2188 ! this loop applies discretized derivative of projectors 2189 ! note that qijb_kk is sorted by input atom order, but nonlop wants it sorted by type 2190 do iatom = 1, natom 2191 itypat = gs_hamk%typat(gs_hamk%atindx1(iatom)) 2192 do klmn = 1, dtefield%lmn2_size(itypat) 2193 ! note: D_ij-like terms have 4 spinor components: 11, 22, 12, and 21. Here the qijb is diagonal 2194 ! in spin space so only the first two are nonzero and they are equal 2195 do ispinor = 1, nspinor 2196 qijbkk(klmn,iatom,ispinor,1) = dtefield%qijb_kk(1,klmn,gs_hamk%atindx1(iatom),idir) 2197 qijbkk(klmn, iatom,ispinor,2) = dtefield%qijb_kk(2,klmn,gs_hamk%atindx1(iatom),idir) 2198 if (ifor > 1) qijbkk(klmn,iatom,ispinor,2) = -qijbkk(klmn,iatom,ispinor,2) 2199 end do 2200 end do ! end loop over lmn2_size 2201 end do ! end loop over natom 2202 2203 choice = 1 2204 signs = 2 2205 paw_opt = 1 2206 cpopt = 2 ! use cprj_kb in memory 2207 nspinortot=min(2,nspinor*(1+mpi_enreg%paral_spinor)) 2208 do i_paw_band = 1, nbo 2209 2210 call pawcprj_get(gs_hamk%atindx,cprj_band_srt,cprj_kb,natom,i_paw_band,0,ikpt,1,& 2211 & isppol,nbo,1,natom,1,nbo,nspinor,nsppol,0,& 2212 & mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb) 2213 2214 ! Pass dummy_enlout to avoid aliasing (enl, enlout) 2215 call nonlop(choice,cpopt,cprj_band_srt,dummy_enlout,gs_hamk,idir,(/zero/),mpi_enreg,1,0,& 2216 & paw_opt,signs,svectout_dum,0,direc,grad_berry_ev,enl=qijbkk) 2217 2218 ! Add i*fac*smat_inv(i_paw_band,iband)*grad_berry_ev to the gradient 2219 do ipw = 1, npw*nspinor 2220 2221 grad_berry(1,ipw) = grad_berry(1,ipw) - & 2222 & fac*(smat_inv(2,i_paw_band,iband)*grad_berry_ev(1,ipw) + & 2223 & smat_inv(1,i_paw_band,iband)*grad_berry_ev(2,ipw)) 2224 2225 grad_berry(2,ipw) = grad_berry(2,ipw) + & 2226 & fac*(smat_inv(1,i_paw_band,iband)*grad_berry_ev(1,ipw) - & 2227 & smat_inv(2,i_paw_band,iband)*grad_berry_ev(2,ipw)) 2228 2229 end do 2230 end do 2231 end if ! end if PAW 2232 2233 ! Add i*fac*cg1_k to the gradient 2234 do ipw = 1, npw*nspinor 2235 grad_berry(1,ipw) = grad_berry(1,ipw) - fac*cg1_k(2,ipw) 2236 grad_berry(2,ipw) = grad_berry(2,ipw) + fac*cg1_k(1,ipw) 2237 end do 2238 fac = -1._dp*fac 2239 dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir) = sflag_k(:) 2240 dtefield%sflag(iband,ikpt+(isppol-1)*nkpt,ifor,idir) = 0 2241 dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir) = smat_k(:,:,:) 2242 end do ! ifor 2243 2244 ! if (gs_hamk%usepaw == 1) then 2245 ! ! call nonlop to apply on-site dipole <EV> part to direc 2246 ! ! note that rij is sorted by input atom order, but nonlop wants it sorted by type 2247 ! do iatom = 1, natom 2248 ! itypat = gs_hamk%typat(gs_hamk%atindx1(iatom)) 2249 ! do klmn = 1, dtefield%lmn2_size(itypat) 2250 ! ! note: D_ij-like terms have 4 spinor components: 11, 22, 12, and 21. Here the enl_rij is diagonal 2251 ! ! in spin space so only the first two are nonzero and they are equal 2252 ! do ispinor = 1, nspinor 2253 ! if (nspinor == 1) then 2254 ! enl_rij(klmn,iatom,ispinor) = dtefield%rij(klmn,itypat,idir) 2255 ! else 2256 ! enl_rij(2*klmn-1,iatom,ispinor) = dtefield%rij(klmn,itypat,idir) 2257 ! end if 2258 ! end do 2259 ! end do ! end loop over lmn2_size 2260 ! end do ! end loop over natom 2261 ! cpopt = -1 ! compute cprj inside nonlop because we do not have them for direc 2262 ! call nonlop(choice,cpopt,cprj_dum,dummy_enlout,gs_hamk,idir,zero,mpi_enreg,1,0,& 2263 ! & paw_opt,signs,svectout_dum,0,direc,grad_berry_ev,enl=enl_rij) 2264 ! grad_berry(:,:) = grad_berry(:,:) - dtefield%efield_dot(idir)*grad_berry_ev(:,:)/two_pi 2265 ! end if 2266 2267 end do ! idir 2268 2269 !deallocations 2270 if(gs_hamk%usepaw /= 0) then 2271 call pawcprj_free(cprj_kb) 2272 call pawcprj_free(cprj_band_srt) 2273 if (nkpt /= dtefield%fnkpt) then 2274 call pawcprj_free(cprj_fkn) 2275 call pawcprj_free(cprj_ikn) 2276 end if 2277 end if 2278 ABI_FREE(grad_berry_ev) 2279 ABI_FREE(qijbkk) 2280 ABI_FREE(enl_rij) 2281 ABI_FREE(smat_k_paw) 2282 ABI_FREE(cprj_kb) 2283 ABI_FREE(cprj_band_srt) 2284 ABI_FREE(cprj_fkn) 2285 ABI_FREE(cprj_ikn) 2286 2287 !DBG_EXIT("COLL") 2288 2289 end subroutine make_grad_berry
m_cgwf/cgwf [ Functions ]
[ Top ] [ m_cgwf ] [ Functions ]
NAME
cgwf
FUNCTION
Update all wavefunction |C>, non self-consistently. also compute the corresponding H|C> and Vnl|C> (and S|C> if paw). Uses a conjugate-gradient algorithm. In case of paw, resolves a generalized eigenproblem using an overlap matrix (not used for norm conserving psps).
INPUTS
berryopt == 4/14: electric field is on; all other values, no field is present chkexit= if non-zero, check whether the user wishes to exit cpus = CPU time limit filnam_ds1=name of input file (used for exit checking) gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array gsc ikpt=number of the k-point inonsc=index of non self-consistent loop isppol=spin polarization currently treated mband =maximum number of bands mcg=second dimension of the cg array mcgq=second dimension of the cgq array mgsc=second dimension of the gsc array mkgq = second dimension of pwnsfacq mpi_enreg=information about MPI parallelization mpw=maximum dimensioned size of npw nband=number of bands. nbdblock=number of bands in a block nkpt=number of k points nline=number of line minimizations per band. npw=number of planewaves in basis sphere at given k. npwarr(nkpt)=number of planewaves in basis at this k point nspinor=number of spinorial components of the wavefunctions (on current proc) nsppol=number of spin polarizations ortalg=governs the choice of the algorithm for orthogonalisation. prtvol=control print volume and debugging output pwind(pwind_alloc,2,3) = array used to compute the overlap matrix smat between k-points (see initberry.f) pwind_alloc = first dimension of pwind pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations (see initberry.f) pwnsfacq(2,mkgq) = phase factors for the nearest neighbours of the current k-point (electric field, MPI //) tolrde=tolerance on the ratio of differences of energies (for the line minimisation) tolwfr=tolerance on largest wf residual use_subovl=1 if the overlap matrix is not identity in WFs subspace use_subvnlx=1 if subvnlx has to be computed wfoptalg=govern the choice of algorithm for wf optimisation (0, 1, 10 and 11 : in the present routine, usual CG algorithm ; (2 and 3 : use shifted square Hamiltonian) zshift(nband)=in case wfoptalg is 2 or 3, shift of the Hamiltonian
OUTPUT
dphase_k(3) = change in Zak phase for the current k-point in case berryopt = 4/14,6/16,7/17 (electric (displacement) field) resid(nband)=wf residual for new states=|(H-e)|C>|^2 (hartree^2) subham(nband*(nband+1))=Hamiltonian expressed in the WFs subspace subovl(nband*(nband+1)*use_subovl)=overlap matrix expressed in the WFs subspace subvnlx(nband*(nband+1)*use_subvnlx))=non-local Hamiltonian (if NCPP) plus Fock ACE operator (if usefock_ACE) expressed in the WFs subspace
SIDE EFFECTS
cg(2,mcg) at input =wavefunction <G|C band,k> coefficients for ALL bands at output same as input except that the current band, with number 'band' has been updated dtefield <type(efield_type)> = variables related to Berry phase calculations (see initberry.f) quit= if 1, proceeds to smooth ending of the job. if(gs_hamk%usepaw==1) gsc(2,mgsc)=<G|S|C band,k> coefficients for ALL bands where S is the overlap matrix (used only for paw)
NOTES
1) cg should not be filtered and normalized: it should already be OK at input ! 2) Not sure that that the generalized eigenproblem (when gs_hamk%usepaw=1) is compatible with wfoptalg=2 or 3 (use of shifted square Hamiltonian) - to be verified
SOURCE
140 subroutine cgwf(berryopt,cg,cgq,chkexit,cpus,dphase_k,dtefield,& 141 & filnam_ds1,gsc,gs_hamk,icg,igsc,ikpt,inonsc,& 142 & isppol,mband,mcg,mcgq,mgsc,mkgq,mpi_enreg,& 143 & mpw,nband,nbdblock,nkpt,nline,npw,npwarr,& 144 & nspinor,nsppol,ortalg,prtvol,pwind,& 145 & pwind_alloc,pwnsfac,pwnsfacq,quit,resid,subham,subovl,& 146 & subvnlx,tolrde,tolwfr,use_subovl,use_subvnlx,wfoptalg,zshift) 147 148 !Arguments ------------------------------------ 149 integer,intent(in) :: berryopt,chkexit,icg,igsc,ikpt,inonsc,isppol 150 integer,intent(in) :: mband,mcg,mcgq,mgsc,mkgq,mpw,nband,nbdblock,nkpt,nline 151 integer,intent(in) :: npw,nspinor,nsppol,ortalg,prtvol,pwind_alloc 152 integer,intent(in) :: use_subovl,use_subvnlx,wfoptalg 153 integer,intent(in) :: quit 154 real(dp),intent(in) :: cpus,tolrde,tolwfr 155 character(len=*),intent(in) :: filnam_ds1 156 type(MPI_type),intent(in) :: mpi_enreg 157 type(efield_type),intent(inout) :: dtefield 158 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 159 !arrays 160 integer,intent(in) :: npwarr(nkpt),pwind(pwind_alloc,2,3) 161 real(dp),intent(in) :: cgq(2,mcgq) 162 real(dp),intent(in) :: pwnsfac(2,pwind_alloc),pwnsfacq(2,mkgq),zshift(nband) 163 real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc) 164 real(dp),intent(inout) :: dphase_k(3) 165 real(dp),intent(out) :: subham(nband*(nband+1)),subovl(nband*(nband+1)*use_subovl) 166 real(dp),intent(out) :: subvnlx(nband*(nband+1)*use_subvnlx) 167 real(dp),intent(out) :: resid(nband) 168 169 !Local variables------------------------------- 170 integer,parameter :: level=113,tim_getghc=1,tim_projbd=1,type_calc=0 171 integer,save :: nskip=0 172 integer :: choice,counter,cpopt,ddkflag,dimenlc1,dimenlr1,dimenl2,iat,iatom,itypat 173 integer :: iband,ibandmin,ibandmax,me_g0 174 integer :: ibdblock,iblock,icg1,icg_shift,icp1,icp2,idir,idum1,ierr,ifor,igs,igsc_shift,ii,ikgf 175 integer :: ikpt2,ikpt2f,ikptf,iline,iproc,ipw,ispinor,istwf_k,isubh,isubo,itrs 176 integer :: job,mcg_q,me_distrb,natom,ncpgr,nblock,nproc_distrb,npw_k2 177 integer :: optekin,paw_opt,signs,shiftbd,sij_opt,spaceComm_distrb 178 integer :: useoverlap,wfopta10 179 real(dp) :: chc,costh,deltae,deold,dhc,dhd,diff,dotgg,dotgp,doti,dotr 180 real(dp) :: dphase_aux2,e0,e0_old,e1,e1_old,eval,gamma 181 real(dp) :: lam0,lamold,root,sinth,sintn,swap,tan2th,theta,thetam 182 real(dp) :: xnorm 183 logical :: gen_eigenpb, finite_field 184 character(len=500) :: message 185 integer :: hel(2,3) 186 integer,allocatable :: dimlmn(:),dimlmn_srt(:),ikptf_recv(:),pwind_k(:),sflag_k(:) 187 real(dp) :: bcut(2,3),dphase_aux1(3),dtm_k(2),phase_end(3) 188 real(dp) :: phase_init(3),tsec(2) 189 real(dp),allocatable :: cg1_k(:,:),cgq_k(:,:),conjgr(:,:),cwavef(:,:) 190 real(dp),allocatable :: detovc(:,:,:),detovd(:,:,:),direc(:,:),direc_tmp(:,:) 191 real(dp),allocatable :: gh_direc(:,:),gh_direcws(:,:),ghc(:,:),ghc_all(:,:),ghcws(:,:) 192 real(dp),allocatable :: grad_berry(:,:),grad_total(:,:),gs_direc(:,:) 193 real(dp) :: gsc_dummy(0,0) 194 real(dp),allocatable :: gvnlxc(:,:),gvnlx_direc(:,:),gvnlx_dummy(:,:) 195 real(dp),allocatable :: pcon(:),pwnsfac_k(:,:),scprod(:,:),scwavef(:,:) 196 real(dp),allocatable :: smat_inv(:,:,:),smat_k(:,:,:),smat_k_paw(:,:,:),swork(:,:),vresid(:,:),work(:,:) 197 real(dp),pointer :: kinpw(:) 198 type(pawcprj_type) :: cprj_dum(1,1) 199 type(pawcprj_type),allocatable :: cprj_k(:,:),cprj_kb(:,:) 200 type(pawcprj_type),allocatable :: cprj_direc(:,:),cprj_band_srt(:,:),cprj_gat(:,:) 201 type(pawcprj_type),allocatable :: cprj_fkn(:,:),cprj_ikn(:,:) 202 203 ! ********************************************************************* 204 205 DBG_ENTER("COLL") 206 207 !Starting the routine 208 call timab(22,1,tsec) 209 210 !Touching chkexit, cpus,filnam_ds to avoid warning for abirules. This is dirty... 211 if(chkexit<0)then 212 ABI_BUG('chkexit should be positive!') 213 end if 214 215 if(cpus<0 .and. filnam_ds1=='a')then 216 ABI_BUG('cpus should be positive!') 217 end if 218 219 !====================================================================== 220 !========= LOCAL VARIABLES DEFINITIONS AND ALLOCATIONS ================ 221 !====================================================================== 222 223 !MPI data 224 spaceComm_distrb=mpi_enreg%comm_cell 225 nproc_distrb=xmpi_comm_size(spaceComm_distrb) 226 me_distrb=mpi_enreg%me_kpt 227 me_g0 = mpi_enreg%me_g0 228 229 !if PAW, one has to solve a generalized eigenproblem (H|Psi>=Lambda.S|Psi>) 230 !else, one has to solve a classical eigenproblem (H|Psi>=Lambda.|Psi>) 231 gen_eigenpb=(gs_hamk%usepaw==1) 232 useoverlap=0;if (gen_eigenpb) useoverlap=1 233 234 !Initializations and allocations 235 isubh=1;isubo=1 236 nblock=(nband-1)/nbdblock+1 237 istwf_k=gs_hamk%istwf_k 238 wfopta10=mod(wfoptalg,10) 239 optekin=0;if (wfoptalg>=10) optekin=1 240 natom=gs_hamk%natom 241 cpopt=-1 242 kinpw => gs_hamk%kinpw_k 243 244 ABI_MALLOC(pcon,(npw)) 245 ABI_MALLOC(ghc,(2,npw*nspinor)) 246 ABI_MALLOC(gvnlxc,(2,npw*nspinor)) 247 ABI_MALLOC(conjgr,(2,npw*nspinor)) 248 ABI_MALLOC(cwavef,(2,npw*nspinor)) 249 ABI_MALLOC(direc,(2,npw*nspinor)) 250 ABI_MALLOC(scprod,(2,nband)) 251 252 ABI_MALLOC(gh_direc,(2,npw*nspinor)) 253 ABI_MALLOC(gvnlx_direc,(2,npw*nspinor)) 254 ABI_MALLOC(vresid,(2,npw*nspinor)) 255 256 if (gen_eigenpb) then 257 ABI_MALLOC(gs_direc,(2,npw*nspinor)) 258 else 259 ABI_MALLOC(gs_direc,(0,0)) 260 end if 261 262 if (gen_eigenpb) then 263 ABI_MALLOC(scwavef,(2,npw*nspinor)) 264 ABI_MALLOC(direc_tmp,(2,npw*nspinor)) 265 end if 266 267 if (gen_eigenpb.and.(inonsc==1)) then 268 ABI_MALLOC_OR_DIE(ghc_all,(2,nband*npw*nspinor), ierr) 269 end if 270 271 if (wfopta10==2.or.wfopta10==3) then 272 ABI_MALLOC(work,(2,npw*nspinor)) 273 end if 274 275 if (gen_eigenpb.and.(wfopta10==2.or.wfopta10==3)) then 276 ABI_MALLOC(swork,(2,npw*nspinor)) 277 else 278 ABI_MALLOC(swork,(0,0)) 279 end if 280 281 if (wfopta10==2 .or. wfopta10==3) then 282 ABI_MALLOC(ghcws,(2,npw*nspinor)) 283 ABI_MALLOC(gh_direcws,(2,npw*nspinor)) 284 ABI_MALLOC(gvnlx_dummy,(2,npw*nspinor)) 285 else 286 ABI_MALLOC(gvnlx_dummy,(0,0)) 287 end if 288 289 !if "generalized eigenproblem", not sure of wfoptalg=2,3 algorithms 290 if ((gen_eigenpb).and.(wfopta10==2.or.wfopta10==3)) then 291 write(message, '(a,a,a,a,a)' ) & 292 & 'Conjugate gradient algorithm not tested with',ch10,& 293 & 'wfoptalg=2 or 3 and usepaw==1 !',ch10,& 294 & 'Program will continue at your own risk...' 295 ABI_WARNING(message) 296 end if 297 298 !Electric field: definition of local variables: 299 !detovc(1:2,ifor,idir) determinant of the overlap matrix 300 !S_{nm}(k,k+dk)=<u_{n,k}|u_{m,k+dk}>, with the states at 301 !k as bras (neighbor is specified by ifor and idir) 302 !detovd same as detovc but with <u_{n,k}| replaced by 303 !<D| (search direction) in the band-th line 304 !grad_berry(1:2,ipw) Berry phase term contribution to the gradient vector 305 !hel(ifor,idir) helicity of the ellipse associated w/ (ifor,idir) 306 !bcut(ifor,idir) branch cut of the ellipse associated w/ (ifor,idir) 307 !theta_min optimal angle theta in line_minimization when electric 308 !field is on 309 !grad_total(1:2,ipw) total gradient (zero field term plus Berry phase term) 310 311 finite_field = ( (berryopt == 4) .or. (berryopt == 6) .or. (berryopt == 7) .or. & 312 & (berryopt == 14) .or. (berryopt == 16) .or. (berryopt == 17) ) 313 ncpgr = 0 ! do not think the cprj's here need gradients (no force computation in cgwf) 314 315 if (finite_field) then 316 317 ! ji : These could be a couple of new input variables (but it is OK to define them here) 318 ikptf = dtefield%i2fbz(ikpt) 319 ikgf = dtefield%fkgindex(ikptf) ! this is the shift for pwind 320 mcg_q = mpw*mband*nspinor 321 ABI_MALLOC(detovc,(2,2,3)) 322 ABI_MALLOC(detovd,(2,2,3)) 323 ABI_MALLOC(grad_berry,(2,npw*nspinor)) 324 ABI_MALLOC(cg1_k,(2,mpw)) 325 ABI_MALLOC(cgq_k,(2,mcg_q)) 326 ABI_MALLOC(grad_total,(2,npw*nspinor)) 327 ABI_MALLOC(sflag_k,(mband)) 328 ABI_MALLOC(pwind_k,(mpw)) 329 ABI_MALLOC(pwnsfac_k,(4,mpw)) 330 ABI_MALLOC(smat_k,(2,mband,mband)) 331 ABI_MALLOC(smat_inv,(2,mband,mband)) 332 ! now the special features if using PAW 333 if (gs_hamk%usepaw /= 0) then 334 ABI_MALLOC(smat_k_paw,(2,gs_hamk%usepaw*mband,gs_hamk%usepaw*mband)) 335 smat_k_paw(:,:,:) = zero 336 ! the following are arguments to nonlop used to apply the on-site dipole to direc vector 337 choice = 1 ! only apply projectors 338 paw_opt = 1 ! only apply Dij 339 signs = 2 ! apply nonlop to vector in k space 340 ! following two items are the nonlocal potential strength dij due to the on-site dipoles 341 dimenlc1 = 2*gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2 342 dimenlr1 = gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2 343 dimenl2 = natom 344 ! cprj structures for finite_field case 345 ABI_MALLOC(dimlmn,(natom)) 346 do iatom = 1, natom 347 itypat = gs_hamk%typat(iatom) 348 dimlmn(iatom)=dtefield%lmn_size(itypat) 349 end do 350 ABI_MALLOC(dimlmn_srt,(natom)) 351 iatom = 0 352 do itypat = 1, gs_hamk%ntypat 353 do iat = 1, gs_hamk%nattyp(itypat) 354 iatom = iatom + 1 355 dimlmn_srt(iatom) = dtefield%lmn_size(itypat) 356 end do 357 end do 358 ABI_MALLOC(ikptf_recv,(nproc_distrb)) 359 ABI_MALLOC(cprj_k,(natom,mband*nspinor)) 360 ABI_MALLOC(cprj_kb,(natom,mband*nspinor)) 361 ABI_MALLOC(cprj_direc,(natom,mband*nspinor)) 362 ABI_MALLOC(cprj_band_srt,(natom,nspinor)) 363 ABI_MALLOC(cprj_gat,(natom,mband*nspinor*nproc_distrb)) 364 call pawcprj_alloc(cprj_k,ncpgr,dimlmn) 365 call pawcprj_alloc(cprj_kb,ncpgr,dimlmn) 366 call pawcprj_alloc(cprj_direc,ncpgr,dimlmn) 367 call pawcprj_alloc(cprj_band_srt,ncpgr,dimlmn_srt) 368 call pawcprj_alloc(cprj_gat,ncpgr,dimlmn) 369 if (nkpt /= dtefield%fnkpt) then 370 ABI_MALLOC(cprj_fkn,(natom,mband*nspinor)) 371 ABI_MALLOC(cprj_ikn,(natom,mband*nspinor)) 372 call pawcprj_alloc(cprj_fkn,ncpgr,dimlmn) 373 call pawcprj_alloc(cprj_ikn,ncpgr,dimlmn) 374 end if 375 else 376 ABI_MALLOC(smat_k_paw,(0,0,0)) 377 ABI_MALLOC(dimlmn,(0)) 378 ABI_MALLOC(dimlmn_srt,(0)) 379 ABI_MALLOC(ikptf_recv,(0)) 380 ABI_MALLOC(cprj_k,(0,0)) 381 ABI_MALLOC(cprj_kb,(0,0)) 382 ABI_MALLOC(cprj_direc,(0,0)) 383 ABI_MALLOC(cprj_band_srt,(0,0)) 384 ABI_MALLOC(cprj_gat,(0,0)) 385 end if 386 end if ! finite_field 387 388 ! ====================================================================== 389 ! If generalized eigenproblem: has to know <g|S|c> for all 390 ! bands (for orthogonalization purpose); take benefit of this 391 ! calculation to compute <g|H|c> at the same time, and cprj_k if finite_field 392 ! ====================================================================== 393 if (gen_eigenpb.and.inonsc==1) then 394 do iblock=1,nblock 395 ibandmin=1+(iblock-1)*nbdblock 396 ibandmax=min(iblock*nbdblock,nband) 397 do iband=ibandmin,ibandmax 398 ibdblock=iband-(iblock-1)*nbdblock 399 icg_shift=npw*nspinor*(iband-1)+icg 400 igsc_shift=npw*nspinor*(iband-1)+igsc 401 402 call cg_zcopy(npw*nspinor,cg(1,1+icg_shift),cwavef) 403 404 ! Compute <g|H|c> 405 ! By setting ieigen to iband, Fock contrib. of this iband to the energy will be calculated 406 call fock_set_ieigen(gs_hamk%fockcommon,iband) 407 sij_opt=1 408 if (finite_field .and. gs_hamk%usepaw == 1) then 409 call getghc(0,cwavef,cprj_band_srt,ghc,scwavef,gs_hamk,gvnlxc,& 410 & eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc) 411 call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_k,natom,iband,0,ikpt,& 412 & 1,isppol,mband,1,natom,1,mband,dimlmn,nspinor,nsppol,0,& 413 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 414 else 415 call getghc(cpopt,cwavef,cprj_dum,ghc,scwavef,gs_hamk,gvnlxc,& 416 & eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc) 417 end if 418 419 call cg_zcopy(npw*nspinor,ghc,ghc_all(1,1+icg_shift-icg)) 420 call cg_zcopy(npw*nspinor,scwavef,gsc(1,1+igsc_shift)) 421 422 end do 423 end do 424 end if 425 426 ! Loop over blocks of bands. In the standard band-sequential algorithm, nblock=nband. 427 do iblock=1,nblock 428 counter=100*iblock*nbdblock+inonsc 429 430 ! Loop over bands in a block 431 ! This loop can be MPI-parallelized, over processors attached to the same k point 432 ibandmin=1+(iblock-1)*nbdblock 433 ibandmax=min(iblock*nbdblock,nband) 434 435 ! Big iband loop 436 do iband=ibandmin,ibandmax 437 ibdblock=iband-(iblock-1)*nbdblock 438 counter=100*iband+inonsc 439 icg_shift=npw*nspinor*(iband-1)+icg 440 igsc_shift=npw*nspinor*(iband-1)+igsc 441 442 ! ====================================================================== 443 ! ========== INITIALISATION OF MINIMIZATION ITERATIONS ================= 444 ! ====================================================================== 445 446 if (prtvol>=10) then ! Tell us what is going on: 447 write(message, '(a,i6,2x,a,i3,a)' )' --- cgwf is called for band',iband,'for',nline,' lines' 448 call wrtout(std_out,message,'PERS') 449 end if 450 451 dotgp=one 452 if (finite_field) then 453 detovc(:,:,:) = zero ; detovd(:,:,:) = zero 454 phase_init(:) = zero 455 dphase_aux1(:) = zero 456 phase_end(:) = zero 457 bcut(:,:) = zero 458 hel(:,:) = 0 459 end if 460 461 ! Extraction of the vector that is iteratively updated 462 call cg_zcopy(npw*nspinor,cg(1,1+icg_shift),cwavef) 463 464 ! If generalized eigenproblem: extraction of the overlap information 465 if (gen_eigenpb) then 466 call cg_zcopy(npw*nspinor,gsc(1,1+igsc_shift),scwavef) 467 end if 468 469 ! Normalize incoming wf (and S.wf, if generalized eigenproblem): 470 ! WARNING: It might be interesting to skip the following operation. 471 ! The associated routines should be reexamined to see whether cwavef is not already normalized. 472 if (gen_eigenpb) then 473 call dotprod_g(dotr,doti,istwf_k,npw*nspinor,2,cwavef,scwavef,me_g0,mpi_enreg%comm_spinorfft) 474 dotr=sqrt(dotr**2+doti**2); xnorm=one/sqrt(dotr) 475 call cg_zscal(npw*nspinor,(/xnorm,zero/),cwavef) 476 call cg_zscal(npw*nspinor,(/xnorm,zero/),scwavef) 477 else 478 call sqnorm_g(dotr,istwf_k,npw*nspinor,cwavef,me_g0,mpi_enreg%comm_fft) 479 xnorm=one/sqrt(abs(dotr)) 480 call cg_zscal(npw*nspinor,(/xnorm,zero/),cwavef) 481 end if 482 483 if (prtvol==-level) then 484 write(message,'(a,f14.6)')' cgwf: xnorm = ',xnorm 485 call wrtout(std_out,message,'PERS') 486 end if 487 488 ! Compute (or extract) <g|H|c> 489 if (gen_eigenpb.and.(inonsc==1)) then 490 491 !$OMP PARALLEL DO PRIVATE(ipw) 492 do ipw=1,npw*nspinor 493 ghc(1,ipw)=xnorm*ghc_all(1,ipw+icg_shift-icg) 494 ghc(2,ipw)=xnorm*ghc_all(2,ipw+icg_shift-icg) 495 end do 496 497 else 498 ! By setting ieigen to iband, Fock contrib. of this iband to the energy will be calculated 499 call fock_set_ieigen(gs_hamk%fockcommon,iband) 500 sij_opt=0 501 call getghc(cpopt,cwavef,cprj_dum,ghc,gsc_dummy,gs_hamk,gvnlxc,& 502 & eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc) 503 end if 504 505 506 ! Minimisation of the residual: compute <G|(H-zshift)^2|C iband,k> 507 if(wfopta10==2 .or. wfopta10==3) then 508 ghcws(:,:)=ghc(:,:) 509 if (gen_eigenpb) then 510 sij_opt=1 511 work(:,:)=ghc(:,:)-zshift(iband)*scwavef(:,:) 512 else 513 sij_opt=0 514 work(:,:)=ghc(:,:)-zshift(iband)*cwavef(:,:) 515 end if 516 call getghc(cpopt,work,cprj_dum,ghc,swork,gs_hamk,gvnlx_dummy,& 517 & eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc) 518 if (gen_eigenpb) then 519 ghc(:,:)=ghc(:,:)-zshift(iband)*swork(:,:) 520 else 521 ghc(:,:)=ghc(:,:)-zshift(iband)*work(:,:) 522 end if 523 end if 524 525 ! ====================================================================== 526 ! ====== BEGIN LOOP FOR A GIVEN BAND: MINIMIZATION ITERATIONS ========== 527 ! ====================================================================== 528 if(nline/=0)then 529 do iline=1,nline 530 531 ! === COMPUTE THE RESIDUAL === 532 533 ! Compute lambda = <C|H|C> or <C|(H-zshift)**2|C> 534 call dotprod_g(chc,doti,istwf_k,npw*nspinor,1,cwavef,ghc,me_g0,mpi_enreg%comm_spinorfft) 535 lam0=chc 536 537 ! Check that lam0 is decreasing on succeeding lines: 538 if (.not.finite_field) then 539 if (iline==1) then 540 lamold=lam0 541 else 542 if (lam0 > lamold+tol12) then 543 write(message, '(a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')& 544 & 'New trial energy at line ',iline,' = ',lam0,ch10,& 545 & 'is higher than former =',lamold,ch10 546 ABI_WARNING(message) 547 end if 548 lamold=lam0 549 end if 550 end if 551 552 ! Compute residual vector: 553 ! Note that vresid is precomputed to garantee cancellation of errors 554 ! and allow residuals to reach values as small as 1.0d-24 or better. 555 556 if (wfopta10<=1) then 557 eval=chc 558 if (gen_eigenpb) then 559 560 !$OMP PARALLEL DO 561 do ipw=1,npw*nspinor 562 vresid(1,ipw)=ghc(1,ipw)-chc*scwavef(1,ipw) 563 vresid(2,ipw)=ghc(2,ipw)-chc*scwavef(2,ipw) 564 end do 565 else 566 !$OMP PARALLEL DO 567 do ipw=1,npw*nspinor 568 vresid(1,ipw)=ghc(1,ipw)-chc*cwavef(1,ipw) 569 vresid(2,ipw)=ghc(2,ipw)-chc*cwavef(2,ipw) 570 end do 571 end if 572 else 573 call dotprod_g(eval,doti,istwf_k,npw*nspinor,1,cwavef,ghcws,me_g0,mpi_enreg%comm_spinorfft) 574 if (gen_eigenpb) then 575 !$OMP PARALLEL DO 576 do ipw=1,npw*nspinor 577 vresid(1,ipw)=ghcws(1,ipw)-eval*scwavef(1,ipw) 578 vresid(2,ipw)=ghcws(2,ipw)-eval*scwavef(2,ipw) 579 end do 580 else 581 !$OMP PARALLEL DO 582 do ipw=1,npw*nspinor 583 vresid(1,ipw)=ghcws(1,ipw)-eval*cwavef(1,ipw) 584 vresid(2,ipw)=ghcws(2,ipw)-eval*cwavef(2,ipw) 585 end do 586 end if 587 end if 588 589 ! Compute residual (squared) norm 590 call sqnorm_g(resid(iband),istwf_k,npw*nspinor,vresid,me_g0,mpi_enreg%comm_fft) 591 592 if (prtvol==-level) then 593 write(message,'(a,i0,2f14.6)')' cgwf: iline,eval,resid = ',iline,eval,resid(iband) 594 call wrtout(std_out,message,'PERS') 595 end if 596 597 ! ====================================================================== 598 ! ============== CHECK FOR CONVERGENCE CRITERIA ======================== 599 ! ====================================================================== 600 601 ! If residual sufficiently small stop line minimizations 602 if (resid(iband)<tolwfr) then 603 if (prtvol>=10) then 604 write(message, '(a,i4,a,i2,a,es12.4)' ) & 605 ' cgwf: band ',iband,' converged after ',iline,' line minimizations: resid =',resid(iband) 606 call wrtout(std_out,message,'PERS') 607 end if 608 nskip=nskip+(nline-iline+1) ! Number of two-way 3D ffts skipped 609 exit ! Exit from the loop on iline 610 end if 611 612 ! If user require exiting the job, stop line minimisations 613 if (quit==1) then 614 write(message, '(a,i0)' )' cgwf: user require exiting => skip update of band ',iband 615 call wrtout(std_out,message,'PERS') 616 617 nskip=nskip+(nline-iline+1) ! Number of two-way 3D ffts skipped 618 exit ! Exit from the loop on iline 619 end if 620 621 ! ====================================================================== 622 ! =========== COMPUTE THE STEEPEST DESCENT DIRECTION =================== 623 ! ====================================================================== 624 625 ! Compute the steepest descent direction 626 if (gen_eigenpb) then 627 call cg_zcopy(npw*nspinor,vresid,direc) ! Store <G|H-lambda.S|C> in direc 628 else 629 call cg_zcopy(npw*nspinor,ghc,direc) ! Store <G|H|C> in direc 630 end if 631 632 ! Electric field: compute the gradient of the Berry phase part of the energy functional. 633 ! See PRL 89, 117602 (2002) [[cite:Souza2002]], grad_berry(:,:) is the second term of Eq. (4) 634 if (finite_field) then 635 636 call make_grad_berry(cg,cgq,cprj_k,detovc,dimlmn,dimlmn_srt,direc,dtefield,grad_berry,& 637 & gs_hamk,iband,icg,ikpt,isppol,mband,mcg,mcgq,mkgq,mpi_enreg,mpw,natom,nkpt,npw,npwarr,& 638 & nspinor,nsppol,pwind,pwind_alloc,pwnsfac,pwnsfacq) 639 640 ! Add grad_berry to direc and store original gradient 641 direc(:,:) = direc(:,:) + grad_berry(:,:) 642 grad_total(:,:) = direc(:,:) 643 ! DEBUG: check that grad_berry is orthogonal to the occupied manifold at k 644 ! do jband = 1, dtefield%mband_occ 645 ! dotr = zero ; doti = zero 646 ! do ipw = 1, npw*nspinor 647 ! if(.not.gen_eigenpb) then 648 ! dotr = dotr + cg(1,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(1,ipw) + & 649 ! & cg(2,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(2,ipw) 650 ! doti = doti + cg(1,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(2,ipw) - & 651 ! & cg(2,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(1,ipw) 652 ! end if 653 ! end do 654 ! if ((abs(dotr) > tol12).or.(abs(doti) > tol12)) then 655 ! write(std_out,'(a)')'cgwf-berry : ERROR (orthogonality)' 656 ! write(std_out,'(3(2x,i3),2(5x,e16.9))')ikpt,iband,jband,dotr,doti 657 ! stop 658 ! end if 659 ! end do 660 ! ENDDEBUG 661 end if ! finite_field 662 663 ! =========== PROJECT THE STEEPEST DESCENT DIRECTION =================== 664 ! ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================ 665 666 ! The following projection over the subspace orthogonal to occupied bands 667 ! is optional. It is a bit more accurate, but doubles the number of N^3 ops. 668 ! It is done only if ortalg>=0. 669 670 ! Project the steepest descent direction: 671 ! direc(2,npw)=<G|H|Cnk> - \sum_{(i<=n)} <G|H|Cik> , normalized. 672 673 if(ortalg>=0)then 674 if (gen_eigenpb) then 675 call projbd(cg,direc,iband,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,& 676 & gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft) 677 else 678 call projbd(cg,direc,-1 ,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,& 679 & gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft) 680 end if 681 else 682 ! For negative ortalg must still project current band out of conjugate vector (unneeded if gen_eigenpb) 683 if (.not.gen_eigenpb) then 684 call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,cwavef,direc,me_g0,mpi_enreg%comm_spinorfft) 685 if(istwf_k==1)then 686 call cg_zaxpy(npw*nspinor,-(/dotr,doti/),cwavef,direc) 687 else 688 call cg_zaxpy(npw*nspinor,(/-dotr,zero/),cwavef,direc) 689 end if 690 end if 691 end if 692 693 ! For a generalized eigenpb, store the steepest descent direction 694 if (gen_eigenpb) direc_tmp=direc 695 696 ! ====================================================================== 697 ! ======== PRECONDITION THE STEEPEST DESCENT DIRECTION ================= 698 ! ====================================================================== 699 700 ! If wfoptalg>=10, the precondition matrix is kept constant during iteration ; otherwise it is recomputed 701 if (wfoptalg<10.or.iline==1) then 702 call cg_precon(cwavef,zero,istwf_k,kinpw,npw,nspinor,me_g0,optekin,pcon,direc,mpi_enreg%comm_fft) 703 704 ! Minimisation of the residual: must precondition twice 705 ! (might make only one call, with modified precon routine - might also make a shift !!!) 706 if(wfopta10==2 .or. wfopta10==3)then 707 call cg_precon(cwavef,zero,istwf_k,kinpw,npw,nspinor,me_g0,optekin,pcon,direc,mpi_enreg%comm_fft) 708 if(iline==1)then 709 !$OMP PARALLEL DO 710 do ipw=1,npw 711 pcon(ipw)=pcon(ipw)**2 712 pcon(ipw)=pcon(ipw)**2 713 end do 714 end if 715 end if 716 else 717 do ispinor=1,nspinor 718 igs=(ispinor-1)*npw 719 !$OMP PARALLEL DO 720 do ipw=1+igs,npw+igs 721 direc(1,ipw)=direc(1,ipw)*pcon(ipw-igs) 722 direc(2,ipw)=direc(2,ipw)*pcon(ipw-igs) 723 end do 724 end do 725 end if 726 727 ! ======= PROJECT THE PRECOND. STEEPEST DESCENT DIRECTION ============== 728 ! ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================ 729 ! Projecting again out all bands (not normalized). 730 call projbd(cg,direc,-1,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,& 731 & gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft) 732 733 ! ====================================================================== 734 ! ================= COMPUTE THE CONJUGATE-GRADIENT ===================== 735 ! ====================================================================== 736 737 if (finite_field) then 738 call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,grad_total,me_g0,mpi_enreg%comm_spinorfft) 739 !DEBUG (electric field) 740 !check that the dotproduct is real 741 !if (abs(doti) > tol8) then 742 !write(std_out,*) ' cgwf-berry: ERROR' 743 !write(std_out,*) ' doti = ',doti 744 !stop 745 !end if 746 !ENDDEBUG 747 else 748 if (gen_eigenpb) then 749 call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,direc_tmp,me_g0,mpi_enreg%comm_spinorfft) 750 else 751 call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,ghc,me_g0,mpi_enreg%comm_spinorfft) 752 end if 753 end if 754 755 ! MJV: added 5 Feb 2012 - causes divide by 0 on next iteration of iline 756 if (abs(dotgg) < TINY(0.0_dp)*1.e50_dp) dotgg = TINY(0.0_dp)*1.e50_dp 757 758 ! At first iteration, gamma is set to zero 759 if (iline==1) then 760 gamma=zero 761 dotgp=dotgg 762 call cg_zcopy(npw*nspinor,direc,conjgr) 763 if (prtvol==-level)then 764 write(message,'(a,es21.10e3)')' cgwf: dotgg = ',dotgg 765 call wrtout(std_out,message,'PERS') 766 end if 767 768 else 769 gamma=dotgg/dotgp 770 dotgp=dotgg 771 772 if (prtvol==-level)then 773 write(message,'(a,2es16.6)')' cgwf: dotgg,gamma = ',dotgg,gamma 774 call wrtout(std_out,message,'PERS') 775 end if 776 777 ! Note: another way to compute gamma: Polak, Ribiere no real improvement ; to be more carrefully tested 778 ! call dotprod_g(dotgg,doti,istwf_k,mpi_enreg,npw*nspinor,1,direc,direc_tmp) 779 ! !direcp must be set to zero at the beginning 780 ! direcp=direc-direcp 781 ! call dotprod_g(dotgmg,doti,istwf_k,mpi_enreg,npw*nspinor,1,direcp,direc_tmp) 782 ! direcp=direc;gamma=dotgmg/dotgp;dotgp=dotgmg 783 784 !$OMP PARALLEL DO 785 do ipw=1,npw*nspinor 786 conjgr(1,ipw)=direc(1,ipw)+gamma*conjgr(1,ipw) 787 conjgr(2,ipw)=direc(2,ipw)+gamma*conjgr(2,ipw) 788 end do 789 !call cg_zaxpby(npw*nspinor,cg_one,direc,(/gamma,zero/),conjgr) 790 end if 791 792 ! ====================================================================== 793 ! ============ PROJECTION OF THE CONJUGATED GRADIENT =================== 794 ! ====================================================================== 795 796 if (gen_eigenpb) then 797 call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,scwavef,conjgr,me_g0,mpi_enreg%comm_spinorfft) 798 else 799 call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,cwavef,conjgr,me_g0,mpi_enreg%comm_spinorfft) 800 end if 801 802 ! Project the conjugated gradient onto the current band 803 ! MG: TODO: this is an hot spot that could be rewritten with BLAS! provided 804 ! that direc --> conjgr 805 if(istwf_k==1)then 806 807 !$OMP PARALLEL DO 808 do ipw=1,npw*nspinor 809 direc(1,ipw)=conjgr(1,ipw)-(dotr*cwavef(1,ipw)-doti*cwavef(2,ipw)) 810 direc(2,ipw)=conjgr(2,ipw)-(dotr*cwavef(2,ipw)+doti*cwavef(1,ipw)) 811 end do 812 else 813 !$OMP PARALLEL DO 814 do ipw=1,npw*nspinor 815 direc(1,ipw)=conjgr(1,ipw)-dotr*cwavef(1,ipw) 816 direc(2,ipw)=conjgr(2,ipw)-dotr*cwavef(2,ipw) 817 end do 818 end if 819 820 ! In case of generalized eigenproblem, normalization of direction vector 821 ! cannot be done here (because S|D> is not known here). 822 if (.not.gen_eigenpb) then 823 call sqnorm_g(dotr,istwf_k,npw*nspinor,direc,me_g0,mpi_enreg%comm_fft) 824 xnorm=one/sqrt(abs(dotr)) 825 call cg_zscal(npw*nspinor,(/xnorm,zero/),direc) 826 xnorm=one 827 end if 828 829 ! ====================================================================== 830 ! ===== COMPUTE CONTRIBUTIONS TO 1ST AND 2ND DERIVATIVES OF ENERGY ===== 831 ! ====================================================================== 832 833 ! Compute gh_direc = <G|H|D> and eventually gs_direc = <G|S|D> 834 sij_opt=0;if (gen_eigenpb) sij_opt=1 835 836 call getghc(cpopt,direc,cprj_dum,gh_direc,gs_direc,gs_hamk,gvnlx_direc,& 837 & eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc) 838 839 if(wfopta10==2 .or. wfopta10==3)then 840 ! Minimisation of the residual, so compute <G|(H-zshift)^2|D> 841 gh_direcws(:,:)=gh_direc(:,:) 842 if (gen_eigenpb) then 843 sij_opt=1 844 work(:,:)=gh_direc(:,:)-zshift(iband)*gs_direc(:,:) 845 else 846 sij_opt=0 847 work(:,:)=gh_direc(:,:)-zshift(iband)*direc(:,:) 848 end if 849 850 call getghc(cpopt,work,cprj_dum,gh_direc,swork,gs_hamk,gvnlx_dummy,& 851 & eval,mpi_enreg,1,prtvol,0,tim_getghc,type_calc) 852 853 if (gen_eigenpb) then 854 gh_direc(:,:)=gh_direc(:,:)-zshift(iband)*swork(:,:) 855 else 856 gh_direc(:,:)=gh_direc(:,:)-zshift(iband)*work(:,:) 857 end if 858 end if 859 860 ! In case of generalized eigenproblem, compute now the norm of the conjugated gradient 861 if (gen_eigenpb) then 862 call dotprod_g(dotr,doti,istwf_k,npw*nspinor,1,direc,gs_direc,me_g0,mpi_enreg%comm_spinorfft) 863 xnorm=one/sqrt(abs(dotr)) 864 end if 865 866 ! Compute dhc = Re{<D|H|C>} 867 call dotprod_g(dhc,doti,istwf_k,npw*nspinor,1,direc,ghc,me_g0,mpi_enreg%comm_spinorfft) 868 dhc=dhc*xnorm 869 870 ! Compute <D|H|D> or <D|(H-zshift)^2|D> 871 call dotprod_g(dhd,doti,istwf_k,npw*nspinor,1,direc,gh_direc,me_g0,mpi_enreg%comm_spinorfft) 872 dhd=dhd*xnorm**2 873 874 if(prtvol==-level)then 875 write(message,'(a,3f14.6)') 'cgwf: chc,dhc,dhd=',chc,dhc,dhd 876 call wrtout(std_out,message,'PERS') 877 end if 878 879 ! ====================================================================== 880 ! ======= COMPUTE MIXING FACTORS - CHECK FOR CONVERGENCE =============== 881 ! ====================================================================== 882 883 if (.not.finite_field) then 884 ! Compute tan(2 theta),sin(theta) and cos(theta) 885 tan2th=2.0_dp*dhc/(chc-dhd) 886 887 if (abs(tan2th)<1.d-05) then 888 costh=1.0_dp-0.125_dp*tan2th**2 889 sinth=0.5_dp*tan2th*(1.0_dp-0.375_dp*tan2th**2) 890 891 ! Check that result is above machine precision 892 if (abs(sinth)<epsilon(0._dp)) then 893 write(message, '(a,es16.4)' ) ' cgwf: converged with tan2th=',tan2th 894 call wrtout(std_out,message,'PERS') 895 ! Number of one-way 3D ffts skipped 896 nskip=nskip+2*(nline-iline) 897 exit ! Exit from the loop on iline 898 end if 899 900 else 901 root=sqrt(1.0_dp+tan2th**2) 902 costh=sqrt(0.5_dp+0.5_dp/root) 903 sinth=sign(sqrt(0.5_dp-0.5_dp/root),tan2th) 904 end if 905 906 ! Check for lower of two possible roots (same sign as curvature at theta where slope is zero) 907 diff=(chc-dhd) 908 ! Swap c and d if value of diff is positive 909 if (diff>zero) then 910 swap=costh 911 costh=-sinth 912 sinth=swap 913 if(prtvol<0 .or. prtvol>=10)then 914 write(message,*)' Note: swap roots, iline,diff=',iline,diff 915 call wrtout(std_out,message,'PERS') 916 end if 917 end if 918 919 else 920 ! In case the eletric field is on, the line minimization has to be done numerically 921 922 ! Compute determinant of the overlap matrix where in the band-th line 923 ! the wavefunction is replaced by the search direction 924 job = 10 ; shiftbd = 0 925 do idir = 1, 3 926 ! do not do this for efield_dot(idir)=0 927 if (abs(dtefield%efield_dot(idir)) < tol12) cycle 928 do ifor = 1, 2 929 ikpt2f = dtefield%ikpt_dk(ikptf,ifor,idir) 930 if (dtefield%indkk_f2ibz(ikpt2f,6) == 1) then 931 itrs = 10 932 else 933 itrs = 0 934 end if 935 ikpt2 = dtefield%indkk_f2ibz(ikpt2f,1) 936 npw_k2 = npwarr(ikpt2) 937 pwind_k(1:npw) = pwind(ikgf+1:ikgf+npw,ifor,idir) 938 pwnsfac_k(1:2,1:npw) = pwnsfac(1:2,ikgf+1:ikgf+npw) 939 sflag_k(:) = dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir) 940 smat_k(:,:,:) = dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir) 941 942 if (mpi_enreg%nproc_cell > 1) then 943 icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt) 944 cgq_k(:,1:dtefield%mband_occ*nspinor*npw_k2) = & 945 & cgq(:,icg1+1:icg1+dtefield%mband_occ*nspinor*npw_k2) 946 idum1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt) 947 pwnsfac_k(3:4,1:npw_k2) = pwnsfacq(1:2,idum1+1:idum1+npw_k2) 948 else 949 icg1 = dtefield%cgindex(ikpt2,isppol) 950 cgq_k(:,1:dtefield%mband_occ*nspinor*npw_k2) = & 951 & cg(:,icg1+1:icg1+dtefield%mband_occ*nspinor*npw_k2) 952 idum1=dtefield%fkgindex(ikpt2f) 953 pwnsfac_k(3:4,1:npw_k2) = pwnsfac(1:2,idum1+1:idum1+npw_k2) 954 end if 955 956 icg1 = 0 ; ddkflag = 0 957 if (gen_eigenpb) then 958 !$OMP PARALLEL DO 959 do ipw=1,npw*nspinor 960 direc_tmp(1,ipw)=direc(1,ipw)*xnorm 961 direc_tmp(2,ipw)=direc(2,ipw)*xnorm 962 end do 963 ! need cprj corresponding to direc_tmp in order to make smat_k_paw properly 964 call getcprj(1,0,direc_tmp,cprj_band_srt,& 965 & gs_hamk%ffnl_k,0,gs_hamk%indlmn,gs_hamk%istwf_k,gs_hamk%kg_k,& 966 & gs_hamk%kpg_k,gs_hamk%kpt_k,gs_hamk%lmnmax,gs_hamk%mgfft,& 967 & mpi_enreg,gs_hamk%natom,gs_hamk%nattyp,gs_hamk%ngfft,gs_hamk%nloalg,& 968 & gs_hamk%npw_k,gs_hamk%nspinor,gs_hamk%ntypat,gs_hamk%phkxred,gs_hamk%ph1d,& 969 & gs_hamk%ph3d_k,gs_hamk%ucvol,gs_hamk%useylm) 970 971 call pawcprj_copy(cprj_k,cprj_direc) 972 call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_direc,gs_hamk%natom,iband,0,& 973 & ikpt,1,isppol,mband,1,gs_hamk%natom,1,mband,dimlmn,gs_hamk%nspinor,nsppol,0,& 974 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 975 976 ! icp1=dtefield%mband_occ*(ikptf-1) 977 icp2=mband*nspinor*(ikpt2-1) 978 call pawcprj_get(gs_hamk%atindx,cprj_kb,dtefield%cprj,natom,1,icp2,ikpt,0,isppol,& 979 & mband,dtefield%fnkpt,natom,mband,mband,nspinor,nsppol,0,& 980 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 981 982 if (ikpt2 /= ikpt2f) then ! construct cprj_kb by symmetry 983 call pawcprj_copy(cprj_kb,cprj_ikn) 984 call pawcprj_symkn(cprj_fkn,cprj_ikn,dtefield%atom_indsym,dimlmn,-1,gs_hamk%indlmn,& 985 & dtefield%indkk_f2ibz(ikpt2f,2),dtefield%indkk_f2ibz(ikpt2f,6),& 986 & dtefield%fkptns(:,dtefield%i2fbz(ikpt2)),& 987 & dtefield%lmax,dtefield%lmnmax,mband,natom,dtefield%mband_occ,nspinor,& 988 & dtefield%nsym,gs_hamk%ntypat,gs_hamk%typat,dtefield%zarot) 989 call pawcprj_copy(cprj_fkn,cprj_kb) 990 end if 991 992 call smatrix_k_paw(cprj_direc,cprj_kb,dtefield,idir,ifor,mband,natom,smat_k_paw,gs_hamk%typat) 993 994 call smatrix(direc_tmp,cgq_k,cg1_k,ddkflag,dtm_k,icg1,icg1,& 995 & itrs,job,iband,npw*nspinor,mcg_q,mpw,iband,& 996 & mpw,dtefield%mband_occ,dtefield%nband_occ(isppol),& 997 & npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,& 998 & shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw) 999 else 1000 call smatrix(direc,cgq_k,cg1_k,ddkflag,dtm_k,icg1,icg1,& 1001 & itrs,job,iband,npw*nspinor,mcg_q,mpw,iband,& 1002 & mpw,dtefield%mband_occ,dtefield%nband_occ(isppol),& 1003 & npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,& 1004 & shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw) 1005 end if 1006 detovd(:,ifor,idir) = dtm_k(:) ! Store the determinant of the overlap 1007 ! matrix (required to compute theta_min) 1008 ! DEBUG 1009 ! write(std_out,*)'cgwf-berry: detovc and detovd' 1010 ! write(std_out,*)detovc(:,ifor,idir) 1011 ! write(std_out,*)detovd(:,ifor,idir) 1012 ! write(std_out,*)'smat_k' 1013 ! do jband = 1, 4 1014 ! write(std_out,'(4(2x,e14.6))')smat_k(1,jband,:) 1015 ! write(std_out,'(4(2x,e14.6))')smat_k(2,jband,:) 1016 ! write(std_out,*) 1017 ! end do 1018 ! ENDDEBUG 1019 end do ! ifor 1020 end do ! idir 1021 1022 call linemin(bcut,chc,costh,detovc,detovd,dhc,dhd,& 1023 & dphase_aux1,dtefield%efield_dot,iline,& 1024 & dtefield%fnkpt,dtefield%nstr,hel,phase_end,& 1025 & phase_init,dtefield%sdeg,sinth,thetam) 1026 ! DEBUG 1027 ! if (mpi_enreg%me == 1) then 1028 ! write(std_out,*)'after linemin ' 1029 ! write(std_out,'(a,3(2x,f16.9))')'phase_init = ',phase_init(:) 1030 ! write(std_out,'(a,3(2x,f16.9))')'phase_end = ',phase_end(:) 1031 ! write(std_out,'(a,3(2x,f16.9))')'dphase_aux1 = ',dphase_aux1(:) 1032 ! write(std_out,*) 'thetam',thetam 1033 ! end if 1034 ! ENDDEBUG 1035 end if ! finite_field 1036 1037 ! ====================================================================== 1038 ! =========== GENERATE NEW |wf>, H|wf>, Vnl|Wf>, S|Wf> ... ============= 1039 ! ====================================================================== 1040 1041 sintn=sinth*xnorm 1042 1043 !$OMP PARALLEL DO 1044 do ipw=1,npw*nspinor 1045 cwavef(1,ipw)=cwavef(1,ipw)*costh+direc(1,ipw)*sintn 1046 cwavef(2,ipw)=cwavef(2,ipw)*costh+direc(2,ipw)*sintn 1047 end do 1048 1049 ! call cg_zaxpby(npw*nspinor,(/sintn,zero/),direc,(/costh,zero/),cwavef) 1050 call cg_zcopy(npw*nspinor,cwavef,cg(1,1+icg_shift)) 1051 1052 do ipw=1,npw*nspinor 1053 ghc(1,ipw) =ghc(1,ipw)*costh + gh_direc(1,ipw)*sintn 1054 ghc(2,ipw) =ghc(2,ipw)*costh + gh_direc(2,ipw)*sintn 1055 end do 1056 1057 1058 if (use_subvnlx==1) then 1059 !$OMP PARALLEL DO 1060 do ipw=1,npw*nspinor 1061 gvnlxc(1,ipw)=gvnlxc(1,ipw)*costh + gvnlx_direc(1,ipw)*sintn 1062 gvnlxc(2,ipw)=gvnlxc(2,ipw)*costh + gvnlx_direc(2,ipw)*sintn 1063 end do 1064 ! call cg_zaxpby(npw*nspinor,(/sintn,zero/),gvnlx_direc,(/costh,zero/),gvnlxc) 1065 end if 1066 1067 if (gen_eigenpb) then 1068 !$OMP PARALLEL DO 1069 do ipw=1,npw*nspinor 1070 scwavef(1,ipw)=scwavef(1,ipw)*costh+gs_direc(1,ipw)*sintn 1071 scwavef(2,ipw)=scwavef(2,ipw)*costh+gs_direc(2,ipw)*sintn 1072 ! gsc(1,ipw+igsc_shift)=scwavef(1,ipw) 1073 ! gsc(2,ipw+igsc_shift)=scwavef(2,ipw) 1074 end do 1075 ! call cg_zaxpby(npw*nspinor,(/sintn,zero/),gs_direc,(/costh,zero/),scwavef) 1076 call cg_zcopy(npw*nspinor,scwavef,gsc(1,1+igsc_shift)) 1077 1078 if (finite_field) then ! must update cprj for the new wavefunction 1079 call getcprj(1,0,cwavef,cprj_band_srt,& 1080 & gs_hamk%ffnl_k,0,gs_hamk%indlmn,istwf_k,gs_hamk%kg_k,gs_hamk%kpg_k,gs_hamk%kpt_k,& 1081 & gs_hamk%lmnmax,gs_hamk%mgfft,mpi_enreg,natom,gs_hamk%nattyp,& 1082 & gs_hamk%ngfft,gs_hamk%nloalg,gs_hamk%npw_k,gs_hamk%nspinor,gs_hamk%ntypat,& 1083 & gs_hamk%phkxred,gs_hamk%ph1d,gs_hamk%ph3d_k,gs_hamk%ucvol,gs_hamk%useylm) 1084 call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_k,gs_hamk%natom,iband,0,ikpt,& 1085 & 1,isppol,mband,1,gs_hamk%natom,1,mband,dimlmn,gs_hamk%nspinor,nsppol,0,& 1086 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 1087 end if 1088 end if 1089 1090 if(wfopta10==2 .or. wfopta10==3)then 1091 ! Need to keep track of ghcws, in order to avoid recomputing it 1092 !$OMP PARALLEL DO 1093 do ipw=1,npw*nspinor 1094 ghcws(1,ipw)=ghcws(1,ipw)*costh + gh_direcws(1,ipw)*sintn 1095 ghcws(2,ipw)=ghcws(2,ipw)*costh + gh_direcws(2,ipw)*sintn 1096 end do 1097 ! call cg_zaxpby(npw*nspinor,(/sintn,zero/),gh_direcws,(/costh,zero/),ghcws) 1098 end if 1099 1100 ! ====================================================================== 1101 ! =========== CHECK CONVERGENCE AGAINST TRIAL ENERGY =================== 1102 ! ====================================================================== 1103 1104 ! Compute delta(E) 1105 if (.not.finite_field) then 1106 deltae=chc*(costh**2-1._dp)+dhd*sinth**2+2._dp*costh*sinth*dhc 1107 else 1108 ! Compute deltae 1109 call etheta(bcut,chc,detovc,detovd,dhc,dhd,dtefield%efield_dot,e0,e1,& 1110 & hel,dtefield%fnkpt,dtefield%nstr,dtefield%sdeg,thetam) 1111 theta = zero 1112 1113 call etheta(bcut,chc,detovc,detovd,dhc,dhd,& 1114 & dtefield%efield_dot,e0_old,e1_old,& 1115 & hel,dtefield%fnkpt,dtefield%nstr,dtefield%sdeg,theta) 1116 deltae = e0 - e0_old 1117 ! DEBUG 1118 ! write(std_out,*) 'e0, e0_old, deltae', e0, e0_old, deltae 1119 ! ENDDEBUG 1120 ! Check that e0 is decreasing on succeeding lines: 1121 ! if (deltae > zero) then 1122 if (deltae > tol12) then ! exploring different checks for finit_field 1123 write(message, '(3a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')& 1124 & ' (electric field)',ch10,& 1125 & ' New trial energy at line',iline,' = ',e0,ch10,& 1126 & ' is higher than former:',e0_old,ch10 1127 ABI_WARNING(message) 1128 end if 1129 end if ! finite_field 1130 1131 ! Check convergence and eventually exit 1132 if (iline==1) then 1133 deold=deltae 1134 else if (abs(deltae)<tolrde*abs(deold) .and. iline/=nline .and. wfopta10<2)then 1135 if(prtvol>=10)then 1136 write(message, '(a,i4,1x,a,1p,e12.4,a,e12.4,a)' ) & 1137 & ' cgwf: line',iline,& 1138 & ' deltae=',deltae,' < tolrde*',deold,' =>skip lines' 1139 call wrtout(std_out,message,'PERS') 1140 end if 1141 nskip=nskip+2*(nline-iline) ! Number of one-way 3D ffts skipped 1142 exit ! Exit from the loop on iline 1143 end if 1144 1145 end do ! END LOOP FOR A GIVEN BAND Note that there are three "exit" instructions inside 1146 1147 ! Additional computations in case of electric field 1148 if (finite_field) then 1149 ! Bring present contribution to dphasek(idir) into [-pi,pi] 1150 do idir = 1, 3 1151 dphase_aux2 = mod(phase_end(idir) - phase_init(idir) + 100*two_pi,two_pi) 1152 if (dphase_aux2 > pi) dphase_aux2 = dphase_aux2 - two_pi 1153 ! DEBUG 1154 ! dphase_aux1(idir)=mod(dphase_aux1(idir)+100*two_pi,two_pi) 1155 ! if(dphase_aux1(idir)>pi) dphase_aux1(idir)=dphase_aux1(idir)-two_pi 1156 ! diff = dphase_aux2 - dphase_aux1(idir) 1157 ! if (abs(diff) > tol10) then 1158 ! write(std_out,*)'cgwf-berry: ERROR' 1159 ! write(std_out,'(a,3(2x,i3),f16.9)')'ikpt,iband,idir,diff',ikpt,iband,idir,diff 1160 ! stop 1161 ! end if 1162 ! write(100,*) idir, dphase_aux2 1163 ! ENDDEBUG 1164 dphase_k(idir) = dphase_k(idir) + dphase_aux2 1165 ! DEBUG 1166 ! write(std_out,*) 'idir,phase_init,phase_end,dphase_k' 1167 ! write(std_out,*) idir,phase_init(idir),phase_end(idir),dphase_k(idir) 1168 ! ENDDEBUG 1169 end do 1170 end if ! finite_field 1171 1172 else ! nline==0 , needs to provide a residual 1173 resid(iband)=-one 1174 end if ! End nline==0 case 1175 1176 ! ====================================================================== 1177 ! =============== END OF CURRENT BAND: CLEANING ======================== 1178 ! ====================================================================== 1179 1180 ! It was checked that getghc is NOT needed here : equivalent results with the copy below. 1181 if(wfopta10==2 .or. wfopta10==3) ghc(:,:)=ghcws(:,:) 1182 1183 if (finite_field) dtefield%sflag(:,ikpt + (isppol-1)*nkpt,:,:) = 0 1184 1185 ! At the end of the treatment of a set of bands, write the number of one-way 3D ffts skipped 1186 if (xmpi_paral==0 .and. mpi_enreg%paral_kgb==0 .and. iband==nband .and. prtvol/=0) then 1187 write(message,'(a,i0)')' cgwf: number of one-way 3D ffts skipped in cgwf until now =',nskip 1188 call wrtout(std_out,message,'PERS') 1189 end if 1190 1191 end do ! End big iband loop. iband in a block 1192 1193 ! ====================================================================== 1194 ! ============= COMPUTE HAMILTONIAN IN WFs SUBSPACE ==================== 1195 ! ====================================================================== 1196 call mksubham(cg,ghc,gsc,gvnlxc,iblock,icg,igsc,istwf_k,& 1197 & isubh,isubo,mcg,mgsc,nband,nbdblock,npw,& 1198 & nspinor,subham,subovl,subvnlx,use_subovl,use_subvnlx,me_g0) 1199 1200 end do ! iblock End loop over block of bands 1201 1202 if (allocated(dimlmn_srt)) then 1203 ABI_FREE(dimlmn_srt) 1204 end if 1205 1206 if (finite_field .and. gs_hamk%usepaw == 1) then ! store updated cprjs for this kpt 1207 ! switch from ikptf to ikpt 1208 ikptf = ikpt 1209 call xmpi_allgather(ikptf,ikptf_recv,spaceComm_distrb,ierr) 1210 call pawcprj_mpi_allgather(cprj_k,cprj_gat,natom,nspinor*mband,1,dimlmn,ncpgr,nproc_distrb,& 1211 & spaceComm_distrb,ierr,rank_ordered=.true.) 1212 do iproc = 1, nproc_distrb 1213 icp2=nspinor*mband*(iproc-1) 1214 call pawcprj_get(gs_hamk%atindx1,cprj_k,cprj_gat,natom,1,icp2,ikpt,0,isppol,& 1215 & mband,nproc_distrb,natom,mband,mband,nspinor,nsppol,0,& 1216 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 1217 ! ikptf = ikptf_recv(iproc) 1218 icp1 = nspinor*mband*(ikptf_recv(iproc)-1) 1219 call pawcprj_put(gs_hamk%atindx1,cprj_k,dtefield%cprj,natom,1,icp1,ikpt,0,isppol,& 1220 & mband,nkpt,natom,mband,mband,dimlmn,nspinor,nsppol,0,& 1221 & mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb) 1222 end do 1223 end if 1224 1225 ! Debugging ouputs 1226 if(prtvol==-level)then 1227 isubh=1 1228 if (use_subvnlx==1) write(message,'(a)') ' cgwf : isubh subham(isubh:isubh+1) subvnlx(isubh:isubh+1)' 1229 if (use_subvnlx==0) write(message,'(a)') ' cgwf : isubh subham(isubh:isubh+1)' 1230 do iband=1,nband 1231 do ii=1,iband 1232 if (use_subvnlx==1) then 1233 write(message,'(i5,4es16.6)')isubh,subham(isubh:isubh+1),subvnlx(isubh:isubh+1) 1234 else 1235 write(message,'(i5,2es16.6)')isubh,subham(isubh:isubh+1) 1236 end if 1237 call wrtout(std_out,message,'PERS') 1238 isubh=isubh+2 1239 end do 1240 end do 1241 end if 1242 1243 ! =================== 1244 ! FINAL DEALLOCATIONS 1245 ! =================== 1246 ABI_FREE(conjgr) 1247 ABI_FREE(cwavef) 1248 ABI_FREE(direc) 1249 ABI_FREE(pcon) 1250 ABI_FREE(scprod) 1251 ABI_FREE(ghc) 1252 ABI_FREE(gvnlxc) 1253 ABI_FREE(gh_direc) 1254 ABI_FREE(gvnlx_direc) 1255 ABI_FREE(vresid) 1256 ABI_FREE(gs_direc) 1257 1258 if (gen_eigenpb) then 1259 ABI_FREE(scwavef) 1260 ABI_FREE(direc_tmp) 1261 end if 1262 1263 if (gen_eigenpb.and.(inonsc==1)) then 1264 ABI_FREE(ghc_all) 1265 end if 1266 1267 if(wfopta10==2.or.wfopta10==3) then 1268 ABI_FREE(ghcws) 1269 ABI_FREE(gh_direcws) 1270 end if 1271 ABI_FREE(gvnlx_dummy) 1272 1273 if(wfopta10==2.or.wfopta10==3) then 1274 ABI_FREE(work) 1275 end if 1276 1277 ABI_FREE(swork) 1278 1279 if (finite_field) then 1280 ABI_FREE(cg1_k) 1281 ABI_FREE(cgq_k) 1282 ABI_FREE(detovc) 1283 ABI_FREE(detovd) 1284 ABI_FREE(grad_berry) 1285 ABI_FREE(sflag_k) 1286 ABI_FREE(smat_inv) 1287 ABI_FREE(smat_k) 1288 ABI_FREE(pwind_k) 1289 ABI_FREE(pwnsfac_k) 1290 ABI_FREE(grad_total) 1291 if (gs_hamk%usepaw /= 0) then 1292 call pawcprj_free(cprj_k) 1293 call pawcprj_free(cprj_kb) 1294 call pawcprj_free(cprj_direc) 1295 call pawcprj_free(cprj_band_srt) 1296 call pawcprj_free(cprj_gat) 1297 if (nkpt /= dtefield%fnkpt) then 1298 call pawcprj_free(cprj_fkn) 1299 call pawcprj_free(cprj_ikn) 1300 ABI_FREE(cprj_fkn) 1301 ABI_FREE(cprj_ikn) 1302 end if 1303 end if 1304 ABI_FREE(smat_k_paw) 1305 ABI_FREE(dimlmn) 1306 ABI_FREE(cprj_k) 1307 ABI_FREE(cprj_kb) 1308 ABI_FREE(cprj_direc) 1309 ABI_FREE(cprj_gat) 1310 ABI_FREE(ikptf_recv) 1311 ABI_FREE(cprj_band_srt) 1312 end if 1313 1314 ! Do not delete this line, needed to run with open MP 1315 write(unit=message,fmt=*) resid(1) 1316 1317 call timab(22,2,tsec) 1318 1319 DBG_EXIT("COLL") 1320 1321 end subroutine cgwf
m_cgwf/etheta [ Functions ]
[ Top ] [ m_cgwf ] [ Functions ]
NAME
etheta
FUNCTION
Computes the energy per unit cell and its first derivative for a given angle theta. More precisely, computes only the part of the energy that changes with theta.
INPUTS
bcut(ifor,idir) = branch cut of the ellipse associated with (ifor,idir) chc = <C|H_0|C> where |C> is the wavefunction of the current band detovc = determinant of the overlap matrix S detovd = determinant of the overlap matrix where for the band that is being updated <C| is replaced by <D| (search direction) dhc = Re[<D|H_0|C>] dhd = <D|H_0|D> efield_dot = reciprocal lattice coordinates of the electric field hel(ifor,idir) = helicity of the ellipse associated with (ifor,idir) nkpt = number of k-points nsppol = 1 for unpolarized, 2 for spin-polarized nstr(idir) = number of strings along the idir-th direction sdeg = spin degeneracy theta = value of the angle for which the energy (e0) and its derivative (e1) are computed
OUTPUT
e0 = energy for the given value of theta e1 = derivative of the energy with respect to theta
SOURCE
1680 subroutine etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,& 1681 & hel,nkpt,nstr,sdeg,theta) 1682 1683 !Arguments ------------------------------------ 1684 !scalars 1685 integer,intent(in) :: nkpt 1686 real(dp),intent(in) :: chc,dhc,dhd,sdeg,theta 1687 real(dp),intent(out) :: e0,e1 1688 !arrays 1689 integer,intent(in) :: hel(2,3),nstr(3) 1690 real(dp),intent(in) :: bcut(2,3),detovc(2,2,3),detovd(2,2,3),efield_dot(3) 1691 1692 !Local variables ------------------------- 1693 !scalars 1694 integer :: idir,ifor 1695 real(dp) :: c2theta,ctheta,dphase,gnorm,phase,rho,s2theta,sgn,stheta 1696 !arrays 1697 real(dp) :: dg_theta(2),g_theta(2) 1698 1699 ! *********************************************************************** 1700 1701 e0 = zero ; e1 = zero 1702 1703 ctheta = cos(theta) 1704 stheta = sin(theta) 1705 c2theta = ctheta*ctheta - stheta*stheta ! cos(2*theta) 1706 s2theta = two*ctheta*stheta ! sin(2*theta) 1707 1708 e0 = chc*ctheta*ctheta + dhd*stheta*stheta + dhc*s2theta 1709 e0 = e0*sdeg/nkpt 1710 1711 !DEBUG 1712 !e0 = zero 1713 !ENDDEBUG 1714 1715 e1 = (dhd - chc)*s2theta + two*dhc*c2theta 1716 e1 = e1*sdeg/nkpt 1717 1718 sgn = -1_dp 1719 do idir = 1, 3 1720 1721 if (abs(efield_dot(idir)) < tol12) cycle 1722 1723 do ifor = 1, 2 1724 1725 g_theta(:) = ctheta*detovc(:,ifor,idir) + & 1726 & stheta*detovd(:,ifor,idir) 1727 dg_theta(:) = -1_dp*stheta*detovc(:,ifor,idir) + & 1728 & ctheta*detovd(:,ifor,idir) 1729 1730 ! Compute E(theta) 1731 1732 call rhophi(g_theta,phase,rho) 1733 if (theta >= bcut(ifor,idir)) phase = phase + hel(ifor,idir)*two_pi 1734 1735 ! DEBUG 1736 ! unit = 100 + 10*idir + ifor 1737 ! write(unit,'(4(f16.9))')theta,g_theta(:),phase 1738 ! ENDDEBUG 1739 1740 e0 = e0 + sgn*sdeg*efield_dot(idir)*phase/(two_pi*nstr(idir)) 1741 1742 1743 ! Compute dE/dtheta 1744 1745 ! imaginary part of the derivative of ln(g_theta) 1746 gnorm = g_theta(1)*g_theta(1) + g_theta(2)*g_theta(2) 1747 dphase = (dg_theta(2)*g_theta(1) - dg_theta(1)*g_theta(2))/gnorm 1748 1749 e1 = e1 + sgn*sdeg*efield_dot(idir)*dphase/(two_pi*nstr(idir)) 1750 1751 sgn = -1_dp*sgn 1752 1753 end do 1754 end do 1755 1756 end subroutine etheta
m_cgwf/linemin [ Functions ]
[ Top ] [ m_cgwf ] [ Functions ]
NAME
linemin
FUNCTION
Performs the "line minimization" w.r.t. the angle theta on a unit circle to update the wavefunction associated with the current k-point and band label. This routine is used only when the electric field is on (otherwise it could in principle also be used, but there is a simpler procedure, as originally coded in abinit).
INPUTS
chc = <C|H_0|C> where |C> is the wavefunction of the current band detovc = determinant of the overlap matrix S detovd = determinant of the overlap matrix where for the band that is being updated <C| is replaced by <D| (search direction) dhc = Re[<D|H_0|C>] dhd = <D|H_0|D> efield_dot = reciprocal lattice coordinates of the electric field iline = index of the current line minimization nkpt = number of k-points nstr(idir) = number of strings along the idir-th direction sdeg = spin degeneracy
OUTPUT
bcut(ifor,idir) = branch cut of the ellipse associated with (ifor,idir) costh = cos(thetam) hel(ifor,idir) = helicity of the ellipse associated with (ifor,idir) phase_end = total change in Zak phase, must be equal to dphase_aux1 + n*two_pi sinth = sin(thetam) thetam = optimal angle theta in line_minimization
SIDE EFFECTS
Input/Output dphase_aux1 = change in Zak phase accumulated during the loop over iline (can be used for debugging in cgwf.f) phase_init = initial Zak phase (before doing the first line minimization)
NOTES
We are making the "frozen Hamiltonian approximation", i.e., the Hamiltonian does not change with theta (we are neglecting the dependence of the Hartree and exchange-correlation terms on theta; the original abinit routine does the same)
SOURCE
1372 subroutine linemin(bcut,chc,costh,detovc,detovd,dhc,dhd,dphase_aux1,& 1373 & efield_dot,iline,nkpt,nstr,hel,phase_end,phase_init,sdeg,sinth,thetam) 1374 1375 !Arguments ------------------------------------ 1376 !scalars 1377 integer,intent(in) :: iline,nkpt 1378 real(dp),intent(in) :: chc,dhc,dhd,sdeg 1379 real(dp),intent(out) :: costh,sinth,thetam 1380 !arrays 1381 integer,intent(in) :: nstr(3) 1382 integer,intent(out) :: hel(2,3) 1383 real(dp),intent(in) :: detovc(2,2,3),detovd(2,2,3),efield_dot(3) 1384 real(dp),intent(inout) :: dphase_aux1(3),phase_init(3) 1385 real(dp),intent(out) :: bcut(2,3),phase_end(3) 1386 1387 !Local variables ------------------------- 1388 !scalars 1389 integer :: idir,ifor,igrid,iter,maxiter,ngrid 1390 real(dp) :: aa,angle,bb,big_axis,cc,cphi_0,delta_theta,e0,e1 1391 real(dp) :: excentr,iab,phase0,phase_min,phi_0,rdum,sgn,small_axis,sphi_0 1392 real(dp) :: theta,theta_0,val 1393 logical :: flag_neg 1394 character(len=500) :: message 1395 !arrays 1396 real(dp) :: g_theta(2),theta_min(2),theta_try(2) 1397 real(dp) :: esave(251),e1save(251) !!REC 1398 1399 ! *********************************************************************** 1400 1401 !Compute the helicity and the branch cut of the ellipse in the complex 1402 !plane associated with the overlap between a k-point and one of its neighbours 1403 1404 do idir = 1, 3 1405 1406 if (abs(efield_dot(idir)) < tol12) cycle 1407 1408 do ifor = 1, 2 1409 1410 aa = half*(detovc(1,ifor,idir)*detovc(1,ifor,idir) + & 1411 & detovc(2,ifor,idir)*detovc(2,ifor,idir) + & 1412 & detovd(1,ifor,idir)*detovd(1,ifor,idir) + & 1413 & detovd(2,ifor,idir)*detovd(2,ifor,idir)) 1414 1415 bb = half*(detovc(1,ifor,idir)*detovc(1,ifor,idir) + & 1416 & detovc(2,ifor,idir)*detovc(2,ifor,idir) - & 1417 & detovd(1,ifor,idir)*detovd(1,ifor,idir) - & 1418 & detovd(2,ifor,idir)*detovd(2,ifor,idir)) 1419 1420 cc = detovc(1,ifor,idir)*detovd(1,ifor,idir) + & 1421 & detovc(2,ifor,idir)*detovd(2,ifor,idir) 1422 1423 iab = detovc(1,ifor,idir)*detovd(2,ifor,idir) - & 1424 & detovc(2,ifor,idir)*detovd(1,ifor,idir) 1425 1426 if (iab >= zero) then 1427 hel(ifor,idir) = 1 1428 else 1429 hel(ifor,idir) = -1 1430 end if 1431 1432 if (abs(bb) > tol8) then 1433 theta_0 = half*atan(cc/bb) 1434 else 1435 theta_0 = quarter*pi 1436 end if 1437 1438 if (bb < zero) theta_0 = theta_0 + pi*half 1439 1440 g_theta(:) = cos(theta_0)*detovc(:,ifor,idir) + & 1441 & sin(theta_0)*detovd(:,ifor,idir) 1442 ! DEBUG 1443 ! write(std_out,*)'before rhophi, g_theta =',g_theta 1444 ! ENDDEBUG 1445 call rhophi(g_theta,phi_0,rdum) 1446 ! DEBUG 1447 ! write(std_out,*)'after rhophi, phi_0 = ',phi_0 1448 ! ENDDEBUG 1449 1450 cphi_0 = cos(phi_0) 1451 sphi_0 = sin(phi_0) 1452 1453 rdum = aa - sqrt(bb*bb + cc*cc) 1454 if (rdum < zero) rdum = zero 1455 small_axis = sqrt(rdum) 1456 big_axis = sqrt(aa + sqrt(bb*bb + cc*cc)) 1457 excentr = hel(ifor,idir)*small_axis/big_axis 1458 1459 ! Find angle for which phi = pi 1460 if (abs(excentr) > tol8) then 1461 angle = atan(tan(pi-phi_0)/excentr) 1462 else 1463 if (tan(pi-phi_0)*hel(ifor,idir) > zero) then 1464 angle = half*pi 1465 else 1466 angle = -0.5_dp*pi 1467 end if 1468 end if 1469 bcut(ifor,idir) = angle + theta_0 1470 1471 1472 ! Compute the branch-cut angle 1473 if (hel(ifor,idir) == 1) then 1474 if ((sphi_0 > 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) + pi 1475 if ((sphi_0 < 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) - pi 1476 else 1477 if ((sphi_0 > 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) - pi 1478 if ((sphi_0 < 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) + pi 1479 end if 1480 1481 if (bcut(ifor,idir) > pi) bcut(ifor,idir) = bcut(ifor,idir) - two_pi 1482 if (bcut(ifor,idir) < -1_dp*pi) bcut(ifor,idir) = bcut(ifor,idir) + two_pi 1483 1484 ! DEBUG 1485 ! write(std_out,'(a,2x,i3,2x,i3,5x,f16.9,5x,i2)')'linemin: ifor,idir,bcut,hel',& 1486 ! & ifor,idir,bcut(ifor,idir),hel(ifor,idir) 1487 ! write(std_out,'(a,5x,f16.9,5x,f16.9)')'linemin: big_axis,small_axis ',& 1488 ! & big_axis,small_axis 1489 ! ENDDEBUG 1490 1491 end do ! ifor 1492 end do ! idir 1493 1494 !--------------------------------------------------------------------------- 1495 1496 !Perform the "line minimization" w.r.t. the angle theta on a unit circle 1497 !to update the wavefunction associated with the current k-point and band label. 1498 1499 ngrid = 250 ! initial number of subdivisions in [-pi/2,pi/2] 1500 !for finding extrema 1501 maxiter = 100 1502 delta_theta = pi/ngrid 1503 1504 !DEBUG 1505 !write(std_out,*)'linemin: theta, e0, e1, e1fdiff' 1506 !ENDDEBUG 1507 1508 1509 !Get the interval where the absolute minimum of E(theta) is located 1510 1511 val = huge(one) ! large number 1512 flag_neg=.false. 1513 theta_min(:) = ten 1514 do igrid = 1, ngrid+1 1515 1516 theta = (igrid - 1)*delta_theta - pi*half 1517 call etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,& 1518 & hel,nkpt,nstr,sdeg,theta) 1519 1520 esave(igrid)=e0 !!REC 1521 e1save(igrid)=e1 !!REC 1522 1523 ! It is important to detect when the slope changes from negative to positive 1524 ! Moreover, a slope being extremely close to zero must be ignored 1525 1526 ! DEBUG 1527 ! write(std_out,*)' igrid,e0,e1,val,theta_min(:)=',igrid,theta,e0,e1,val,theta_min(:) 1528 ! ENDDEBUG 1529 1530 ! Store e1 and theta if negative ... 1531 if(e1 < -tol10)then 1532 theta_try(1)=theta 1533 flag_neg=.true. 1534 end if 1535 ! A change of sign is just happening 1536 if(e1 > tol10 .and. flag_neg)then 1537 theta_try(2)=theta 1538 flag_neg=.false. 1539 ! Still, must be better than the previous minimum in order to succeed 1540 if (e0 < val-tol10) then 1541 val=e0 1542 theta_min(:)=theta_try(:) 1543 end if 1544 end if 1545 end do 1546 1547 !In case the minimum was not found 1548 1549 if (abs(theta_min(1) - ten) < tol10) then 1550 ! REC start 1551 write(message,'(a,a)')ch10,& 1552 & ' linemin: ERROR- cannot find theta_min.' 1553 call wrtout(std_out,message,'COLL') 1554 write(message,'(a,a)')ch10,& 1555 & ' igrid theta esave(igrid) e1save(igrid) ' 1556 call wrtout(std_out,message,'COLL') 1557 do igrid = 1, ngrid+1 1558 theta = (igrid - 1)*delta_theta - pi*half 1559 write(std_out,'(i6,3f16.9)')igrid,theta,esave(igrid),e1save(igrid) 1560 !write(101,'(i6,3f16.9)')igrid,theta,esave(igrid),e1save(igrid) 1561 end do 1562 write(message,'(6a)')ch10,& 1563 & ' linemin: ERROR - ',ch10,& 1564 & ' Cannot find theta_min. No minimum exists: the field is too strong ! ',ch10,& 1565 & ' Try decreasing difference between D and 4 Pi P by changing structure or D (only for fixed D calculation)' 1566 call wrtout(std_out,message,'COLL') 1567 ABI_ERROR('linemin cannot find theta_min') 1568 end if 1569 1570 !Compute the mimum of E(theta) 1571 1572 1573 iter = 0 1574 do while ((delta_theta > tol8).and.(iter < maxiter)) 1575 delta_theta = half*(theta_min(2) - theta_min(1)) 1576 theta = theta_min(1) + delta_theta 1577 call etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,& 1578 & hel,nkpt,nstr,sdeg,theta) 1579 if (e1 > zero) then 1580 theta_min(2) = theta 1581 else 1582 theta_min(1) = theta 1583 end if 1584 iter = iter + 1 1585 1586 ! DEBUG 1587 ! write(std_out,'(a,2x,i3,2(2x,f16.9))')'iter,e0,e1 = ',iter,e0,e1 1588 ! ENDDEBUG 1589 1590 end do 1591 1592 costh = cos(theta) 1593 sinth = sin(theta) 1594 1595 thetam = theta 1596 1597 !DEBUG 1598 !write(std_out,*)'linemin : thetam = ',thetam 1599 !ENDDEBUG 1600 1601 !--------------------------------------------------------------------------- 1602 1603 !Compute and store the change in electronic polarization 1604 1605 sgn = one 1606 do idir = 1, 3 1607 1608 if (abs(efield_dot(idir)) < tol12) cycle 1609 1610 phase_end(idir) = zero 1611 do ifor = 1, 2 1612 1613 g_theta(:) = detovc(:,ifor,idir) 1614 ! DEBUG 1615 ! write(std_out,*)'before rhophi (2nd call), g_theta =',g_theta 1616 ! ENDDEBUG 1617 call rhophi(g_theta,phase0,rdum) 1618 ! DEBUG 1619 ! write(std_out,*)'after rhophi, phase0 = ',phase0 1620 ! ENDDEBUG 1621 1622 if(iline == 1) phase_init(idir) = phase_init(idir) + sgn*phase0 1623 1624 g_theta(:) = costh*detovc(:,ifor,idir) + sinth*detovd(:,ifor,idir) 1625 call rhophi(g_theta,phase_min,rdum) 1626 1627 phase_end(idir) = phase_end(idir) + sgn*phase_min 1628 1629 ! Correct for branch cuts (remove jumps) 1630 if (bcut(ifor,idir) <= zero) phase0 = phase0 + hel(ifor,idir)*two_pi 1631 if(thetam >= bcut(ifor,idir)) phase_min = phase_min + hel(ifor,idir)*two_pi 1632 1633 dphase_aux1(idir) = dphase_aux1(idir) + sgn*(phase_min - phase0) 1634 1635 sgn = -1_dp*sgn 1636 1637 end do ! idir 1638 end do ! ifor 1639 1640 !DEBUG 1641 !write(std_out,'(a,3(2x,f16.9))')'dphase_aux1 = ',(dphase_aux1(idir),idir = 1, 3) 1642 !write(std_out,*)' linemin: debug, exit.' 1643 !ENDDEBUG 1644 1645 end subroutine linemin
m_cgwf/mksubham [ Functions ]
[ Top ] [ m_cgwf ] [ Functions ]
NAME
mksubham
FUNCTION
Build the Hamiltonian matrix in the eigenfunctions subspace, for one given band (or for one given block of bands)
INPUTS
cg(2,mcg)=wavefunctions gsc(2,mgsc)=<g|S|c> matrix elements (S=overlap) iblock=index of block of bands icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array cg istwf_k=input parameter that describes the storage of wfs mcg=second dimension of the cg array mgsc=second dimension of the gsc array nband_k=number of bands at this k point for that spin polarization nbdblock=number of bands in a block npw_k=number of plane waves at this k point nspinor=number of spinorial components of the wavefunctions use_subovl=1 if the overlap matrix is not identity in WFs subspace use_subvnlx= 1 if <C band,k|H|C band_prime,k> has to be computed me_g0=1 if this processors has G=0, 0 otherwise
OUTPUT
SIDE EFFECTS
ghc(2,npw_k*nspinor)=<G|H|C band,k> for the current state This is an input in non-blocked algorithm an output in blocked algorithm gvnlxc(2,npw_k*nspinor)=<G|Vnl|C band,k> for the current state This is an input in non-blocked algorithm an output in blocked algorithm isubh=index of current state in array subham isubo=index of current state in array subovl subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace subovl(nband_k*(nband_k+1)*use_subovl)=overlap matrix expressed in the WFs subspace subvnlx(nband_k*(nband_k+1)*use_subvnlx)=non-local Hamiltonian (if NCPP) plus Fock ACE operator (if usefock_ACE) expressed in the WFs subspace
SOURCE
1802 subroutine mksubham(cg,ghc,gsc,gvnlxc,iblock,icg,igsc,istwf_k,& 1803 & isubh,isubo,mcg,mgsc,nband_k,nbdblock,npw_k,& 1804 & nspinor,subham,subovl,subvnlx,use_subovl,use_subvnlx,me_g0) 1805 1806 !Arguments ------------------------------------ 1807 !scalars 1808 integer,intent(in) :: iblock,icg,igsc,istwf_k,mcg,mgsc,nband_k 1809 integer,intent(in) :: nbdblock,npw_k,nspinor,use_subovl,use_subvnlx,me_g0 1810 integer,intent(inout) :: isubh,isubo 1811 !arrays 1812 real(dp),intent(in) :: cg(2,mcg) 1813 real(dp),intent(in) :: gsc(2,mgsc) 1814 real(dp),intent(inout) :: ghc(2,npw_k*nspinor),gvnlxc(2,npw_k*nspinor) 1815 real(dp),intent(inout) :: subham(nband_k*(nband_k+1)) 1816 real(dp),intent(inout) :: subovl(nband_k*(nband_k+1)*use_subovl) 1817 real(dp),intent(inout) :: subvnlx(nband_k*(nband_k+1)*use_subvnlx) 1818 1819 !Local variables------------------------------- 1820 !scalars 1821 integer :: iband,ibdblock,ii,ipw,ipw1,isp,iwavef,jwavef 1822 real(dp) :: cgimipw,cgreipw,chcim,chcre,cscim,cscre,cvcim,cvcre 1823 !real(dp) :: chc(2),cvc(2),csc(2) 1824 1825 ! ********************************************************************* 1826 1827 !Loop over bands in a block This loop can be parallelized 1828 do iband=1+(iblock-1)*nbdblock,min(iblock*nbdblock,nband_k) 1829 ibdblock=iband-(iblock-1)*nbdblock 1830 1831 ! Compute elements of subspace Hamiltonian <C(i)|H|C(n)> and <C(i)|Vnl|C(n)> 1832 if(istwf_k==1)then 1833 1834 do ii=1,iband 1835 iwavef=(ii-1)*npw_k*nspinor+icg 1836 chcre=zero ; chcim=zero 1837 if (use_subvnlx==0) then 1838 do ipw=1,npw_k*nspinor 1839 cgreipw=cg(1,ipw+iwavef) 1840 cgimipw=cg(2,ipw+iwavef) 1841 chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw) 1842 chcim=chcim+cgreipw*ghc(2,ipw)-cgimipw*ghc(1,ipw) 1843 end do 1844 ! chc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),ghc) 1845 else 1846 #if 1 1847 do ipw=1,npw_k*nspinor 1848 cgreipw=cg(1,ipw+iwavef) 1849 cgimipw=cg(2,ipw+iwavef) 1850 chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw) 1851 chcim=chcim+cgreipw*ghc(2,ipw)-cgimipw*ghc(1,ipw) 1852 end do 1853 cvcre=zero ; cvcim=zero 1854 do ipw=1,npw_k*nspinor 1855 cgreipw=cg(1,ipw+iwavef) 1856 cgimipw=cg(2,ipw+iwavef) 1857 cvcre=cvcre+cgreipw*gvnlxc(1,ipw)+cgimipw*gvnlxc(2,ipw) 1858 cvcim=cvcim+cgreipw*gvnlxc(2,ipw)-cgimipw*gvnlxc(1,ipw) 1859 end do 1860 subvnlx(isubh )=cvcre 1861 subvnlx(isubh+1)=cvcim 1862 #else 1863 ! New version with BLAS1, will require some update of the refs. 1864 cvc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),gvnlxc) 1865 subvnlx(isubh )=cvc(1) 1866 subvnlx(isubh+1)=cvc(2) 1867 chc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),ghc) 1868 chcre = chc(1) 1869 chcim = chc(2) 1870 #endif 1871 ! Store real and imag parts in Hermitian storage mode: 1872 end if 1873 subham(isubh )=chcre 1874 subham(isubh+1)=chcim 1875 ! subham(isubh )=chc(1) 1876 ! subham(isubh+1)=chc(2) 1877 isubh=isubh+2 1878 end do 1879 1880 else if(istwf_k>=2)then 1881 do ii=1,iband 1882 iwavef=(ii-1)*npw_k+icg 1883 ! Use the time-reversal symmetry, but should not double-count G=0 1884 if(istwf_k==2 .and. me_g0==1) then 1885 chcre = half*cg(1,1+iwavef)*ghc(1,1) 1886 if (use_subvnlx==1) cvcre=half*cg(1,1+iwavef)*gvnlxc(1,1) 1887 ipw1=2 1888 else 1889 chcre=zero; ipw1=1 1890 if (use_subvnlx==1) cvcre=zero 1891 end if 1892 if (use_subvnlx==0) then 1893 do isp=1,nspinor 1894 do ipw=ipw1+(isp-1)*npw_k,npw_k*isp 1895 cgreipw=cg(1,ipw+iwavef) 1896 cgimipw=cg(2,ipw+iwavef) 1897 chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw) 1898 end do 1899 end do 1900 chcre=two*chcre 1901 else 1902 do isp=1,nspinor 1903 do ipw=ipw1+(isp-1)*npw_k,npw_k*isp 1904 cgreipw=cg(1,ipw+iwavef) 1905 cgimipw=cg(2,ipw+iwavef) 1906 chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw) 1907 cvcre=cvcre+cgreipw*gvnlxc(1,ipw)+cgimipw*gvnlxc(2,ipw) 1908 end do 1909 end do 1910 chcre=two*chcre 1911 cvcre=two*cvcre 1912 ! Store real and imag parts in Hermitian storage mode: 1913 subvnlx(isubh )=cvcre 1914 subvnlx(isubh+1)=zero 1915 end if 1916 subham(isubh )=chcre 1917 subham(isubh+1)=zero 1918 isubh=isubh+2 1919 end do 1920 end if 1921 1922 ! Compute elements of subspace <C(i)|S|C(n)> (S=overlap matrix) 1923 ! <C(i)|S|C(n)> should be closed to Identity. 1924 if (use_subovl==1) then 1925 jwavef=(iband-1)*npw_k*nspinor+igsc 1926 if(istwf_k==1)then 1927 do ii=1,iband 1928 iwavef=(ii-1)*npw_k*nspinor+icg 1929 cscre=zero ; cscim=zero 1930 do ipw=1,npw_k*nspinor 1931 cgreipw=cg(1,ipw+iwavef) 1932 cgimipw=cg(2,ipw+iwavef) 1933 cscre=cscre+cgreipw*gsc(1,ipw+jwavef)+cgimipw*gsc(2,ipw+jwavef) 1934 cscim=cscim+cgreipw*gsc(2,ipw+jwavef)-cgimipw*gsc(1,ipw+jwavef) 1935 end do 1936 ! csc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),gsc) 1937 ! subovl(isubo )=csc(1) 1938 ! subovl(isubo+1)=csc(2) 1939 ! Store real and imag parts in Hermitian storage mode: 1940 subovl(isubo )=cscre 1941 subovl(isubo+1)=cscim 1942 isubo=isubo+2 1943 end do 1944 else if(istwf_k>=2)then 1945 do ii=1,iband 1946 iwavef=(ii-1)*npw_k*nspinor+icg 1947 if(istwf_k==2 .and. me_g0==1)then 1948 cscre=half*cg(1,1+iwavef)*gsc(1,1+jwavef) 1949 ipw1=2 1950 else 1951 cscre=zero; ipw1=1 1952 end if 1953 do isp=1,nspinor 1954 do ipw=ipw1+(isp-1)*npw_k,npw_k*isp 1955 cgreipw=cg(1,ipw+iwavef) 1956 cgimipw=cg(2,ipw+iwavef) 1957 cscre=cscre+cg(1,ipw+iwavef)*gsc(1,ipw+jwavef)+cg(2,ipw+iwavef)*gsc(2,ipw+jwavef) 1958 end do 1959 end do 1960 cscre=two*cscre 1961 ! Store real and imag parts in Hermitian storage mode: 1962 subovl(isubo )=cscre 1963 subovl(isubo+1)=zero 1964 isubo=isubo+2 1965 end do 1966 end if 1967 end if 1968 1969 end do ! iband in a block 1970 1971 end subroutine mksubham