TABLE OF CONTENTS


ABINIT/m_cgwf [ Modules ]

[ Top ] [ Modules ]

NAME

  m_cgwf

FUNCTION

  Conjugate-gradient eigensolver.

COPYRIGHT

  Copyright (C) 2008-2022 ABINIT group (DCA, XG, GMR, MT, MVeithen, ISouza, JIniguez)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_cgwf
23 
24  use defs_basis
25  use m_errors
26  use m_xmpi
27  use m_abicore
28  use m_cgtools
29  use m_efield
30 
31  use defs_abitypes,   only : MPI_type
32  use m_time,          only : timab
33  use m_numeric_tools, only : rhophi
34  use m_pawcprj,       only : pawcprj_type, pawcprj_alloc, pawcprj_put, pawcprj_copy, &
35                              pawcprj_get, pawcprj_mpi_allgather, pawcprj_free, pawcprj_symkn
36  use m_hamiltonian,   only : gs_hamiltonian_type
37  use m_fock,          only : fock_set_ieigen,fock_set_getghc_call
38  use m_getghc,        only : getghc
39  use m_berrytk,       only : smatrix
40  use m_nonlop,      only : nonlop
41  use m_paw_overlap,   only : smatrix_k_paw
42  use m_cgprj,         only : getcprj
43 
44  implicit none
45 
46  private

ABINIT/make_grad_berry [ Functions ]

[ Top ] [ Functions ]

NAME

 make_grad_berry

FUNCTION

 compute gradient contribution from berry phase in finite
 electric field case

INPUTS

  cg(2,mcg)=input wavefunctions
  cgq(2,mcgq) = wavefunctions at neighboring k points
  cprj_k(natom,nband_k*usepaw)=cprj at this k point
  dimlmn(natom)=lmn_size for each atom in input order
  dimlmn_srt(natom)=lmn_size for each atom sorted by type
  direc(2,npw*nspinor)=gradient vector
  gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k
  iband=index of band currently being treated
  icg=shift to be applied on the location of data in the array cg
  ikpt=number of the k-point currently being treated
  isppol=spin polarization currently treated
  natom=number of atoms in cell.
  mband =maximum number of bands
  mpw=maximum dimensioned size of npw
  mcg=second dimension of the cg array
  mcgq=second dimension of the cgq array
  mkgq = second dimension of pwnsfacq
  nkpt=number of k points
  mpi_enreg=information about MPI parallelization
  npw=number of planewaves in basis sphere at given k.
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  nsppol=number of spin polarizations
  pwind(pwind_alloc,2,3) = array used to compute
           the overlap matrix smat between k-points (see initberry.f)
  pwind_alloc = first dimension of pwind
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  pwnsfacq(2,mkgq) = phase factors for the nearest neighbours of the
                     current k-point (electric field, MPI //)

OUTPUT

 grad_berry(2,npw*nspinor) :: contribution to gradient in finite electric field case

SIDE EFFECTS

  dtefield <type(efield_type)> = variables related to Berry phase calculations (see initberry.f)

NOTES

SOURCE

2023 subroutine make_grad_berry(cg,cgq,cprj_k,detovc,dimlmn,dimlmn_srt,direc,dtefield,grad_berry,&
2024 &                          gs_hamk,iband,icg,ikpt,isppol,mband,mcg,mcgq,mkgq,mpi_enreg,mpw,natom,nkpt,&
2025 &                          npw,npwarr,nspinor,nsppol,pwind,pwind_alloc,pwnsfac,pwnsfacq)
2026 
2027   !Arguments ------------------------------------
2028   !scalars
2029   integer,intent(in) :: iband,icg,ikpt,isppol,mband,mcg,mcgq
2030   integer,intent(in) :: mkgq,mpw,natom,nkpt,npw,nspinor,nsppol,pwind_alloc
2031   type(gs_hamiltonian_type),intent(in) :: gs_hamk
2032   type(efield_type),intent(inout) :: dtefield
2033   type(MPI_type),intent(in) :: mpi_enreg
2034 
2035   !arrays
2036   integer,intent(in) :: dimlmn(natom),dimlmn_srt(natom)
2037   integer,intent(in) :: npwarr(nkpt),pwind(pwind_alloc,2,3)
2038   real(dp),intent(in) :: cg(2,mcg),cgq(2,mcgq)
2039   real(dp),intent(inout) :: direc(2,npw*nspinor)
2040   real(dp),intent(in) :: pwnsfac(2,pwind_alloc),pwnsfacq(2,mkgq)
2041   real(dp),intent(out) :: detovc(2,2,3),grad_berry(2,npw*nspinor)
2042   type(pawcprj_type),intent(in) :: cprj_k(natom,dtefield%mband_occ*gs_hamk%usepaw*dtefield%nspinor)
2043 
2044   !Local variables-------------------------------
2045   !scalars
2046   integer :: choice,cpopt,ddkflag,dimenlr1,iatom,icg1,icp2,idum1
2047   integer :: idir,ifor,ikgf,ikptf,ikpt2,ikpt2f,ipw,i_paw_band,ispinor,itrs,itypat,job
2048   integer :: klmn,mcg1_k,mcg_q,nbo,npw_k2,nspinortot,paw_opt,shiftbd,signs
2049   real(dp) :: fac
2050   character(len=500) :: message
2051   !arrays
2052   integer :: pwind_k(npw),sflag_k(dtefield%mband_occ)
2053   real(dp) :: cg1_k(2,npw*nspinor),dtm_k(2),pwnsfac_k(4,mpw)
2054   real(dp) :: smat_k(2,dtefield%mband_occ,dtefield%mband_occ)
2055   real(dp) :: smat_inv(2,dtefield%mband_occ,dtefield%mband_occ),svectout_dum(2,0)
2056   real(dp) :: dummy_enlout(0)
2057   real(dp),allocatable :: cgq_k(:,:),enl_rij(:,:,:,:),grad_berry_ev(:,:)
2058   real(dp),allocatable :: qijbkk(:,:,:,:),smat_k_paw(:,:,:)
2059   ! type(pawcprj_type) :: cprj_dum(1,1) ! was used in on-site dipole, now suppressed
2060   ! 15 June 2012 J Zwanziger
2061   type(pawcprj_type),allocatable :: cprj_kb(:,:),cprj_band_srt(:,:)
2062   type(pawcprj_type),allocatable :: cprj_fkn(:,:),cprj_ikn(:,:)
2063 
2064 
2065   ! *********************************************************************
2066 
2067   !DBG_ENTER("COLL")
2068 
2069   nbo = dtefield%mband_occ
2070 
2071   !allocations
2072 
2073   !Electric field: compute the gradient of the Berry phase part of the energy functional.
2074   !See PRL 89, 117602 (2002) [[cite:Souza2002]], grad_berry(:,:) is the second term of Eq. (4)
2075   grad_berry(:,:) = zero
2076   job = 11 ; shiftbd = 1
2077   mcg_q = mpw*mband*nspinor
2078   mcg1_k = npw*nspinor
2079 
2080   if (gs_hamk%usepaw /= 0) then
2081      dimenlr1 = gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2
2082      ABI_MALLOC(qijbkk,(dimenlr1,natom,nspinor**2,2))
2083      ABI_MALLOC(enl_rij,(nspinor*dimenlr1,natom,nspinor**2,1))
2084      ABI_MALLOC(smat_k_paw,(2,nbo,nbo))
2085      ABI_MALLOC(grad_berry_ev,(2,npw*nspinor))
2086      enl_rij = zero
2087      qijbkk = zero
2088      smat_k_paw = zero
2089      ABI_MALLOC(cprj_kb,(natom,nbo*nspinor))
2090      call pawcprj_alloc(cprj_kb,0,dimlmn)
2091      ABI_MALLOC(cprj_band_srt,(natom,nspinor))
2092      call pawcprj_alloc(cprj_band_srt,0,dimlmn_srt)
2093      if (nkpt /= dtefield%fnkpt) then
2094         ABI_MALLOC(cprj_fkn,(natom,nbo*nspinor))
2095         ABI_MALLOC(cprj_ikn,(natom,nbo*nspinor))
2096         call pawcprj_alloc(cprj_fkn,0,dimlmn)
2097         call pawcprj_alloc(cprj_ikn,0,dimlmn)
2098      else
2099         ABI_MALLOC(cprj_fkn,(0,0))
2100         ABI_MALLOC(cprj_ikn,(0,0))
2101      end if
2102   else
2103      ABI_MALLOC(qijbkk,(0,0,0,0))
2104      ABI_MALLOC(enl_rij,(0,0,0,0))
2105      ABI_MALLOC(smat_k_paw,(0,0,0))
2106      ABI_MALLOC(grad_berry_ev,(0,0))
2107      ABI_MALLOC(cprj_kb,(0,0))
2108      ABI_MALLOC(cprj_band_srt,(0,0))
2109      ABI_MALLOC(cprj_fkn,(0,0))
2110      ABI_MALLOC(cprj_ikn,(0,0))
2111   end if
2112 
2113   ikptf = dtefield%i2fbz(ikpt)
2114   ikgf = dtefield%fkgindex(ikptf)  ! this is the shift for pwind
2115 
2116   do idir = 1, 3
2117      !  skip idir values for which efield_dot(idir)=0
2118      if (abs(dtefield%efield_dot(idir)) < tol12) cycle
2119      !  Implicitly, we use the gradient multiplied by the number of k points in the FBZ
2120      fac = dtefield%efield_dot(idir)*dble(dtefield%fnkpt)/&
2121           &   (dble(dtefield%nstr(idir))*four_pi)
2122      do ifor = 1, 2
2123         !    Handle dtefield%i2fbz properly and ask whether t.r.s. is used
2124         ikpt2f = dtefield%ikpt_dk(ikptf,ifor,idir)
2125         if (dtefield%indkk_f2ibz(ikpt2f,6) == 1) then
2126            itrs = 10
2127         else
2128            itrs = 0
2129         end if
2130         ikpt2 = dtefield%indkk_f2ibz(ikpt2f,1)
2131         npw_k2 = npwarr(ikpt2)
2132         ABI_MALLOC(cgq_k,(2,nbo*nspinor*npw_k2))
2133         pwind_k(1:npw) = pwind(ikgf+1:ikgf+npw,ifor,idir)
2134         pwnsfac_k(1:2,1:npw) = pwnsfac(1:2,ikgf+1:ikgf+npw)
2135         sflag_k(:) = dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir)
2136         smat_k(:,:,:) = dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir)
2137         if (mpi_enreg%nproc_cell > 1) then
2138            icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt)
2139            cgq_k(:,1:nbo*nspinor*npw_k2) = &
2140                 &       cgq(:,icg1+1:icg1+nbo*nspinor*npw_k2)
2141            idum1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt)
2142            pwnsfac_k(3:4,1:npw_k2) = pwnsfacq(1:2,idum1+1:idum1+npw_k2)
2143         else
2144            icg1 = dtefield%cgindex(ikpt2,isppol)
2145            cgq_k(:,1:nbo*nspinor*npw_k2) = &
2146                 &       cg(:,icg1+1:icg1+nbo*nspinor*npw_k2)
2147            idum1 = dtefield%fkgindex(ikpt2f)
2148            pwnsfac_k(3:4,1:npw_k2) = pwnsfac(1:2,idum1+1:idum1+npw_k2)
2149         end if
2150         if (gs_hamk%usepaw == 1) then
2151            icp2=nbo*(ikpt2-1)*nspinor
2152            call pawcprj_get(gs_hamk%atindx1,cprj_kb,dtefield%cprj,natom,1,icp2,ikpt,0,isppol,&
2153                 &       nbo,dtefield%fnkpt,natom,nbo,nbo,nspinor,nsppol,0,&
2154                 &       mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb)
2155            if (ikpt2 /= ikpt2f) then ! construct cprj_kb by symmetry
2156               call pawcprj_copy(cprj_kb,cprj_ikn)
2157               call pawcprj_symkn(cprj_fkn,cprj_ikn,dtefield%atom_indsym,dimlmn,-1,gs_hamk%indlmn,&
2158                    &         dtefield%indkk_f2ibz(ikpt2f,2),dtefield%indkk_f2ibz(ikpt2f,6),&
2159                    &         dtefield%fkptns(:,dtefield%i2fbz(ikpt2)),&
2160                    &         dtefield%lmax,dtefield%lmnmax,mband,natom,nbo,nspinor,&
2161                    &         dtefield%nsym,gs_hamk%ntypat,gs_hamk%typat,dtefield%zarot)
2162               call pawcprj_copy(cprj_fkn,cprj_kb)
2163            end if
2164            call smatrix_k_paw(cprj_k,cprj_kb,dtefield,idir,ifor,mband,natom,smat_k_paw,gs_hamk%typat)
2165         end if
2166 
2167         icg1 = 0 ; ddkflag = 1
2168         call smatrix(cg,cgq_k,cg1_k,ddkflag,dtm_k,icg,icg1,itrs,&
2169              &     job,iband,mcg,mcg_q,mcg1_k,iband,mpw,nbo,dtefield%nband_occ(isppol),&
2170              &     npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,&
2171              &     shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw)
2172         ABI_FREE(cgq_k)
2173         detovc(:,ifor,idir) = dtm_k(:) !store the determinant of the overlap
2174         if (sqrt(dtm_k(1)*dtm_k(1) + dtm_k(2)*dtm_k(2)) < tol12) then
2175            write(message,'(3a,i5,a,i3,a,a,a)') &
2176                 &       '  (electric field)',ch10,&
2177                 &       '  For k-point #',ikpt,' and band # ',iband,',',ch10,&
2178                 &       '  the determinant of the overlap matrix is found to be 0. Fixing...'
2179            !      REC try this:
2180            write(std_out,*)message,dtm_k(1:2)
2181            if(abs(dtm_k(1))<=1d-12)dtm_k(1)=1d-12
2182            if(abs(dtm_k(2))<=1d-12)dtm_k(2)=1d-12
2183            write(std_out,*)' Changing to:',dtm_k(1:2)
2184            !      REC       ABI_BUG(message)
2185         end if
2186 
2187         if (gs_hamk%usepaw == 1) then
2188            !      this loop applies discretized derivative of projectors
2189            !      note that qijb_kk is sorted by input atom order, but nonlop wants it sorted by type
2190            do iatom = 1, natom
2191               itypat = gs_hamk%typat(gs_hamk%atindx1(iatom))
2192               do klmn = 1, dtefield%lmn2_size(itypat)
2193                  !          note: D_ij-like terms have 4 spinor components: 11, 22, 12, and 21. Here the qijb is diagonal
2194                  !          in spin space so only the first two are nonzero and they are equal
2195                  do ispinor = 1, nspinor
2196                     qijbkk(klmn,iatom,ispinor,1) = dtefield%qijb_kk(1,klmn,gs_hamk%atindx1(iatom),idir)
2197                     qijbkk(klmn,  iatom,ispinor,2) = dtefield%qijb_kk(2,klmn,gs_hamk%atindx1(iatom),idir)
2198                     if (ifor > 1) qijbkk(klmn,iatom,ispinor,2) = -qijbkk(klmn,iatom,ispinor,2)
2199                  end do
2200               end do ! end loop over lmn2_size
2201            end do ! end loop over natom
2202 
2203            choice = 1
2204            signs = 2
2205            paw_opt = 1
2206            cpopt = 2 ! use cprj_kb in memory
2207            nspinortot=min(2,nspinor*(1+mpi_enreg%paral_spinor))
2208            do i_paw_band = 1, nbo
2209 
2210               call pawcprj_get(gs_hamk%atindx,cprj_band_srt,cprj_kb,natom,i_paw_band,0,ikpt,1,&
2211                    &         isppol,nbo,1,natom,1,nbo,nspinor,nsppol,0,&
2212                    &         mpicomm=mpi_enreg%comm_kpt,proc_distrb=mpi_enreg%proc_distrb)
2213 
2214               ! Pass dummy_enlout to avoid aliasing (enl, enlout)
2215               call nonlop(choice,cpopt,cprj_band_srt,dummy_enlout,gs_hamk,idir,(/zero/),mpi_enreg,1,0,&
2216                    &         paw_opt,signs,svectout_dum,0,direc,grad_berry_ev,enl=qijbkk)
2217 
2218               !        Add i*fac*smat_inv(i_paw_band,iband)*grad_berry_ev to the gradient
2219               do ipw = 1, npw*nspinor
2220 
2221                  grad_berry(1,ipw) = grad_berry(1,ipw) - &
2222                       &           fac*(smat_inv(2,i_paw_band,iband)*grad_berry_ev(1,ipw) + &
2223                       &           smat_inv(1,i_paw_band,iband)*grad_berry_ev(2,ipw))
2224 
2225                  grad_berry(2,ipw) = grad_berry(2,ipw) + &
2226                       &           fac*(smat_inv(1,i_paw_band,iband)*grad_berry_ev(1,ipw) - &
2227                       &           smat_inv(2,i_paw_band,iband)*grad_berry_ev(2,ipw))
2228 
2229               end do
2230            end do
2231         end if ! end if PAW
2232 
2233         !    Add i*fac*cg1_k to the gradient
2234         do ipw = 1, npw*nspinor
2235            grad_berry(1,ipw) = grad_berry(1,ipw) - fac*cg1_k(2,ipw)
2236            grad_berry(2,ipw) = grad_berry(2,ipw) + fac*cg1_k(1,ipw)
2237         end do
2238         fac = -1._dp*fac
2239         dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir) = sflag_k(:)
2240         dtefield%sflag(iband,ikpt+(isppol-1)*nkpt,ifor,idir) = 0
2241         dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir) = smat_k(:,:,:)
2242      end do  ! ifor
2243 
2244      !  if (gs_hamk%usepaw == 1) then
2245      !  !    call nonlop to apply on-site dipole <EV> part to direc
2246      !  !    note that rij is sorted by input atom order, but nonlop wants it sorted by type
2247      !  do iatom = 1, natom
2248      !  itypat = gs_hamk%typat(gs_hamk%atindx1(iatom))
2249      !  do klmn = 1, dtefield%lmn2_size(itypat)
2250      !  !        note: D_ij-like terms have 4 spinor components: 11, 22, 12, and 21. Here the enl_rij is diagonal
2251      !  !        in spin space so only the first two are nonzero and they are equal
2252      !  do ispinor = 1, nspinor
2253      !  if (nspinor == 1) then
2254      !  enl_rij(klmn,iatom,ispinor) = dtefield%rij(klmn,itypat,idir)
2255      !  else
2256      !  enl_rij(2*klmn-1,iatom,ispinor) = dtefield%rij(klmn,itypat,idir)
2257      !  end if
2258      !  end do
2259      !  end do ! end loop over lmn2_size
2260      !  end do ! end loop over natom
2261      !  cpopt = -1 ! compute cprj inside nonlop because we do not have them for direc
2262      !  call nonlop(choice,cpopt,cprj_dum,dummy_enlout,gs_hamk,idir,zero,mpi_enreg,1,0,&
2263      !  &           paw_opt,signs,svectout_dum,0,direc,grad_berry_ev,enl=enl_rij)
2264      !  grad_berry(:,:) = grad_berry(:,:) - dtefield%efield_dot(idir)*grad_berry_ev(:,:)/two_pi
2265      !  end if
2266 
2267   end do    ! idir
2268 
2269   !deallocations
2270   if(gs_hamk%usepaw /= 0) then
2271      call pawcprj_free(cprj_kb)
2272      call pawcprj_free(cprj_band_srt)
2273      if (nkpt /= dtefield%fnkpt) then
2274         call pawcprj_free(cprj_fkn)
2275         call pawcprj_free(cprj_ikn)
2276      end if
2277   end if
2278   ABI_FREE(grad_berry_ev)
2279   ABI_FREE(qijbkk)
2280   ABI_FREE(enl_rij)
2281   ABI_FREE(smat_k_paw)
2282   ABI_FREE(cprj_kb)
2283   ABI_FREE(cprj_band_srt)
2284   ABI_FREE(cprj_fkn)
2285   ABI_FREE(cprj_ikn)
2286 
2287   !DBG_EXIT("COLL")
2288 
2289 end subroutine make_grad_berry

m_cgwf/cgwf [ Functions ]

[ Top ] [ m_cgwf ] [ Functions ]

NAME

 cgwf

FUNCTION

 Update all wavefunction |C>, non self-consistently.
 also compute the corresponding H|C> and Vnl|C> (and S|C> if paw).
 Uses a conjugate-gradient algorithm.
 In case of paw, resolves a generalized eigenproblem using an
  overlap matrix (not used for norm conserving psps).

INPUTS

  berryopt == 4/14: electric field is on;
              all other values, no field is present
  chkexit= if non-zero, check whether the user wishes to exit
  cpus = CPU time limit
  filnam_ds1=name of input file (used for exit checking)
  gs_hamk <type(gs_hamiltonian_type)>=all data for the Hamiltonian at k
  icg=shift to be applied on the location of data in the array cg
  igsc=shift to be applied on the location of data in the array gsc
  ikpt=number of the k-point
  inonsc=index of non self-consistent loop
  isppol=spin polarization currently treated
  mband =maximum number of bands
  mcg=second dimension of the cg array
  mcgq=second dimension of the cgq array
  mgsc=second dimension of the gsc array
  mkgq = second dimension of pwnsfacq
  mpi_enreg=information about MPI parallelization
  mpw=maximum dimensioned size of npw
  nband=number of bands.
  nbdblock=number of bands in a block
  nkpt=number of k points
  nline=number of line minimizations per band.
  npw=number of planewaves in basis sphere at given k.
  npwarr(nkpt)=number of planewaves in basis at this k point
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  nsppol=number of spin polarizations
  ortalg=governs the choice of the algorithm for orthogonalisation.
  prtvol=control print volume and debugging output
  pwind(pwind_alloc,2,3) = array used to compute
           the overlap matrix smat between k-points (see initberry.f)
  pwind_alloc = first dimension of pwind
  pwnsfac(2,pwind_alloc) = phase factors for non-symmorphic translations
                           (see initberry.f)
  pwnsfacq(2,mkgq) = phase factors for the nearest neighbours of the
                     current k-point (electric field, MPI //)
  tolrde=tolerance on the ratio of differences of energies (for the line minimisation)
  tolwfr=tolerance on largest wf residual
  use_subovl=1 if the overlap matrix is not identity in WFs subspace
  use_subvnlx=1 if subvnlx has to be computed
  wfoptalg=govern the choice of algorithm for wf optimisation
   (0, 1, 10 and 11 : in the present routine, usual CG algorithm ;
   (2 and 3 : use shifted square Hamiltonian)
  zshift(nband)=in case wfoptalg is 2 or 3, shift of the Hamiltonian

OUTPUT

  dphase_k(3) = change in Zak phase for the current k-point in case berryopt = 4/14,6/16,7/17 (electric (displacement) field)
  resid(nband)=wf residual for new states=|(H-e)|C>|^2 (hartree^2)
  subham(nband*(nband+1))=Hamiltonian expressed in the WFs subspace
  subovl(nband*(nband+1)*use_subovl)=overlap matrix expressed in the WFs subspace
  subvnlx(nband*(nband+1)*use_subvnlx))=non-local Hamiltonian (if NCPP)  plus Fock ACE operator (if usefock_ACE)
   expressed in the WFs subspace

SIDE EFFECTS

  cg(2,mcg)
    at input =wavefunction <G|C band,k> coefficients for ALL bands
    at output same as input except that
      the current band, with number 'band' has been updated
  dtefield <type(efield_type)> = variables related to Berry phase
      calculations (see initberry.f)
  quit= if 1, proceeds to smooth ending of the job.
  if(gs_hamk%usepaw==1)
   gsc(2,mgsc)=<G|S|C band,k> coefficients for ALL bands
               where S is the overlap matrix (used only for paw)

NOTES

  1) cg should not be filtered and normalized: it should already be OK at input !
  2) Not sure that that the generalized eigenproblem (when gs_hamk%usepaw=1)
     is compatible with wfoptalg=2 or 3 (use of shifted square Hamiltonian) - to be verified

SOURCE

 140 subroutine cgwf(berryopt,cg,cgq,chkexit,cpus,dphase_k,dtefield,&
 141 &                filnam_ds1,gsc,gs_hamk,icg,igsc,ikpt,inonsc,&
 142 &                isppol,mband,mcg,mcgq,mgsc,mkgq,mpi_enreg,&
 143 &                mpw,nband,nbdblock,nkpt,nline,npw,npwarr,&
 144 &                nspinor,nsppol,ortalg,prtvol,pwind,&
 145 &                pwind_alloc,pwnsfac,pwnsfacq,quit,resid,subham,subovl,&
 146 &                subvnlx,tolrde,tolwfr,use_subovl,use_subvnlx,wfoptalg,zshift)
 147 
 148 !Arguments ------------------------------------
 149  integer,intent(in) :: berryopt,chkexit,icg,igsc,ikpt,inonsc,isppol
 150  integer,intent(in) :: mband,mcg,mcgq,mgsc,mkgq,mpw,nband,nbdblock,nkpt,nline
 151  integer,intent(in) :: npw,nspinor,nsppol,ortalg,prtvol,pwind_alloc
 152  integer,intent(in) :: use_subovl,use_subvnlx,wfoptalg
 153  integer,intent(in) :: quit
 154  real(dp),intent(in) :: cpus,tolrde,tolwfr
 155  character(len=*),intent(in) :: filnam_ds1
 156  type(MPI_type),intent(in) :: mpi_enreg
 157  type(efield_type),intent(inout) :: dtefield
 158  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
 159 !arrays
 160  integer,intent(in) :: npwarr(nkpt),pwind(pwind_alloc,2,3)
 161  real(dp),intent(in) :: cgq(2,mcgq)
 162  real(dp),intent(in) :: pwnsfac(2,pwind_alloc),pwnsfacq(2,mkgq),zshift(nband)
 163  real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc)
 164  real(dp),intent(inout) :: dphase_k(3)
 165  real(dp),intent(out) :: subham(nband*(nband+1)),subovl(nband*(nband+1)*use_subovl)
 166  real(dp),intent(out) :: subvnlx(nband*(nband+1)*use_subvnlx)
 167  real(dp),intent(out) :: resid(nband)
 168 
 169 !Local variables-------------------------------
 170  integer,parameter :: level=113,tim_getghc=1,tim_projbd=1,type_calc=0
 171  integer,save :: nskip=0
 172  integer :: choice,counter,cpopt,ddkflag,dimenlc1,dimenlr1,dimenl2,iat,iatom,itypat
 173  integer :: iband,ibandmin,ibandmax,me_g0
 174  integer :: ibdblock,iblock,icg1,icg_shift,icp1,icp2,idir,idum1,ierr,ifor,igs,igsc_shift,ii,ikgf
 175  integer :: ikpt2,ikpt2f,ikptf,iline,iproc,ipw,ispinor,istwf_k,isubh,isubo,itrs
 176  integer :: job,mcg_q,me_distrb,natom,ncpgr,nblock,nproc_distrb,npw_k2
 177  integer :: optekin,paw_opt,signs,shiftbd,sij_opt,spaceComm_distrb
 178  integer :: useoverlap,wfopta10
 179  real(dp) :: chc,costh,deltae,deold,dhc,dhd,diff,dotgg,dotgp,doti,dotr
 180  real(dp) :: dphase_aux2,e0,e0_old,e1,e1_old,eval,gamma
 181  real(dp) :: lam0,lamold,root,sinth,sintn,swap,tan2th,theta,thetam
 182  real(dp) :: xnorm
 183  logical :: gen_eigenpb, finite_field
 184  character(len=500) :: message
 185  integer :: hel(2,3)
 186  integer,allocatable :: dimlmn(:),dimlmn_srt(:),ikptf_recv(:),pwind_k(:),sflag_k(:)
 187  real(dp) :: bcut(2,3),dphase_aux1(3),dtm_k(2),phase_end(3)
 188  real(dp) :: phase_init(3),tsec(2)
 189  real(dp),allocatable :: cg1_k(:,:),cgq_k(:,:),conjgr(:,:),cwavef(:,:)
 190  real(dp),allocatable :: detovc(:,:,:),detovd(:,:,:),direc(:,:),direc_tmp(:,:)
 191  real(dp),allocatable :: gh_direc(:,:),gh_direcws(:,:),ghc(:,:),ghc_all(:,:),ghcws(:,:)
 192  real(dp),allocatable :: grad_berry(:,:),grad_total(:,:),gs_direc(:,:)
 193  real(dp) :: gsc_dummy(0,0)
 194  real(dp),allocatable :: gvnlxc(:,:),gvnlx_direc(:,:),gvnlx_dummy(:,:)
 195  real(dp),allocatable :: pcon(:),pwnsfac_k(:,:),scprod(:,:),scwavef(:,:)
 196  real(dp),allocatable :: smat_inv(:,:,:),smat_k(:,:,:),smat_k_paw(:,:,:),swork(:,:),vresid(:,:),work(:,:)
 197  real(dp),pointer :: kinpw(:)
 198  type(pawcprj_type) :: cprj_dum(1,1)
 199  type(pawcprj_type),allocatable :: cprj_k(:,:),cprj_kb(:,:)
 200  type(pawcprj_type),allocatable :: cprj_direc(:,:),cprj_band_srt(:,:),cprj_gat(:,:)
 201  type(pawcprj_type),allocatable :: cprj_fkn(:,:),cprj_ikn(:,:)
 202 
 203 ! *********************************************************************
 204 
 205  DBG_ENTER("COLL")
 206 
 207 !Starting the routine
 208  call timab(22,1,tsec)
 209 
 210 !Touching chkexit, cpus,filnam_ds to avoid warning for abirules. This is dirty...
 211  if(chkexit<0)then
 212    ABI_BUG('chkexit should be positive!')
 213  end if
 214 
 215  if(cpus<0 .and. filnam_ds1=='a')then
 216    ABI_BUG('cpus should be positive!')
 217  end if
 218 
 219 !======================================================================
 220 !========= LOCAL VARIABLES DEFINITIONS AND ALLOCATIONS ================
 221 !======================================================================
 222 
 223 !MPI data
 224  spaceComm_distrb=mpi_enreg%comm_cell
 225  nproc_distrb=xmpi_comm_size(spaceComm_distrb)
 226  me_distrb=mpi_enreg%me_kpt
 227  me_g0 = mpi_enreg%me_g0
 228 
 229 !if PAW, one has to solve a generalized eigenproblem (H|Psi>=Lambda.S|Psi>)
 230 !else,   one has to solve a classical eigenproblem   (H|Psi>=Lambda.|Psi>)
 231  gen_eigenpb=(gs_hamk%usepaw==1)
 232  useoverlap=0;if (gen_eigenpb) useoverlap=1
 233 
 234 !Initializations and allocations
 235  isubh=1;isubo=1
 236  nblock=(nband-1)/nbdblock+1
 237  istwf_k=gs_hamk%istwf_k
 238  wfopta10=mod(wfoptalg,10)
 239  optekin=0;if (wfoptalg>=10) optekin=1
 240  natom=gs_hamk%natom
 241  cpopt=-1
 242  kinpw => gs_hamk%kinpw_k
 243 
 244  ABI_MALLOC(pcon,(npw))
 245  ABI_MALLOC(ghc,(2,npw*nspinor))
 246  ABI_MALLOC(gvnlxc,(2,npw*nspinor))
 247  ABI_MALLOC(conjgr,(2,npw*nspinor))
 248  ABI_MALLOC(cwavef,(2,npw*nspinor))
 249  ABI_MALLOC(direc,(2,npw*nspinor))
 250  ABI_MALLOC(scprod,(2,nband))
 251 
 252  ABI_MALLOC(gh_direc,(2,npw*nspinor))
 253  ABI_MALLOC(gvnlx_direc,(2,npw*nspinor))
 254  ABI_MALLOC(vresid,(2,npw*nspinor))
 255 
 256  if (gen_eigenpb)  then
 257    ABI_MALLOC(gs_direc,(2,npw*nspinor))
 258  else
 259    ABI_MALLOC(gs_direc,(0,0))
 260  end if
 261 
 262  if (gen_eigenpb) then
 263    ABI_MALLOC(scwavef,(2,npw*nspinor))
 264    ABI_MALLOC(direc_tmp,(2,npw*nspinor))
 265  end if
 266 
 267  if (gen_eigenpb.and.(inonsc==1))  then
 268    ABI_MALLOC_OR_DIE(ghc_all,(2,nband*npw*nspinor), ierr)
 269  end if
 270 
 271  if (wfopta10==2.or.wfopta10==3)  then
 272    ABI_MALLOC(work,(2,npw*nspinor))
 273  end if
 274 
 275  if (gen_eigenpb.and.(wfopta10==2.or.wfopta10==3))  then
 276    ABI_MALLOC(swork,(2,npw*nspinor))
 277  else
 278    ABI_MALLOC(swork,(0,0))
 279  end if
 280 
 281  if (wfopta10==2 .or. wfopta10==3) then
 282    ABI_MALLOC(ghcws,(2,npw*nspinor))
 283    ABI_MALLOC(gh_direcws,(2,npw*nspinor))
 284    ABI_MALLOC(gvnlx_dummy,(2,npw*nspinor))
 285  else
 286    ABI_MALLOC(gvnlx_dummy,(0,0))
 287  end if
 288 
 289 !if "generalized eigenproblem", not sure of wfoptalg=2,3 algorithms
 290  if ((gen_eigenpb).and.(wfopta10==2.or.wfopta10==3)) then
 291    write(message, '(a,a,a,a,a)' ) &
 292 &   'Conjugate gradient algorithm not tested with',ch10,&
 293 &   'wfoptalg=2 or 3 and usepaw==1 !',ch10,&
 294 &   'Program will continue at your own risk...'
 295    ABI_WARNING(message)
 296  end if
 297 
 298 !Electric field: definition of local variables:
 299 !detovc(1:2,ifor,idir) determinant of the overlap matrix
 300 !S_{nm}(k,k+dk)=<u_{n,k}|u_{m,k+dk}>, with the states at
 301 !k as bras (neighbor is specified by ifor and idir)
 302 !detovd                same as detovc but with <u_{n,k}| replaced by
 303 !<D| (search direction) in the band-th line
 304 !grad_berry(1:2,ipw)   Berry phase term contribution to the gradient vector
 305 !hel(ifor,idir)        helicity of the ellipse associated w/ (ifor,idir)
 306 !bcut(ifor,idir)       branch cut of the ellipse associated w/ (ifor,idir)
 307 !theta_min             optimal angle theta in line_minimization when electric
 308 !field is on
 309 !grad_total(1:2,ipw)   total gradient (zero field term plus Berry phase term)
 310 
 311  finite_field = ( (berryopt == 4) .or. (berryopt == 6) .or. (berryopt == 7) .or.   &
 312 & (berryopt == 14) .or. (berryopt == 16) .or. (berryopt == 17) )
 313  ncpgr = 0 ! do not think the cprj's here need gradients (no force computation in cgwf)
 314 
 315  if (finite_field) then
 316 
 317 !  ji : These could be a couple of new input variables (but it is OK to define them here)
 318    ikptf = dtefield%i2fbz(ikpt)
 319    ikgf = dtefield%fkgindex(ikptf)  ! this is the shift for pwind
 320    mcg_q = mpw*mband*nspinor
 321    ABI_MALLOC(detovc,(2,2,3))
 322    ABI_MALLOC(detovd,(2,2,3))
 323    ABI_MALLOC(grad_berry,(2,npw*nspinor))
 324    ABI_MALLOC(cg1_k,(2,mpw))
 325    ABI_MALLOC(cgq_k,(2,mcg_q))
 326    ABI_MALLOC(grad_total,(2,npw*nspinor))
 327    ABI_MALLOC(sflag_k,(mband))
 328    ABI_MALLOC(pwind_k,(mpw))
 329    ABI_MALLOC(pwnsfac_k,(4,mpw))
 330    ABI_MALLOC(smat_k,(2,mband,mband))
 331    ABI_MALLOC(smat_inv,(2,mband,mband))
 332 !  now the special features if using PAW
 333    if (gs_hamk%usepaw /= 0) then
 334      ABI_MALLOC(smat_k_paw,(2,gs_hamk%usepaw*mband,gs_hamk%usepaw*mband))
 335      smat_k_paw(:,:,:) = zero
 336 !    the following are arguments to nonlop used to apply the on-site dipole to direc vector
 337      choice = 1 ! only apply projectors
 338      paw_opt = 1 ! only apply Dij
 339      signs = 2 ! apply nonlop to vector in k space
 340 !    following two items are the nonlocal potential strength dij due to the on-site dipoles
 341      dimenlc1 = 2*gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2
 342      dimenlr1 = gs_hamk%lmnmax*(gs_hamk%lmnmax+1)/2
 343      dimenl2 = natom
 344 !    cprj structures for finite_field case
 345      ABI_MALLOC(dimlmn,(natom))
 346      do iatom = 1, natom
 347        itypat = gs_hamk%typat(iatom)
 348        dimlmn(iatom)=dtefield%lmn_size(itypat)
 349      end do
 350      ABI_MALLOC(dimlmn_srt,(natom))
 351      iatom = 0
 352      do itypat = 1, gs_hamk%ntypat
 353        do iat = 1, gs_hamk%nattyp(itypat)
 354          iatom = iatom + 1
 355          dimlmn_srt(iatom) = dtefield%lmn_size(itypat)
 356        end do
 357      end do
 358      ABI_MALLOC(ikptf_recv,(nproc_distrb))
 359      ABI_MALLOC(cprj_k,(natom,mband*nspinor))
 360      ABI_MALLOC(cprj_kb,(natom,mband*nspinor))
 361      ABI_MALLOC(cprj_direc,(natom,mband*nspinor))
 362      ABI_MALLOC(cprj_band_srt,(natom,nspinor))
 363      ABI_MALLOC(cprj_gat,(natom,mband*nspinor*nproc_distrb))
 364      call pawcprj_alloc(cprj_k,ncpgr,dimlmn)
 365      call pawcprj_alloc(cprj_kb,ncpgr,dimlmn)
 366      call pawcprj_alloc(cprj_direc,ncpgr,dimlmn)
 367      call pawcprj_alloc(cprj_band_srt,ncpgr,dimlmn_srt)
 368      call pawcprj_alloc(cprj_gat,ncpgr,dimlmn)
 369      if (nkpt /= dtefield%fnkpt) then
 370        ABI_MALLOC(cprj_fkn,(natom,mband*nspinor))
 371        ABI_MALLOC(cprj_ikn,(natom,mband*nspinor))
 372        call pawcprj_alloc(cprj_fkn,ncpgr,dimlmn)
 373        call pawcprj_alloc(cprj_ikn,ncpgr,dimlmn)
 374      end if
 375    else
 376      ABI_MALLOC(smat_k_paw,(0,0,0))
 377      ABI_MALLOC(dimlmn,(0))
 378      ABI_MALLOC(dimlmn_srt,(0))
 379      ABI_MALLOC(ikptf_recv,(0))
 380      ABI_MALLOC(cprj_k,(0,0))
 381      ABI_MALLOC(cprj_kb,(0,0))
 382      ABI_MALLOC(cprj_direc,(0,0))
 383      ABI_MALLOC(cprj_band_srt,(0,0))
 384      ABI_MALLOC(cprj_gat,(0,0))
 385    end if
 386  end if ! finite_field
 387 
 388  ! ======================================================================
 389  ! If generalized eigenproblem: has to know <g|S|c> for all
 390  ! bands (for orthogonalization purpose); take benefit of this
 391  ! calculation to compute <g|H|c> at the same time, and cprj_k if finite_field
 392  ! ======================================================================
 393  if (gen_eigenpb.and.inonsc==1) then
 394    do iblock=1,nblock
 395      ibandmin=1+(iblock-1)*nbdblock
 396      ibandmax=min(iblock*nbdblock,nband)
 397      do iband=ibandmin,ibandmax
 398        ibdblock=iband-(iblock-1)*nbdblock
 399        icg_shift=npw*nspinor*(iband-1)+icg
 400        igsc_shift=npw*nspinor*(iband-1)+igsc
 401 
 402        call cg_zcopy(npw*nspinor,cg(1,1+icg_shift),cwavef)
 403 
 404 !      Compute <g|H|c>
 405 !      By setting ieigen to iband, Fock contrib. of this iband to the energy will be calculated
 406        call fock_set_ieigen(gs_hamk%fockcommon,iband)
 407        sij_opt=1
 408        if (finite_field .and. gs_hamk%usepaw == 1) then
 409          call getghc(0,cwavef,cprj_band_srt,ghc,scwavef,gs_hamk,gvnlxc,&
 410            &         eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc)
 411          call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_k,natom,iband,0,ikpt,&
 412 &         1,isppol,mband,1,natom,1,mband,dimlmn,nspinor,nsppol,0,&
 413 &         mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
 414        else
 415          call getghc(cpopt,cwavef,cprj_dum,ghc,scwavef,gs_hamk,gvnlxc,&
 416            &         eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc)
 417        end if
 418 
 419        call cg_zcopy(npw*nspinor,ghc,ghc_all(1,1+icg_shift-icg))
 420        call cg_zcopy(npw*nspinor,scwavef,gsc(1,1+igsc_shift))
 421 
 422      end do
 423    end do
 424  end if
 425 
 426  ! Loop over blocks of bands. In the standard band-sequential algorithm, nblock=nband.
 427  do iblock=1,nblock
 428    counter=100*iblock*nbdblock+inonsc
 429 
 430    ! Loop over bands in a block
 431    ! This loop can be MPI-parallelized, over processors attached to the same k point
 432    ibandmin=1+(iblock-1)*nbdblock
 433    ibandmax=min(iblock*nbdblock,nband)
 434 
 435    ! Big iband loop
 436    do iband=ibandmin,ibandmax
 437      ibdblock=iband-(iblock-1)*nbdblock
 438      counter=100*iband+inonsc
 439      icg_shift=npw*nspinor*(iband-1)+icg
 440      igsc_shift=npw*nspinor*(iband-1)+igsc
 441 
 442      ! ======================================================================
 443      ! ========== INITIALISATION OF MINIMIZATION ITERATIONS =================
 444      ! ======================================================================
 445 
 446      if (prtvol>=10) then ! Tell us what is going on:
 447        write(message, '(a,i6,2x,a,i3,a)' )' --- cgwf is called for band',iband,'for',nline,' lines'
 448        call wrtout(std_out,message,'PERS')
 449      end if
 450 
 451      dotgp=one
 452      if (finite_field) then
 453        detovc(:,:,:) = zero ; detovd(:,:,:) = zero
 454        phase_init(:) = zero
 455        dphase_aux1(:) = zero
 456        phase_end(:) = zero
 457        bcut(:,:) = zero
 458        hel(:,:) = 0
 459      end if
 460 
 461      ! Extraction of the vector that is iteratively updated
 462      call cg_zcopy(npw*nspinor,cg(1,1+icg_shift),cwavef)
 463 
 464      ! If generalized eigenproblem: extraction of the overlap information
 465      if (gen_eigenpb) then
 466        call cg_zcopy(npw*nspinor,gsc(1,1+igsc_shift),scwavef)
 467      end if
 468 
 469      ! Normalize incoming wf (and S.wf, if generalized eigenproblem):
 470      ! WARNING: It might be interesting to skip the following operation.
 471      ! The associated routines should be reexamined to see whether cwavef is not already normalized.
 472      if (gen_eigenpb) then
 473        call dotprod_g(dotr,doti,istwf_k,npw*nspinor,2,cwavef,scwavef,me_g0,mpi_enreg%comm_spinorfft)
 474        dotr=sqrt(dotr**2+doti**2); xnorm=one/sqrt(dotr)
 475        call cg_zscal(npw*nspinor,(/xnorm,zero/),cwavef)
 476        call cg_zscal(npw*nspinor,(/xnorm,zero/),scwavef)
 477      else
 478        call sqnorm_g(dotr,istwf_k,npw*nspinor,cwavef,me_g0,mpi_enreg%comm_fft)
 479        xnorm=one/sqrt(abs(dotr))
 480        call cg_zscal(npw*nspinor,(/xnorm,zero/),cwavef)
 481      end if
 482 
 483      if (prtvol==-level) then
 484        write(message,'(a,f14.6)')' cgwf: xnorm = ',xnorm
 485        call wrtout(std_out,message,'PERS')
 486      end if
 487 
 488      ! Compute (or extract) <g|H|c>
 489      if (gen_eigenpb.and.(inonsc==1)) then
 490 
 491 !$OMP PARALLEL DO PRIVATE(ipw)
 492        do ipw=1,npw*nspinor
 493          ghc(1,ipw)=xnorm*ghc_all(1,ipw+icg_shift-icg)
 494          ghc(2,ipw)=xnorm*ghc_all(2,ipw+icg_shift-icg)
 495        end do
 496 
 497      else
 498 !      By setting ieigen to iband, Fock contrib. of this iband to the energy will be calculated
 499        call fock_set_ieigen(gs_hamk%fockcommon,iband)
 500        sij_opt=0
 501        call getghc(cpopt,cwavef,cprj_dum,ghc,gsc_dummy,gs_hamk,gvnlxc,&
 502          &       eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc)
 503      end if
 504 
 505 
 506      ! Minimisation of the residual: compute <G|(H-zshift)^2|C iband,k>
 507      if(wfopta10==2 .or. wfopta10==3) then
 508        ghcws(:,:)=ghc(:,:)
 509        if (gen_eigenpb) then
 510          sij_opt=1
 511          work(:,:)=ghc(:,:)-zshift(iband)*scwavef(:,:)
 512        else
 513          sij_opt=0
 514          work(:,:)=ghc(:,:)-zshift(iband)*cwavef(:,:)
 515        end if
 516        call getghc(cpopt,work,cprj_dum,ghc,swork,gs_hamk,gvnlx_dummy,&
 517          &       eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc)
 518        if (gen_eigenpb) then
 519          ghc(:,:)=ghc(:,:)-zshift(iband)*swork(:,:)
 520        else
 521          ghc(:,:)=ghc(:,:)-zshift(iband)*work(:,:)
 522        end if
 523      end if
 524 
 525      ! ======================================================================
 526      ! ====== BEGIN LOOP FOR A GIVEN BAND: MINIMIZATION ITERATIONS ==========
 527      ! ======================================================================
 528      if(nline/=0)then
 529        do iline=1,nline
 530 
 531          ! === COMPUTE THE RESIDUAL ===
 532 
 533          ! Compute lambda = <C|H|C> or <C|(H-zshift)**2|C>
 534          call dotprod_g(chc,doti,istwf_k,npw*nspinor,1,cwavef,ghc,me_g0,mpi_enreg%comm_spinorfft)
 535          lam0=chc
 536 
 537          ! Check that lam0 is decreasing on succeeding lines:
 538          if (.not.finite_field) then
 539            if (iline==1) then
 540              lamold=lam0
 541            else
 542              if (lam0 > lamold+tol12) then
 543                write(message, '(a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')&
 544 &               'New trial energy at line ',iline,' = ',lam0,ch10,&
 545 &               'is higher than former =',lamold,ch10
 546                ABI_WARNING(message)
 547              end if
 548              lamold=lam0
 549            end if
 550          end if
 551 
 552          ! Compute residual vector:
 553          ! Note that vresid is precomputed to garantee cancellation of errors
 554          ! and allow residuals to reach values as small as 1.0d-24 or better.
 555 
 556          if (wfopta10<=1) then
 557            eval=chc
 558            if (gen_eigenpb) then
 559 
 560 !$OMP PARALLEL DO
 561              do ipw=1,npw*nspinor
 562                vresid(1,ipw)=ghc(1,ipw)-chc*scwavef(1,ipw)
 563                vresid(2,ipw)=ghc(2,ipw)-chc*scwavef(2,ipw)
 564              end do
 565            else
 566 !$OMP PARALLEL DO
 567              do ipw=1,npw*nspinor
 568                vresid(1,ipw)=ghc(1,ipw)-chc*cwavef(1,ipw)
 569                vresid(2,ipw)=ghc(2,ipw)-chc*cwavef(2,ipw)
 570              end do
 571            end if
 572          else
 573            call dotprod_g(eval,doti,istwf_k,npw*nspinor,1,cwavef,ghcws,me_g0,mpi_enreg%comm_spinorfft)
 574            if (gen_eigenpb) then
 575 !$OMP PARALLEL DO
 576              do ipw=1,npw*nspinor
 577                vresid(1,ipw)=ghcws(1,ipw)-eval*scwavef(1,ipw)
 578                vresid(2,ipw)=ghcws(2,ipw)-eval*scwavef(2,ipw)
 579              end do
 580            else
 581 !$OMP PARALLEL DO
 582              do ipw=1,npw*nspinor
 583                vresid(1,ipw)=ghcws(1,ipw)-eval*cwavef(1,ipw)
 584                vresid(2,ipw)=ghcws(2,ipw)-eval*cwavef(2,ipw)
 585              end do
 586            end if
 587          end if
 588 
 589          ! Compute residual (squared) norm
 590          call sqnorm_g(resid(iband),istwf_k,npw*nspinor,vresid,me_g0,mpi_enreg%comm_fft)
 591 
 592          if (prtvol==-level) then
 593            write(message,'(a,i0,2f14.6)')' cgwf: iline,eval,resid = ',iline,eval,resid(iband)
 594            call wrtout(std_out,message,'PERS')
 595          end if
 596 
 597          ! ======================================================================
 598          ! ============== CHECK FOR CONVERGENCE CRITERIA ========================
 599          ! ======================================================================
 600 
 601          ! If residual sufficiently small stop line minimizations
 602          if (resid(iband)<tolwfr) then
 603            if (prtvol>=10) then
 604              write(message, '(a,i4,a,i2,a,es12.4)' ) &
 605               ' cgwf: band ',iband,' converged after ',iline,' line minimizations: resid =',resid(iband)
 606              call wrtout(std_out,message,'PERS')
 607            end if
 608            nskip=nskip+(nline-iline+1)  ! Number of two-way 3D ffts skipped
 609            exit                         ! Exit from the loop on iline
 610          end if
 611 
 612          ! If user require exiting the job, stop line minimisations
 613          if (quit==1) then
 614            write(message, '(a,i0)' )' cgwf: user require exiting => skip update of band ',iband
 615            call wrtout(std_out,message,'PERS')
 616 
 617            nskip=nskip+(nline-iline+1)  ! Number of two-way 3D ffts skipped
 618            exit                         ! Exit from the loop on iline
 619          end if
 620 
 621          ! ======================================================================
 622          ! =========== COMPUTE THE STEEPEST DESCENT DIRECTION ===================
 623          ! ======================================================================
 624 
 625          ! Compute the steepest descent direction
 626          if (gen_eigenpb) then
 627            call cg_zcopy(npw*nspinor,vresid,direc)  ! Store <G|H-lambda.S|C> in direc
 628          else
 629            call cg_zcopy(npw*nspinor,ghc,direc)     ! Store <G|H|C> in direc
 630          end if
 631 
 632          ! Electric field: compute the gradient of the Berry phase part of the energy functional.
 633          ! See PRL 89, 117602 (2002) [[cite:Souza2002]], grad_berry(:,:) is the second term of Eq. (4)
 634          if (finite_field) then
 635 
 636            call make_grad_berry(cg,cgq,cprj_k,detovc,dimlmn,dimlmn_srt,direc,dtefield,grad_berry,&
 637 &           gs_hamk,iband,icg,ikpt,isppol,mband,mcg,mcgq,mkgq,mpi_enreg,mpw,natom,nkpt,npw,npwarr,&
 638 &           nspinor,nsppol,pwind,pwind_alloc,pwnsfac,pwnsfacq)
 639 
 640 !          Add grad_berry to direc and store original gradient
 641            direc(:,:) = direc(:,:) + grad_berry(:,:)
 642            grad_total(:,:) = direc(:,:)
 643 !          DEBUG: check that grad_berry is orthogonal to the occupied manifold at k
 644 !          do jband = 1, dtefield%mband_occ
 645 !          dotr = zero  ;  doti = zero
 646 !          do ipw = 1, npw*nspinor
 647 !          if(.not.gen_eigenpb) then
 648 !          dotr = dotr + cg(1,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(1,ipw) + &
 649 !          &                 cg(2,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(2,ipw)
 650 !          doti = doti + cg(1,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(2,ipw) - &
 651 !          &                 cg(2,icg + (jband-1)*npw*nspinor + ipw)*grad_berry(1,ipw)
 652 !          end if
 653 !          end do
 654 !          if ((abs(dotr) > tol12).or.(abs(doti) > tol12)) then
 655 !          write(std_out,'(a)')'cgwf-berry : ERROR (orthogonality)'
 656 !          write(std_out,'(3(2x,i3),2(5x,e16.9))')ikpt,iband,jband,dotr,doti
 657 !          stop
 658 !          end if
 659 !          end do
 660 !          ENDDEBUG
 661          end if   ! finite_field
 662 
 663          ! =========== PROJECT THE STEEPEST DESCENT DIRECTION ===================
 664          ! ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================
 665 
 666          ! The following projection over the subspace orthogonal to occupied bands
 667          ! is optional. It is a bit more accurate, but doubles the number of N^3 ops.
 668          ! It is done only if ortalg>=0.
 669 
 670          ! Project the steepest descent direction:
 671          ! direc(2,npw)=<G|H|Cnk> - \sum_{(i<=n)} <G|H|Cik> , normalized.
 672 
 673          if(ortalg>=0)then
 674            if (gen_eigenpb) then
 675              call projbd(cg,direc,iband,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,&
 676 &             gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft)
 677            else
 678              call projbd(cg,direc,-1   ,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,&
 679 &             gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft)
 680            end if
 681          else
 682 !          For negative ortalg must still project current band out of conjugate vector (unneeded if gen_eigenpb)
 683            if (.not.gen_eigenpb) then
 684              call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,cwavef,direc,me_g0,mpi_enreg%comm_spinorfft)
 685              if(istwf_k==1)then
 686                call cg_zaxpy(npw*nspinor,-(/dotr,doti/),cwavef,direc)
 687              else
 688                call cg_zaxpy(npw*nspinor,(/-dotr,zero/),cwavef,direc)
 689              end if
 690            end if
 691          end if
 692 
 693          ! For a generalized eigenpb, store the steepest descent direction
 694          if (gen_eigenpb) direc_tmp=direc
 695 
 696          ! ======================================================================
 697          ! ======== PRECONDITION THE STEEPEST DESCENT DIRECTION =================
 698          ! ======================================================================
 699 
 700          ! If wfoptalg>=10, the precondition matrix is kept constant during iteration ; otherwise it is recomputed
 701          if (wfoptalg<10.or.iline==1) then
 702            call cg_precon(cwavef,zero,istwf_k,kinpw,npw,nspinor,me_g0,optekin,pcon,direc,mpi_enreg%comm_fft)
 703 
 704            ! Minimisation of the residual: must precondition twice
 705            ! (might make only one call, with modified precon routine - might also make a shift !!!)
 706            if(wfopta10==2 .or. wfopta10==3)then
 707              call cg_precon(cwavef,zero,istwf_k,kinpw,npw,nspinor,me_g0,optekin,pcon,direc,mpi_enreg%comm_fft)
 708              if(iline==1)then
 709 !$OMP PARALLEL DO
 710                do ipw=1,npw
 711                  pcon(ipw)=pcon(ipw)**2
 712                  pcon(ipw)=pcon(ipw)**2
 713                end do
 714              end if
 715            end if
 716          else
 717            do ispinor=1,nspinor
 718              igs=(ispinor-1)*npw
 719 !$OMP PARALLEL DO
 720              do ipw=1+igs,npw+igs
 721                direc(1,ipw)=direc(1,ipw)*pcon(ipw-igs)
 722                direc(2,ipw)=direc(2,ipw)*pcon(ipw-igs)
 723              end do
 724            end do
 725          end if
 726 
 727          ! ======= PROJECT THE PRECOND. STEEPEST DESCENT DIRECTION ==============
 728          ! ========= OVER THE SUBSPACE ORTHOGONAL TO OTHER BANDS ================
 729          ! Projecting again out all bands (not normalized).
 730          call projbd(cg,direc,-1,icg,igsc,istwf_k,mcg,mgsc,nband,npw,nspinor,&
 731 &         gsc,scprod,0,tim_projbd,useoverlap,me_g0,mpi_enreg%comm_fft)
 732 
 733          ! ======================================================================
 734          ! ================= COMPUTE THE CONJUGATE-GRADIENT =====================
 735          ! ======================================================================
 736 
 737          if (finite_field) then
 738            call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,grad_total,me_g0,mpi_enreg%comm_spinorfft)
 739            !DEBUG (electric field)
 740            !check that the dotproduct is real
 741            !if (abs(doti) > tol8) then
 742            !write(std_out,*) ' cgwf-berry: ERROR'
 743            !write(std_out,*) ' doti = ',doti
 744            !stop
 745            !end if
 746            !ENDDEBUG
 747          else
 748            if (gen_eigenpb) then
 749              call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,direc_tmp,me_g0,mpi_enreg%comm_spinorfft)
 750            else
 751              call dotprod_g(dotgg,doti,istwf_k,npw*nspinor,1,direc,ghc,me_g0,mpi_enreg%comm_spinorfft)
 752            end if
 753          end if
 754 
 755          ! MJV: added 5 Feb 2012 - causes divide by 0 on next iteration of iline
 756          if (abs(dotgg) < TINY(0.0_dp)*1.e50_dp) dotgg = TINY(0.0_dp)*1.e50_dp
 757 
 758          ! At first iteration, gamma is set to zero
 759          if (iline==1) then
 760            gamma=zero
 761            dotgp=dotgg
 762            call cg_zcopy(npw*nspinor,direc,conjgr)
 763            if (prtvol==-level)then
 764              write(message,'(a,es21.10e3)')' cgwf: dotgg = ',dotgg
 765              call wrtout(std_out,message,'PERS')
 766            end if
 767 
 768          else
 769            gamma=dotgg/dotgp
 770            dotgp=dotgg
 771 
 772            if (prtvol==-level)then
 773              write(message,'(a,2es16.6)')' cgwf: dotgg,gamma = ',dotgg,gamma
 774              call wrtout(std_out,message,'PERS')
 775            end if
 776 
 777            ! Note: another way to compute gamma: Polak, Ribiere no real improvement ; to be more carrefully tested
 778            ! call dotprod_g(dotgg,doti,istwf_k,mpi_enreg,npw*nspinor,1,direc,direc_tmp)
 779            ! !direcp must be set to zero at the beginning
 780            ! direcp=direc-direcp
 781            ! call dotprod_g(dotgmg,doti,istwf_k,mpi_enreg,npw*nspinor,1,direcp,direc_tmp)
 782            ! direcp=direc;gamma=dotgmg/dotgp;dotgp=dotgmg
 783 
 784 !$OMP PARALLEL DO
 785            do ipw=1,npw*nspinor
 786              conjgr(1,ipw)=direc(1,ipw)+gamma*conjgr(1,ipw)
 787              conjgr(2,ipw)=direc(2,ipw)+gamma*conjgr(2,ipw)
 788            end do
 789            !call cg_zaxpby(npw*nspinor,cg_one,direc,(/gamma,zero/),conjgr)
 790          end if
 791 
 792          ! ======================================================================
 793          ! ============ PROJECTION OF THE CONJUGATED GRADIENT ===================
 794          ! ======================================================================
 795 
 796          if (gen_eigenpb) then
 797            call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,scwavef,conjgr,me_g0,mpi_enreg%comm_spinorfft)
 798          else
 799            call dotprod_g(dotr,doti,istwf_k,npw*nspinor,3,cwavef,conjgr,me_g0,mpi_enreg%comm_spinorfft)
 800          end if
 801 
 802          ! Project the conjugated gradient onto the current band
 803          ! MG: TODO: this is an hot spot that could be rewritten with BLAS! provided
 804          ! that direc --> conjgr
 805          if(istwf_k==1)then
 806 
 807 !$OMP PARALLEL DO
 808            do ipw=1,npw*nspinor
 809              direc(1,ipw)=conjgr(1,ipw)-(dotr*cwavef(1,ipw)-doti*cwavef(2,ipw))
 810              direc(2,ipw)=conjgr(2,ipw)-(dotr*cwavef(2,ipw)+doti*cwavef(1,ipw))
 811            end do
 812          else
 813 !$OMP PARALLEL DO
 814            do ipw=1,npw*nspinor
 815              direc(1,ipw)=conjgr(1,ipw)-dotr*cwavef(1,ipw)
 816              direc(2,ipw)=conjgr(2,ipw)-dotr*cwavef(2,ipw)
 817            end do
 818          end if
 819 
 820          ! In case of generalized eigenproblem, normalization of direction vector
 821          ! cannot be done here (because S|D> is not known here).
 822          if (.not.gen_eigenpb) then
 823            call sqnorm_g(dotr,istwf_k,npw*nspinor,direc,me_g0,mpi_enreg%comm_fft)
 824            xnorm=one/sqrt(abs(dotr))
 825            call cg_zscal(npw*nspinor,(/xnorm,zero/),direc)
 826            xnorm=one
 827          end if
 828 
 829          ! ======================================================================
 830          ! ===== COMPUTE CONTRIBUTIONS TO 1ST AND 2ND DERIVATIVES OF ENERGY =====
 831          ! ======================================================================
 832 
 833          ! Compute gh_direc = <G|H|D> and eventually gs_direc = <G|S|D>
 834          sij_opt=0;if (gen_eigenpb) sij_opt=1
 835 
 836          call getghc(cpopt,direc,cprj_dum,gh_direc,gs_direc,gs_hamk,gvnlx_direc,&
 837            &         eval,mpi_enreg,1,prtvol,sij_opt,tim_getghc,type_calc)
 838 
 839          if(wfopta10==2 .or. wfopta10==3)then
 840            ! Minimisation of the residual, so compute <G|(H-zshift)^2|D>
 841            gh_direcws(:,:)=gh_direc(:,:)
 842            if (gen_eigenpb) then
 843              sij_opt=1
 844              work(:,:)=gh_direc(:,:)-zshift(iband)*gs_direc(:,:)
 845            else
 846              sij_opt=0
 847              work(:,:)=gh_direc(:,:)-zshift(iband)*direc(:,:)
 848            end if
 849 
 850            call getghc(cpopt,work,cprj_dum,gh_direc,swork,gs_hamk,gvnlx_dummy,&
 851              &           eval,mpi_enreg,1,prtvol,0,tim_getghc,type_calc)
 852 
 853            if (gen_eigenpb) then
 854              gh_direc(:,:)=gh_direc(:,:)-zshift(iband)*swork(:,:)
 855            else
 856              gh_direc(:,:)=gh_direc(:,:)-zshift(iband)*work(:,:)
 857            end if
 858          end if
 859 
 860          ! In case of generalized eigenproblem, compute now the norm of the conjugated gradient
 861          if (gen_eigenpb) then
 862            call dotprod_g(dotr,doti,istwf_k,npw*nspinor,1,direc,gs_direc,me_g0,mpi_enreg%comm_spinorfft)
 863            xnorm=one/sqrt(abs(dotr))
 864          end if
 865 
 866          ! Compute dhc = Re{<D|H|C>}
 867          call dotprod_g(dhc,doti,istwf_k,npw*nspinor,1,direc,ghc,me_g0,mpi_enreg%comm_spinorfft)
 868          dhc=dhc*xnorm
 869 
 870          ! Compute <D|H|D> or <D|(H-zshift)^2|D>
 871          call dotprod_g(dhd,doti,istwf_k,npw*nspinor,1,direc,gh_direc,me_g0,mpi_enreg%comm_spinorfft)
 872          dhd=dhd*xnorm**2
 873 
 874          if(prtvol==-level)then
 875            write(message,'(a,3f14.6)') 'cgwf: chc,dhc,dhd=',chc,dhc,dhd
 876            call wrtout(std_out,message,'PERS')
 877          end if
 878 
 879          ! ======================================================================
 880          ! ======= COMPUTE MIXING FACTORS - CHECK FOR CONVERGENCE ===============
 881          ! ======================================================================
 882 
 883          if (.not.finite_field) then
 884            ! Compute tan(2 theta),sin(theta) and cos(theta)
 885            tan2th=2.0_dp*dhc/(chc-dhd)
 886 
 887            if (abs(tan2th)<1.d-05) then
 888              costh=1.0_dp-0.125_dp*tan2th**2
 889              sinth=0.5_dp*tan2th*(1.0_dp-0.375_dp*tan2th**2)
 890 
 891              ! Check that result is above machine precision
 892              if (abs(sinth)<epsilon(0._dp)) then
 893                write(message, '(a,es16.4)' ) ' cgwf: converged with tan2th=',tan2th
 894                call wrtout(std_out,message,'PERS')
 895                ! Number of one-way 3D ffts skipped
 896                nskip=nskip+2*(nline-iline)
 897                exit ! Exit from the loop on iline
 898              end if
 899 
 900            else
 901              root=sqrt(1.0_dp+tan2th**2)
 902              costh=sqrt(0.5_dp+0.5_dp/root)
 903              sinth=sign(sqrt(0.5_dp-0.5_dp/root),tan2th)
 904            end if
 905 
 906            ! Check for lower of two possible roots (same sign as curvature at theta where slope is zero)
 907            diff=(chc-dhd)
 908            ! Swap c and d if value of diff is positive
 909            if (diff>zero) then
 910              swap=costh
 911              costh=-sinth
 912              sinth=swap
 913              if(prtvol<0 .or. prtvol>=10)then
 914                write(message,*)'   Note: swap roots, iline,diff=',iline,diff
 915                call wrtout(std_out,message,'PERS')
 916              end if
 917            end if
 918 
 919          else
 920            ! In case the eletric field is on, the line minimization has to be done numerically
 921 
 922            ! Compute determinant of the overlap matrix where in the band-th line
 923            ! the wavefunction is replaced by the search direction
 924            job = 10 ; shiftbd = 0
 925            do idir = 1, 3
 926              ! do not do this for efield_dot(idir)=0
 927              if (abs(dtefield%efield_dot(idir)) < tol12) cycle
 928              do ifor = 1, 2
 929                ikpt2f = dtefield%ikpt_dk(ikptf,ifor,idir)
 930                if (dtefield%indkk_f2ibz(ikpt2f,6) == 1) then
 931                  itrs = 10
 932                else
 933                  itrs = 0
 934                end if
 935                ikpt2 = dtefield%indkk_f2ibz(ikpt2f,1)
 936                npw_k2 = npwarr(ikpt2)
 937                pwind_k(1:npw) = pwind(ikgf+1:ikgf+npw,ifor,idir)
 938                pwnsfac_k(1:2,1:npw) = pwnsfac(1:2,ikgf+1:ikgf+npw)
 939                sflag_k(:) = dtefield%sflag(:,ikpt+(isppol-1)*nkpt,ifor,idir)
 940                smat_k(:,:,:) = dtefield%smat(:,:,:,ikpt+(isppol-1)*nkpt,ifor,idir)
 941 
 942                if (mpi_enreg%nproc_cell > 1) then
 943                  icg1 = dtefield%cgqindex(2,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt)
 944                  cgq_k(:,1:dtefield%mband_occ*nspinor*npw_k2) = &
 945 &                 cgq(:,icg1+1:icg1+dtefield%mband_occ*nspinor*npw_k2)
 946                  idum1 = dtefield%cgqindex(3,ifor+2*(idir-1),ikpt+(isppol-1)*nkpt)
 947                  pwnsfac_k(3:4,1:npw_k2) = pwnsfacq(1:2,idum1+1:idum1+npw_k2)
 948                else
 949                  icg1 = dtefield%cgindex(ikpt2,isppol)
 950                  cgq_k(:,1:dtefield%mband_occ*nspinor*npw_k2) = &
 951 &                 cg(:,icg1+1:icg1+dtefield%mband_occ*nspinor*npw_k2)
 952                  idum1=dtefield%fkgindex(ikpt2f)
 953                  pwnsfac_k(3:4,1:npw_k2) = pwnsfac(1:2,idum1+1:idum1+npw_k2)
 954                end if
 955 
 956                icg1 = 0 ; ddkflag = 0
 957                if (gen_eigenpb) then
 958 !$OMP PARALLEL DO
 959                  do ipw=1,npw*nspinor
 960                    direc_tmp(1,ipw)=direc(1,ipw)*xnorm
 961                    direc_tmp(2,ipw)=direc(2,ipw)*xnorm
 962                  end do
 963                  ! need cprj corresponding to direc_tmp in order to make smat_k_paw properly
 964                  call getcprj(1,0,direc_tmp,cprj_band_srt,&
 965 &                 gs_hamk%ffnl_k,0,gs_hamk%indlmn,gs_hamk%istwf_k,gs_hamk%kg_k,&
 966 &                 gs_hamk%kpg_k,gs_hamk%kpt_k,gs_hamk%lmnmax,gs_hamk%mgfft,&
 967 &                 mpi_enreg,gs_hamk%natom,gs_hamk%nattyp,gs_hamk%ngfft,gs_hamk%nloalg,&
 968 &                 gs_hamk%npw_k,gs_hamk%nspinor,gs_hamk%ntypat,gs_hamk%phkxred,gs_hamk%ph1d,&
 969 &                 gs_hamk%ph3d_k,gs_hamk%ucvol,gs_hamk%useylm)
 970 
 971                  call pawcprj_copy(cprj_k,cprj_direc)
 972                  call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_direc,gs_hamk%natom,iband,0,&
 973 &                 ikpt,1,isppol,mband,1,gs_hamk%natom,1,mband,dimlmn,gs_hamk%nspinor,nsppol,0,&
 974 &                 mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
 975 
 976 !                icp1=dtefield%mband_occ*(ikptf-1)
 977                  icp2=mband*nspinor*(ikpt2-1)
 978                  call pawcprj_get(gs_hamk%atindx,cprj_kb,dtefield%cprj,natom,1,icp2,ikpt,0,isppol,&
 979 &                 mband,dtefield%fnkpt,natom,mband,mband,nspinor,nsppol,0,&
 980 &                 mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
 981 
 982                  if (ikpt2 /= ikpt2f) then ! construct cprj_kb by symmetry
 983                    call pawcprj_copy(cprj_kb,cprj_ikn)
 984                    call pawcprj_symkn(cprj_fkn,cprj_ikn,dtefield%atom_indsym,dimlmn,-1,gs_hamk%indlmn,&
 985 &                   dtefield%indkk_f2ibz(ikpt2f,2),dtefield%indkk_f2ibz(ikpt2f,6),&
 986 &                   dtefield%fkptns(:,dtefield%i2fbz(ikpt2)),&
 987 &                   dtefield%lmax,dtefield%lmnmax,mband,natom,dtefield%mband_occ,nspinor,&
 988 &                   dtefield%nsym,gs_hamk%ntypat,gs_hamk%typat,dtefield%zarot)
 989                    call pawcprj_copy(cprj_fkn,cprj_kb)
 990                  end if
 991 
 992                  call smatrix_k_paw(cprj_direc,cprj_kb,dtefield,idir,ifor,mband,natom,smat_k_paw,gs_hamk%typat)
 993 
 994                  call smatrix(direc_tmp,cgq_k,cg1_k,ddkflag,dtm_k,icg1,icg1,&
 995 &                 itrs,job,iband,npw*nspinor,mcg_q,mpw,iband,&
 996 &                 mpw,dtefield%mband_occ,dtefield%nband_occ(isppol),&
 997 &                 npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,&
 998 &                 shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw)
 999                else
1000                  call smatrix(direc,cgq_k,cg1_k,ddkflag,dtm_k,icg1,icg1,&
1001 &                 itrs,job,iband,npw*nspinor,mcg_q,mpw,iband,&
1002 &                 mpw,dtefield%mband_occ,dtefield%nband_occ(isppol),&
1003 &                 npw,npw_k2,nspinor,pwind_k,pwnsfac_k,sflag_k,&
1004 &                 shiftbd,smat_inv,smat_k,smat_k_paw,gs_hamk%usepaw)
1005                end if
1006                detovd(:,ifor,idir) = dtm_k(:) ! Store the determinant of the overlap
1007 !              matrix (required to compute theta_min)
1008 !              DEBUG
1009 !              write(std_out,*)'cgwf-berry: detovc and detovd'
1010 !              write(std_out,*)detovc(:,ifor,idir)
1011 !              write(std_out,*)detovd(:,ifor,idir)
1012 !              write(std_out,*)'smat_k'
1013 !              do jband = 1, 4
1014 !              write(std_out,'(4(2x,e14.6))')smat_k(1,jband,:)
1015 !              write(std_out,'(4(2x,e14.6))')smat_k(2,jband,:)
1016 !              write(std_out,*)
1017 !              end do
1018 !              ENDDEBUG
1019              end do  ! ifor
1020            end do    ! idir
1021 
1022            call linemin(bcut,chc,costh,detovc,detovd,dhc,dhd,&
1023 &           dphase_aux1,dtefield%efield_dot,iline,&
1024 &           dtefield%fnkpt,dtefield%nstr,hel,phase_end,&
1025 &           phase_init,dtefield%sdeg,sinth,thetam)
1026 !          DEBUG
1027 !          if (mpi_enreg%me == 1) then
1028 !          write(std_out,*)'after linemin '
1029 !          write(std_out,'(a,3(2x,f16.9))')'phase_init  = ',phase_init(:)
1030 !          write(std_out,'(a,3(2x,f16.9))')'phase_end   = ',phase_end(:)
1031 !          write(std_out,'(a,3(2x,f16.9))')'dphase_aux1 = ',dphase_aux1(:)
1032 !          write(std_out,*) 'thetam',thetam
1033 !          end if
1034 !          ENDDEBUG
1035          end if  ! finite_field
1036 
1037          ! ======================================================================
1038          ! =========== GENERATE NEW |wf>, H|wf>, Vnl|Wf>, S|Wf> ... =============
1039          ! ======================================================================
1040 
1041          sintn=sinth*xnorm
1042 
1043 !$OMP PARALLEL DO
1044          do ipw=1,npw*nspinor
1045            cwavef(1,ipw)=cwavef(1,ipw)*costh+direc(1,ipw)*sintn
1046            cwavef(2,ipw)=cwavef(2,ipw)*costh+direc(2,ipw)*sintn
1047          end do
1048 
1049 !        call cg_zaxpby(npw*nspinor,(/sintn,zero/),direc,(/costh,zero/),cwavef)
1050          call cg_zcopy(npw*nspinor,cwavef,cg(1,1+icg_shift))
1051 
1052          do ipw=1,npw*nspinor
1053            ghc(1,ipw)  =ghc(1,ipw)*costh + gh_direc(1,ipw)*sintn
1054            ghc(2,ipw)  =ghc(2,ipw)*costh + gh_direc(2,ipw)*sintn
1055          end do
1056 
1057 
1058          if (use_subvnlx==1) then
1059 !$OMP PARALLEL DO
1060            do ipw=1,npw*nspinor
1061              gvnlxc(1,ipw)=gvnlxc(1,ipw)*costh + gvnlx_direc(1,ipw)*sintn
1062              gvnlxc(2,ipw)=gvnlxc(2,ipw)*costh + gvnlx_direc(2,ipw)*sintn
1063            end do
1064 !          call cg_zaxpby(npw*nspinor,(/sintn,zero/),gvnlx_direc,(/costh,zero/),gvnlxc)
1065          end if
1066 
1067          if (gen_eigenpb) then
1068 !$OMP PARALLEL DO
1069            do ipw=1,npw*nspinor
1070              scwavef(1,ipw)=scwavef(1,ipw)*costh+gs_direc(1,ipw)*sintn
1071              scwavef(2,ipw)=scwavef(2,ipw)*costh+gs_direc(2,ipw)*sintn
1072 !            gsc(1,ipw+igsc_shift)=scwavef(1,ipw)
1073 !            gsc(2,ipw+igsc_shift)=scwavef(2,ipw)
1074            end do
1075 !          call cg_zaxpby(npw*nspinor,(/sintn,zero/),gs_direc,(/costh,zero/),scwavef)
1076            call cg_zcopy(npw*nspinor,scwavef,gsc(1,1+igsc_shift))
1077 
1078            if (finite_field) then  ! must update cprj for the new wavefunction
1079              call getcprj(1,0,cwavef,cprj_band_srt,&
1080 &             gs_hamk%ffnl_k,0,gs_hamk%indlmn,istwf_k,gs_hamk%kg_k,gs_hamk%kpg_k,gs_hamk%kpt_k,&
1081 &             gs_hamk%lmnmax,gs_hamk%mgfft,mpi_enreg,natom,gs_hamk%nattyp,&
1082 &             gs_hamk%ngfft,gs_hamk%nloalg,gs_hamk%npw_k,gs_hamk%nspinor,gs_hamk%ntypat,&
1083 &             gs_hamk%phkxred,gs_hamk%ph1d,gs_hamk%ph3d_k,gs_hamk%ucvol,gs_hamk%useylm)
1084              call pawcprj_put(gs_hamk%atindx,cprj_band_srt,cprj_k,gs_hamk%natom,iband,0,ikpt,&
1085 &             1,isppol,mband,1,gs_hamk%natom,1,mband,dimlmn,gs_hamk%nspinor,nsppol,0,&
1086 &             mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
1087            end if
1088          end if
1089 
1090          if(wfopta10==2 .or. wfopta10==3)then
1091            ! Need to keep track of ghcws, in order to avoid recomputing it
1092 !$OMP PARALLEL DO
1093            do ipw=1,npw*nspinor
1094              ghcws(1,ipw)=ghcws(1,ipw)*costh + gh_direcws(1,ipw)*sintn
1095              ghcws(2,ipw)=ghcws(2,ipw)*costh + gh_direcws(2,ipw)*sintn
1096            end do
1097 !          call cg_zaxpby(npw*nspinor,(/sintn,zero/),gh_direcws,(/costh,zero/),ghcws)
1098          end if
1099 
1100          ! ======================================================================
1101          ! =========== CHECK CONVERGENCE AGAINST TRIAL ENERGY ===================
1102          ! ======================================================================
1103 
1104          ! Compute delta(E)
1105          if (.not.finite_field) then
1106            deltae=chc*(costh**2-1._dp)+dhd*sinth**2+2._dp*costh*sinth*dhc
1107          else
1108            ! Compute deltae
1109            call etheta(bcut,chc,detovc,detovd,dhc,dhd,dtefield%efield_dot,e0,e1,&
1110 &           hel,dtefield%fnkpt,dtefield%nstr,dtefield%sdeg,thetam)
1111            theta = zero
1112 
1113            call etheta(bcut,chc,detovc,detovd,dhc,dhd,&
1114 &           dtefield%efield_dot,e0_old,e1_old,&
1115 &           hel,dtefield%fnkpt,dtefield%nstr,dtefield%sdeg,theta)
1116            deltae = e0 - e0_old
1117 !          DEBUG
1118 !          write(std_out,*) 'e0, e0_old, deltae', e0, e0_old, deltae
1119 !          ENDDEBUG
1120 !          Check that e0 is decreasing on succeeding lines:
1121 !          if (deltae > zero) then
1122            if (deltae > tol12) then ! exploring different checks for finit_field
1123              write(message, '(3a,i8,a,1p,e14.6,a1,3x,a,1p,e14.6,a1)')&
1124 &             '  (electric field)',ch10,&
1125 &             '  New trial energy at line',iline,' = ',e0,ch10,&
1126 &             '  is higher than former:',e0_old,ch10
1127              ABI_WARNING(message)
1128            end if
1129          end if         ! finite_field
1130 
1131 !        Check convergence and eventually exit
1132          if (iline==1) then
1133            deold=deltae
1134          else if (abs(deltae)<tolrde*abs(deold) .and. iline/=nline .and. wfopta10<2)then
1135            if(prtvol>=10)then
1136              write(message, '(a,i4,1x,a,1p,e12.4,a,e12.4,a)' ) &
1137 &             ' cgwf: line',iline,&
1138 &             ' deltae=',deltae,' < tolrde*',deold,' =>skip lines'
1139              call wrtout(std_out,message,'PERS')
1140            end if
1141            nskip=nskip+2*(nline-iline)  ! Number of one-way 3D ffts skipped
1142            exit                         ! Exit from the loop on iline
1143          end if
1144 
1145        end do ! END LOOP FOR A GIVEN BAND Note that there are three "exit" instructions inside
1146 
1147        ! Additional computations in case of electric field
1148        if (finite_field) then
1149          ! Bring present contribution to dphasek(idir) into [-pi,pi]
1150          do idir = 1, 3
1151            dphase_aux2 = mod(phase_end(idir) - phase_init(idir) + 100*two_pi,two_pi)
1152            if (dphase_aux2 > pi) dphase_aux2 = dphase_aux2 - two_pi
1153            ! DEBUG
1154            ! dphase_aux1(idir)=mod(dphase_aux1(idir)+100*two_pi,two_pi)
1155            ! if(dphase_aux1(idir)>pi) dphase_aux1(idir)=dphase_aux1(idir)-two_pi
1156            ! diff = dphase_aux2 - dphase_aux1(idir)
1157            ! if (abs(diff) > tol10) then
1158            ! write(std_out,*)'cgwf-berry: ERROR'
1159            ! write(std_out,'(a,3(2x,i3),f16.9)')'ikpt,iband,idir,diff',ikpt,iband,idir,diff
1160            ! stop
1161            ! end if
1162            ! write(100,*) idir, dphase_aux2
1163            ! ENDDEBUG
1164            dphase_k(idir) = dphase_k(idir) + dphase_aux2
1165            ! DEBUG
1166            ! write(std_out,*) 'idir,phase_init,phase_end,dphase_k'
1167            ! write(std_out,*) idir,phase_init(idir),phase_end(idir),dphase_k(idir)
1168            ! ENDDEBUG
1169          end do
1170        end if   ! finite_field
1171 
1172      else ! nline==0 , needs to provide a residual
1173        resid(iband)=-one
1174      end if ! End nline==0 case
1175 
1176      ! ======================================================================
1177      ! =============== END OF CURRENT BAND: CLEANING ========================
1178      ! ======================================================================
1179 
1180      ! It was checked that getghc is NOT needed here : equivalent results with the copy below.
1181      if(wfopta10==2 .or. wfopta10==3) ghc(:,:)=ghcws(:,:)
1182 
1183      if (finite_field) dtefield%sflag(:,ikpt + (isppol-1)*nkpt,:,:) = 0
1184 
1185      ! At the end of the treatment of a set of bands, write the number of one-way 3D ffts skipped
1186      if (xmpi_paral==0 .and. mpi_enreg%paral_kgb==0 .and. iband==nband .and. prtvol/=0) then
1187        write(message,'(a,i0)')' cgwf: number of one-way 3D ffts skipped in cgwf until now =',nskip
1188        call wrtout(std_out,message,'PERS')
1189      end if
1190 
1191    end do !  End big iband loop. iband in a block
1192 
1193    !  ======================================================================
1194    !  ============= COMPUTE HAMILTONIAN IN WFs SUBSPACE ====================
1195    !  ======================================================================
1196    call mksubham(cg,ghc,gsc,gvnlxc,iblock,icg,igsc,istwf_k,&
1197 &   isubh,isubo,mcg,mgsc,nband,nbdblock,npw,&
1198 &   nspinor,subham,subovl,subvnlx,use_subovl,use_subvnlx,me_g0)
1199 
1200  end do ! iblock End loop over block of bands
1201 
1202  if (allocated(dimlmn_srt)) then
1203    ABI_FREE(dimlmn_srt)
1204  end if
1205 
1206  if (finite_field .and. gs_hamk%usepaw == 1) then ! store updated cprjs for this kpt
1207    ! switch from ikptf to ikpt
1208    ikptf = ikpt
1209    call xmpi_allgather(ikptf,ikptf_recv,spaceComm_distrb,ierr)
1210    call pawcprj_mpi_allgather(cprj_k,cprj_gat,natom,nspinor*mband,1,dimlmn,ncpgr,nproc_distrb,&
1211 &   spaceComm_distrb,ierr,rank_ordered=.true.)
1212    do iproc = 1, nproc_distrb
1213      icp2=nspinor*mband*(iproc-1)
1214      call pawcprj_get(gs_hamk%atindx1,cprj_k,cprj_gat,natom,1,icp2,ikpt,0,isppol,&
1215 &     mband,nproc_distrb,natom,mband,mband,nspinor,nsppol,0,&
1216 &     mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
1217 !    ikptf = ikptf_recv(iproc)
1218      icp1 = nspinor*mband*(ikptf_recv(iproc)-1)
1219      call pawcprj_put(gs_hamk%atindx1,cprj_k,dtefield%cprj,natom,1,icp1,ikpt,0,isppol,&
1220 &     mband,nkpt,natom,mband,mband,dimlmn,nspinor,nsppol,0,&
1221 &     mpicomm=spaceComm_distrb,proc_distrb=mpi_enreg%proc_distrb)
1222    end do
1223  end if
1224 
1225  ! Debugging ouputs
1226  if(prtvol==-level)then
1227    isubh=1
1228    if (use_subvnlx==1) write(message,'(a)') ' cgwf : isubh  subham(isubh:isubh+1)  subvnlx(isubh:isubh+1)'
1229    if (use_subvnlx==0) write(message,'(a)') ' cgwf : isubh  subham(isubh:isubh+1)'
1230    do iband=1,nband
1231      do ii=1,iband
1232        if (use_subvnlx==1) then
1233          write(message,'(i5,4es16.6)')isubh,subham(isubh:isubh+1),subvnlx(isubh:isubh+1)
1234        else
1235          write(message,'(i5,2es16.6)')isubh,subham(isubh:isubh+1)
1236        end if
1237        call wrtout(std_out,message,'PERS')
1238        isubh=isubh+2
1239      end do
1240    end do
1241  end if
1242 
1243  ! ===================
1244  ! FINAL DEALLOCATIONS
1245  ! ===================
1246  ABI_FREE(conjgr)
1247  ABI_FREE(cwavef)
1248  ABI_FREE(direc)
1249  ABI_FREE(pcon)
1250  ABI_FREE(scprod)
1251  ABI_FREE(ghc)
1252  ABI_FREE(gvnlxc)
1253  ABI_FREE(gh_direc)
1254  ABI_FREE(gvnlx_direc)
1255  ABI_FREE(vresid)
1256  ABI_FREE(gs_direc)
1257 
1258  if (gen_eigenpb)  then
1259    ABI_FREE(scwavef)
1260    ABI_FREE(direc_tmp)
1261  end if
1262 
1263  if (gen_eigenpb.and.(inonsc==1))  then
1264    ABI_FREE(ghc_all)
1265  end if
1266 
1267  if(wfopta10==2.or.wfopta10==3)  then
1268    ABI_FREE(ghcws)
1269    ABI_FREE(gh_direcws)
1270  end if
1271  ABI_FREE(gvnlx_dummy)
1272 
1273  if(wfopta10==2.or.wfopta10==3)  then
1274    ABI_FREE(work)
1275  end if
1276 
1277  ABI_FREE(swork)
1278 
1279  if (finite_field) then
1280    ABI_FREE(cg1_k)
1281    ABI_FREE(cgq_k)
1282    ABI_FREE(detovc)
1283    ABI_FREE(detovd)
1284    ABI_FREE(grad_berry)
1285    ABI_FREE(sflag_k)
1286    ABI_FREE(smat_inv)
1287    ABI_FREE(smat_k)
1288    ABI_FREE(pwind_k)
1289    ABI_FREE(pwnsfac_k)
1290    ABI_FREE(grad_total)
1291    if (gs_hamk%usepaw /= 0) then
1292      call pawcprj_free(cprj_k)
1293      call pawcprj_free(cprj_kb)
1294      call pawcprj_free(cprj_direc)
1295      call pawcprj_free(cprj_band_srt)
1296      call pawcprj_free(cprj_gat)
1297      if (nkpt /= dtefield%fnkpt) then
1298        call pawcprj_free(cprj_fkn)
1299        call pawcprj_free(cprj_ikn)
1300        ABI_FREE(cprj_fkn)
1301        ABI_FREE(cprj_ikn)
1302      end if
1303    end if
1304    ABI_FREE(smat_k_paw)
1305    ABI_FREE(dimlmn)
1306    ABI_FREE(cprj_k)
1307    ABI_FREE(cprj_kb)
1308    ABI_FREE(cprj_direc)
1309    ABI_FREE(cprj_gat)
1310    ABI_FREE(ikptf_recv)
1311    ABI_FREE(cprj_band_srt)
1312  end if
1313 
1314 ! Do not delete this line, needed to run with open MP
1315  write(unit=message,fmt=*) resid(1)
1316 
1317  call timab(22,2,tsec)
1318 
1319  DBG_EXIT("COLL")
1320 
1321 end subroutine cgwf

m_cgwf/etheta [ Functions ]

[ Top ] [ m_cgwf ] [ Functions ]

NAME

 etheta

FUNCTION

 Computes the energy per unit cell and its first derivative
 for a given angle theta. More precisely, computes only the part of
 the energy that changes with theta.

INPUTS

 bcut(ifor,idir) = branch cut of the ellipse associated with (ifor,idir)
 chc = <C|H_0|C> where |C> is the wavefunction of the current band
 detovc = determinant of the overlap matrix S
 detovd = determinant of the overlap matrix where for the band
          that is being updated <C| is replaced by <D| (search direction)
 dhc = Re[<D|H_0|C>]
 dhd = <D|H_0|D>
 efield_dot = reciprocal lattice coordinates of the electric field
 hel(ifor,idir) = helicity of the ellipse associated with (ifor,idir)
 nkpt = number of k-points
 nsppol = 1 for unpolarized, 2 for spin-polarized
 nstr(idir) = number of strings along the idir-th direction
 sdeg = spin degeneracy
 theta = value of the angle for which the energy (e0) and its
         derivative (e1) are computed

OUTPUT

 e0 = energy for the given value of theta
 e1 = derivative of the energy with respect to theta

SOURCE

1680 subroutine etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,&
1681 &    hel,nkpt,nstr,sdeg,theta)
1682 
1683 !Arguments ------------------------------------
1684 !scalars
1685  integer,intent(in) :: nkpt
1686  real(dp),intent(in) :: chc,dhc,dhd,sdeg,theta
1687  real(dp),intent(out) :: e0,e1
1688 !arrays
1689  integer,intent(in) :: hel(2,3),nstr(3)
1690  real(dp),intent(in) :: bcut(2,3),detovc(2,2,3),detovd(2,2,3),efield_dot(3)
1691 
1692 !Local variables -------------------------
1693 !scalars
1694  integer :: idir,ifor
1695  real(dp) :: c2theta,ctheta,dphase,gnorm,phase,rho,s2theta,sgn,stheta
1696 !arrays
1697  real(dp) :: dg_theta(2),g_theta(2)
1698 
1699 ! ***********************************************************************
1700 
1701  e0 = zero ; e1 = zero
1702 
1703  ctheta = cos(theta)
1704  stheta = sin(theta)
1705  c2theta = ctheta*ctheta - stheta*stheta   ! cos(2*theta)
1706  s2theta = two*ctheta*stheta               ! sin(2*theta)
1707 
1708  e0 = chc*ctheta*ctheta + dhd*stheta*stheta + dhc*s2theta
1709  e0 = e0*sdeg/nkpt
1710 
1711 !DEBUG
1712 !e0 = zero
1713 !ENDDEBUG
1714 
1715  e1 = (dhd - chc)*s2theta + two*dhc*c2theta
1716  e1 = e1*sdeg/nkpt
1717 
1718  sgn = -1_dp
1719  do idir = 1, 3
1720 
1721    if (abs(efield_dot(idir)) < tol12) cycle
1722 
1723    do ifor = 1, 2
1724 
1725      g_theta(:)  = ctheta*detovc(:,ifor,idir) + &
1726 &     stheta*detovd(:,ifor,idir)
1727      dg_theta(:) = -1_dp*stheta*detovc(:,ifor,idir) + &
1728 &     ctheta*detovd(:,ifor,idir)
1729 
1730 !    Compute E(theta)
1731 
1732      call rhophi(g_theta,phase,rho)
1733      if (theta >= bcut(ifor,idir)) phase = phase + hel(ifor,idir)*two_pi
1734 
1735 !    DEBUG
1736 !    unit = 100 + 10*idir + ifor
1737 !    write(unit,'(4(f16.9))')theta,g_theta(:),phase
1738 !    ENDDEBUG
1739 
1740      e0 = e0 + sgn*sdeg*efield_dot(idir)*phase/(two_pi*nstr(idir))
1741 
1742 
1743 !    Compute dE/dtheta
1744 
1745 !    imaginary part of the derivative of ln(g_theta)
1746      gnorm = g_theta(1)*g_theta(1) + g_theta(2)*g_theta(2)
1747      dphase = (dg_theta(2)*g_theta(1) - dg_theta(1)*g_theta(2))/gnorm
1748 
1749      e1 = e1 + sgn*sdeg*efield_dot(idir)*dphase/(two_pi*nstr(idir))
1750 
1751      sgn = -1_dp*sgn
1752 
1753    end do
1754  end do
1755 
1756 end subroutine etheta

m_cgwf/linemin [ Functions ]

[ Top ] [ m_cgwf ] [ Functions ]

NAME

 linemin

FUNCTION

 Performs the "line minimization" w.r.t. the angle theta on a unit circle
 to update the wavefunction associated with the current k-point and
 band label.
 This routine is used only when the electric field is on (otherwise it could
 in principle also be used, but there is a simpler procedure, as originally
 coded in abinit).

INPUTS

 chc = <C|H_0|C> where |C> is the wavefunction of the current band
 detovc = determinant of the overlap matrix S
 detovd = determinant of the overlap matrix where for the band
          that is being updated <C| is replaced by <D| (search direction)
 dhc = Re[<D|H_0|C>]
 dhd = <D|H_0|D>
 efield_dot = reciprocal lattice coordinates of the electric field
 iline = index of the current line minimization
 nkpt = number of k-points
 nstr(idir) = number of strings along the idir-th direction
 sdeg = spin degeneracy

OUTPUT

 bcut(ifor,idir) = branch cut of the ellipse associated with (ifor,idir)
 costh = cos(thetam)
 hel(ifor,idir) = helicity of the ellipse associated with (ifor,idir)
 phase_end = total change in Zak phase, must be equal to
             dphase_aux1 + n*two_pi
 sinth = sin(thetam)
 thetam = optimal angle theta in line_minimization

SIDE EFFECTS

 Input/Output
 dphase_aux1 = change in Zak phase accumulated during the loop over iline
               (can be used for debugging in cgwf.f)
 phase_init = initial Zak phase (before doing the first line minimization)

NOTES

 We are making the "frozen Hamiltonian approximation", i.e., the
 Hamiltonian does not change with theta (we are neglecting the dependence
 of the Hartree and exchange-correlation terms on theta; the original
 abinit routine does the same)

SOURCE

1372 subroutine linemin(bcut,chc,costh,detovc,detovd,dhc,dhd,dphase_aux1,&
1373 &  efield_dot,iline,nkpt,nstr,hel,phase_end,phase_init,sdeg,sinth,thetam)
1374 
1375 !Arguments ------------------------------------
1376 !scalars
1377  integer,intent(in) :: iline,nkpt
1378  real(dp),intent(in) :: chc,dhc,dhd,sdeg
1379  real(dp),intent(out) :: costh,sinth,thetam
1380 !arrays
1381  integer,intent(in) :: nstr(3)
1382  integer,intent(out) :: hel(2,3)
1383  real(dp),intent(in) :: detovc(2,2,3),detovd(2,2,3),efield_dot(3)
1384  real(dp),intent(inout) :: dphase_aux1(3),phase_init(3)
1385  real(dp),intent(out) :: bcut(2,3),phase_end(3)
1386 
1387 !Local variables -------------------------
1388 !scalars
1389  integer :: idir,ifor,igrid,iter,maxiter,ngrid
1390  real(dp) :: aa,angle,bb,big_axis,cc,cphi_0,delta_theta,e0,e1
1391  real(dp) :: excentr,iab,phase0,phase_min,phi_0,rdum,sgn,small_axis,sphi_0
1392  real(dp) :: theta,theta_0,val
1393  logical :: flag_neg
1394  character(len=500) :: message
1395 !arrays
1396  real(dp) :: g_theta(2),theta_min(2),theta_try(2)
1397  real(dp) :: esave(251),e1save(251)   !!REC
1398 
1399 ! ***********************************************************************
1400 
1401 !Compute the helicity and the branch cut of the ellipse in the complex
1402 !plane associated with the overlap between a k-point and one of its neighbours
1403 
1404  do idir = 1, 3
1405 
1406    if (abs(efield_dot(idir)) < tol12) cycle
1407 
1408    do ifor = 1, 2
1409 
1410      aa = half*(detovc(1,ifor,idir)*detovc(1,ifor,idir) + &
1411 &     detovc(2,ifor,idir)*detovc(2,ifor,idir) + &
1412 &     detovd(1,ifor,idir)*detovd(1,ifor,idir) + &
1413 &     detovd(2,ifor,idir)*detovd(2,ifor,idir))
1414 
1415      bb = half*(detovc(1,ifor,idir)*detovc(1,ifor,idir) + &
1416 &     detovc(2,ifor,idir)*detovc(2,ifor,idir) - &
1417 &     detovd(1,ifor,idir)*detovd(1,ifor,idir) - &
1418 &     detovd(2,ifor,idir)*detovd(2,ifor,idir))
1419 
1420      cc = detovc(1,ifor,idir)*detovd(1,ifor,idir) + &
1421 &     detovc(2,ifor,idir)*detovd(2,ifor,idir)
1422 
1423      iab = detovc(1,ifor,idir)*detovd(2,ifor,idir) - &
1424 &     detovc(2,ifor,idir)*detovd(1,ifor,idir)
1425 
1426      if (iab >= zero) then
1427        hel(ifor,idir) = 1
1428      else
1429        hel(ifor,idir) = -1
1430      end if
1431 
1432      if (abs(bb) > tol8) then
1433        theta_0 = half*atan(cc/bb)
1434      else
1435        theta_0 = quarter*pi
1436      end if
1437 
1438      if (bb < zero) theta_0 = theta_0 + pi*half
1439 
1440      g_theta(:) = cos(theta_0)*detovc(:,ifor,idir) + &
1441 &     sin(theta_0)*detovd(:,ifor,idir)
1442 !    DEBUG
1443 !    write(std_out,*)'before rhophi, g_theta =',g_theta
1444 !    ENDDEBUG
1445      call rhophi(g_theta,phi_0,rdum)
1446 !    DEBUG
1447 !    write(std_out,*)'after rhophi, phi_0 = ',phi_0
1448 !    ENDDEBUG
1449 
1450      cphi_0 = cos(phi_0)
1451      sphi_0 = sin(phi_0)
1452 
1453      rdum = aa - sqrt(bb*bb + cc*cc)
1454      if (rdum < zero) rdum = zero
1455      small_axis = sqrt(rdum)
1456      big_axis = sqrt(aa + sqrt(bb*bb + cc*cc))
1457      excentr = hel(ifor,idir)*small_axis/big_axis
1458 
1459 !    Find angle for which phi = pi
1460      if (abs(excentr) > tol8) then
1461        angle = atan(tan(pi-phi_0)/excentr)
1462      else
1463        if (tan(pi-phi_0)*hel(ifor,idir) > zero) then
1464          angle = half*pi
1465        else
1466          angle = -0.5_dp*pi
1467        end if
1468      end if
1469      bcut(ifor,idir) = angle + theta_0
1470 
1471 
1472 !    Compute the branch-cut angle
1473      if (hel(ifor,idir) == 1) then
1474        if ((sphi_0 > 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) + pi
1475        if ((sphi_0 < 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) - pi
1476      else
1477        if ((sphi_0 > 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) - pi
1478        if ((sphi_0 < 0).and.(cphi_0 > 0)) bcut(ifor,idir) = bcut(ifor,idir) + pi
1479      end if
1480 
1481      if (bcut(ifor,idir) > pi) bcut(ifor,idir) = bcut(ifor,idir) - two_pi
1482      if (bcut(ifor,idir) < -1_dp*pi) bcut(ifor,idir) = bcut(ifor,idir) + two_pi
1483 
1484 !    DEBUG
1485 !    write(std_out,'(a,2x,i3,2x,i3,5x,f16.9,5x,i2)')'linemin: ifor,idir,bcut,hel',&
1486 !    &   ifor,idir,bcut(ifor,idir),hel(ifor,idir)
1487 !    write(std_out,'(a,5x,f16.9,5x,f16.9)')'linemin: big_axis,small_axis ',&
1488 !    &     big_axis,small_axis
1489 !    ENDDEBUG
1490 
1491    end do   ! ifor
1492  end do   ! idir
1493 
1494 !---------------------------------------------------------------------------
1495 
1496 !Perform the "line minimization" w.r.t. the angle theta on a unit circle
1497 !to update the wavefunction associated with the current k-point and band label.
1498 
1499  ngrid = 250   ! initial number of subdivisions in [-pi/2,pi/2]
1500 !for finding extrema
1501  maxiter = 100
1502  delta_theta = pi/ngrid
1503 
1504 !DEBUG
1505 !write(std_out,*)'linemin: theta, e0, e1, e1fdiff'
1506 !ENDDEBUG
1507 
1508 
1509 !Get the interval where the absolute minimum of E(theta) is located
1510 
1511  val = huge(one)             ! large number
1512  flag_neg=.false.
1513  theta_min(:) = ten
1514  do igrid = 1, ngrid+1
1515 
1516    theta = (igrid - 1)*delta_theta - pi*half
1517    call etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,&
1518 &   hel,nkpt,nstr,sdeg,theta)
1519 
1520    esave(igrid)=e0      !!REC
1521    e1save(igrid)=e1     !!REC
1522 
1523 !  It is important to detect when the slope changes from negative to positive
1524 !  Moreover, a slope being extremely close to zero must be ignored
1525 
1526 !  DEBUG
1527 !  write(std_out,*)' igrid,e0,e1,val,theta_min(:)=',igrid,theta,e0,e1,val,theta_min(:)
1528 !  ENDDEBUG
1529 
1530 !  Store e1 and theta if negative ...
1531    if(e1 < -tol10)then
1532      theta_try(1)=theta
1533      flag_neg=.true.
1534    end if
1535 !  A change of sign is just happening
1536    if(e1 > tol10 .and. flag_neg)then
1537      theta_try(2)=theta
1538      flag_neg=.false.
1539 !    Still, must be better than the previous minimum in order to succeed
1540      if (e0 < val-tol10) then
1541        val=e0
1542        theta_min(:)=theta_try(:)
1543      end if
1544    end if
1545  end do
1546 
1547 !In case the minimum was not found
1548 
1549  if (abs(theta_min(1) - ten) < tol10) then
1550 !  REC start
1551    write(message,'(a,a)')ch10,&
1552 &   ' linemin: ERROR- cannot find theta_min.'
1553    call wrtout(std_out,message,'COLL')
1554    write(message,'(a,a)')ch10,&
1555 &   ' igrid      theta          esave(igrid)    e1save(igrid) '
1556    call wrtout(std_out,message,'COLL')
1557    do igrid = 1, ngrid+1
1558      theta = (igrid - 1)*delta_theta - pi*half
1559      write(std_out,'(i6,3f16.9)')igrid,theta,esave(igrid),e1save(igrid)
1560      !write(101,'(i6,3f16.9)')igrid,theta,esave(igrid),e1save(igrid)
1561    end do
1562    write(message,'(6a)')ch10,&
1563 &   ' linemin: ERROR - ',ch10,&
1564 &   '  Cannot find theta_min. No minimum exists: the field is too strong ! ',ch10,&
1565 &   '  Try decreasing difference between D and 4 Pi P by changing structure or D (only for fixed D calculation)'
1566    call wrtout(std_out,message,'COLL')
1567    ABI_ERROR('linemin cannot find theta_min')
1568  end if
1569 
1570 !Compute the mimum of E(theta)
1571 
1572 
1573  iter = 0
1574  do while ((delta_theta > tol8).and.(iter < maxiter))
1575    delta_theta = half*(theta_min(2) - theta_min(1))
1576    theta = theta_min(1) + delta_theta
1577    call etheta(bcut,chc,detovc,detovd,dhc,dhd,efield_dot,e0,e1,&
1578 &   hel,nkpt,nstr,sdeg,theta)
1579    if (e1 > zero) then
1580      theta_min(2) = theta
1581    else
1582      theta_min(1) = theta
1583    end if
1584    iter = iter + 1
1585 
1586 !  DEBUG
1587 !  write(std_out,'(a,2x,i3,2(2x,f16.9))')'iter,e0,e1 = ',iter,e0,e1
1588 !  ENDDEBUG
1589 
1590  end do
1591 
1592  costh = cos(theta)
1593  sinth = sin(theta)
1594 
1595  thetam = theta
1596 
1597 !DEBUG
1598 !write(std_out,*)'linemin : thetam = ',thetam
1599 !ENDDEBUG
1600 
1601 !---------------------------------------------------------------------------
1602 
1603 !Compute and store the change in electronic polarization
1604 
1605  sgn = one
1606  do idir = 1, 3
1607 
1608    if (abs(efield_dot(idir)) < tol12) cycle
1609 
1610    phase_end(idir) = zero
1611    do ifor = 1, 2
1612 
1613      g_theta(:) = detovc(:,ifor,idir)
1614 !    DEBUG
1615 !    write(std_out,*)'before rhophi (2nd call), g_theta =',g_theta
1616 !    ENDDEBUG
1617      call rhophi(g_theta,phase0,rdum)
1618 !    DEBUG
1619 !    write(std_out,*)'after rhophi, phase0 = ',phase0
1620 !    ENDDEBUG
1621 
1622      if(iline == 1) phase_init(idir) = phase_init(idir) + sgn*phase0
1623 
1624      g_theta(:) = costh*detovc(:,ifor,idir) + sinth*detovd(:,ifor,idir)
1625      call rhophi(g_theta,phase_min,rdum)
1626 
1627      phase_end(idir) = phase_end(idir) + sgn*phase_min
1628 
1629 !    Correct for branch cuts (remove jumps)
1630      if (bcut(ifor,idir) <= zero) phase0 = phase0 + hel(ifor,idir)*two_pi
1631      if(thetam >= bcut(ifor,idir)) phase_min = phase_min + hel(ifor,idir)*two_pi
1632 
1633      dphase_aux1(idir) = dphase_aux1(idir) + sgn*(phase_min - phase0)
1634 
1635      sgn = -1_dp*sgn
1636 
1637    end do   ! idir
1638  end do    ! ifor
1639 
1640 !DEBUG
1641 !write(std_out,'(a,3(2x,f16.9))')'dphase_aux1 = ',(dphase_aux1(idir),idir = 1, 3)
1642 !write(std_out,*)' linemin: debug, exit.'
1643 !ENDDEBUG
1644 
1645 end subroutine linemin

m_cgwf/mksubham [ Functions ]

[ Top ] [ m_cgwf ] [ Functions ]

NAME

 mksubham

FUNCTION

 Build the Hamiltonian matrix in the eigenfunctions subspace,
 for one given band (or for one given block of bands)

INPUTS

  cg(2,mcg)=wavefunctions
  gsc(2,mgsc)=<g|S|c> matrix elements (S=overlap)
  iblock=index of block of bands
  icg=shift to be applied on the location of data in the array cg
  igsc=shift to be applied on the location of data in the array cg
  istwf_k=input parameter that describes the storage of wfs
  mcg=second dimension of the cg array
  mgsc=second dimension of the gsc array
  nband_k=number of bands at this k point for that spin polarization
  nbdblock=number of bands in a block
  npw_k=number of plane waves at this k point
  nspinor=number of spinorial components of the wavefunctions
  use_subovl=1 if the overlap matrix is not identity in WFs subspace
  use_subvnlx= 1 if <C band,k|H|C band_prime,k> has to be computed
  me_g0=1 if this processors has G=0, 0 otherwise

OUTPUT

SIDE EFFECTS

  ghc(2,npw_k*nspinor)=<G|H|C band,k> for the current state
                       This is an input in non-blocked algorithm
                               an output in blocked algorithm
  gvnlxc(2,npw_k*nspinor)=<G|Vnl|C band,k> for the current state
                       This is an input in non-blocked algorithm
                               an output in blocked algorithm
  isubh=index of current state in array subham
  isubo=index of current state in array subovl
  subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace
  subovl(nband_k*(nband_k+1)*use_subovl)=overlap matrix expressed in the WFs subspace
  subvnlx(nband_k*(nband_k+1)*use_subvnlx)=non-local Hamiltonian (if NCPP)  plus Fock ACE operator (if usefock_ACE)
   expressed in the WFs subspace

SOURCE

1802 subroutine mksubham(cg,ghc,gsc,gvnlxc,iblock,icg,igsc,istwf_k,&
1803 &                    isubh,isubo,mcg,mgsc,nband_k,nbdblock,npw_k,&
1804 &                    nspinor,subham,subovl,subvnlx,use_subovl,use_subvnlx,me_g0)
1805 
1806 !Arguments ------------------------------------
1807 !scalars
1808  integer,intent(in) :: iblock,icg,igsc,istwf_k,mcg,mgsc,nband_k
1809  integer,intent(in) :: nbdblock,npw_k,nspinor,use_subovl,use_subvnlx,me_g0
1810  integer,intent(inout) :: isubh,isubo
1811 !arrays
1812  real(dp),intent(in) :: cg(2,mcg)
1813  real(dp),intent(in) :: gsc(2,mgsc)
1814  real(dp),intent(inout) :: ghc(2,npw_k*nspinor),gvnlxc(2,npw_k*nspinor)
1815  real(dp),intent(inout) :: subham(nband_k*(nband_k+1))
1816  real(dp),intent(inout) :: subovl(nband_k*(nband_k+1)*use_subovl)
1817  real(dp),intent(inout) :: subvnlx(nband_k*(nband_k+1)*use_subvnlx)
1818 
1819 !Local variables-------------------------------
1820 !scalars
1821  integer :: iband,ibdblock,ii,ipw,ipw1,isp,iwavef,jwavef
1822  real(dp) :: cgimipw,cgreipw,chcim,chcre,cscim,cscre,cvcim,cvcre
1823 !real(dp) :: chc(2),cvc(2),csc(2)
1824 
1825 ! *********************************************************************
1826 
1827 !Loop over bands in a block This loop can be parallelized
1828  do iband=1+(iblock-1)*nbdblock,min(iblock*nbdblock,nband_k)
1829    ibdblock=iband-(iblock-1)*nbdblock
1830 
1831 !  Compute elements of subspace Hamiltonian <C(i)|H|C(n)> and <C(i)|Vnl|C(n)>
1832    if(istwf_k==1)then
1833 
1834      do ii=1,iband
1835        iwavef=(ii-1)*npw_k*nspinor+icg
1836        chcre=zero ; chcim=zero
1837        if (use_subvnlx==0) then
1838          do ipw=1,npw_k*nspinor
1839            cgreipw=cg(1,ipw+iwavef)
1840            cgimipw=cg(2,ipw+iwavef)
1841            chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw)
1842            chcim=chcim+cgreipw*ghc(2,ipw)-cgimipw*ghc(1,ipw)
1843          end do
1844 !        chc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),ghc)
1845        else
1846 #if 1
1847          do ipw=1,npw_k*nspinor
1848            cgreipw=cg(1,ipw+iwavef)
1849            cgimipw=cg(2,ipw+iwavef)
1850            chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw)
1851            chcim=chcim+cgreipw*ghc(2,ipw)-cgimipw*ghc(1,ipw)
1852          end do
1853          cvcre=zero ; cvcim=zero
1854          do ipw=1,npw_k*nspinor
1855            cgreipw=cg(1,ipw+iwavef)
1856            cgimipw=cg(2,ipw+iwavef)
1857            cvcre=cvcre+cgreipw*gvnlxc(1,ipw)+cgimipw*gvnlxc(2,ipw)
1858            cvcim=cvcim+cgreipw*gvnlxc(2,ipw)-cgimipw*gvnlxc(1,ipw)
1859          end do
1860          subvnlx(isubh  )=cvcre
1861          subvnlx(isubh+1)=cvcim
1862 #else
1863 !        New version with BLAS1, will require some update of the refs.
1864          cvc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),gvnlxc)
1865          subvnlx(isubh  )=cvc(1)
1866          subvnlx(isubh+1)=cvc(2)
1867          chc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),ghc)
1868          chcre = chc(1)
1869          chcim = chc(2)
1870 #endif
1871 !        Store real and imag parts in Hermitian storage mode:
1872        end if
1873        subham(isubh  )=chcre
1874        subham(isubh+1)=chcim
1875 !      subham(isubh  )=chc(1)
1876 !      subham(isubh+1)=chc(2)
1877        isubh=isubh+2
1878      end do
1879 
1880    else if(istwf_k>=2)then
1881      do ii=1,iband
1882        iwavef=(ii-1)*npw_k+icg
1883 !      Use the time-reversal symmetry, but should not double-count G=0
1884        if(istwf_k==2 .and. me_g0==1) then
1885          chcre = half*cg(1,1+iwavef)*ghc(1,1)
1886          if (use_subvnlx==1) cvcre=half*cg(1,1+iwavef)*gvnlxc(1,1)
1887          ipw1=2
1888        else
1889          chcre=zero; ipw1=1
1890          if (use_subvnlx==1) cvcre=zero
1891        end if
1892        if (use_subvnlx==0) then
1893          do isp=1,nspinor
1894            do ipw=ipw1+(isp-1)*npw_k,npw_k*isp
1895              cgreipw=cg(1,ipw+iwavef)
1896              cgimipw=cg(2,ipw+iwavef)
1897              chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw)
1898            end do
1899          end do
1900          chcre=two*chcre
1901        else
1902          do isp=1,nspinor
1903            do ipw=ipw1+(isp-1)*npw_k,npw_k*isp
1904              cgreipw=cg(1,ipw+iwavef)
1905              cgimipw=cg(2,ipw+iwavef)
1906              chcre=chcre+cgreipw*ghc(1,ipw)+cgimipw*ghc(2,ipw)
1907              cvcre=cvcre+cgreipw*gvnlxc(1,ipw)+cgimipw*gvnlxc(2,ipw)
1908            end do
1909          end do
1910          chcre=two*chcre
1911          cvcre=two*cvcre
1912 !        Store real and imag parts in Hermitian storage mode:
1913          subvnlx(isubh  )=cvcre
1914          subvnlx(isubh+1)=zero
1915        end if
1916        subham(isubh  )=chcre
1917        subham(isubh+1)=zero
1918        isubh=isubh+2
1919      end do
1920    end if
1921 
1922 !  Compute elements of subspace <C(i)|S|C(n)> (S=overlap matrix)
1923 !  <C(i)|S|C(n)> should be closed to Identity.
1924    if (use_subovl==1) then
1925      jwavef=(iband-1)*npw_k*nspinor+igsc
1926      if(istwf_k==1)then
1927        do ii=1,iband
1928          iwavef=(ii-1)*npw_k*nspinor+icg
1929          cscre=zero ; cscim=zero
1930          do ipw=1,npw_k*nspinor
1931            cgreipw=cg(1,ipw+iwavef)
1932            cgimipw=cg(2,ipw+iwavef)
1933            cscre=cscre+cgreipw*gsc(1,ipw+jwavef)+cgimipw*gsc(2,ipw+jwavef)
1934            cscim=cscim+cgreipw*gsc(2,ipw+jwavef)-cgimipw*gsc(1,ipw+jwavef)
1935          end do
1936 !        csc = cg_zdotc(npw_k*nspinor,cg(1,1+iwavef),gsc)
1937 !        subovl(isubo  )=csc(1)
1938 !        subovl(isubo+1)=csc(2)
1939 !        Store real and imag parts in Hermitian storage mode:
1940          subovl(isubo  )=cscre
1941          subovl(isubo+1)=cscim
1942          isubo=isubo+2
1943        end do
1944      else if(istwf_k>=2)then
1945        do ii=1,iband
1946          iwavef=(ii-1)*npw_k*nspinor+icg
1947          if(istwf_k==2 .and. me_g0==1)then
1948            cscre=half*cg(1,1+iwavef)*gsc(1,1+jwavef)
1949            ipw1=2
1950          else
1951            cscre=zero; ipw1=1
1952          end if
1953          do isp=1,nspinor
1954            do ipw=ipw1+(isp-1)*npw_k,npw_k*isp
1955              cgreipw=cg(1,ipw+iwavef)
1956              cgimipw=cg(2,ipw+iwavef)
1957              cscre=cscre+cg(1,ipw+iwavef)*gsc(1,ipw+jwavef)+cg(2,ipw+iwavef)*gsc(2,ipw+jwavef)
1958            end do
1959          end do
1960          cscre=two*cscre
1961 !        Store real and imag parts in Hermitian storage mode:
1962          subovl(isubo  )=cscre
1963          subovl(isubo+1)=zero
1964          isubo=isubo+2
1965        end do
1966      end if
1967    end if
1968 
1969  end do ! iband in a block
1970 
1971 end subroutine mksubham