TABLE OF CONTENTS


ABINIT/m_cgtools [ Modules ]

[ Top ] [ Modules ]

NAME

  m_cgtools

FUNCTION

 This module defines wrappers for BLAS routines. The arguments are stored
 using the "cg" convention, namely real array of shape cg(2,...)

COPYRIGHT

 Copyright (C) 1992-2024 ABINIT group (MG, MT, XG, DCA, GZ, FB, MVer, DCA, GMR, FF)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt .

NOTES

 1) The convention about names of interfaced routine is: cg_<name>,
    where <name> is equal to the name of the standard BLAS routine

 2) Blas routines are called without an explicit interface on purpose since

    a) The compiler should pass the base address of the array to the F77 BLAS

    b) Any compiler would complain about type mismatch (REAL,COMPLEX)
       if an explicit interface is given.

 3) The use of mpi_type is not allowed here. MPI parallelism should be handled in a generic
    way by passing the MPI communicator so that the caller can decide how to handle MPI.


ABINIT/subdiago [ Functions ]

[ Top ] [ Functions ]

NAME

 subdiago

FUNCTION

 This routine diagonalizes the Hamiltonian in the trial subspace.

INPUTS

  icg=shift to be applied on the location of data in the array cg
  igsc=shift to be applied on the location of data in the array gsc
  istwf_k=input parameter that describes the storage of wfs
  mcg=second dimension of the cg array
  mgsc=second dimension of the gsc array
  nband_k=number of bands at this k point for that spin polarization
  npw_k=number of plane waves at this k point
  my_nspinor=number of spinorial components of the wavefunctions (on current proc)
  use_subovl=1 if the overlap matrix is not identity in WFs subspace
  usepaw= 0 for non paw calculation; =1 for paw calculation
  me_g0=1 if this processor has G=0, 0 otherwise.

OUTPUT

  eig_k(nband_k)=array for holding eigenvalues (hartree)
  evec(2*nband_k,nband_k)=array for holding eigenvectors

SIDE EFFECTS

  subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace. Hermitianized in output.
  subovl(nband_k*(nband_k+1)*use_subovl)=overlap matrix expressed in the WFs subspace. Hermitianized in output.
  cg(2,mcg)=wavefunctions
  gsc(2,mgsc)=<g|S|c> matrix elements (S=overlap)

SOURCE

4304 subroutine subdiago(cg, eig_k, evec, gsc, icg, igsc, istwf_k, mcg, mgsc, nband_k, npw_k, my_nspinor, paral_kgb, &
4305                     subham, subovl, use_subovl, usepaw, me_g0)
4306 
4307  use m_linalg_interfaces
4308  use m_abi_linalg
4309 
4310 !Arguments ------------------------------------
4311  integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nband_k,npw_k,me_g0
4312  integer,intent(in) :: my_nspinor,paral_kgb,use_subovl,usepaw
4313  real(dp),intent(inout) :: subham(nband_k*(nband_k+1)),subovl(nband_k*(nband_k+1)*use_subovl)
4314  real(dp),intent(out) :: eig_k(nband_k),evec(2*nband_k,nband_k)
4315  real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc)
4316 
4317 !Local variables-------------------------------
4318  integer :: iband,ii,ierr,rvectsize,vectsize,use_slk
4319  !real(dp) :: cpu, wall, gflops
4320  character(len=500) :: msg
4321  ! real(dp) :: tsec(2)
4322  real(dp),allocatable :: evec_re(:,:),subovl_re(:),subham_tmp(:), work(:,:)
4323  real(dp),allocatable :: blockvectora(:,:),blockvectorb(:,:),blockvectorc(:,:)
4324 
4325 ! *********************************************************************
4326 
4327  if (paral_kgb<0) then
4328    ABI_BUG('paral_kgb should be positive ')
4329  end if
4330 
4331  ! 1 if Scalapack version is used.
4332  ! MG TODO: This should not be bound to paral_kgb
4333  use_slk = paral_kgb
4334 
4335  rvectsize=npw_k*my_nspinor
4336  vectsize=2*rvectsize;if (me_g0==1) vectsize=vectsize-1
4337  !call cwtime(cpu, wall, gflops, "start")
4338 
4339  !Impose Hermiticity on diagonal elements of subham (and subovl, if needed)
4340  ! MG FIXME: In these two calls we are aliasing the args
4341  call hermit(subham, subham, ierr, nband_k)
4342  if (use_subovl==1) call hermit(subovl, subovl, ierr, nband_k)
4343  !call cwtime_report(" hermit", cpu, wall, gflops)
4344 
4345  ! Diagonalize the Hamitonian matrix
4346  if (istwf_k==2) then
4347    ABI_CALLOC(evec_re, (nband_k,nband_k))
4348    ABI_MALLOC(subham_tmp, (nband_k*(nband_k+1)/2))
4349    subham_tmp=subham(1:nband_k*(nband_k+1):2)
4350    if (use_subovl==1) then
4351      ABI_MALLOC(subovl_re, (nband_k*(nband_k+1)/2))
4352      subovl_re=subovl(1:nband_k*(nband_k+1):2)
4353      ! TODO: Not sure this one has been fully tested
4354      call abi_xhpgv(1,'V','U',nband_k,subham_tmp,subovl_re,eig_k,evec_re,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4355      ABI_FREE(subovl_re)
4356    else
4357      call abi_xhpev('V','U',nband_k,subham_tmp,eig_k,evec_re,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4358    end if
4359    evec(:,:)=zero; evec(1:2*nband_k:2,:) = evec_re
4360    ABI_FREE(evec_re)
4361    ABI_FREE(subham_tmp)
4362  else
4363    if (use_subovl==1) then
4364      call abi_xhpgv(1,'V','U',nband_k,subham,subovl,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4365    else
4366      call abi_xhpev('V','U',nband_k,subham,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4367    end if
4368  end if
4369  !call cwtime_report(" hdiago", cpu, wall, gflops)
4370 
4371  ! Normalize each eigenvector and set phase:
4372  ! this is because of the simultaneous diagonalisation of this
4373  ! matrix by different processors, allowing to get different unitary transforms, thus breaking the
4374  ! coherency of parts of cg stored on different processors).
4375  !
4376  ! The problem with minus/plus signs might be present also if .not. use_subovl
4377  !
4378  !if(use_subovl == 0) then
4379  call cg_normev(evec, nband_k, nband_k)
4380  !end if
4381 
4382  if(istwf_k==2)then
4383    do iband=1,nband_k
4384      do ii=1,nband_k
4385        if(abs(evec(2*ii,iband))>1.0d-10)then
4386          write(msg,'(3a,2i0,2es16.6,a,a)')ch10,&
4387          ' For istwf_k=2, observed the following element of evec:',ch10,&
4388          iband,ii,evec(2*ii-1,iband),evec(2*ii,iband),ch10,' with a non-negligible imaginary part.'
4389          ABI_BUG(msg)
4390        end if
4391      end do
4392    end do
4393  end if
4394  !call cwtime_report(" normev", cpu, wall, gflops)
4395 
4396  !=====================================================
4397  ! Carry out rotation of bands C(G,n) according to evecs
4398  ! ZGEMM if istwfk==1, DGEMM if istwfk==2
4399  !=====================================================
4400  if (istwf_k==2) then
4401 
4402    ABI_MALLOC_OR_DIE(blockvectora, (vectsize, nband_k), ierr)
4403    ABI_MALLOC_OR_DIE(blockvectorb, (nband_k, nband_k), ierr)
4404    ABI_MALLOC_OR_DIE(blockvectorc, (vectsize, nband_k), ierr)
4405 
4406    do iband=1,nband_k
4407      if (me_g0 == 1) then
4408        call abi_xcopy(1,cg(1,cgindex_subd(iband)),1,blockvectora(1,iband),1)
4409        call abi_xcopy(rvectsize-1,cg(1,cgindex_subd(iband)+1),2,blockvectora(2,iband),1)
4410        call abi_xcopy(rvectsize-1,cg(2,cgindex_subd(iband)+1),2,blockvectora(rvectsize+1,iband),1)
4411      else
4412        call abi_xcopy(rvectsize,cg(1,cgindex_subd(iband)),2,blockvectora(1,iband),1)
4413        call abi_xcopy(rvectsize,cg(2,cgindex_subd(iband)),2,blockvectora(rvectsize+1,iband),1)
4414      end if
4415      call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k)
4416    end do
4417 
4418    !MG TODO: This one is a DGEMM.
4419    call abi_xgemm('N','N',vectsize,nband_k,nband_k,&
4420      cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize)
4421 
4422    do iband=1,nband_k
4423      if (me_g0 == 1) then
4424        call abi_xcopy(1,blockvectorc(1,iband),1,cg(1,cgindex_subd(iband)),1)
4425        call abi_xcopy(rvectsize-1,blockvectorc(2,iband),1,cg(1,cgindex_subd(iband)+1),2)
4426        call abi_xcopy(rvectsize-1,blockvectorc(rvectsize+1,iband),1,cg(2,cgindex_subd(iband)+1),2)
4427      else
4428        call abi_xcopy(rvectsize,blockvectorc(1,iband),1,cg(1,cgindex_subd(iband)),2)
4429        call abi_xcopy(rvectsize,blockvectorc(rvectsize+1,iband),1,cg(2,cgindex_subd(iband)),2)
4430      end if
4431    end do
4432 
4433    if (usepaw==1) then
4434     ! If paw, must also rotate S.C(G,n):
4435 
4436      do iband=1,nband_k
4437        if (me_g0 == 1) then
4438          call abi_xcopy(1,gsc(1,gscindex_subd(iband)),1,blockvectora(1,iband),1)
4439          call abi_xcopy(rvectsize-1,gsc(1,gscindex_subd(iband)+1),2,blockvectora(2,iband),1)
4440          call abi_xcopy(rvectsize-1,gsc(2,gscindex_subd(iband)+1),2,blockvectora(rvectsize+1,iband),1)
4441        else
4442          call abi_xcopy(rvectsize  ,gsc(1,gscindex_subd(iband)),2,blockvectora(1,iband),1)
4443          call abi_xcopy(rvectsize  ,gsc(2,gscindex_subd(iband)),2,blockvectora(rvectsize+1,iband),1)
4444        end if
4445        call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k)
4446      end do
4447 
4448      call abi_xgemm('N','N',vectsize,nband_k,nband_k,&
4449                     cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize)
4450 
4451      do iband=1,nband_k
4452        if (me_g0 == 1) then
4453          call abi_xcopy(1,blockvectorc(1,iband),1,gsc(1,gscindex_subd(iband)),1)
4454          call abi_xcopy(rvectsize-1,blockvectorc(2,iband),1,gsc(1,gscindex_subd(iband)+1),2)
4455          call abi_xcopy(rvectsize-1,blockvectorc(rvectsize+1,iband),1,gsc(2,gscindex_subd(iband)+1),2)
4456        else
4457          call abi_xcopy(rvectsize,blockvectorc(1,iband),1,gsc(1,gscindex_subd(iband)),2)
4458          call abi_xcopy(rvectsize,blockvectorc(rvectsize+1,iband),1,gsc(2,gscindex_subd(iband)),2)
4459        end if
4460      end do
4461 
4462    end if
4463 
4464    ABI_FREE(blockvectora)
4465    ABI_FREE(blockvectorb)
4466    ABI_FREE(blockvectorc)
4467 
4468  else
4469    ! istwf_k /= 2
4470    ABI_MALLOC_OR_DIE(work, (2,npw_k*my_nspinor*nband_k), ierr)
4471 
4472    ! MG: Do not remove this initialization.
4473    ! telast_06 stops in fxphase on inca_debug and little_buda (very very strange, due to atlas?)
4474    !work=zero
4475 
4476    call abi_xgemm('N','N',npw_k*my_nspinor,nband_k,nband_k,cone, &
4477      cg(:,icg+1:npw_k*my_nspinor*nband_k+icg),npw_k*my_nspinor, &
4478      evec,nband_k,czero,work,npw_k*my_nspinor,x_cplx=2)
4479 
4480    call abi_xcopy(npw_k*my_nspinor*nband_k,work(1,1),1,cg(1,1+icg),1,x_cplx=2)
4481 
4482    if (usepaw==1) then
4483      ! If paw, must also rotate S.C(G,n):
4484      call abi_xgemm('N','N',npw_k*my_nspinor,nband_k,nband_k,cone, &
4485        gsc(:,1+igsc:npw_k*my_nspinor*nband_k+igsc),npw_k*my_nspinor, &
4486        evec,nband_k,czero,work,npw_k*my_nspinor,x_cplx=2)
4487      call abi_xcopy(npw_k*my_nspinor*nband_k, work(1,1),1,gsc(1,1+igsc),1,x_cplx=2)
4488    end if
4489 
4490    ABI_FREE(work)
4491  end if
4492  !call cwtime_report(" rotation", cpu, wall, gflops)
4493 
4494  contains
4495 
4496    function cgindex_subd(iband)
4497      integer :: iband,cgindex_subd
4498      cgindex_subd=npw_k*my_nspinor*(iband-1)+icg+1
4499    end function cgindex_subd
4500 
4501    function gscindex_subd(iband)
4502      integer :: iband,gscindex_subd
4503      gscindex_subd=npw_k*my_nspinor*(iband-1)+igsc+1
4504  end function gscindex_subd
4505 
4506 end subroutine subdiago

ABINIT/subdiago_low_memory [ Functions ]

[ Top ] [ Functions ]

NAME

 subdiago_low_memory

FUNCTION

 This routine diagonalizes the Hamiltonian in the eigenfunction subspace
 Separate the computation in blocks of plane waves to save memory

INPUTS

  icg=shift to be applied on the location of data in the array cg
  istwf_k=input parameter that describes the storage of wfs
  mcg=second dimension of the cg array
  nband_k=number of bands at this k point for that spin polarization
  npw_k=number of plane waves at this k point
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  subham(nband_k*(nband_k+1))=Hamiltonian expressed in the WFs subspace

OUTPUT

  eig_k(nband_k)=array for holding eigenvalues (hartree)
  evec(2*nband_k,nband_k)=array for holding eigenvectors

SIDE EFFECTS

  cg(2,mcg)=wavefunctions

SOURCE

4535 subroutine subdiago_low_memory(cg,eig_k,evec,icg,istwf_k,&
4536 &                   mcg,nband_k,npw_k,nspinor,paral_kgb,&
4537 &                   subham)
4538 
4539  use m_linalg_interfaces
4540  use m_abi_linalg
4541 
4542 !Arguments ------------------------------------
4543  integer,intent(in) :: icg,istwf_k,mcg,nband_k,npw_k
4544  integer,intent(in) :: nspinor,paral_kgb
4545  real(dp),intent(inout) :: subham(nband_k*(nband_k+1))
4546  real(dp),intent(out) :: eig_k(nband_k),evec(2*nband_k,nband_k)
4547  real(dp),intent(inout),target :: cg(2,mcg)
4548 
4549 !Local variables-------------------------------
4550  integer :: ig,igfirst,block_size,iblock,nblock,block_size_tmp,wfsize
4551  integer :: iband,ii,ierr,vectsize,use_slk
4552  character(len=500) :: message
4553  ! real(dp) :: tsec(2)
4554  real(dp),allocatable :: evec_tmp(:,:),subham_tmp(:)
4555  real(dp),allocatable :: work(:,:)
4556  real(dp),allocatable :: blockvectora(:,:),blockvectorb(:,:),blockvectorc(:,:)
4557  real(dp),pointer :: cg_block(:,:)
4558 
4559 ! *********************************************************************
4560 
4561  if (paral_kgb<0) then
4562    ABI_BUG('paral_kgb should be positive ')
4563  end if
4564 
4565  ! 1 if Scalapack version is used.
4566  use_slk = paral_kgb
4567 
4568 !Impose Hermiticity on diagonal elements of subham (and subovl, if needed)
4569 ! MG FIXME: In these two calls we are aliasing the args
4570  call hermit(subham,subham,ierr,nband_k)
4571 
4572 !Diagonalize the Hamitonian matrix
4573  if(istwf_k==2) then
4574    ABI_MALLOC(evec_tmp,(nband_k,nband_k))
4575    ABI_MALLOC(subham_tmp,(nband_k*(nband_k+1)/2))
4576    subham_tmp=subham(1:nband_k*(nband_k+1):2)
4577    evec_tmp=zero
4578    call abi_xhpev('V','U',nband_k,subham_tmp,eig_k,evec_tmp,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4579    evec(:,:)=zero;evec(1:2*nband_k:2,:) =evec_tmp
4580    ABI_FREE(evec_tmp)
4581    ABI_FREE(subham_tmp)
4582  else
4583    call abi_xhpev('V','U',nband_k,subham,eig_k,evec,nband_k,istwf_k=istwf_k,use_slk=use_slk)
4584  end if
4585 
4586 !Normalize each eigenvector and set phase:
4587 !The problem with minus/plus signs might be present also if .not. use_subovl
4588 !if(use_subovl == 0) then
4589  call cg_normev(evec,nband_k,nband_k)
4590 !end if
4591 
4592  if(istwf_k==2)then
4593    do iband=1,nband_k
4594      do ii=1,nband_k
4595        if(abs(evec(2*ii,iband))>1.0d-10)then
4596          write(message,'(3a,2i0,2es16.6,a,a)')ch10,&
4597 &         ' subdiago: For istwf_k=2, observed the following element of evec :',ch10,&
4598 &         iband,ii,evec(2*ii-1,iband),evec(2*ii,iband),ch10,'  with a non-negligible imaginary part.'
4599          ABI_BUG(message)
4600        end if
4601      end do
4602    end do
4603  end if
4604 
4605 !=====================================================
4606 !Carry out rotation of bands C(G,n) according to evecs
4607 ! ZGEMM if istwfk==1, DGEMM if istwfk==2
4608 !=====================================================
4609  wfsize=npw_k*nspinor
4610 
4611  block_size=100
4612 
4613  if (wfsize<block_size) block_size=wfsize
4614 
4615  nblock=wfsize/block_size
4616  if (mod(wfsize,block_size)/=0) nblock=nblock+1
4617 
4618  if (istwf_k>1) then ! evec is real
4619 
4620    vectsize=2*block_size
4621 
4622    ABI_MALLOC_OR_DIE(blockvectora,(vectsize,nband_k), ierr)
4623    ABI_MALLOC_OR_DIE(blockvectorb,(nband_k,nband_k), ierr)
4624    ABI_MALLOC_OR_DIE(blockvectorc,(vectsize,nband_k), ierr)
4625 
4626    do iband=1,nband_k
4627      call abi_xcopy(nband_k,evec(2*iband-1,1),2*nband_k,blockvectorb(iband,1),nband_k)
4628    end do
4629 
4630    do iblock=1,nblock
4631 
4632      igfirst=(iblock-1)*block_size
4633      block_size_tmp=block_size
4634      if (igfirst+block_size>wfsize) then
4635        block_size_tmp=wfsize-igfirst
4636      end if
4637 
4638      do iband=1,nband_k
4639        call abi_xcopy(block_size_tmp,cg(1,1+cgindex_subd(iblock,iband)),2,blockvectora(1,iband),1)
4640        call abi_xcopy(block_size_tmp,cg(2,1+cgindex_subd(iblock,iband)),2,blockvectora(block_size+1,iband),1)
4641        if (block_size_tmp<block_size) then
4642          blockvectora(block_size_tmp+1:block_size,iband) = zero
4643          blockvectora(block_size+block_size_tmp+1:2*block_size,iband) = zero
4644        end if
4645      end do
4646 
4647      call abi_xgemm('N','N',vectsize,nband_k,nband_k,&
4648 &     cone,blockvectora,vectsize,blockvectorb,nband_k,czero,blockvectorc,vectsize)
4649 
4650      do iband=1,nband_k
4651        call abi_xcopy(block_size_tmp,blockvectorc(1,iband),1,cg(1,1+cgindex_subd(iblock,iband)),2)
4652        call abi_xcopy(block_size_tmp,blockvectorc(block_size+1,iband),1,cg(2,1+cgindex_subd(iblock,iband)),2)
4653      end do
4654 
4655    end do
4656 
4657    ABI_FREE(blockvectora)
4658    ABI_FREE(blockvectorb)
4659    ABI_FREE(blockvectorc)
4660 
4661  else ! evec is complex
4662 
4663    ABI_MALLOC_OR_DIE(work,(2,block_size*nband_k), ierr)
4664    if (nblock==1) then
4665      cg_block => cg(:,icg+1:icg+nband_k*wfsize)
4666    else
4667      ABI_MALLOC_OR_DIE(cg_block,(2,block_size*nband_k), ierr)
4668    end if
4669 
4670    do iblock=1,nblock
4671      igfirst=(iblock-1)*block_size
4672      block_size_tmp=block_size
4673      if (igfirst+block_size>wfsize) then
4674        block_size_tmp=wfsize-igfirst
4675      end if
4676      if (nblock/=1) then
4677        do iband=1,nband_k
4678          do ig=1,block_size_tmp
4679            cg_block(:,ig+(iband-1)*block_size) = cg(:,ig+cgindex_subd(iblock,iband))
4680          end do
4681          if (block_size_tmp<block_size) then
4682            do ig=block_size_tmp+1,block_size
4683              cg_block(:,ig+(iband-1)*block_size) = zero
4684            end do
4685          end if
4686        end do
4687      end if
4688      call abi_xgemm('N','N',block_size,nband_k,nband_k,cone,cg_block,block_size,evec,nband_k,czero,work,&
4689        &     block_size,x_cplx=2)
4690      do iband=1,nband_k
4691        do ig=1,block_size_tmp
4692          cg(:,ig+cgindex_subd(iblock,iband)) = work(:,ig+(iband-1)*block_size)
4693        end do
4694      end do
4695    end do
4696 
4697    ABI_FREE(work)
4698    if (nblock/=1) then
4699      ABI_FREE(cg_block)
4700    end if
4701 
4702  end if
4703 
4704  contains
4705 
4706    function cgindex_subd(iblock,iband)
4707 
4708    integer :: iband,iblock,cgindex_subd
4709    cgindex_subd=(iblock-1)*block_size+(iband-1)*wfsize+icg
4710  end function cgindex_subd
4711 
4712 end subroutine subdiago_low_memory

m_cgtools/cg_addtorho [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_addtorho

FUNCTION

  Add |ur|**2 to the ground-states density rho.
    rho = rho + weight_r * Re[ur]**2 + weight_i * Im[ur]**2

INPUTS

  nx,ny,nz=physical dimension of the FFT box.
  ldx,ldy,ldz=leading dimensions of the arrays.
  ndat=number of contributions to accumulate.
  weight_r=weight used for the accumulation of the density in real space
  weight_i=weight used for the accumulation of the density in real space
  ur(2,ldx,ldy,ldz*ndat)=wavefunctions in real space

SIDE EFFECTS

  rho(ldx,ldy,ldz) = contains the input density at input,
                  modified in input with the contribution gived by ur.

SOURCE

2072 subroutine cg_addtorho(nx,ny,nz,ldx,ldy,ldz,ndat,weight_r,weight_i,ur,rho)
2073 
2074 !Arguments ------------------------------------
2075 !scalars
2076  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat
2077  real(dp),intent(in) :: weight_i,weight_r
2078 !arrays
2079  real(dp),intent(in) :: ur(2,ldx,ldy,ldz*ndat)
2080  real(dp),intent(inout) :: rho(ldx,ldy,ldz)
2081 
2082 !Local variables-------------------------------
2083 !scalars
2084  integer :: ix,iy,iz,idat,izdat
2085 
2086 ! *************************************************************************
2087 
2088  if (ndat==1) then
2089 !$OMP PARALLEL DO
2090    do iz=1,nz
2091      do iy=1,ny
2092        do ix=1,nx
2093          rho(ix,iy,iz) = rho(ix,iy,iz) + weight_r * ur(1,ix,iy,iz)**2 &
2094 &                                      + weight_i * ur(2,ix,iy,iz)**2
2095        end do
2096      end do
2097    end do
2098 
2099  else
2100 ! It would be nice to use $OMP PARALLEL DO PRIVATE(izdat) REDUCTION(+:rho)
2101 ! but it's risky as the private rho is allocated on the stack of the thread.
2102 !$OMP PARALLEL PRIVATE(izdat)
2103    do idat=1,ndat
2104 !$OMP DO
2105      do iz=1,nz
2106        izdat = iz + (idat-1)*ldz
2107        do iy=1,ny
2108          do ix=1,nx
2109            rho(ix,iy,iz) = rho(ix,iy,iz) + weight_r * ur(1,ix,iy,izdat)**2 &
2110 &                                        + weight_i * ur(2,ix,iy,izdat)**2
2111          end do
2112        end do
2113      end do
2114 !$OMP END DO NOWAIT
2115    end do
2116 !$OMP END PARALLEL
2117  end if
2118 
2119 end subroutine cg_addtorho

m_cgtools/cg_box2gsph [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_box2gsph

FUNCTION

INPUTS

  nx,ny,nz=physical dimension of the FFT box.
  ldx,ldy,ldz=Logical dimensions of the arrays.
  ndat=number of data in iarrbox
  npw_k=Number of planewaves in the G-sphere.
  kg_k(3,npw_k)=Reduced coordinates of the G-vectoes.
  iarrbox(2,ldx,ldy,ldz*ndat)=Input arrays on the FFT box.
  [rscal] = Scaling factor

OUTPUT

  oarrsph(2,npw_k*ndat)=Data defined on the G-sphere.

SOURCE

1960 subroutine cg_box2gsph(nx,ny,nz,ldx,ldy,ldz,ndat,npw_k,kg_k,iarrbox,oarrsph,rscal)
1961 
1962 !Arguments ------------------------------------
1963 !scalars
1964  integer,intent(in) :: npw_k,nx,ny,nz,ldx,ldy,ldz,ndat
1965  real(dp),optional,intent(in) :: rscal
1966 !arrays
1967  integer,intent(in) :: kg_k(3,npw_k)
1968  real(dp),intent(in) :: iarrbox(2,ldx*ldy*ldz*ndat)
1969  real(dp),intent(out) :: oarrsph(2,npw_k*ndat)
1970 
1971 !Local variables-------------------------------
1972 !scalars
1973  integer :: ig,ix,iy,iz,idat,sph_pad,box_pad,ifft
1974 
1975 ! *************************************************************************
1976 
1977  if (.not. PRESENT(rscal)) then
1978    !
1979    if (ndat==1) then
1980 !$OMP PARALLEL DO PRIVATE(ix,iy,iz,ifft)
1981      do ig=1,npw_k
1982        ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1
1983        iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1
1984        iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1
1985        ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
1986 #if defined FC_NVHPC
1987 if (ifft<0) stop "prevent from miscompiling this section"
1988 #endif
1989        oarrsph(1,ig) = iarrbox(1,ifft)
1990        oarrsph(2,ig) = iarrbox(2,ifft)
1991      end do
1992    else
1993 !$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,ix,iy,iz,ifft)
1994      do idat=1,ndat
1995        sph_pad = (idat-1)*npw_k
1996        box_pad = (idat-1)*ldx*ldy*ldz
1997        do ig=1,npw_k
1998          ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1
1999          iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1
2000          iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1
2001          ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
2002 #if defined FC_NVHPC
2003 if (ifft<0) stop "prevent from miscompiling this section"
2004 #endif
2005          oarrsph(1,ig+sph_pad) = iarrbox(1,ifft+box_pad)
2006          oarrsph(2,ig+sph_pad) = iarrbox(2,ifft+box_pad)
2007        end do
2008      end do
2009    end if
2010    !
2011  else
2012    if (ndat==1) then
2013 !$OMP PARALLEL DO PRIVATE(ix,iy,iz,ifft)
2014      do ig=1,npw_k
2015        ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1
2016        iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1
2017        iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1
2018        ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
2019 #if defined FC_NVHPC
2020 if (ifft<0) stop "prevent from miscompiling this section"
2021 #endif
2022        oarrsph(1,ig) = iarrbox(1,ifft) * rscal
2023        oarrsph(2,ig) = iarrbox(2,ifft) * rscal
2024      end do
2025    else
2026 !$OMP PARALLEL DO PRIVATE(sph_pad,box_pad,ix,iy,iz,ifft)
2027      do idat=1,ndat
2028        sph_pad = (idat-1)*npw_k
2029        box_pad = (idat-1)*ldx*ldy*ldz
2030        do ig=1,npw_k
2031          ix=kg_k(1,ig); if (ix<0) ix=ix+nx; ix=ix+1
2032          iy=kg_k(2,ig); if (iy<0) iy=iy+ny; iy=iy+1
2033          iz=kg_k(3,ig); if (iz<0) iz=iz+nz; iz=iz+1
2034          ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
2035 #if defined FC_NVHPC
2036 if (ifft<0) stop "prevent from miscompiling this section"
2037 #endif
2038          oarrsph(1,ig+sph_pad) = iarrbox(1,ifft+box_pad) * rscal
2039          oarrsph(2,ig+sph_pad) = iarrbox(2,ifft+box_pad) * rscal
2040        end do
2041      end do
2042    end if
2043  end if
2044 
2045 end subroutine cg_box2gsph

m_cgtools/cg_dznrm2 [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_dznrm2

FUNCTION

   returns the euclidean norm of a vector via the function name, so that
   DZNRM2 := sqrt( x**H*x )

INPUTS

  n = Specifies the number of elements in vector x.
  x(2*x) = Input array.

OUTPUT

SOURCE

490 function cg_dznrm2(n, x) result(res)
491 
492 !Arguments ------------------------------------
493 !scalars
494  integer,intent(in) :: n
495  real(dp) :: res
496 !arrays
497  real(dp),intent(in) :: x(2*n)
498  real(dp),external :: dznrm2
499 
500 ! *************************************************************************
501 
502  res = dznrm2(n, x, 1)
503 
504 end function cg_dznrm2

m_cgtools/cg_envlop [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_envlop

FUNCTION

 Multiply random number values in cg by envelope function to lower initial kinetic energy.
 Envelope  $\left( 1-\left( G/G_{\max }\right) ^2\right) ^{power}$ for |G|<= Gmax.
 Near G=0, little scaling, and goes to zero flatly near Gmax.

INPUTS

 cg(2,mcg)=initial random number wavefunctions
 ecut=kinetic energy cutoff in Ha
 gmet(3,3)=reciprocal space metric (bohr^-2)
 icgmod=shift to be given to the location of data in cg
 kg(3,npw)=reduced coordinates of G vectors in basis sphere
 kpoint(3)=reduced coordinates of k point
 mcg=maximum second dimension of cg (at least npw*nband*nspinor)
 nband=number of bands being considered
 npw=number of planewaves in basis sphere
 nspinor=number of spinorial components of the wavefunctions

OUTPUT

  cg(2,mcg)=revised values (not orthonormalized)

SOURCE

3159 subroutine cg_envlop(cg, ecut, gmet, icgmod, kg, kpoint, mcg, nband, npw, nspinor)
3160 
3161 !Arguments ------------------------------------
3162 !scalars
3163  integer,intent(in) :: icgmod,mcg,nband,npw,nspinor
3164  real(dp),intent(in) :: ecut
3165 !arrays
3166  integer,intent(in) :: kg(3,npw)
3167  real(dp),intent(in) :: gmet(3,3),kpoint(3)
3168  real(dp),intent(inout) :: cg(2,mcg)
3169 
3170 !Local variables-------------------------------
3171 !scalars
3172  integer,parameter :: re=1,im=2,power=12
3173  integer :: i1,i2,i3,ig,igs,ispinor,nn,spad
3174  real(dp) :: cutoff,gs,kpgsqc
3175  !character(len=500) :: msg
3176 !arrays
3177  real(dp),allocatable :: cut_pws(:)
3178 
3179 ! *************************************************************************
3180 
3181 !$(k+G)^2$ cutoff from $(1/2)(2 Pi (k+G))^2 = ecut$
3182  kpgsqc=ecut/(2.0_dp*pi**2)
3183  cutoff = kpgsqc
3184 
3185  ABI_MALLOC(cut_pws,(npw))
3186 
3187 !Run through G vectors in basis
3188 !$OMP PARALLEL DO PRIVATE(gs)
3189  do ig=1,npw
3190    i1=kg(1,ig) ; i2=kg(2,ig) ; i3=kg(3,ig)
3191 !(k+G)^2 evaluated using metric and kpoint
3192    gs = gmet(1,1)*(kpoint(1)+dble(i1))**2+&
3193 &    gmet(2,2)*(kpoint(2)+dble(i2))**2+&
3194 &    gmet(3,3)*(kpoint(3)+dble(i3))**2+&
3195 &    2.0_dp*(gmet(2,1)*(kpoint(2)+dble(i2))*(kpoint(1)+dble(i1))+&
3196 &    gmet(3,2)*(kpoint(3)+dble(i3))*(kpoint(2)+dble(i2))+&
3197 &    gmet(1,3)*(kpoint(1)+dble(i1))*(kpoint(3)+dble(i3)))
3198    if (gs>cutoff) then
3199      cut_pws(ig) = zero
3200    else
3201      cut_pws(ig) = (1.0_dp-(gs/cutoff))**power
3202    end if
3203  end do
3204 
3205 !Run through bands (real and imaginary components)
3206 !$OMP PARALLEL DO PRIVATE(spad,igs)
3207  do nn=1,nband
3208    spad = (nn-1)*npw*nspinor+icgmod
3209    do ispinor=1,nspinor
3210      do ig=1,npw
3211        igs=ig+(ispinor-1)*npw
3212        cg(1,igs+spad) = cg(1,igs+spad) * cut_pws(ig)
3213        cg(2,igs+spad) = cg(2,igs+spad) * cut_pws(ig)
3214      end do
3215    end do
3216  end do
3217 
3218  ABI_FREE(cut_pws)
3219 
3220 end subroutine cg_envlop

m_cgtools/cg_from_reim [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_from_reim

FUNCTION

INPUTS

SOURCE

375 subroutine cg_from_reim(npw, ndat, reim, factor, cg)
376 
377 !Arguments ------------------------------------
378 !scalars
379  integer,intent(in) :: npw,ndat
380  real(dp),intent(in) :: factor
381 !arrays
382  real(dp),intent(in) :: reim(npw*2, ndat)
383  real(dp),intent(out) :: cg(2*npw, ndat)
384 
385 !Local variables-------------------------------
386  integer :: idat
387 
388 ! *************************************************************************
389 
390  ! UnPack real and imaginary part and multiply by scale factor if /= one.
391  do idat=1,ndat
392    call dcopy(npw, reim(1, idat), 1, cg(1, idat), 2)
393    call dcopy(npw, reim(npw+1, idat), 1, cg(2, idat), 2)
394    if (factor /= one) call dscal(2*npw, factor, cg(1, idat), 1)
395  end do
396 
397 end subroutine cg_from_reim

m_cgtools/cg_fromcplx [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_fromcplx

FUNCTION

  Convert a complex array to a real array with (real,imag) part

INPUTS

  n = Specifies the number of elements in icplx and ocg.
  icplx(n)=Input complex array.

OUTPUT

  ocg(2*n)=Output array with real and imaginary part.

SOURCE

197 subroutine cg_fromcplx(n, icplx, ocg)
198 
199 !Arguments ------------------------------------
200 !scalars
201  integer,intent(in) :: n
202 !arrays
203  real(dp),intent(out) :: ocg(2*n)
204  complex(dpc),intent(in) :: icplx(n)
205 
206 !Local variables ------------------------------
207 !scalars
208  integer :: ii,idx
209 
210 ! *************************************************************************
211 
212 !$OMP PARALLEL DO PRIVATE(idx)
213  do ii=1,n
214    idx = 2*ii-1
215    ocg(idx  ) = DBLE (icplx(ii))
216    ocg(idx+1) = AIMAG(icplx(ii))
217  end do
218 
219 end subroutine cg_fromcplx

m_cgtools/cg_get_eigens [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_get_eigens

FUNCTION

  Helper functions to compute <i|H|i> / <i|S|i> for ndat states.
  Assume normalized input wavefunctions.

INPUTS

OUTPUT

SOURCE

5583 subroutine cg_get_eigens(usepaw, istwf_k, npwsp, ndat, cg, ghc, gsc, eig, me_g0, comm)
5584 
5585  integer,intent(in) :: usepaw, istwf_k, npwsp, ndat, me_g0, comm
5586  real(dp),intent(in) :: ghc(2*npwsp, ndat), cg(2*npwsp, ndat), gsc(2*npwsp, ndat*usepaw)
5587  real(dp),intent(out) :: eig(ndat)
5588 
5589 !Local variables-------------------------------
5590  integer,parameter :: option1 = 1
5591  integer :: idat, ierr
5592  real(dp) :: doti, dots_r(ndat)
5593 ! *************************************************************************
5594 
5595  ! <psi|H|psi> / <psi|S|psi>
5596 !$OMP PARALLEL DO
5597  do idat=1,ndat
5598    call dotprod_g(eig(idat), doti, istwf_k, npwsp, option1, ghc(:,idat), cg(:,idat), me_g0, xmpi_comm_self)
5599    if (usepaw == 1) then
5600      call dotprod_g(dots_r(idat), doti, istwf_k, npwsp, option1, gsc(:,idat), cg(:,idat), me_g0, xmpi_comm_self)
5601    end if
5602  end do
5603 
5604  if (xmpi_comm_size(comm) > 1) then
5605    call xmpi_sum(eig, comm, ierr)
5606    if (usepaw == 1) call xmpi_sum(dots_r, comm, ierr)
5607  end if
5608 
5609  if (usepaw == 1) eig(:) = eig(:) / dots_r(:)
5610 
5611 end subroutine cg_get_eigens

m_cgtools/cg_get_residvecs [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_get_residvecs

FUNCTION

  Compute redidual vectors (H - eS) |psi> for ndat states.

INPUTS

OUTPUT

SOURCE

5627 subroutine cg_get_residvecs(usepaw, npwsp, ndat, eig, cg, ghc, gsc, residvecs)
5628 
5629  integer,intent(in) :: usepaw, npwsp, ndat
5630  real(dp),intent(in) :: eig(ndat)
5631  real(dp),intent(in) :: ghc(2*npwsp, ndat), cg(2*npwsp, ndat), gsc(2*npwsp, ndat*usepaw)
5632  real(dp),intent(out) :: residvecs(2*npwsp, ndat)
5633 
5634 !Local variables-------------------------------
5635  integer :: idat
5636 ! *************************************************************************
5637 
5638  if (usepaw == 1) then
5639    ! (H - e) |psi>
5640    !$OMP PARALLEL DO IF (ndat > 1)
5641    do idat=1,ndat
5642      residvecs(:,idat) = ghc(:,idat) - eig(idat) * gsc(:,idat)
5643    end do
5644  else
5645    ! (H - eS) |psi>
5646    !$OMP PARALLEL DO IF (ndat > 1)
5647    do idat=1,ndat
5648      residvecs(:,idat) = ghc(:,idat) - eig(idat) * cg(:,idat)
5649    end do
5650  end if
5651 
5652 end subroutine cg_get_residvecs

m_cgtools/cg_getspin [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_getspin

FUNCTION

  Sandwich a single wave function on the Pauli matrices

INPUTS

  npw_k = number of plane waves
  cgcband = coefficients of spinorial wave function

OUTPUT

  spin = 3-vector of spin components for this state
  cgcmat = outer spin product of spinorial wf with itself

SOURCE

1737 subroutine cg_getspin(cgcband, npw_k, spin, cgcmat)
1738 
1739 !Arguments ------------------------------------
1740 !scalars
1741  integer, intent(in) :: npw_k
1742  real(dp), intent(in) :: cgcband(2,2*npw_k)
1743  complex(dpc), intent(out),optional :: cgcmat(2,2)
1744  real(dp), intent(out) :: spin(3)
1745 
1746 !Local variables-------------------------------
1747 !scalars
1748  complex(dpc) :: cspin(0:3), cgcmat_(2,2)
1749 ! ***********************************************************************
1750 
1751 ! cgcmat_ = cgcband * cgcband^T*  i.e. 2x2 matrix of spin components (dpcomplex)
1752  cgcmat_ = czero
1753  call zgemm('n','c',2,2,npw_k,cone,cgcband,2,cgcband,2,czero,cgcmat_,2)
1754 
1755 ! spin(*)  = sum_{si sj pi} cgcband(si,pi)^* pauli_mat*(si,sj) cgcband(sj,pi)
1756  cspin(0) = cgcmat_(1,1)*pauli_mat(1,1,0) + cgcmat_(2,1)*pauli_mat(2,1,0) &
1757 &         + cgcmat_(1,2)*pauli_mat(1,2,0) + cgcmat_(2,2)*pauli_mat(2,2,0)
1758  cspin(1) = cgcmat_(1,1)*pauli_mat(1,1,1) + cgcmat_(2,1)*pauli_mat(2,1,1) &
1759 &         + cgcmat_(1,2)*pauli_mat(1,2,1) + cgcmat_(2,2)*pauli_mat(2,2,1)
1760  cspin(2) = cgcmat_(1,1)*pauli_mat(1,1,2) + cgcmat_(2,1)*pauli_mat(2,1,2) &
1761 &         + cgcmat_(1,2)*pauli_mat(1,2,2) + cgcmat_(2,2)*pauli_mat(2,2,2)
1762  cspin(3) = cgcmat_(1,1)*pauli_mat(1,1,3) + cgcmat_(2,1)*pauli_mat(2,1,3) &
1763 &         + cgcmat_(1,2)*pauli_mat(1,2,3) + cgcmat_(2,2)*pauli_mat(2,2,3)
1764 !write(std_out,*) 'cgmat: ', cgcmat_
1765 !write(std_out,*) 'real(spin): ', real(cspin)
1766 !write(std_out,*) 'aimag(spin): ', aimag(cspin)
1767 
1768  spin = real(cspin(1:3))
1769  if (present(cgcmat)) cgcmat = cgcmat_
1770 
1771 end subroutine cg_getspin

m_cgtools/cg_gsph2box [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_gsph2box

FUNCTION

 Array iarrsph is defined in sphere with npw_k points. Insert iarrsph inside box
 of nx*ny*nz points to define array oarrbox for fft box. rest of oarrbox is filled with 0 s.

INPUTS

 iarrsph(2,npw_k*ndat)= contains values for npw_k G vectors in basis sphere
 ndat=number of FFT to perform.
 npw_k=number of G vectors in basis at this k point
 oarrbox(2,ldx*ldy*ldz*ndat) = fft box
 nx,ny,nz=physical dimension of the box (oarrbox)
 ldx,ldy,ldz=memory dimension of oarrbox
 kg_k(3,npw_k)=integer coordinates of G vectors in basis sphere
 istwf_k=option parameter that describes the storage of wfs

OUTPUT

   oarrbox(ldx*ldy*ldz*ndat)

NOTES

 If istwf_k differs from 1, then special storage modes must be taken
 into account, for symmetric wavefunctions coming from k=(0 0 0) or other
 special k points.

SOURCE

1804 subroutine cg_gsph2box(nx,ny,nz,ldx,ldy,ldz,ndat,npw_k,istwf_k,kg_k,iarrsph,oarrbox)
1805 
1806 !Arguments ------------------------------------
1807 !scalars
1808  integer,intent(in) :: istwf_k,nx,ny,nz,ldx,ldy,ldz,ndat,npw_k
1809 !arrays
1810  integer,intent(in) :: kg_k(3,npw_k)
1811  real(dp),intent(in) :: iarrsph(2,npw_k*ndat)
1812  real(dp),intent(out) :: oarrbox(2,ldx*ldy*ldz*ndat)
1813 
1814 !Local variables-------------------------------
1815 !scalars
1816  integer,parameter :: me_g0=1
1817  integer :: ix,ixinv,iy,iyinv,iz,izinv,dat,ipw,npwmin,pad_box,pad_sph,ifft,ifft_inv,ldxyz
1818  character(len=500) :: msg
1819 !arrays
1820  integer,allocatable :: ixinver(:),iyinver(:),izinver(:)
1821 
1822 ! *************************************************************************
1823 
1824 !In the case of special k-points, invariant under time-reversal,
1825 !but not Gamma, initialize the inverse coordinates
1826 !Remember indeed that
1827 !u_k(G) = u_{k+G0}(G-G0); u_{-k}(-G) = u_k(G)^*
1828 !and therefore:
1829 !u_{G0/2}(G) = u_{G0/2}(-G-G0)^*.
1830  if (istwf_k>=2) then
1831    ABI_MALLOC(ixinver,(nx))
1832    ABI_MALLOC(iyinver,(ny))
1833    ABI_MALLOC(izinver,(nz))
1834    if ( ANY(istwf_k==(/2,4,6,8/)) ) then
1835      ixinver(1)=1
1836      do ix=2,nx
1837        ixinver(ix)=nx+2-ix
1838      end do
1839    else
1840      do ix=1,nx
1841        ixinver(ix)=nx+1-ix
1842      end do
1843    end if
1844    if (istwf_k>=2 .and. istwf_k<=5) then
1845      iyinver(1)=1
1846      do iy=2,ny
1847        iyinver(iy)=ny+2-iy
1848      end do
1849    else
1850      do iy=1,ny
1851        iyinver(iy)=ny+1-iy
1852      end do
1853    end if
1854    if ( ANY(istwf_k==(/2,3,6,7/)) ) then
1855      izinver(1)=1
1856      do iz=2,nz
1857        izinver(iz)=nz+2-iz
1858      end do
1859    else
1860      do iz=1,nz
1861        izinver(iz)=nz+1-iz
1862      end do
1863    end if
1864  end if
1865 
1866  ldxyz = ldx*ldy*ldz
1867 
1868  if (istwf_k==1) then
1869 
1870 !$OMP PARALLEL DO PRIVATE(pad_sph,pad_box,ix,iy,iz,ifft)
1871    do dat=1,ndat
1872      pad_sph = (dat-1)*npw_k
1873      pad_box = (dat-1)*ldxyz
1874      oarrbox(:,1+pad_box:ldxyz+pad_box) = zero ! zero the sub-array
1875      do ipw=1,npw_k
1876        ix=kg_k(1,ipw); if (ix<0) ix=ix+nx; ix=ix+1
1877        iy=kg_k(2,ipw); if (iy<0) iy=iy+ny; iy=iy+1
1878        iz=kg_k(3,ipw); if (iz<0) iz=iz+nz; iz=iz+1
1879        ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
1880 #if (defined FC_NVHPC) || (defined __INTEL_COMPILER && defined HAVE_OPENMP)
1881 if (ifft<0) stop "prevent from miscompiling this section"
1882 #endif
1883        oarrbox(1,ifft+pad_box) = iarrsph(1,ipw+pad_sph)
1884        oarrbox(2,ifft+pad_box) = iarrsph(2,ipw+pad_sph)
1885      end do
1886    end do
1887 
1888  else if (istwf_k>=2) then
1889    !
1890    npwmin=1
1891    if(istwf_k==2 .and. me_g0==1) then ! If gamma point, then oarrbox must be completed
1892      do dat=1,ndat
1893        pad_sph = (dat-1)*npw_k
1894        pad_box = (dat-1)*ldxyz
1895        oarrbox(1,1+pad_box) = iarrsph(1,1+pad_sph)
1896        oarrbox(2,1+pad_box) = zero
1897      end do
1898      npwmin=2
1899    end if
1900 
1901 !$OMP PARALLEL DO PRIVATE(pad_sph,pad_box,ix,iy,iz,ixinv,iyinv,izinv,ifft,ifft_inv)
1902    do dat=1,ndat
1903      pad_sph = (dat-1)*npw_k
1904      pad_box = (dat-1)*ldxyz
1905      oarrbox(:,npwmin+pad_box:ldxyz+pad_box) = zero
1906      do ipw=npwmin,npw_k
1907        ix=kg_k(1,ipw); if(ix<0)ix=ix+nx; ix=ix+1
1908        iy=kg_k(2,ipw); if(iy<0)iy=iy+ny; iy=iy+1
1909        iz=kg_k(3,ipw); if(iz<0)iz=iz+nz; iz=iz+1
1910        ifft = ix + (iy-1)*ldx + (iz-1)*ldx*ldy
1911 #if defined FC_NVHPC
1912 if (ifft<0) stop "prevent from miscompiling this section"
1913 #endif
1914        ! Construct the coordinates of -k-G
1915        ixinv=ixinver(ix); iyinv=iyinver(iy); izinv=izinver(iz)
1916        ifft_inv = ixinv + (iyinv-1)*ldx + (izinv-1)*ldx*ldy
1917 
1918        oarrbox(:,ifft    +pad_box) =  iarrsph(:,ipw+pad_sph)
1919        oarrbox(1,ifft_inv+pad_box) =  iarrsph(1,ipw+pad_sph)
1920        oarrbox(2,ifft_inv+pad_box) = -iarrsph(2,ipw+pad_sph)
1921      end do
1922    end do
1923    !
1924  else
1925    write(msg,'(a,i0)')"Wrong istwfk ",istwf_k
1926    ABI_ERROR(msg)
1927  end if
1928 
1929  if (istwf_k>=2) then
1930    ABI_FREE(ixinver)
1931    ABI_FREE(iyinver)
1932    ABI_FREE(izinver)
1933  end if
1934 
1935 end subroutine cg_gsph2box

m_cgtools/cg_hprotate_and_get_diag [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

   cg_hprotate_and_get_diag

FUNCTION

  Compute the diagonal elements of E^H VNLX E
  where VNLX is an Hermitean matrixin packed form and E is the matrix with eigenvectors as column vectors.
  Mainly used to rotate the matrix elements of an operator after the subspace diagonalization.

INPUTS

OUTPUT

SOURCE

5459 subroutine cg_hprotate_and_get_diag(nband_k, subvnlx, evec, enlx_k)
5460 
5461 !Arguments ------------------------------------
5462 !scalars
5463  integer,intent(in) :: nband_k
5464 !arrays
5465  real(dp),intent(in) :: subvnlx(nband_k*(nband_k+1))
5466  real(dp),intent(in) :: evec(2*nband_k,nband_k)
5467  real(dp), intent(out) :: enlx_k(nband_k)
5468 
5469 !Local variables ------------------------------
5470 !scalars
5471  integer :: ii,jj,pidx,iband
5472  real(dp),allocatable :: mat1(:,:,:),matvnl(:,:,:)
5473 
5474 ! *************************************************************************
5475 
5476  ABI_MALLOC(matvnl,(2,nband_k,nband_k))
5477  ABI_MALLOC(mat1,(2,nband_k,nband_k))
5478 
5479  ! Construct upper triangle of matvnl from subvnlx using full storage mode.
5480  pidx=0
5481  do jj=1,nband_k
5482    do ii=1,jj
5483      pidx=pidx+1
5484      matvnl(1,ii,jj)=subvnlx(2*pidx-1)
5485      matvnl(2,ii,jj)=subvnlx(2*pidx  )
5486    end do
5487  end do
5488 
5489  call zhemm('L','U',nband_k,nband_k,cone,matvnl,nband_k,evec,nband_k,czero,mat1,nband_k)
5490 
5491 !$OMP PARALLEL DO
5492  do iband=1,nband_k
5493    enlx_k(iband) = cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband))
5494  end do
5495 
5496  ABI_FREE(matvnl)
5497  ABI_FREE(mat1)
5498 
5499 end subroutine cg_hprotate_and_get_diag

m_cgtools/cg_hrotate_and_get_diag [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

   cg_hrotate_and_get_diag

FUNCTION

  Compute the diagonal elements of E^H VNLX E
  where VNLX is an Hermitean matrix.
  Mainly used to rotate the matrix elements of an operator after the subspace diagonalization.

INPUTS

OUTPUT

SOURCE

5517 subroutine cg_hrotate_and_get_diag(istwf_k, nband_k, totvnlx, evec, enlx_k)
5518 
5519 !Arguments ------------------------------------
5520 !scalars
5521  integer,intent(in) :: istwf_k, nband_k
5522 !arrays
5523  real(dp),intent(in) :: totvnlx(2*nband_k,nband_k)
5524  real(dp),intent(in) :: evec(2*nband_k,nband_k)
5525  real(dp),intent(out) :: enlx_k(nband_k)
5526 
5527 !Local variables ------------------------------
5528 !scalars
5529  real(dp),external :: ddot
5530  integer :: jj,iband
5531  real(dp),allocatable :: mat_loc(:,:),mat1(:,:,:),matvnl(:,:,:), evec_loc(:,:)
5532 
5533 ! *************************************************************************
5534 
5535  ABI_MALLOC(matvnl, (2,nband_k, nband_k))
5536  ABI_MALLOC(mat1, (2, nband_k, nband_k))
5537  mat1=zero
5538 
5539  enlx_k(1:nband_k)=zero
5540 
5541  if (istwf_k==1) then
5542    call zhemm('l','l',nband_k,nband_k,cone,totvnlx,nband_k,evec,nband_k,czero,mat1,nband_k)
5543    do iband=1,nband_k
5544      enlx_k(iband)= cg_real_zdotc(nband_k,evec(:,iband),mat1(:,:,iband))
5545    end do
5546 
5547  else if (istwf_k==2) then
5548    ABI_MALLOC(evec_loc,(nband_k,nband_k))
5549    ABI_MALLOC(mat_loc,(nband_k,nband_k))
5550    do iband=1,nband_k
5551      do jj=1,nband_k
5552        evec_loc(iband,jj)=evec(2*iband-1,jj)
5553      end do
5554    end do
5555    call dsymm('l','l',nband_k,nband_k,one,totvnlx,nband_k,evec_loc,nband_k,zero,mat_loc,nband_k)
5556    do iband=1,nband_k
5557      enlx_k(iband)=ddot(nband_k,evec_loc(:,iband),1,mat_loc(:,iband),1)
5558    end do
5559    ABI_FREE(evec_loc)
5560    ABI_FREE(mat_loc)
5561  end if
5562 
5563  ABI_FREE(matvnl)
5564  ABI_FREE(mat1)
5565 
5566 end subroutine cg_hrotate_and_get_diag

m_cgtools/cg_kfilter [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_kfilter

FUNCTION

INPUTS

  nband=Number of vectors in icg1

SOURCE

235 pure subroutine cg_kfilter(npw_k, my_nspinor, nband_k, kinpw, cg)
236 
237 !Arguments ------------------------------------
238 !scalars
239  integer,intent(in) :: npw_k, my_nspinor, nband_k
240 !arrays
241  real(dp), intent(in) :: kinpw(npw_k)
242  real(dp),intent(inout) :: cg(2,npw_k*my_nspinor*nband_k)
243 
244 !Local variables-------------------------------
245  integer :: ispinor, iband, igs, iwavef, ipw
246 
247 ! *************************************************************************
248 
249 ! Filter the WFs when modified kinetic energy is too large (see routine mkkin.f)
250 ! !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(igs, iwavef)
251  do ispinor=1,my_nspinor
252    igs=(ispinor-1)*npw_k
253    do iband=1,nband_k
254      iwavef=(iband-1)*npw_k*my_nspinor
255      do ipw=1+igs,npw_k+igs
256        if (kinpw(ipw-igs)>huge(zero)*1.d-11)then
257          cg(1,ipw+iwavef)=zero
258          cg(2,ipw+iwavef)=zero
259        end if
260      end do
261    end do
262  end do
263 
264 end subroutine cg_kfilter

m_cgtools/cg_norm2g [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_norm2g

FUNCTION

  Compute <psi|psi> for ndat states distributed inside communicator comm.

INPUTS

OUTPUT

SOURCE

5668 subroutine cg_norm2g(istwf_k, npwsp, ndat, cg, norms, me_g0, comm)
5669 
5670  integer,intent(in) :: istwf_k, npwsp, ndat, me_g0, comm
5671  real(dp),intent(in) :: cg(2*npwsp, ndat)
5672  real(dp),intent(out) :: norms(ndat)
5673 
5674 !Local variables-------------------------------
5675  integer :: idat, ierr
5676 ! *************************************************************************
5677 
5678 !$OMP PARALLEL DO IF (ndat > 1)
5679  do idat=1,ndat
5680    call sqnorm_g(norms(idat), istwf_k, npwsp, cg(:,idat), me_g0, xmpi_comm_self)
5681  end do
5682  if (xmpi_comm_size(comm) > 1) call xmpi_sum(norms, comm, ierr)
5683 
5684 end subroutine cg_norm2g

m_cgtools/cg_normev [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_normev

FUNCTION

 Normalize a set of nband eigenvectors of complex length npw
 (real length 2*npw) and set phases to make cg(i,i) real and positive.
 Near convergence, cg(i,j) approaches delta(i,j).

INPUTS

  cg(2*npw,nband)=unnormalized eigenvectors
  npw=dimension of cg as shown
  nband=number of eigenvectors and complex length thereof.

OUTPUT

  cg(2*npw,nband)=nband normalized eigenvectors

SOURCE

3244 subroutine cg_normev(cg, npw, nband)
3245 
3246 !Arguments ------------------------------------
3247 !scalars
3248  integer,intent(in) :: npw,nband
3249 !arrays
3250  real(dp),intent(inout) :: cg(2*npw,nband)
3251 
3252 !Local variables-------------------------------
3253 !scalars
3254  integer :: ii,jj
3255  real(dp) :: den,evim,evre,phim,phre,xnorm
3256  character(len=500) :: msg
3257 
3258 ! *************************************************************************
3259 
3260 !Loop over vectors
3261  do ii=1,nband
3262    ! find norm
3263    xnorm=0.0d0
3264    do jj=1,2*npw
3265      xnorm=xnorm+cg(jj,ii)**2
3266    end do
3267 
3268    if((xnorm-one)**2>tol6)then
3269      write(msg,'(6a,i6,a,es16.6,3a)' )ch10,&
3270      'normev: ',ch10,&
3271      'Starting xnorm should be close to one (tol is tol6).',ch10,&
3272      'However, for state number',ii,', xnorm=',xnorm,ch10,&
3273      'It might be that your LAPACK library has not been correctly installed.'
3274      ABI_BUG(msg)
3275    end if
3276 
3277    xnorm=1.0d0/sqrt(xnorm)
3278 !  Set up phase
3279    phre=cg(2*ii-1,ii)
3280    phim=cg(2*ii,ii)
3281    if (phim/=0.0d0) then
3282      den=1.0d0/sqrt(phre**2+phim**2)
3283      phre=phre*xnorm*den
3284      phim=phim*xnorm*den
3285    else
3286 !    give xnorm the same sign as phre (negate if negative)
3287      phre=sign(xnorm,phre)
3288      phim=0.0d0
3289    end if
3290 !  normalize with phase change
3291    do jj=1,2*npw,2
3292      evre=cg(jj,ii)
3293      evim=cg(jj+1,ii)
3294      cg(jj,ii)=phre*evre+phim*evim
3295      cg(jj+1,ii)=phre*evim-phim*evre
3296    end do
3297  end do
3298 
3299 end subroutine cg_normev

m_cgtools/cg_precon [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_precon

FUNCTION

 precondition <G|(H-e)|C>

INPUTS

  cg(2,npw)=<G|C>.
  eval=current band eigenvalue = <C|H|C>.
  istwf_k=option parameter that describes the storage of wfs
  kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree)
  nspinor=number of spinorial components of the wavefunctions
  vect(2,npw)=<G|H|C>.
  npw=number of planewaves at this k point.
  optekin= 1 if the kinetic energy used in preconditionning is modified
             according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]]
           0 otherwise
  mg_g0=1 if the node treats G=0.
  comm=MPI communicator

OUTPUT

  pcon(npw)=preconditioning matrix
  vect(2,npw*nspinor)=<G|(H-eval)|C_{n,k}>*(polynomial ratio)

SOURCE

3332 subroutine cg_precon(cg, eval, istwf_k, kinpw, npw, nspinor, me_g0, optekin, pcon, vect, comm)
3333 
3334 !Arguments ------------------------------------
3335 !scalars
3336  integer,intent(in) :: istwf_k,npw,nspinor,optekin,me_g0,comm
3337  real(dp),intent(in) :: eval
3338 !arrays
3339  real(dp),intent(in) :: cg(2,npw*nspinor),kinpw(npw)
3340  real(dp),intent(inout) :: vect(2,npw*nspinor)
3341  real(dp),intent(out) :: pcon(npw)
3342 
3343 !Local variables-------------------------------
3344 !scalars
3345  integer :: ierr,ig,igs,ipw1,ispinor
3346  real(dp) :: ek0,ek0_inv,fac,poly,xx
3347  character(len=500) :: msg
3348 !arrays
3349  real(dp) :: tsec(2)
3350 
3351 ! *************************************************************************
3352 
3353 !Compute mean kinetic energy of band
3354  if(istwf_k==1)then
3355    ek0=zero
3356    do ispinor=1,nspinor
3357      igs=(ispinor-1)*npw
3358      do ig=1+igs,npw+igs
3359        if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3360          ek0=ek0+kinpw(ig-igs)*(cg(1,ig)**2+cg(2,ig)**2)
3361        end if
3362      end do
3363    end do
3364 
3365  else if (istwf_k>=2)then
3366    if (istwf_k==2 .and. me_g0 == 1)then
3367      ek0=zero ; ipw1=2
3368      if(kinpw(1)<huge(zero)*1.d-11)ek0=0.5_dp*kinpw(1)*cg(1,1)**2
3369    else
3370      ek0=zero ; ipw1=1
3371    end if
3372    do ispinor=1,nspinor
3373      igs=(ispinor-1)*npw
3374      do ig=ipw1+igs,npw+igs
3375        if(kinpw(ig)<huge(zero)*1.d-11)then
3376          ek0=ek0+kinpw(ig)*(cg(1,ig)**2+cg(2,ig)**2)
3377        end if
3378      end do
3379    end do
3380    ek0=2.0_dp*ek0
3381  end if
3382 
3383  call timab(48,1,tsec)
3384  call xmpi_sum(ek0,comm,ierr)
3385  call timab(48,2,tsec)
3386 
3387  if(ek0<1.0d-10)then
3388    write(msg,'(3a)')'The mean kinetic energy of a wavefunction vanishes.',ch10,'It is reset to 0.1 Ha.'
3389    ABI_WARNING(msg)
3390    ek0=0.1_dp
3391  end if
3392 
3393  if (optekin==1) then
3394    ek0_inv=2.0_dp/(3._dp*ek0)
3395  else
3396    ek0_inv=1.0_dp/ek0
3397  end if
3398 
3399 !Carry out preconditioning
3400  do ispinor=1,nspinor
3401    igs=(ispinor-1)*npw
3402 !$OMP PARALLEL DO PRIVATE(fac,ig,poly,xx) SHARED(cg,ek0_inv,eval,kinpw,igs,npw,vect,pcon)
3403    do ig=1+igs,npw+igs
3404      if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3405        xx=kinpw(ig-igs)*ek0_inv
3406 !      Teter polynomial ratio
3407        poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3408        fac=poly/(poly+16._dp*xx**4)
3409        if (optekin==1) fac=two*fac
3410        pcon(ig-igs)=fac
3411        vect(1,ig)=( vect(1,ig)-eval*cg(1,ig) )*fac
3412        vect(2,ig)=( vect(2,ig)-eval*cg(2,ig) )*fac
3413      else
3414        pcon(ig-igs)=zero
3415        vect(1,ig)=zero
3416        vect(2,ig)=zero
3417      end if
3418    end do
3419  end do
3420 
3421 end subroutine cg_precon

m_cgtools/cg_precon_block [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_precon_block

FUNCTION

 precondition $<G|(H-e_{n,k})|C_{n,k}>$ for a block of band (band-FFT parallelisation)
 in the case of real WFs (istwfk/=1)

INPUTS

  blocksize= size of blocks of bands
  cg(vectsize,blocksize)=<G|C_{n,k}> for a block of bands.
  eval(blocksize,blocksize)=current block of bands eigenvalues=<C_{n,k}|H|C_{n,k}>.
  ghc(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands.
  iterationnumber=number of iterative minimizations in LOBPCG
  kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree)
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  $vect(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$.
  npw=number of planewaves at this k point.
  optekin= 1 if the kinetic energy used in preconditionning is modified
             according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]]
           0 otherwise
  optpcon= 0 the TPA preconditionning matrix does not depend on band
           1 the TPA preconditionning matrix (not modified)
           2 the TPA preconditionning matrix is independant of iteration number
  vectsize= size of vectors
  mg_g0=1 if this node has Gamma, 0 otherwise.

OUTPUT

  vect(2,npw)=<g|(h-eval)|c_{n,k}>*(polynomial ratio)

SIDE EFFECTS

  pcon(npw,blocksize)=preconditionning matrix
            input  if optpcon=0,2 and iterationnumber/=1
            output if optpcon=0,2 and iterationnumber==1

SOURCE

3462 subroutine cg_precon_block(cg,eval,blocksize,iterationnumber,kinpw,&
3463 & npw,nspinor,me_g0,optekin,optpcon,pcon,ghc,vect,vectsize,comm)
3464 
3465 !Arguments ------------------------------------
3466 !scalars
3467  integer,intent(in) :: blocksize,iterationnumber,npw,nspinor,optekin
3468  integer,intent(in) :: optpcon,vectsize,me_g0,comm
3469 !arrays
3470  real(dp),intent(in) :: cg(vectsize,blocksize),eval(blocksize,blocksize)
3471  real(dp),intent(in) :: ghc(vectsize,blocksize),kinpw(npw)
3472  real(dp),intent(inout) :: pcon(npw,blocksize),vect(vectsize,blocksize)
3473 
3474 !Local variables-------------------------------
3475 !scalars
3476  integer :: iblocksize,ierr,ig,igs,ipw1,ispinor
3477  real(dp) :: fac,poly,xx
3478  character(len=500) :: msg
3479 !arrays
3480  real(dp) :: tsec(2)
3481  real(dp),allocatable :: ek0(:),ek0_inv(:)
3482 
3483 ! *************************************************************************
3484 
3485  call timab(536,1,tsec)
3486 
3487 !In this case, the Teter, Allan and Payne preconditioner is approximated:
3488 !the factor xx=Ekin(G) and no more Ekin(G)/Ekin(iband)
3489  if (optpcon==0) then
3490    do ispinor=1,nspinor
3491      igs=(ispinor-1)*npw
3492      if (me_g0 == 1) then
3493        do ig=1+igs,1+igs !g=0
3494          if (iterationnumber==1) then
3495            if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3496              xx=kinpw(ig-igs)
3497 !            teter polynomial ratio
3498              poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3499              fac=poly/(poly+16._dp*xx**4)
3500              if (optekin==1) fac=two*fac
3501              pcon(ig-igs,1)=fac
3502              do iblocksize=1,blocksize
3503                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3504 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3505              end do
3506            else
3507              pcon(ig-igs,1)=zero
3508              vect(ig,:)=0.0_dp
3509            end if
3510          else
3511            do iblocksize=1,blocksize
3512              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3513 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3514            end do
3515          end if
3516        end do
3517        do ig=2+igs,npw+igs
3518          if (iterationnumber==1) then
3519            if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3520              xx=kinpw(ig-igs)
3521 !            teter polynomial ratio
3522              poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3523              fac=poly/(poly+16._dp*xx**4)
3524              if (optekin==1) fac=two*fac
3525              pcon(ig-igs,1)=fac
3526              do iblocksize=1,blocksize
3527                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3528 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3529                vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-&
3530 &               eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,1)
3531              end do
3532            else
3533              pcon(ig-igs,1)=zero
3534              vect(ig,:)=zero
3535              vect(ig+npw-1,:)=zero
3536            end if
3537          else
3538            do iblocksize=1,blocksize
3539              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3540 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3541              vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-&
3542 &             eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,1)
3543            end do
3544          end if
3545        end do
3546      else
3547        do ig=1+igs,npw+igs
3548          if (iterationnumber==1) then
3549            if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3550              xx=kinpw(ig-igs)
3551 !            teter polynomial ratio
3552              poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3553              fac=poly/(poly+16._dp*xx**4)
3554              if (optekin==1) fac=two*fac
3555              pcon(ig-igs,1)=fac
3556              do iblocksize=1,blocksize
3557                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3558 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3559                vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-&
3560 &               eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,1)
3561              end do
3562            else
3563              pcon(ig-igs,:)=zero
3564              vect(ig,:)=zero
3565              vect(ig+npw,:)=zero
3566            end if
3567          else
3568            do iblocksize=1,blocksize
3569              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3570 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3571              vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-&
3572 &             eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,1)
3573            end do
3574          end if
3575        end do
3576      end if
3577    end do
3578 
3579  else if (optpcon>0) then
3580 !  Compute mean kinetic energy of all bands
3581    ABI_MALLOC(ek0,(blocksize))
3582    ABI_MALLOC(ek0_inv,(blocksize))
3583    if (iterationnumber==1.or.optpcon==1) then
3584      do iblocksize=1,blocksize
3585        if (me_g0 == 1)then
3586          ek0(iblocksize)=0.0_dp ; ipw1=2
3587          if(kinpw(1)<huge(zero)*1.d-11)ek0(iblocksize)=0.5_dp*kinpw(1)*cg(1,iblocksize)**2
3588          do ig=ipw1,npw
3589            if(kinpw(ig)<huge(zero)*1.d-11)then
3590              ek0(iblocksize)=ek0(iblocksize)+&
3591 &             kinpw(ig)*(cg(ig,iblocksize)**2+cg(ig+npw-1,iblocksize)**2)
3592            end if
3593          end do
3594        else
3595          ek0(iblocksize)=0.0_dp ; ipw1=1
3596          do ig=ipw1,npw
3597            if(kinpw(ig)<huge(zero)*1.d-11)then
3598              ek0(iblocksize)=ek0(iblocksize)+&
3599 &             kinpw(ig)*(cg(ig,iblocksize)**2+cg(ig+npw,iblocksize)**2)
3600            end if
3601          end do
3602        end if
3603      end do
3604 
3605      call xmpi_sum(ek0,comm,ierr)
3606 
3607      do iblocksize=1,blocksize
3608        if(ek0(iblocksize)<1.0d-10)then
3609          write(msg, '(4a)' )ch10,&
3610          'cg_precon_block: the mean kinetic energy of a wavefunction vanishes.',ch10,&
3611          'it is reset to 0.1ha.'
3612          ABI_WARNING(msg)
3613          ek0(iblocksize)=0.1_dp
3614        end if
3615      end do
3616      if (optekin==1) then
3617        ek0_inv(:)=2.0_dp/(3._dp*ek0(:))
3618      else
3619        ek0_inv(:)=1.0_dp/ek0(:)
3620      end if
3621    end if !iterationnumber==1.or.optpcon==1
3622 
3623 !  Carry out preconditioning
3624    do iblocksize=1,blocksize
3625      do ispinor=1,nspinor
3626        igs=(ispinor-1)*npw
3627        if (me_g0 == 1) then
3628          do ig=1+igs,1+igs !g=0
3629            if (iterationnumber==1.or.optpcon==1) then
3630              if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3631                xx=kinpw(ig-igs)*ek0_inv(iblocksize)
3632 !              teter polynomial ratio
3633                poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3634                fac=poly/(poly+16._dp*xx**4)
3635                if (optekin==1) fac=two*fac
3636                pcon(ig-igs,iblocksize)=fac
3637                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3638 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac
3639              else
3640                pcon(ig-igs,iblocksize)=zero
3641                vect(ig,iblocksize)=0.0_dp
3642              end if
3643            else
3644              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3645 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize)
3646            end if
3647          end do
3648          do ig=2+igs,npw+igs
3649            if (iterationnumber==1.or.optpcon==1) then
3650              if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3651                xx=kinpw(ig-igs)*ek0_inv(iblocksize)
3652 !              teter polynomial ratio
3653                poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3654                fac=poly/(poly+16._dp*xx**4)
3655                if (optekin==1) fac=two*fac
3656                pcon(ig-igs,iblocksize)=fac
3657                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3658 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac
3659                vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-&
3660 &               eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*fac
3661              else
3662                pcon(ig-igs,iblocksize)=zero
3663                vect(ig,iblocksize)=zero
3664                vect(ig+npw-1,iblocksize)=zero
3665              end if
3666            else
3667              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3668 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize)
3669              vect(ig+npw-1,iblocksize)=(ghc(ig+npw-1,iblocksize)-&
3670 &             eval(iblocksize,iblocksize)*cg(ig+npw-1,iblocksize))*pcon(ig-igs,iblocksize)
3671            end if
3672          end do
3673        else
3674          do ig=1+igs,npw+igs
3675            if (iterationnumber==1.or.optpcon==1) then
3676              if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3677                xx=kinpw(ig-igs)*ek0_inv(iblocksize)
3678 !              teter polynomial ratio
3679                poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3680                fac=poly/(poly+16._dp*xx**4)
3681                if (optekin==1) fac=two*fac
3682                pcon(ig-igs,iblocksize)=fac
3683                vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3684 &               eval(iblocksize,iblocksize)*cg(ig,iblocksize))*fac
3685                vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-&
3686 &               eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*fac
3687              else
3688                pcon(ig-igs,iblocksize)=zero
3689                vect(ig,iblocksize)=zero
3690                vect(ig+npw,iblocksize)=zero
3691              end if
3692            else
3693              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3694 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize)
3695              vect(ig+npw,iblocksize)=(ghc(ig+npw,iblocksize)-&
3696 &             eval(iblocksize,iblocksize)*cg(ig+npw,iblocksize))*pcon(ig-igs,iblocksize)
3697            end if
3698          end do
3699        end if
3700      end do
3701    end do
3702    ABI_FREE(ek0)
3703    ABI_FREE(ek0_inv)
3704  end if !optpcon
3705 
3706  call timab(536,2,tsec)
3707 
3708 end subroutine cg_precon_block

m_cgtools/cg_precon_many [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_precon_many

FUNCTION

INPUTS

OUTPUT

SOURCE

5751 subroutine cg_precon_many(istwf_k, npw, nspinor, ndat, cg, optekin, kinpw, vect, me_g0, comm)
5752 
5753  integer,intent(in) :: istwf_k, npw, nspinor, optekin, ndat, me_g0, comm
5754  real(dp),intent(in) :: cg(2*npw*nspinor,ndat), kinpw(npw)
5755  real(dp),intent(inout) :: vect(2*npw*nspinor,ndat)
5756 
5757 !Local variables-------------------------------
5758  integer :: idat
5759  real(dp),allocatable :: pcon(:)
5760 ! *************************************************************************
5761 
5762  ! TODO: Optimized version for MPI with ndat > 1
5763  ABI_MALLOC(pcon, (npw))
5764  do idat=1,ndat
5765    call cg_precon(cg(:,idat), zero, istwf_k, kinpw, npw, nspinor, me_g0, optekin, pcon, vect(:,idat), comm)
5766  end do
5767  ABI_FREE(pcon)
5768 
5769  !call cg_kinene(istwf_k, npw, nspinor, ndat, cg, me_g0, comm)
5770  !call cg_zprecon_block(cg,eval,blocksize,iterationnumber,kinpw, npw,nspinor,optekin,optpcon,pcon,ghc,vect,vectsize,comm)
5771 
5772 end subroutine cg_precon_many

m_cgtools/cg_real_zdotc [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_real_zdotc

FUNCTION

   Perform a vector-vector operation defined as res = REAL (\Sigma (conjg(x)*y)) where x and y are n-element vectors.

INPUTS

  n = Specifies the number of elements in vector x and y
  x,y = Input arrays.

OUTPUT

  res=Real part of the scalar product.

SOURCE

580 function cg_real_zdotc(n,x,y) result(res)
581 
582 !Arguments ------------------------------------
583 !scalars
584  integer,intent(in) :: n
585 !arrays
586  real(dp),intent(in) :: x(2,n)
587  real(dp),intent(in) :: y(2,n)
588  real(dp) :: res
589 
590 !Local variables-------------------------------
591  real(dp),external :: ddot
592 
593 ! *************************************************************************
594 
595  res = ddot(2*n, x, 1, y, 1)
596 
597 end function cg_real_zdotc

m_cgtools/cg_set_imag0_to_zero [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_set_imag0_to_zero

FUNCTION

  Set the imaginary part at G=0 to zero if istwfk == 2 and this proc has the gamma point

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.

SIDE EFFECTS

  cg(2*npwsp*nband)
    input: Input set of vectors.
    output: Orthonormalized set.

SOURCE

5838 pure subroutine cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag)
5839 
5840 !Arguments ------------------------------------
5841 !scalars
5842  integer,intent(in) :: istwfk, me_g0, npwsp, nband
5843 !arrays
5844  real(dp),intent(inout) :: cg(2,npwsp*nband)
5845  real(dp),intent(out) :: max_absimag
5846 
5847 !Local variables ------------------------------
5848  integer :: ib, ii
5849 
5850 ! *************************************************************************
5851 
5852  max_absimag = zero
5853  if (istwfk == 2 .and. me_g0 == 1) then
5854    do ib=1,nband
5855      ii = 1 + (ib - 1) * npwsp
5856      max_absimag = max(max_absimag, abs(cg(2, ii)))
5857      cg(2, ii) = zero
5858    end do
5859  end if
5860 
5861 end subroutine cg_set_imag0_to_zero

m_cgtools/cg_setaug_zero [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_setaug_zero

FUNCTION

  Set to zero all elements of the array that are not in the FFT box.

INPUTS

 nx,ny,nz=physical dimensions of the FFT box
 ldx,ldy,ldx=memory dimension of arr
 ndat=number of FFTs

 SIDE EFFECT
  arr(2,ldx,ldy,ldz*ndat)= all entries in the augmented region are set to zero

SOURCE

286 pure subroutine cg_setaug_zero(cplex,nx,ny,nz,ldx,ldy,ldz,ndat,arr)
287 
288 !Arguments ------------------------------------
289 !scalars
290  integer,intent(in) :: cplex,nx,ny,nz,ldx,ldy,ldz,ndat
291 !arrays
292  real(dp),intent(inout) :: arr(cplex,ldx,ldy,ldz*ndat)
293 
294 !Local variables-------------------------------
295  integer :: iy,iz,dat,padat
296 
297 ! *************************************************************************
298 
299  if (nx /= ldx) then
300    do iz=1,ldz*ndat
301      do iy=1,ldy
302        arr(:,nx+1:ldx,iy,iz) = zero
303      end do
304    end do
305  end if
306 
307  if (ny /= ldy) then
308    do iz=1,ldz*ndat
309      arr(:,:,ny+1:ldy,iz) = zero
310    end do
311  end if
312 
313  if (nz /= ldz) then
314    do dat=1,ndat
315      padat = ldz*(dat-1)
316      do iz=nz+1,ldz
317        arr(:,:,:,iz+padat) = zero
318      end do
319    end do
320  end if
321 
322 end subroutine cg_setaug_zero

m_cgtools/cg_to_reim [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_to_reim

FUNCTION

INPUTS

SOURCE

337 subroutine cg_to_reim(npw, ndat, cg, factor, reim)
338 
339 !Arguments ------------------------------------
340 !scalars
341  integer,intent(in) :: npw,ndat
342  real(dp),intent(in) :: factor
343 !arrays
344  real(dp),intent(in) :: cg(2*npw,ndat)
345  real(dp),intent(out) :: reim(npw*2,ndat)
346 
347 !Local variables-------------------------------
348  integer :: idat
349 
350 ! *************************************************************************
351 
352  ! Pack real and imaginary part of the wavefunctions.
353  ! and multiply by scale factor if factor /= one.
354  do idat=1,ndat
355    call dcopy(npw, cg(1, idat), 2, reim(1, idat), 1)
356    call dcopy(npw, cg(2, idat), 2, reim(npw+1, idat), 1)
357    if (factor /= one) call dscal(2*npw, factor, reim(1, idat), 1)
358  end do
359 
360 end subroutine cg_to_reim

m_cgtools/cg_tocplx [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_tocplx

FUNCTION

  Convert a real array with (real,imag) part to complex.

INPUTS

  n = Specifies the number of elements in cg and ocplx
  cg(2*n)=Input array with real and imaginary part.

OUTPUT

  ocplx(n)=Output complex array.

SOURCE

155 subroutine cg_tocplx(n, cg, ocplx)
156 
157 !Arguments ------------------------------------
158 !scalars
159  integer,intent(in) :: n
160 !arrays
161  real(dp),intent(in) :: cg(2*n)
162  complex(dpc),intent(out) :: ocplx(n)
163 
164 !Local variables ------------------------------
165 !scalars
166  integer :: ii,idx
167 
168 ! *************************************************************************
169 
170 !$OMP PARALLEL DO PRIVATE(idx)
171  do ii=1,n
172    idx = 2*ii-1
173    ocplx(ii) = DCMPLX(cg(idx),cg(idx+1))
174  end do
175 
176 end subroutine cg_tocplx

m_cgtools/cg_vlocpsi [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_vlocpsi

FUNCTION

  Apply the local part of the potentatil to the wavefunction in real space.

INPUTS

  nx,ny,nz=physical dimension of the FFT box.
  ldx,ldy,ldz=leading dimensions of the arrays.
  ndat=number of wavefunctions.
  cplex=  1 if vloc is real, 2 for complex
  vloc(cplex*ldx,ldy,ldz)=Local potential on the FFT box.

SIDE EFFECTS

  ur(2,ldx,ldy,ldz*ndat)=
    Input = wavefunctions in real space.
    Output= vloc |ur>

SOURCE

2145 subroutine cg_vlocpsi(nx,ny,nz,ldx,ldy,ldz,ndat,cplex,vloc,ur)
2146 
2147 !Arguments ------------------------------------
2148 !scalars
2149  integer,intent(in) :: nx,ny,nz,ldx,ldy,ldz,ndat,cplex
2150 !arrays
2151  real(dp),intent(in) :: vloc(cplex*ldx,ldy,ldz)
2152  real(dp),intent(inout) :: ur(2,ldx,ldy,ldz*ndat)
2153 
2154 !Local variables-------------------------------
2155 !scalars
2156  integer :: idat,ix,iy,iz,padat
2157  real(dp) :: fim,fre
2158 
2159 ! *************************************************************************
2160 
2161  if (cplex==1) then
2162    !
2163    if (ndat==1) then
2164 !$OMP PARALLEL DO
2165      do iz=1,nz
2166        do iy=1,ny
2167          do ix=1,nx
2168            ur(1,ix,iy,iz) = vloc(ix,iy,iz) * ur(1,ix,iy,iz)
2169            ur(2,ix,iy,iz) = vloc(ix,iy,iz) * ur(2,ix,iy,iz)
2170          end do
2171        end do
2172      end do
2173      !
2174    else
2175      !
2176 !$OMP PARALLEL DO PRIVATE(padat)
2177      do idat=1,ndat
2178        padat = ldz*(idat-1)
2179        do iz=1,nz
2180          do iy=1,ny
2181            do ix=1,nx
2182              ur(1,ix,iy,iz+padat) = vloc(ix,iy,iz) * ur(1,ix,iy,iz+padat)
2183              ur(2,ix,iy,iz+padat) = vloc(ix,iy,iz) * ur(2,ix,iy,iz+padat)
2184            end do
2185          end do
2186        end do
2187      end do
2188      !
2189    end if
2190    !
2191  else if (cplex==2)then
2192    !
2193    if (ndat==1) then
2194 !$OMP PARALLEL DO PRIVATE(fre,fim)
2195      do iz=1,nz
2196        do iy=1,ny
2197          do ix=1,nx
2198            fre = ur(1,ix,iy,iz)
2199            fim = ur(2,ix,iy,iz)
2200            ur(1,ix,iy,iz) = vloc(2*ix-1,iy,iz)*fre - vloc(2*ix,iy,iz)*fim
2201            ur(2,ix,iy,iz) = vloc(2*ix-1,iy,iz)*fim + vloc(2*ix,iy,iz)*fre
2202          end do
2203        end do
2204      end do
2205    else
2206 !$OMP PARALLEL DO PRIVATE(padat,fre,fim)
2207      do idat=1,ndat
2208        padat = ldz*(idat-1)
2209        do iz=1,nz
2210          do iy=1,ny
2211            do ix=1,nx
2212              fre = ur(1,ix,iy,iz+padat)
2213              fim = ur(2,ix,iy,iz+padat)
2214              ur(1,ix,iy,iz+padat) = vloc(2*ix-1,iy,iz)*fre - vloc(2*ix,iy,iz)*fim
2215              ur(2,ix,iy,iz+padat) = vloc(2*ix-1,iy,iz)*fim + vloc(2*ix,iy,iz)*fre
2216            end do
2217          end do
2218        end do
2219      end do
2220    end if
2221    !
2222  else
2223    ur = huge(one)
2224    !ABI_BUG("Wrong cplex")
2225  end if
2226 
2227 end subroutine cg_vlocpsi

m_cgtools/cg_zaxpby [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zaxpby

FUNCTION

  Scales two vectors, adds them to one another and stores result in the vector.
  y := a*x + b*y

INPUTS

 n = the number of elements in vectors x and y.
 a = Specifies the scalar a.
 x = Array.
 b = Specifies the scalar b.
 y = Array

OUTPUT

 y Contains the updated vector y.

SOURCE

718 subroutine cg_zaxpby(n, a, x, b, y)
719 
720 !Arguments ------------------------------------
721 !scalars
722  integer,intent(in) :: n
723  real(dp),intent(in) :: a(2),b(2)
724 !arrays
725  real(dp),intent(in) :: x(2*n)
726  real(dp),intent(inout) :: y(2*n)
727 
728 ! *************************************************************************
729 
730 #ifdef HAVE_LINALG_AXPBY
731  call zaxpby(n, a, x, 1, b, y, 1)
732 #else
733  call zscal(n, b, y, 1)
734  call zaxpy(n, a, x, 1, y,1)
735 #endif
736 
737 end subroutine cg_zaxpby

m_cgtools/cg_zaxpy [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zaxpy

FUNCTION

  Computes y = alpha*x + y

INPUTS

  n = Specifies the number of elements in vectors x and y.
  alpha = Specifies the scalar alpha.
  x = Array

SIDE EFFECTS

  y = Array. In output, y contains the updated vector.

SOURCE

675 subroutine cg_zaxpy(n, alpha, x, y)
676 
677 !Arguments ------------------------------------
678 !scalars
679  integer,intent(in) :: n
680  real(dp),intent(in) :: alpha(2)
681 !arrays
682  real(dp),intent(in) :: x(2*n)
683  real(dp),intent(inout) :: y(2*n)
684 
685 ! *************************************************************************
686 
687  if (alpha(2) == zero) then
688    call daxpy(2*n, alpha(1), x, 1, y, 1)
689  else
690    call zaxpy(n, alpha, x, 1, y, 1)
691  end if
692 
693 end subroutine cg_zaxpy

m_cgtools/cg_zaxpy_many_areal [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zaxpy_many_areal

FUNCTION

  Computes y = alpha*x + y

INPUTS

  n = Specifies the number of elements in vectors x and y.
  ndat
  alpha(ndat) = Specifies the scalar alpha.
  x = Array

SIDE EFFECTS

  y = Array. In output, y contains the updated vector.

SOURCE

5795 subroutine cg_zaxpy_many_areal(npwsp, ndat, alphas, x, y)
5796 
5797 !Arguments ------------------------------------
5798 !scalars
5799  integer,intent(in) :: npwsp, ndat
5800  real(dp),intent(in) :: alphas(ndat)
5801 !arrays
5802  real(dp),intent(in) :: x(2*npwsp, ndat)
5803  real(dp),intent(inout) :: y(2*npwsp, ndat)
5804 
5805 !Local variables-------------------------------
5806  integer :: idat
5807 ! *************************************************************************
5808 
5809 !$OMP PARALLEL DO IF (ndat > 1)
5810  do idat=1,ndat
5811    call daxpy(2*npwsp, alphas(idat), x(1,idat), 1, y(1,idat), 1)
5812  end do
5813 
5814 end subroutine cg_zaxpy_many_areal

m_cgtools/cg_zcopy [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zcopy

FUNCTION

  Perform y = x, where x and y are vectors.

INPUTS

  n = Specifies the number of elements in vectors x and y.
  x = Input Array

OUTPUT

  y = In output, y contains a copy of the values of x.

SOURCE

418 subroutine cg_zcopy(n, x, y)
419 
420 !Arguments ------------------------------------
421 !scalars
422  integer,intent(in) :: n
423 !arrays
424  real(dp),intent(in) :: x(2*n)
425  real(dp),intent(out) :: y(2*n)
426 
427 ! *************************************************************************
428 
429  call zcopy(n, x, 1, y, 1)
430 
431 end subroutine cg_zcopy

m_cgtools/cg_zdotc [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zdotc

FUNCTION

   Perform a vector-vector operation defined as res = \Sigma (conjg(x)*y) where x and y are n-element vectors.

INPUTS

  n = Specifies the number of elements in vector x and y
  x,y = Input arrays.

OUTPUT

  res(2)=Real and Imaginary part of the scalar product.

SOURCE

524 function cg_zdotc(n, x, y) result(res)
525 
526 !Arguments ------------------------------------
527 !scalars
528  integer,intent(in) :: n
529 !arrays
530  real(dp),intent(in) :: x(2,n)
531  real(dp),intent(in) :: y(2,n)
532  real(dp) :: res(2)
533 
534 !Local variables-------------------------------
535 #ifdef HAVE_LINALG_ZDOTC_BUG
536  integer :: ii
537 #else
538  complex(dpc) :: cres
539  complex(dpc),external :: zdotc
540 #endif
541 
542 ! *************************************************************************
543 
544 #ifdef HAVE_LINALG_ZDOTC_BUG
545  ! Workaround for veclib on MacOSx
546  res = zero
547 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res)
548  do ii=1,n
549    res(1) = res(1) + x(1,ii)*y(1,ii) + x(2,ii)*y(2,ii)
550    res(2) = res(2) + x(1,ii)*y(2,ii) - x(2,ii)*y(1,ii)
551  end do
552 
553 #else
554  cres = zdotc(n, x, 1, y, 1)
555  res(1) = REAL(cres)
556  res(2) = AIMAG(cres)
557 #endif
558 
559 end function cg_zdotc

m_cgtools/cg_zdotg_zip [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zdotg_zip

FUNCTION

  Compute <cg1|cg2> for ndat states

INPUTS

OUTPUT

SOURCE

5700 subroutine cg_zdotg_zip(istwf_k, npwsp, ndat, option, cg1, cg2, dots, me_g0, comm)
5701 
5702  integer,intent(in) :: istwf_k, npwsp, ndat, option, me_g0, comm
5703  real(dp),intent(in) :: cg1(2*npwsp,ndat), cg2(2*npwsp,ndat)
5704  real(dp),intent(out) :: dots(2,ndat)
5705 
5706 !Local variables-------------------------------
5707  integer :: idat, ierr
5708  real(dp) :: dotr, doti, re_dots(ndat)
5709 ! *************************************************************************
5710 
5711 !$OMP PARALLEL DO IF (ndat > 1) PRIVATE(dotr, doti)
5712  do idat=1,ndat
5713    call dotprod_g(dotr, doti, istwf_k, npwsp, option, cg1(:,idat), cg2(:,idat), me_g0, xmpi_comm_self)
5714    if (istwf_k == 2) then
5715      re_dots(idat) = dotr
5716    else
5717      dots(:, idat) = [dotr, doti]
5718    end if
5719  end do
5720 
5721  if (xmpi_comm_size(comm) > 1) then
5722    if (istwf_k == 2) then
5723      call xmpi_sum(re_dots, comm, ierr)
5724    else
5725      call xmpi_sum(dots, comm, ierr)
5726    end if
5727  end if
5728 
5729  if (istwf_k == 2) then
5730    do idat=1,ndat
5731      dots(1,idat) = re_dots(idat)
5732      dots(2,idat) = zero
5733    end do
5734  end if
5735 
5736 end subroutine cg_zdotg_zip

m_cgtools/cg_zdotu [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zdotu

FUNCTION

   Perform a vector-vector operation defined as res = \Sigma (x*y) where x and y are n-element vectors.
   Note that x is unconjugated.

INPUTS

  n = Specifies the number of elements in vector x and y
  x,y = Input arrays.

OUTPUT

  res(2)=Real and Imaginary part of the scalar product.

SOURCE

619 function cg_zdotu(n, x, y) result(res)
620 
621 !Arguments ------------------------------------
622 !scalars
623  integer,intent(in) :: n
624 !arrays
625  real(dp),intent(in) :: x(2,n)
626  real(dp),intent(in) :: y(2,n)
627  real(dp) :: res(2)
628 
629 !Local variables-------------------------------
630 #ifdef HAVE_LINALG_ZDOTU_BUG
631  integer :: ii
632 #else
633  complex(dpc) :: cres
634  complex(dpc),external :: zdotu
635 #endif
636 
637 ! *************************************************************************
638 
639 #ifdef HAVE_LINALG_ZDOTU_BUG
640  ! Workaround for veclib on MacOSx
641  res = zero
642 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:res)
643  do ii=1,n
644    res(1) = res(1) + x(1,ii)*y(1,ii) - x(2,ii)*y(2,ii)
645    res(2) = res(2) + x(1,ii)*y(2,ii) + x(2,ii)*y(1,ii)
646  end do
647 #else
648  cres = zdotu(n, x, 1, y, 1)
649  res(1) = REAL(cres)
650  res(2) = AIMAG(cres)
651 #endif
652 
653 end function cg_zdotu

m_cgtools/cg_zgemm [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zgemm

FUNCTION

  The cg_zgemm routines perform a matrix-matrix operation with general matrices.
  The operation is defined as C := alpha*op(A)*op(B) + beta*C, where:

  op(x) is one of op(x) = x, or op(x) = x', or op(x) = conjg(x'),

  alpha and beta are scalars,
  A, B and C are matrices:
  op(A) is an m-by-k matrix,
  op(B) is a k-by-n matrix,
  C is an m-by-n matrix.

INPUTS

OUTPUT

SOURCE

823 subroutine cg_zgemm(transa, transb, npwsp, ncola, ncolb, cg_a, cg_b, cg_c, alpha, beta)
824 
825 !Arguments ------------------------------------
826 !scalars
827  integer,intent(in) :: npwsp,ncola,ncolb
828  real(dp),optional,intent(in) :: alpha(2), beta(2)
829  character(len=1),intent(in) :: transa, transb
830 !arrays
831  real(dp),intent(in) :: cg_a(2,npwsp*ncola), cg_b(2,npwsp*ncolb)
832  real(dp),intent(inout) :: cg_c(2,*)
833 
834 !Local variables-------------------------------
835 !scalars
836  integer :: mm,nn,kk,lda,ldb,ldc
837  !real(dp) :: my_alpha(2),my_beta(2)
838  complex(dpc) :: my_calpha, my_cbeta
839 
840 ! *************************************************************************
841 
842  lda = npwsp
843  ldb = npwsp
844 
845  mm  = npwsp
846  nn  = ncolb
847  kk  = ncola
848 
849  if (toupper(transa) /= 'N') then
850    mm = ncola
851    kk = npwsp
852  end if
853  if (toupper(transb) /= 'N') nn = npwsp
854 
855  ldc = mm
856 
857  !my_alpha = cg_cone;  if (PRESENT(alpha)) my_alpha = alpha
858  !my_beta  = cg_czero; if (PRESENT(beta))  my_beta  = beta
859  !call ZGEMM(transa, transb, mm, nn, kk, my_alpha, cg_a, lda, cg_b, ldb, my_beta, cg_c, ldc)
860  !call ZGEMM3M(transa, transb, mm, nn, kk, my_alpha, cg_a, lda, cg_b, ldb, my_beta, cg_c, ldc)
861 
862  my_calpha = cone;  if (PRESENT(alpha)) my_calpha = DCMPLX(alpha(1), alpha(2))
863  my_cbeta  = czero; if (PRESENT(beta))  my_cbeta  = DCMPLX(beta(1), beta(2))
864 
865  call abi_zgemm_2r(transa, transb, mm, nn, kk, my_calpha, cg_a, lda, cg_b, ldb, my_cbeta, cg_c, ldc)
866 
867 end subroutine cg_zgemm

m_cgtools/cg_zgemv [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zgemv

FUNCTION

 The cg_zgemv routines perform a **complex** matrix-vector operation defined as:

      y := alpha*A*x + beta*y,
 or
      y := alpha*A'*x + beta*y,
 or
      y := alpha*conjg(A')*x + beta*y,

 where: alpha and beta are COMPLEX scalars, x and y are COMPLEX vectors, A is a m-by-n COMPLEX matrix.
 Default is: alpha = 1 and beta = 0.

INPUTS

OUTPUT

SOURCE

764 subroutine cg_zgemv(trans, nrows, ncols, cgmat, vec, matvec, alpha, beta)
765 
766 !Arguments ------------------------------------
767 !scalars
768  integer,intent(in) :: nrows, ncols
769  real(dp),optional,intent(in) :: alpha(2), beta(2)
770  character(len=1),intent(in) :: trans
771 !arrays
772  real(dp),intent(in) :: cgmat(2,nrows*ncols), vec(2,*)
773  real(dp),intent(inout) :: matvec(2,*)
774 
775 !Local variables-------------------------------
776 !scalars
777  integer :: mm, nn, kk, lda, ldb, ldc
778  real(dp) :: my_alpha(2), my_beta(2)
779 
780 ! *************************************************************************
781 
782  my_alpha = cg_cone;  if (present(alpha)) my_alpha = alpha
783  my_beta  = cg_czero; if (present(beta))  my_beta  = beta
784 
785  lda = nrows; mm = nrows; nn = 1; kk = ncols
786  if (toupper(trans) /= 'N') then
787    mm = ncols; kk = nrows
788  end if
789  ldb = kk; ldc = mm
790 
791  call ZGEMM(trans, "N", mm, nn, kk, my_alpha, cgmat, lda, vec, ldb, my_beta, matvec, ldc)
792  ! ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
793 
794  !call ZGEMV(trans,mm,nn,my_alpha,cgmat,lda,vec,1,my_beta,matvec,1)
795 
796 end subroutine cg_zgemv

m_cgtools/cg_zprecon_block [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 cg_zprecon_block

FUNCTION

 precondition $<G|(H-e_{n,k})|C_{n,k}>$ for a block of band (band-FFT parallelisation)

INPUTS

  blocksize= size of blocks of bands
  $cg(vectsize,blocksize)=<G|C_{n,k}> for a block of bands$.
  $eval(blocksize,blocksize)=current block of bands eigenvalues=<C_{n,k}|H|C_{n,k}>$.
  $ghc(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$.
  iterationnumber=number of iterative minimizations in LOBPCG
  kinpw(npw)=(modified) kinetic energy for each plane wave (Hartree)
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  $vect(vectsize,blocksize)=<G|H|C_{n,k}> for a block of bands$.
  npw=number of planewaves at this k point.
  optekin= 1 if the kinetic energy used in preconditionning is modified
             according to Kresse, Furthmuller, PRB 54, 11169 (1996) [[cite:Kresse1996]]
           0 otherwise
  optpcon= 0 the TPA preconditionning matrix does not depend on band
           1 the TPA preconditionning matrix (not modified)
           2 the TPA preconditionning matrix is independant of iteration number
  vectsize= size of vectors
  comm=MPI communicator.

OUTPUT

  vect(2,npw)=<g|(h-eval)|c_{n,k}>*(polynomial ratio)

SIDE EFFECTS

  pcon(npw,blocksize)=preconditionning matrix
            input  if optpcon=0,2 and iterationnumber/=1
            output if optpcon=0,2 and iterationnumber==1

SOURCE

3748 subroutine cg_zprecon_block(cg,eval,blocksize,iterationnumber,kinpw,&
3749 &  npw,nspinor,optekin,optpcon,pcon,ghc,vect,vectsize,comm)
3750 
3751 !Arguments ------------------------------------
3752 !scalars
3753  integer,intent(in) :: blocksize,iterationnumber,npw,nspinor,optekin
3754  integer,intent(in) :: optpcon,vectsize,comm
3755 !arrays
3756  real(dp),intent(in) :: kinpw(npw)
3757  real(dp),intent(inout) :: pcon(npw,blocksize)
3758  complex(dpc),intent(in) :: cg(vectsize,blocksize),eval(blocksize,blocksize)
3759  complex(dpc),intent(in) :: ghc(vectsize,blocksize)
3760  complex(dpc),intent(inout) :: vect(vectsize,blocksize)
3761 
3762 !Local variables-------------------------------
3763 !scalars
3764  integer :: iblocksize,ierr,ig,igs,ispinor
3765  real(dp) :: fac,poly,xx
3766  !character(len=500) :: msg
3767 !arrays
3768  real(dp) :: tsec(2)
3769  real(dp),allocatable :: ek0(:),ek0_inv(:)
3770 
3771 ! *************************************************************************
3772 
3773  call timab(536,1,tsec)
3774 
3775 !In this case, the Teter, Allan and Payne preconditioner is approximated:
3776 !the factor xx=Ekin(G) and no more Ekin(G)/Ekin(iband)
3777  if (optpcon==0) then
3778    do ispinor=1,nspinor
3779      igs=(ispinor-1)*npw
3780      do ig=1+igs,npw+igs
3781        if (iterationnumber==1) then
3782          if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3783            xx=kinpw(ig-igs)
3784 !          teter polynomial ratio
3785            poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3786            fac=poly/(poly+16._dp*xx**4)
3787            if (optekin==1) fac=two*fac
3788            pcon(ig-igs,1)=fac
3789            do iblocksize=1,blocksize
3790              vect(ig,iblocksize)=(ghc(ig,iblocksize)-eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3791            end do
3792          else
3793            pcon(ig-igs,1)=zero
3794            vect(ig,:)=dcmplx(0.0_dp,0.0_dp)
3795          end if
3796        else
3797          do iblocksize=1,blocksize
3798            vect(ig,iblocksize)=(ghc(ig,iblocksize)-eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,1)
3799          end do
3800        end if
3801      end do
3802    end do
3803 
3804  else if (optpcon>0) then
3805 !  Compute mean kinetic energy of all bands
3806    ABI_MALLOC(ek0,(blocksize))
3807    ABI_MALLOC(ek0_inv,(blocksize))
3808    if (iterationnumber==1.or.optpcon==1) then
3809      do iblocksize=1,blocksize
3810        ek0(iblocksize)=0.0_dp
3811        do ispinor=1,nspinor
3812          igs=(ispinor-1)*npw
3813          do ig=1+igs,npw+igs
3814            if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3815              ek0(iblocksize)=ek0(iblocksize)+kinpw(ig-igs)*&
3816 &             (real(cg(ig,iblocksize))**2+aimag(cg(ig,iblocksize))**2)
3817            end if
3818          end do
3819        end do
3820      end do
3821 
3822      call xmpi_sum(ek0,comm,ierr)
3823 
3824      do iblocksize=1,blocksize
3825        if(ek0(iblocksize)<1.0d-10)then
3826          ABI_WARNING('the mean kinetic energy of a wavefunction vanishes. it is reset to 0.1ha.')
3827          ek0(iblocksize)=0.1_dp
3828        end if
3829      end do
3830      if (optekin==1) then
3831        ek0_inv(:)=2.0_dp/(3._dp*ek0(:))
3832      else
3833        ek0_inv(:)=1.0_dp/ek0(:)
3834      end if
3835    end if !iterationnumber==1.or.optpcon==1
3836 
3837 !  Carry out preconditioning
3838    do iblocksize=1,blocksize
3839      do ispinor=1,nspinor
3840        igs=(ispinor-1)*npw
3841        do ig=1+igs,npw+igs
3842          if (iterationnumber==1.or.optpcon==1) then
3843            if(kinpw(ig-igs)<huge(zero)*1.d-11)then
3844              xx=kinpw(ig-igs)*ek0_inv(iblocksize)
3845 !            teter polynomial ratio
3846              poly=27._dp+xx*(18._dp+xx*(12._dp+xx*8._dp))
3847              fac=poly/(poly+16._dp*xx**4)
3848              if (optekin==1) fac=two*fac
3849              pcon(ig-igs,iblocksize)=fac
3850              vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3851 &             eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize)
3852            else
3853              pcon(ig-igs,iblocksize)=zero
3854              vect(ig,iblocksize)=dcmplx(0.0_dp,0.0_dp)
3855            end if
3856          else
3857            vect(ig,iblocksize)=(ghc(ig,iblocksize)-&
3858 &           eval(iblocksize,iblocksize)*cg(ig,iblocksize))*pcon(ig-igs,iblocksize)
3859          end if
3860        end do
3861      end do
3862    end do
3863    ABI_FREE(ek0)
3864    ABI_FREE(ek0_inv)
3865  end if !optpcon
3866 
3867  call timab(536,2,tsec)
3868 
3869 end subroutine cg_zprecon_block

m_cgtools/cg_zscal [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cg_zscal

FUNCTION

  Perform x = a*x

INPUTS

  n = Specifies the number of elements in vector x.
  a(2)= The scalar a. If a(2) is zero, x = a*x is computed via zdscal

OUTPUT

  x = Updated vector.

SOURCE

452 subroutine cg_zscal(n, a, x)
453 
454 !Arguments ------------------------------------
455 !scalars
456  integer,intent(in) :: n
457  real(dp),intent(in) :: a(2)
458 !arrays
459  real(dp),intent(inout) :: x(2*n)
460 
461 ! *************************************************************************
462 
463  if (a(2) == zero) then
464    call dscal(2*n, a(1), x, 1)
465  else
466    call zscal(n, a, x, 1)
467  end if
468 
469 end subroutine cg_zscal

m_cgtools/cgnc_cholesky [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgnc_cholesky

FUNCTION

  Cholesky orthonormalization of the vectors stored in cg (version optimized for NC wavefunctions).

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of band in cg
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  cg(2*npwsp*nband)
    input: Input set of vectors.
    output: Orthonormalized set.

OUTPUT

  [umat]=Cholesky upper triangle matrix.

SOURCE

2256 subroutine cgnc_cholesky(npwsp, nband, cg, istwfk, me_g0, comm_pw, use_gemm, umat)
2257 
2258 !Arguments ------------------------------------
2259 !scalars
2260  integer,intent(in) :: npwsp, nband, istwfk, comm_pw, me_g0
2261  logical,optional,intent(in) :: use_gemm
2262 !arrays
2263  real(dp),intent(inout) :: cg(2*npwsp*nband)
2264  real(dp),optional,allocatable,intent(out) :: umat(:,:,:)
2265 
2266 !Local variables ------------------------------
2267 !scalars
2268  integer :: ierr,b1,b2
2269 #ifdef DEBUG_MODE
2270  integer :: ptr
2271  character(len=500) :: msg
2272 #endif
2273  !real(dp) :: max_absimag
2274  logical :: my_usegemm
2275 !arrays
2276  real(dp) :: rcg0(nband)
2277  real(dp),allocatable :: r_ovlp(:,:), c_ovlp(:,:,:)
2278 
2279 ! *************************************************************************
2280 
2281 #ifdef DEBUG_MODE
2282  if (istwfk == 2 .and. me_g0 == 1) then
2283    ierr = 0
2284    do b1=1,nband
2285      ptr = 2 + 2*(b1-1)*npwsp
2286      if (abs(cg(ptr)) > zero) then
2287        ierr = ierr + 1
2288        write(msg,'(a,i0,es13.6)')" Input b1, Im u(g=0) should be zero ",b1,cg(ptr)
2289        call wrtout(std_out, msg)
2290        !cg(ptr) = zero
2291      end if
2292    end do
2293    ABI_CHECK(ierr == 0, "Non zero imag part")
2294  end if
2295 #endif
2296 
2297  ! In matrix notation O = PSI^H PSI = U^H U  where PSI is a (ng,nb) matrix with the input wavefunctions
2298  ! The new orthogonalized states PHI is given by: PHI = PSI U^{-1}
2299 
2300  my_usegemm = .FALSE.; if (PRESENT(use_gemm)) my_usegemm = use_gemm
2301 
2302  if (istwfk /= 1) then
2303    ! Version optimized for real wavefunctions.
2304    ABI_MALLOC(r_ovlp, (nband, nband))
2305 
2306    !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag)
2307 
2308    ! 1) Calculate O_ij = <phi_i|phi_j> (real symmetric matrix)
2309    if (my_usegemm) then
2310      call DGEMM("T", "N", nband, nband, 2*npwsp, one, cg, 2*npwsp, cg, 2*npwsp, zero, r_ovlp, nband)
2311    else
2312      call DSYRK("U", "T", nband, 2*npwsp, one, cg, 2*npwsp, zero, r_ovlp, nband)
2313    end if
2314 
2315    r_ovlp = two * r_ovlp
2316    if (istwfk == 2 .and. me_g0 == 1) then
2317      ! Extract the real part at G=0 and subtract its contribution to the overlap.
2318      call dcopy(nband, cg, 2*npwsp, rcg0, 1)
2319      do b2=1,nband
2320        do b1=1,b2
2321          r_ovlp(b1, b2) = r_ovlp(b1, b2) - rcg0(b1) * rcg0(b2)
2322        end do
2323      end do
2324    end if
2325 
2326    ! Sum the overlap if PW are distributed.
2327    if (comm_pw /= xmpi_comm_self) call xmpi_sum(r_ovlp, comm_pw, ierr)
2328 
2329    ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
2330    call DPOTRF('U', nband, r_ovlp, nband, ierr)
2331    ABI_CHECK(ierr == 0, sjoin('DPOTRF returned info:', itoa(ierr)))
2332 
2333    ! 3) Solve X U = cg. On exit cg is orthonormalized.
2334    call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, cg, 2*npwsp)
2335 
2336    if (present(umat)) then
2337      ABI_REMALLOC(umat, (1, nband, nband))
2338      umat(1,:,:) = r_ovlp
2339    end if
2340 
2341    ABI_FREE(r_ovlp)
2342 
2343  else
2344    ! Version for complex wavefunctions.
2345    ABI_MALLOC(c_ovlp, (2, nband, nband))
2346 
2347    ! 1) Calculate O_ij = <phi_i|phi_j> (complex Hermitean)
2348    if (my_usegemm) then
2349      call abi_zgemm_2r("C", "N", nband, nband, npwsp, cone, cg, npwsp, cg, npwsp, czero, c_ovlp, nband)
2350    else
2351      call ZHERK("U", "C", nband, npwsp, cone, cg, npwsp, czero, c_ovlp, nband)
2352    end if
2353 
2354    ! Sum the overlap if PW are distributed.
2355    if (comm_pw /= xmpi_comm_self) call xmpi_sum(c_ovlp, comm_pw, ierr)
2356 
2357    ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
2358    call ZPOTRF('U', nband, c_ovlp, nband, ierr)
2359    ABI_CHECK(ierr == 0, sjoin('ZPOTRF returned info:', itoa(ierr)))
2360 
2361    ! 3) Solve X U = cg. On exit cg is orthonormalized.
2362    call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, cg, npwsp)
2363 
2364    if (present(umat)) then
2365      ABI_REMALLOC(umat, (2, nband, nband))
2366      umat = c_ovlp
2367    end if
2368 
2369    ABI_FREE(c_ovlp)
2370  end if
2371 
2372 #ifdef DEBUG_MODE
2373  if (istwfk == 2) then
2374    ierr = 0
2375    do b1=1,nband
2376      ptr = 2 + 2*(b1-1)*npwsp
2377      if (ABS(cg(ptr)) > zero) then
2378        ierr = ierr + 1
2379        write(msg,'(a,i0,es13.6)')" Output b1, Im u(g=0) should be zero ",b1,cg(ptr)
2380      end if
2381    end do
2382    ABI_CHECK(ierr == 0, "Non zero imag part")
2383  end if
2384 #endif
2385 
2386 end subroutine cgnc_cholesky

m_cgtools/cgnc_gramschmidt [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgnc_grortho

FUNCTION

  Gram-Schmidt orthonormalization of the vectors stored in cg

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of band in cg
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  cg(2*npwsp*nband)
    input: Input set of vectors.
    output: Orthonormalized set.

SOURCE

2704 subroutine cgnc_gramschmidt(npwsp, nband, cg, istwfk, me_g0, comm_pw)
2705 
2706 !Arguments ------------------------------------
2707 !scalars
2708  integer,intent(in) :: npwsp, nband, istwfk, comm_pw, me_g0
2709 !arrays
2710  real(dp),intent(inout) :: cg(2*npwsp*nband)
2711 
2712 !Local variables ------------------------------
2713 !scalars
2714  integer :: b1,nb2,opt
2715  logical :: normalize
2716 
2717 ! *************************************************************************
2718 
2719  ! Normalize the first vector.
2720  call cgnc_normalize(npwsp,1,cg(1),istwfk,me_g0,comm_pw)
2721  if (nband == 1) RETURN
2722 
2723  ! Orthogonaluze b1 wrt to the bands in [1,b1-1].
2724  normalize = .TRUE.
2725  do b1=2,nband
2726    opt = 1 + 2*npwsp*(b1-1)
2727    nb2=b1-1
2728    call cgnc_gsortho(npwsp,nb2,cg(1),1,cg(opt),istwfk,normalize,me_g0,comm_pw)
2729  end do
2730 
2731 end subroutine cgnc_gramschmidt

m_cgtools/cgnc_gsortho [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgnc_gsortho

FUNCTION

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband1=Number of vectors in icg1
  nband1=Number of vectors in cg2
  comm_pw=MPI communicator.

SIDE EFFECTS

  cg2(2*npwsp*nband2)
  icg1(2*npwsp*nband1)
    input: Input set of vectors.
    output: Orthonormalized set.

SOURCE

2622 subroutine cgnc_gsortho(npwsp, nband1, icg1, nband2, iocg2, istwfk, normalize, me_g0, comm_pw)
2623 
2624 !Arguments ------------------------------------
2625 !scalars
2626  integer,intent(in) :: npwsp,nband1,nband2,istwfk,me_g0
2627  integer,optional,intent(in) :: comm_pw
2628  logical,intent(in) :: normalize
2629 !arrays
2630  real(dp),intent(in) :: icg1(2*npwsp*nband1)
2631  real(dp),intent(inout) :: iocg2(2*npwsp*nband2)
2632 
2633 !Local variables ------------------------------
2634 !scalars
2635  integer :: ierr,b1,b2
2636 !arrays
2637  real(dp) :: r_icg1(nband1),r_iocg2(nband2)
2638  real(dp),allocatable :: proj(:,:,:)
2639 
2640 ! *************************************************************************
2641 
2642  ABI_MALLOC(proj, (2, nband1, nband2))
2643  !proj = zero
2644 
2645  ! 1) Calculate <cg1|cg2>
2646  call cg_zgemm("C", "N", npwsp, nband1, nband2, icg1, iocg2, proj)
2647 
2648  if (istwfk>1) then
2649    ! nspinor is always 1 in this case.
2650    ! Account for the missing G and set the imaginary part to zero since wavefunctions are real.
2651    proj(1,:,:) = two * proj(1,:,:)
2652    proj(2,:,:) = zero
2653    !
2654    if (istwfk==2 .and. me_g0==1) then
2655      ! Extract the real part at G=0 and subtract its contribution.
2656      call dcopy(nband1,icg1, 2*npwsp,r_icg1, 1)
2657      call dcopy(nband2,iocg2,2*npwsp,r_iocg2,1)
2658      do b2=1,nband2
2659        do b1=1,nband1
2660          proj(1,b1,b2) = proj(1,b1,b2) - r_icg1(b1) * r_iocg2(b2)
2661        end do
2662      end do
2663    end if
2664    !
2665  end if
2666  !
2667  ! This is for the MPI version
2668  if (comm_pw /= xmpi_comm_self) call xmpi_sum(proj,comm_pw,ierr)
2669 
2670  ! 2) cg2 = cg2 - <cg1|cg2> cg1
2671  call cg_zgemm("N","N",npwsp,nband1,nband2,icg1,proj,iocg2,alpha=-cg_cone,beta=cg_cone)
2672 
2673  ABI_FREE(proj)
2674 
2675  ! 3) Normalize iocg2 if required.
2676  if (normalize) call cgnc_normalize(npwsp,nband2,iocg2,istwfk,me_g0,comm_pw)
2677 
2678 end subroutine cgnc_gsortho

m_cgtools/cgnc_normalize [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgnc_normalize

FUNCTION

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of vectors in icg1

SIDE EFFECTS

SOURCE

2539 subroutine cgnc_normalize(npwsp, nband, cg, istwfk, me_g0, comm_pw)
2540 
2541 !Arguments ------------------------------------
2542 !scalars
2543  integer,intent(in) :: npwsp,nband,istwfk,me_g0,comm_pw
2544 !arrays
2545  real(dp),intent(inout) :: cg(2*npwsp*nband)
2546 
2547 !Local variables ------------------------------
2548 !scalars
2549  integer :: ptr,ierr,band
2550  !character(len=500) :: msg
2551 !arrays
2552  real(dp) :: norm(nband),alpha(2)
2553 
2554 ! *************************************************************************
2555 
2556 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1)
2557  do band=1,nband
2558    ptr = 1 + 2*npwsp*(band-1)
2559    norm(band) = cg_dznrm2(npwsp, cg(ptr))
2560    norm(band) = norm(band) ** 2
2561    !norm(band) = cg_real_zdotc(npwsp, cg(ptr), cg(ptr))
2562  end do
2563 
2564  if (istwfk > 1) then
2565    norm = two * norm
2566    if (istwfk == 2 .and. me_g0 == 1) then
2567 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband >1)
2568      do band=1,nband
2569        ptr = 1 + 2*npwsp*(band-1)
2570        norm(band) = norm(band) - cg(ptr)**2
2571      end do
2572    end if
2573  end if
2574 
2575  if (comm_pw /= xmpi_comm_self) call xmpi_sum(norm, comm_pw, ierr)
2576 
2577  ierr = 0
2578  do band=1,nband
2579    if (norm(band) > zero) then
2580      norm(band) = SQRT(norm(band))
2581    else
2582      ierr = ierr + 1
2583    end if
2584  end do
2585 
2586  if (ierr /= 0) then
2587    ABI_ERROR(sjoin("Found ", itoa(ierr)," vectors with norm <= zero!"))
2588  end if
2589 
2590 !$OMP PARALLEL DO PRIVATE(ptr,alpha) IF (nband > 1)
2591  do band=1,nband
2592    ptr = 1 + 2*npwsp*(band-1)
2593    alpha = [one / norm(band), zero]
2594    call cg_zscal(npwsp, alpha, cg(ptr))
2595  end do
2596 
2597 end subroutine cgnc_normalize

m_cgtools/cgpaw_cholesky [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgpaw_cholesky

FUNCTION

  Cholesky orthonormalization of the vectors stored in cg. (version for PAW wavefunctions).

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of band in cg and gsc
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  cg(2*npwsp*nband)
    input: Input set of vectors |C>, S|C>
    output: Orthonormalized set such as  <C|S|C> = 1
  gsc(2*npwsp*nband): destroyed in output.

OUTPUT

  [umat]=Cholesky upper triangle matrix.

SOURCE

2416 subroutine cgpaw_cholesky(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw, umat)
2417 
2418 !Arguments ------------------------------------
2419 !scalars
2420  integer,intent(in) :: npwsp, nband, istwfk, me_g0, comm_pw
2421 !arrays
2422  real(dp),intent(inout) :: cg(2*npwsp*nband), gsc(2*npwsp*nband)
2423  real(dp),optional,allocatable,intent(out) :: umat(:,:,:)
2424 
2425 !Local variables ------------------------------
2426 !scalars
2427  integer :: ierr, b1, b2
2428  !real(dp) :: max_absimag
2429  !character(len=500) :: msg
2430 !arrays
2431  real(dp) :: rcg0(nband), rg0sc(nband)
2432  real(dp),allocatable :: r_ovlp(:,:), c_ovlp(:,:,:)
2433 
2434 ! *************************************************************************
2435 
2436  if (istwfk /= 1) then
2437    ! Version optimized for real wavefunctions.
2438    ABI_MALLOC(r_ovlp, (nband, nband))
2439 
2440    !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag)
2441    !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, gsc, max_absimag)
2442 
2443 #ifdef HAVE_LINALG_GEMMT
2444    ! Use zgemmt extension BLAS3 provided by e.g. MKL
2445    r_ovlp = zero
2446    call DGEMMT("U", "T", "N", nband, 2*npwsp, one, cg, 2*npwsp, gsc, 2*npwsp, zero, r_ovlp, nband)
2447 #else
2448    call DGEMM("T", "N", nband, nband, 2*npwsp, one, cg, 2*npwsp, gsc, 2*npwsp, zero, r_ovlp, nband)
2449 #endif
2450    r_ovlp = two * r_ovlp
2451 
2452    if (istwfk == 2 .and. me_g0 == 1) then
2453      ! Extract the real part at G=0 and subtract its contribution to the overlap.
2454      call dcopy(nband, cg, 2*npwsp, rcg0, 1)
2455      call dcopy(nband, gsc, 2*npwsp, rg0sc, 1)
2456      do b2=1,nband
2457        do b1=1,b2
2458          r_ovlp(b1,b2) = r_ovlp(b1,b2) - rcg0(b1) * rg0sc(b2)
2459        end do
2460      end do
2461    end if
2462 
2463    ! Sum the overlap if PW are distributed.
2464    if (comm_pw /= xmpi_comm_self) call xmpi_sum(r_ovlp, comm_pw, ierr)
2465 
2466    ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
2467    call DPOTRF('U', nband, r_ovlp, nband, ierr)
2468    ABI_CHECK(ierr == 0, sjoin('DPOTRF returned info:', itoa(ierr)))
2469 
2470    ! 3) Solve X U = cg.
2471    call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, cg, 2*npwsp)
2472 
2473    ! 4) Solve Y U = gsc. On exit <cg|gsc> = 1
2474    call DTRSM('R', 'U', 'N', 'N', 2*npwsp, nband, one, r_ovlp, nband, gsc, 2*npwsp)
2475 
2476    !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, cg, max_absimag)
2477    !call cg_set_imag0_to_zero(istwfk, me_g0, npwsp, nband, gsc, max_absimag)
2478 
2479    if (present(umat)) then
2480      ABI_REMALLOC(umat, (1, nband, nband))
2481      umat(1,:,:) = r_ovlp
2482    end if
2483 
2484    ABI_FREE(r_ovlp)
2485 
2486  else
2487    ! 1) Calculate O_ij =  <phi_i|S|phi_j> (complex Hermitean)
2488    ABI_MALLOC(c_ovlp, (2, nband, nband))
2489 
2490 #ifdef HAVE_LINALG_GEMMT
2491    c_ovlp = zero
2492    call ZGEMMT("U", "C", "N", nband, npwsp, cone, cg, npwsp, gsc, npwsp, czero, c_ovlp, nband)
2493 #else
2494    call abi_zgemm_2r("C", "N", nband, nband, npwsp, cone, cg, npwsp, gsc, npwsp, czero, c_ovlp, nband)
2495 #endif
2496 
2497    ! Sum the overlap if PW are distributed.
2498    if (comm_pw /= xmpi_comm_self) call xmpi_sum(c_ovlp, comm_pw, ierr)
2499    !
2500    ! 2) Cholesky factorization: O = U^H U with U upper triangle matrix.
2501    call ZPOTRF('U', nband, c_ovlp, nband, ierr)
2502    ABI_CHECK(ierr == 0, sjoin('ZPOTRF returned info:', itoa(ierr)))
2503 
2504    ! 3) Solve X U = cg.
2505    call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, cg, npwsp)
2506 
2507    ! 4) Solve Y U = gsc. On exit <cg|gsc> = 1
2508    call ZTRSM('R', 'U', 'N', 'N', npwsp, nband, cone, c_ovlp, nband, gsc, npwsp)
2509 
2510    if (present(umat)) then
2511      ABI_REMALLOC(umat, (2, nband, nband))
2512      umat = c_ovlp
2513    end if
2514 
2515    ABI_FREE(c_ovlp)
2516  end if
2517 
2518  !call cgpaw_normalize(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw)
2519 
2520 end subroutine cgpaw_cholesky

m_cgtools/cgpaw_gramschmidt [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgpaw_gramschmidt

FUNCTION

  Gram-Schmidt orthonormalization of the vectors stored in cg

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of bands in cg
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  cg(2*npwsp*nband), gsc(2*npwsp*nband)
    input: Input set of vectors.
    output: Orthonormalized set.

SOURCE

2934 subroutine cgpaw_gramschmidt(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw)
2935 
2936 !Arguments ------------------------------------
2937 !scalars
2938  integer,intent(in) :: npwsp,nband,istwfk,comm_pw,me_g0
2939 !arrays
2940  real(dp),intent(inout) :: cg(2*npwsp*nband),gsc(2*npwsp*nband)
2941 
2942 !Local variables ------------------------------
2943 !scalars
2944  integer :: b1,nb2,opt
2945  logical :: normalize
2946 
2947 ! *************************************************************************
2948 
2949  ! Normalize the first vector.
2950  call cgpaw_normalize(npwsp,1,cg(1),gsc(1),istwfk,me_g0,comm_pw)
2951  if (nband == 1) RETURN
2952 
2953  ! Orthogonalize b1 wrt to the bands in [1,b1-1].
2954  normalize = .TRUE.
2955  do b1=2,nband
2956    opt = 1 + 2*npwsp*(b1-1)
2957    nb2=b1-1
2958    call cgpaw_gsortho(npwsp,nb2,cg(1),gsc(1),1,cg(opt),gsc(opt),istwfk,normalize,me_g0,comm_pw)
2959  end do
2960 
2961 end subroutine cgpaw_gramschmidt

m_cgtools/cgpaw_gsortho [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgpaw_gsortho

FUNCTION

  This routine uses the Gram-Schmidt method to orthogonalize a set of PAW wavefunctions.
  with respect to an input block of states.

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband1=Number of vectors in the input block icg1
  icg1(2*npwsp*nband1)=Input block of vectors.
  igsc1(2*npwsp*nband1)= S|C> for C in icg1.
  nband2=Number of vectors to orthogonalize
  normalize=True if output wavefunction must be normalized.
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  iocg2(2*npwsp*nband2), iogsc2(2*npwsp*nband1)
    input: set of |C> and S|C> wher |C> is the set of states to orthogonalize
    output: Orthonormalized set.

SOURCE

2850 subroutine cgpaw_gsortho(npwsp, nband1, icg1, igsc1, nband2, iocg2, iogsc2, istwfk, normalize, me_g0, comm_pw)
2851 
2852 !Arguments ------------------------------------
2853 !scalars
2854  integer,intent(in) :: npwsp, nband1, nband2, istwfk, me_g0
2855  integer,optional,intent(in) :: comm_pw
2856  logical,intent(in) :: normalize
2857 !arrays
2858  real(dp),intent(in) :: icg1(2*npwsp*nband1),igsc1(2*npwsp*nband1)
2859  real(dp),intent(inout) :: iocg2(2*npwsp*nband2),iogsc2(2*npwsp*nband2)
2860 
2861 !Local variables ------------------------------
2862 !scalars
2863  integer :: ierr,b1,b2
2864 !arrays
2865  real(dp) :: r_icg1(nband1),r_iocg2(nband2)
2866  real(dp),allocatable :: proj(:,:,:)
2867 
2868 ! *************************************************************************
2869 
2870  ABI_MALLOC(proj,(2,nband1,nband2))
2871 
2872  ! 1) Calculate <cg1|cg2>
2873  call cg_zgemm("C","N",npwsp,nband1,nband2,igsc1,iocg2,proj)
2874 
2875  if (istwfk>1) then
2876    ! nspinor is always 1 in this case.
2877    ! Account for the missing G and set the imaginary part to zero since wavefunctions are real.
2878    proj(1,:,:) = two * proj(1,:,:)
2879    proj(2,:,:) = zero
2880    !
2881    if (istwfk==2 .and. me_g0==1) then
2882      ! Extract the real part at G=0 and subtract its contribution.
2883      call dcopy(nband1,igsc1,2*npwsp,r_icg1, 1)
2884      call dcopy(nband2,iocg2,2*npwsp,r_iocg2,1)
2885      do b2=1,nband2
2886        do b1=1,nband1
2887          proj(1,b1,b2) = proj(1,b1,b2) - r_icg1(b1) * r_iocg2(b2)
2888        end do
2889      end do
2890    end if
2891 
2892  end if
2893 
2894  ! This is for the MPI version
2895  if (comm_pw /= xmpi_comm_self) call xmpi_sum(proj,comm_pw,ierr)
2896 
2897  ! 2)
2898  !   cg2 = cg2 - <Scg1|cg2> cg1
2899  ! S cg2 = S cg2 - <Scg1|cg2> S cg1
2900  call cg_zgemm("N","N",npwsp,nband1,nband2,icg1,proj,iocg2,alpha=-cg_cone,beta=cg_cone)
2901  call cg_zgemm("N","N",npwsp,nband1,nband2,igsc1,proj,iogsc2,alpha=-cg_cone,beta=cg_cone)
2902 
2903  ABI_FREE(proj)
2904 
2905  ! 3) Normalize iocg2 and iogsc2 if required.
2906  if (normalize) call cgpaw_normalize(npwsp, nband2, iocg2, iogsc2, istwfk, me_g0, comm_pw)
2907 
2908 end subroutine cgpaw_gsortho

m_cgtools/cgpaw_normalize [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  cgpaw_normalize

FUNCTION

  Normalize a set of PAW pseudo wavefunctions.

INPUTS

  npwsp=Size of each vector (usually npw*nspinor)
  nband=Number of band in cg and gsc
  istwfk=Storage mode for the wavefunctions. 1 for standard full mode
  me_g0=1 if this node has G=0.
  comm_pw=MPI communicator for the planewave group. Set to xmpi_comm_self for sequential mode.

SIDE EFFECTS

  cg(2*npwsp*nband)
    input: Input set of vectors |C>
    output: Normalized set such as  <C|S|C> = 1
  gsc(2*npwsp*nband)
    input: Input set of vectors S|C>
    output: New S|C> compute with the new |C>

SOURCE

2760 subroutine cgpaw_normalize(npwsp, nband, cg, gsc, istwfk, me_g0, comm_pw)
2761 
2762 !Arguments ------------------------------------
2763 !scalars
2764  integer,intent(in) :: npwsp, nband, istwfk, me_g0, comm_pw
2765 !arrays
2766  real(dp),intent(inout) :: cg(2*npwsp*nband), gsc(2*npwsp*nband)
2767 
2768 !Local variables ------------------------------
2769 !scalars
2770  integer :: ptr,ierr,band
2771  character(len=500) :: msg
2772 !arrays
2773  real(dp) :: norm(nband),alpha(2)
2774 
2775 ! *************************************************************************
2776 
2777 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1)
2778  do band=1,nband
2779    ptr = 1 + 2*npwsp*(band-1)
2780    norm(band) = cg_real_zdotc(npwsp, gsc(ptr), cg(ptr))
2781  end do
2782 
2783  if (istwfk>1) then
2784    norm = two * norm
2785    if (istwfk==2 .and. me_g0==1) then
2786 !$OMP PARALLEL DO PRIVATE(ptr) IF (nband > 1)
2787      do band=1,nband
2788        ptr = 1 + 2*npwsp*(band-1)
2789        norm(band) = norm(band) - gsc(ptr) * cg(ptr)
2790      end do
2791    end if
2792  end if
2793 
2794  if (comm_pw /= xmpi_comm_self) call xmpi_sum(norm, comm_pw, ierr)
2795 
2796  ierr = 0
2797  do band=1,nband
2798    if (norm(band) > zero) then
2799      norm(band) = SQRT(norm(band))
2800    else
2801      ierr = ierr + 1
2802    end if
2803  end do
2804 
2805  if (ierr/=0) then
2806    write(msg,'(a,i0,a)')" Found ",ierr," vectors with norm <= zero!"
2807    ABI_ERROR(msg)
2808  end if
2809 
2810  ! Scale |C> and S|C>.
2811 !$OMP PARALLEL DO PRIVATE(ptr,alpha) IF (nband > 1)
2812  do band=1,nband
2813    ptr = 1 + 2*npwsp*(band-1)
2814    alpha = [one / norm(band), zero]
2815    call cg_zscal(npwsp, alpha, cg(ptr))
2816    call cg_zscal(npwsp, alpha, gsc(ptr))
2817  end do
2818 
2819 end subroutine cgpaw_normalize

m_cgtools/dotprod_g [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 dotprod_g

FUNCTION

 Compute scalar product <vec1|vect2> of complex vectors vect1 and vect2 (can be the same)
 Take into account the storage mode of the vectors (istwf_k)
 If option=1, compute only real part, if option=2 compute also imaginary part.
 If the number of calls to the dot product scales quadratically
 with the volume of the system, it is preferable not to
 call the present routine, but but to write a specially
 optimized routine, that will avoid many branches related to
 the existence of 'option' and 'istwf_k'.

INPUTS

  istwf_k=option parameter that describes the storage of wfs
  vect1(2,npw)=first vector (one should take its complex conjugate)
  vect2(2,npw)=second vector
  npw= (effective) number of planewaves at this k point (including spinorial level)
  option= 1 if only real part to be computed,
          2 if both real and imaginary.
          3 if in case istwf_k==1 must compute real and imaginary parts,
               but if  istwf_k >1 must compute only real part
  me_g0=1 if this processor treats G=0, 0 otherwise
  comm=MPI communicator used to reduce the results.

OUTPUT

  $doti=\Im ( <vect1|vect2> )$ , output only if option=2 and eventually option=3.
  $dotr=\Re ( <vect1|vect2> )$

SOURCE

1027 subroutine dotprod_g(dotr, doti, istwf_k, npw, option, vect1, vect2, me_g0, comm)
1028 
1029 !Arguments ------------------------------------
1030 !scalars
1031  integer,intent(in) :: istwf_k,npw,option,me_g0,comm
1032  real(dp),intent(out) :: doti,dotr
1033 !arrays
1034  real(dp),intent(in) :: vect1(2,npw),vect2(2,npw)
1035 
1036 !Local variables-------------------------------
1037  integer :: ierr
1038  real(dp) :: dotarr(2)
1039 
1040 ! *************************************************************************
1041 
1042  ! Init results indipendently of option.
1043  dotr = zero
1044  doti = zero
1045 
1046  if (istwf_k==1) then
1047    ! General k-point
1048 
1049    if(option==1)then
1050      dotr = cg_real_zdotc(npw,vect1,vect2)
1051    else
1052      dotarr = cg_zdotc(npw,vect1,vect2)
1053      dotr = dotarr(1)
1054      doti = dotarr(2)
1055    end if
1056 
1057  else if (istwf_k==2 .and. me_g0==1) then
1058    ! Gamma k-point and I have G=0
1059    dotr=half*vect1(1,1)*vect2(1,1)
1060    dotr = dotr + cg_real_zdotc(npw-1,vect1(1,2),vect2(1,2))
1061    dotr = two*dotr
1062    if (option==2) doti=zero
1063 
1064  else
1065    ! Other TR k-points
1066    dotr = cg_real_zdotc(npw,vect1,vect2)
1067    dotr=two*dotr
1068    if (option==2) doti=zero
1069  end if
1070 
1071  !Reduction in case of parallelism
1072  if (xmpi_comm_size(comm) > 1) then
1073    if (option==1.or.istwf_k/=1) then
1074      call xmpi_sum(dotr,comm,ierr)
1075    else
1076      dotarr(1)=dotr ; dotarr(2)=doti
1077      call xmpi_sum(dotarr,comm,ierr)
1078      dotr=dotarr(1) ; doti=dotarr(2)
1079    end if
1080  end if
1081 
1082 end subroutine dotprod_g

m_cgtools/dotprod_v [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 dotprod_v

FUNCTION

 Compute dot product of two potentials (integral over FFT grid), to obtain
 a square residual-like quantity (so the sum of product of values
 is NOT divided by the number of FFT points, and NOT multiplied by the primitive cell volume).
 Take into account the spin components of the potentials (nspden), and sum over them.

INPUTS

  cplex=if 1, real space functions on FFT grid are REAL, if 2, COMPLEX
  nfft= (effective) number of FFT grid points (for this processor)
  nspden=number of spin-density components
  opt_storage: 0, if potentials are stored as V^up-up, V^dn-dn, Re[V^up-dn], Im[V^up-dn]
               1, if potentials are stored as V, B_x, B_y, Bz  (B=magn. field)
  pot1(cplex*nfft,nspden)=first real space potential on FFT grid
  pot2(cplex*nfft,nspden)=second real space potential on FFT grid
  comm=MPI communicator in which results will be reduced.

OUTPUT

  dotr= value of the dot product

SOURCE

1264 subroutine dotprod_v(cplex,dotr,nfft,nspden,opt_storage,pot1,pot2,comm)
1265 
1266 !Arguments ------------------------------------
1267 !scalars
1268  integer,intent(in) :: cplex,nfft,nspden,opt_storage,comm
1269  real(dp),intent(out) :: dotr
1270 !arrays
1271  real(dp),intent(in) :: pot1(cplex*nfft,nspden),pot2(cplex*nfft,nspden)
1272 
1273 !Local variables-------------------------------
1274 !scalars
1275  integer :: ierr,ifft,ispden
1276  real(dp) :: ar
1277 !arrays
1278 
1279 ! *************************************************************************
1280 
1281 !Real or complex inputs are coded
1282 
1283  dotr=zero
1284 !$OMP PARALLEL DO COLLAPSE(2) REDUCTION(+:dotr)
1285  do ispden=1,min(nspden,2)
1286    do ifft=1,cplex*nfft
1287      dotr =dotr + pot1(ifft,ispden)*pot2(ifft,ispden)
1288    end do
1289  end do
1290 
1291  if (nspden==4) then
1292    ar=zero
1293 !$OMP PARALLEL DO COLLAPSE(2) REDUCTION(+:ar)
1294    do ispden=3,4
1295      do ifft=1,cplex*nfft
1296        ar = ar + pot1(ifft,ispden)*pot2(ifft,ispden)
1297      end do
1298    end do
1299 
1300    if (opt_storage==0) then
1301      if (cplex==1) then
1302        dotr = dotr+two*ar
1303      else
1304        dotr = dotr+ar
1305      end if
1306    else
1307      dotr = half*(dotr+ar)
1308    end if
1309  end if
1310 
1311 !MPIWF reduction (addition) on dotr is needed here
1312  if (xmpi_comm_size(comm)>1) then
1313    call xmpi_sum(dotr,comm,ierr)
1314  end if
1315 
1316 end subroutine dotprod_v

m_cgtools/dotprod_vn [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 dotprod_vn

FUNCTION

 Compute dot product of potential and density (integral over FFT grid), to obtain
 an energy-like quantity (so the usual dotproduct is divided
 by the number of FFT points, and multiplied by the primitive cell volume).
 Take into account the spin components of the density and potentials (nspden),
 and sum correctly over them. Note that the storage of densities and
 potentials is different : for potential, one stores the matrix components,
 while for the density, one stores the trace, and then, either the
 up component (if nspden=2), or the magnetization vector (if nspden=4).

INPUTS

  cplex=if 1, real space functions on FFT grid are REAL, if 2, COMPLEX
  dens(cplex*nfft,nspden)=real space density on FFT grid
  mpi_enreg=information about MPI parallelization
  nfft= (effective) number of FFT grid points (for this processor)
  nfftot= total number of FFT grid points
  nspden=number of spin-density components
  option= if 1, only the real part is computed
          if 2, both real and imaginary parts are computed  (not yet coded)
  pot(cplex*nfft,nspden)=real space potential on FFT grid
                 (will be complex conjugated if cplex=2 and option=2)
  ucvol=unit cell volume (Bohr**3)

OUTPUT

  doti= imaginary part of the dot product, output only if option=2 (and cplex=2).
  dotr= real part

NOTES

  Concerning storage when nspden=4:
   cplex=1:
     V is stored as : V^11, V^22, Re[V^12], Im[V^12] (complex, hermitian)
     N is stored as : n, m_x, m_y, m_z               (real)
   cplex=2:
     V is stored as : V^11, V^22, V^12, i.V^21 (complex)
     N is stored as : n, m_x, m_y, mz          (complex)

SOURCE

1361 subroutine dotprod_vn(cplex,dens,dotr,doti,nfft,nfftot,nspden,option,pot,ucvol, &
1362     mpi_comm_sphgrid)  ! Optional
1363 
1364 !Arguments ------------------------------------
1365 !scalars
1366  integer,intent(in) :: cplex,nfft,nfftot,nspden,option
1367  integer,intent(in),optional :: mpi_comm_sphgrid
1368  real(dp),intent(in) :: ucvol
1369  real(dp),intent(out) :: doti,dotr
1370 !arrays
1371  real(dp),intent(in) :: dens(cplex*nfft,nspden),pot(cplex*nfft,nspden)
1372 
1373 !Local variables-------------------------------
1374 !scalars
1375  integer  :: ierr,ifft,jfft
1376  real(dp) :: dim11,dim12,dim21,dim22,dim_dn,dim_up,dre11,dre12,dre21,dre22
1377  real(dp) :: dre_dn,dre_up,factor,nproc_sphgrid,pim11,pim12,pim21,pim22,pim_dn,pim_up,pre11
1378  real(dp) :: pre12,pre21,pre22,pre_dn,pre_up
1379  real(dp) :: bx_re,bx_im,by_re,by_im,bz_re,bz_im,v0_re,v0_im
1380 !arrays
1381  real(dp) :: buffer2(2)
1382 
1383 ! *************************************************************************
1384 
1385 !Real or complex inputs are coded
1386  DBG_CHECK(ANY(cplex==(/1,2/)),"Wrong cplex")
1387  DBG_CHECK(ANY(nspden==(/1,2,4/)),"Wrong nspden")
1388 
1389 !Real or complex output are coded
1390  DBG_CHECK(ANY(option==(/1,2/)),"Wrong option")
1391 
1392  dotr=zero; doti=zero
1393 
1394  if(nspden==1)then
1395 
1396    if(option==1 .or. cplex==1 )then
1397 !$OMP PARALLEL DO REDUCTION(+:dotr)
1398      do ifft=1,cplex*nfft
1399        dotr=dotr + pot(ifft,1)*dens(ifft,1)
1400      end do
1401 !    dotr = ddot(cplex*nfft,pot,1,dens,1)
1402 
1403    else  ! option==2 and cplex==2 : one builds the imaginary part, from complex den/pot
1404 
1405 !$OMP PARALLEL DO PRIVATE(jfft) REDUCTION(+:dotr,doti)
1406      do ifft=1,nfft
1407        jfft=2*ifft
1408        dotr=dotr + pot(jfft-1,1)*dens(jfft-1,1) + pot(jfft,1)*dens(jfft  ,1)
1409        doti=doti + pot(jfft-1,1)*dens(jfft  ,1) - pot(jfft,1)*dens(jfft-1,1)
1410      end do
1411 
1412    end if
1413 
1414  else if(nspden==2)then
1415 
1416    if(option==1 .or. cplex==1 )then
1417 !$OMP PARALLEL DO REDUCTION(+:dotr)
1418      do ifft=1,cplex*nfft
1419        dotr=dotr + pot(ifft,1)* dens(ifft,2)     &    ! This is the spin up contribution
1420 &      + pot(ifft,2)*(dens(ifft,1)-dens(ifft,2))      ! This is the spin down contribution
1421      end do
1422 
1423    else ! option==2 and cplex==2 : one builds the imaginary part, from complex den/pot
1424 
1425 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr,doti)
1426      do ifft=1,nfft
1427 
1428        jfft=2*ifft
1429        dre_up=dens(jfft-1,2)
1430        dim_up=dens(jfft  ,2)
1431        dre_dn=dens(jfft-1,1)-dre_up
1432        dim_dn=dens(jfft  ,1)-dim_up
1433        pre_up=pot(jfft-1,1)
1434        pim_up=pot(jfft  ,1)
1435        pre_dn=pot(jfft-1,2)
1436        pim_dn=pot(jfft  ,2)
1437 
1438        dotr=dotr + pre_up * dre_up &
1439 &       + pim_up * dim_up &
1440 &       + pre_dn * dre_dn &
1441 &       + pim_dn * dim_dn
1442        doti=doti + pre_up * dim_up &
1443 &       - pim_up * dre_up &
1444 &       + pre_dn * dim_dn &
1445 &       - pim_dn * dre_dn
1446 
1447      end do
1448    end if
1449 
1450  else if(nspden==4)then
1451 !  \rho{\alpha,\beta} V^{\alpha,\beta} =
1452 !  rho*(V^{11}+V^{22})/2$
1453 !  + m_x Re(V^{12})- m_y Im{V^{12}}+ m_z(V^{11}-V^{22})/2
1454    if (cplex==1) then
1455 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(nfft,dens,pot) REDUCTION(+:dotr)
1456      do ifft=1,nfft
1457        dotr=dotr + &
1458 &       (pot(ifft,1) + pot(ifft,2))*half*dens(ifft,1) &   ! This is the density contrib
1459 &      + pot(ifft,3)      *dens(ifft,2) &   ! This is the m_x contrib
1460 &      - pot(ifft,4)      *dens(ifft,3) &   ! This is the m_y contrib
1461 &      +(pot(ifft,1) - pot(ifft,2))*half*dens(ifft,4)     ! This is the m_z contrib
1462      end do
1463    else ! cplex=2
1464 !    Note concerning storage when cplex=2:
1465 !    V is stored as : v^11, v^22, V^12, i.V^21 (each are complex)
1466 !    N is stored as : n, m_x, m_y, mZ          (each are complex)
1467      if (option==1) then
1468 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr)
1469        do ifft=1,nfft
1470          jfft=2*ifft
1471          dre11=half*(dens(jfft-1,1)+dens(jfft-1,4))
1472          dim11=half*(dens(jfft  ,1)+dens(jfft-1,4))
1473          dre22=half*(dens(jfft-1,1)-dens(jfft-1,4))
1474          dim22=half*(dens(jfft  ,1)-dens(jfft-1,4))
1475          dre12=half*(dens(jfft-1,2)+dens(jfft  ,3))
1476          dim12=half*(dens(jfft  ,2)-dens(jfft-1,3))
1477          dre21=half*(dens(jfft-1,2)-dens(jfft  ,3))
1478          dim21=half*(dens(jfft  ,2)+dens(jfft-1,3))
1479          pre11= pot(jfft-1,1)
1480          pim11= pot(jfft  ,1)
1481          pre22= pot(jfft-1,2)
1482          pim22= pot(jfft  ,2)
1483          pre12= pot(jfft-1,3)
1484          pim12= pot(jfft  ,3)
1485          pre21= pot(jfft  ,4)
1486          pim21=-pot(jfft-1,4)
1487 
1488          v0_re=half*(pre11+pre22)
1489          v0_im=half*(pim11+pim22)
1490          bx_re=half*(pre12+pre21)
1491          bx_im=half*(pim12+pim21)
1492          by_re=half*(-pim12+pim21)
1493          by_im=half*(pre12-pre21)
1494          bz_re=half*(pre11-pre22)
1495          bz_im=half*(pim11-pim22)
1496          dotr=dotr+v0_re * dens(jfft-1,1)&
1497 &         + v0_im * dens(jfft  ,1) &
1498 &         + bx_re * dens(jfft-1,2) &
1499 &         + bx_im * dens(jfft  ,2) &
1500 &         + by_re * dens(jfft-1,3) &
1501 &         + by_im * dens(jfft  ,3) &
1502 &         + bz_re * dens(jfft-1,4) &
1503 &         + bz_im * dens(jfft  ,4)
1504 !         dotr=dotr + pre11 * dre11 &
1505 !&         + pim11 * dim11 &
1506 !&         + pre22 * dre22 &
1507 !&         + pim22 * dim22 &
1508 !&         + pre12 * dre12 &
1509 !&         + pim12 * dim12 &
1510 !&         + pre21 * dre21 &
1511 !&         + pim21 * dim21
1512        end do
1513      else ! option=2
1514 !$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(nfft,dens,pot) REDUCTION(+:dotr,doti)
1515        do ifft=1,nfft
1516          jfft=2*ifft
1517          dre11=half*(dens(jfft-1,1)+dens(jfft-1,4))
1518          dim11=half*(dens(jfft  ,1)+dens(jfft-1,4))
1519          dre22=half*(dens(jfft-1,1)-dens(jfft-1,4))
1520          dim22=half*(dens(jfft  ,1)-dens(jfft-1,4))
1521          dre12=half*(dens(jfft-1,2)+dens(jfft  ,3))
1522          dim12=half*(dens(jfft  ,2)-dens(jfft-1,3))
1523          dre21=half*(dens(jfft-1,2)-dens(jfft  ,3))
1524          dim21=half*(dens(jfft  ,2)+dens(jfft-1,3))
1525          pre11= pot(jfft-1,1)
1526          pim11= pot(jfft  ,1)
1527          pre22= pot(jfft-1,2)
1528          pim22= pot(jfft  ,2)
1529          pre12= pot(jfft-1,3)
1530          pim12= pot(jfft  ,3)
1531          pre21= pot(jfft  ,4)
1532          pim21=-pot(jfft-1,4)
1533          dotr=dotr + pre11 * dre11 &
1534 &         + pim11 * dim11 &
1535 &         + pre22 * dre22 &
1536 &         + pim22 * dim22 &
1537 &         + pre12 * dre12 &
1538 &         + pim12 * dim12 &
1539 &         + pre21 * dre21 &
1540 &         + pim21 * dim21
1541          doti=doti + pre11 * dim11 &
1542 &         - pim11 * dre11 &
1543 &         + pre22 * dim22 &
1544 &         - pim22 * dre22 &
1545 &         + pre12 * dim12 &
1546 &         - pim12 * dre12 &
1547 &         + pre21 * dim21 &
1548 &         - pim21 * dre21
1549        end do
1550      end if ! option
1551    end if ! cplex
1552  end if ! nspden
1553 
1554  factor=ucvol/dble(nfftot)
1555  dotr=factor*dotr
1556  doti=factor*doti
1557 
1558 !MPIWF reduction (addition) on dotr, doti is needed here
1559  if(present(mpi_comm_sphgrid)) then
1560    nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid)
1561    if(nproc_sphgrid>1) then
1562      buffer2(1)=dotr
1563      buffer2(2)=doti
1564      call xmpi_sum(buffer2,mpi_comm_sphgrid,ierr)
1565      dotr=buffer2(1)
1566      doti=buffer2(2)
1567    end if
1568  end if
1569 
1570 end subroutine dotprod_vn

m_cgtools/fxphas_and_cmp [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 fxphas_and_com

FUNCTION

 Fix phase and compare two set of wavefunctions

OUTPUT

SOURCE

4111 logical function fxphas_and_cmp(npw_k, nspinor, nband_k, istwfk, cg1, cg2, eig_k, msg, atol_rho, atol_dphi) result(ok)
4112 
4113 !Arguments ------------------------------------
4114  integer,intent(in) :: npw_k, nspinor, nband_k, istwfk
4115  real(dp),intent(inout) :: cg1(2, npw_k*nspinor, nband_k), cg2(2, npw_k*nspinor, nband_k)
4116  real(dp),intent(in) :: eig_k(nband_k)
4117  character(len=*),intent(out) :: msg
4118  real(dp),optional,intent(in) :: atol_rho, atol_dphi
4119 
4120 !Local variables-------------------------------
4121  integer, parameter :: useoverlap0 = 0, mgsc = 0
4122  integer :: ipw, ipwsp, isp, mcg, band
4123  real(dp) :: phi1, rho1, phi2, rho2, max_rho_adiff, atol_rho__, phi_diff_ref, max_dphi_adiff, atol_dphi__, gsc(0,0)
4124  character(len=500) :: btype
4125 
4126 ! ***********************************************************************
4127 
4128  atol_rho__ = tol6; if (present(atol_rho)) atol_rho__ = atol_rho
4129  atol_dphi__ = tol3; if (present(atol_dphi)) atol_dphi__ = atol_dphi
4130  max_rho_adiff = zero; max_dphi_adiff = zero; phi_diff_ref = huge(one)
4131 
4132  mcg = npw_k * nspinor * nband_k
4133  call fxphas_seq(cg1, gsc, 1, 1, istwfk, mcg, mgsc, nband_k, npw_k * nspinor, useoverlap0)
4134  call fxphas_seq(cg2, gsc, 1, 1, istwfk, mcg, mgsc, nband_k, npw_k * nspinor, useoverlap0)
4135 
4136  do band=1,nband_k
4137    call band_type(band, btype)
4138    if (btype == "degenerate") cycle
4139    write(234, *)"band: ", band, "istwfk: ", istwfk, trim(btype)
4140    write(235, *)"band: ", band, "istwfk:", istwfk, trim(btype)
4141    write(234, *)"cg1:"; write(235, *)"cg2:"
4142    !write(234, *)"cg1 rho:"; write(235, *)"cg2 rho phi:"
4143    do isp=1,nspinor
4144      do ipw=1,npw_k
4145        ipwsp = ipw + (isp - 1) * npw_k
4146        if (npw_k > 15 .and. ipw > 15 .and. ipw < npw_k - 15) cycle
4147        !write(234, *)ipwsp, cg1(1, ipwsp, band); write(234, *)ipwsp, cg1(2, ipwsp, band)
4148        !write(235, *)ipwsp, cg2(1, ipwsp, band); write(235, *)ipwsp, cg2(2, ipwsp, band)
4149        call rhophi(cg1(:, ipwsp, band), phi1, rho1)
4150        call rhophi(cg2(:, ipwsp, band), phi2, rho2)
4151        write(234, *)ipwsp, rho1!; write(234, *)ipwsp, phi1
4152        write(235, *)ipwsp, rho2!; write(235, *)ipwsp, phi2
4153      end do
4154    end do
4155  end do
4156 
4157  do band=1,nband_k
4158    do ipw=1,npw_k * nspinor
4159      call rhophi(cg1(:, ipw, band), phi1, rho1)
4160      call rhophi(cg2(:, ipw, band), phi2, rho2)
4161      max_rho_adiff = max(max_rho_adiff, abs(rho1 - rho2))
4162      if (rho1 > atol_rho__ ** 2) then
4163        if (phi_diff_ref /= huge(one)) phi_diff_ref = phi1 - phi2
4164        max_dphi_adiff = max(max_dphi_adiff, abs(phi_diff_ref - (phi1 - phi2)))
4165      end if
4166    end do
4167  end do
4168 
4169  write(msg, "(2(a,es12.4))")"max_rho_adiff: ", max_rho_adiff, ", max_dphi_adiff: ", max_dphi_adiff
4170  ok = (max_rho_adiff < atol_rho__ .and. max_dphi_adiff < atol_dphi__)
4171 
4172 contains
4173 subroutine band_type(band, btype)
4174   integer,intent(in) :: band
4175   character(len=*),intent(out) :: btype
4176   real(dp) :: e0
4177 
4178   e0 = eig_k(band)
4179 
4180   if (band == 1) then
4181     btype = "last_state"
4182     if (nband_k > 1) then
4183       btype = "non-degenerate"
4184       if (abs(e0 - eig_k(band + 1)) < tol6) btype = "degenerate"
4185     end if
4186 
4187   else if (band == nband_k) then
4188     btype = "last_state"
4189     if (band - 1 > 0) then
4190       if (abs(e0 - eig_k(band - 1)) < tol6) btype = "degenerate"
4191     end if
4192 
4193   else
4194     btype = "non-degenerate"
4195     if (abs(e0 - eig_k(band - 1)) < tol6 .or. abs(e0 - eig_k(band + 1)) < tol6) btype = "degenerate"
4196   end if
4197 
4198 end subroutine band_type
4199 
4200 end function fxphas_and_cmp

m_cgtools/fxphas_seq [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 fxphas_seq

FUNCTION

 Fix phase of all bands. Keep normalization but maximize real part (minimize imag part).
 Also fix the sign of real part by setting the first non-zero element to be positive.

 This version has been stripped of all the mpi_enreg junk by MJV
 Use cgtk_fixphase if you need a routine that works with mpi_enreg and paral_kgb

INPUTS

  cg(2,mcg)= contains the wavefunction |c> coefficients.
  gsc(2,mgsc)= if useoverlap==1, contains the S|c> coefficients,
               where S is an overlap matrix.
  icg=shift to be applied on the location of data in the array cg
  igsc=shift to be applied on the location of data in the array gsc
  istwfk=input option parameter that describes the storage of wfs
    (set to 1 if usual complex vectors)
  mcg=size of second dimension of cg
  mgsc=size of second dimension of gsc
  nband_k=number of bands
  npw_k=number of planewaves
  useoverlap=describe the overlap of wavefunctions:
               0: no overlap (S=Identity_matrix)
               1: PAW wavefunctions

OUTPUT

  cg(2,mcg)=same array with altered phase.
  gsc(2,mgsc)= same array with altered phase.

SOURCE

3906 subroutine fxphas_seq(cg, gsc, icg, igsc, istwfk, mcg, mgsc, nband_k, npw_k, useoverlap)
3907 
3908 !Arguments ------------------------------------
3909 !scalars
3910  integer,intent(in) :: icg,igsc,istwfk,mcg,mgsc,nband_k,npw_k,useoverlap
3911 !arrays
3912  real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc*useoverlap)
3913 
3914 !Local variables-------------------------------
3915 !scalars
3916  integer :: iband,ii,indx
3917  real(dp) :: cim,cre,gscim,gscre,quotient,root1,root2,saa,sab,sbb,theta
3918  real(dp) :: thppi,xx,yy
3919  character(len=500) :: msg
3920 !arrays
3921  real(dp),allocatable :: cimb(:),creb(:),saab(:),sabb(:),sbbb(:) !,sarr(:,:)
3922 
3923 ! *************************************************************************
3924 
3925 !The general case, where a complex phase indeterminacy is present
3926  if(istwfk==1)then
3927 
3928    ABI_MALLOC(cimb,(nband_k))
3929    ABI_MALLOC(creb,(nband_k))
3930    ABI_MALLOC(saab,(nband_k))
3931    ABI_MALLOC(sabb,(nband_k))
3932    ABI_MALLOC(sbbb,(nband_k))
3933    cimb(:)=zero ; creb(:)=zero
3934 
3935 !  Loop over bands
3936 !  TODO: MG store saa arrays in sarr(3,nband_k) to reduce false sharing.
3937    do iband=1,nband_k
3938      indx=icg+(iband-1)*npw_k
3939 
3940 !    Compute several sums over Re, Im parts of c
3941      saa=0.0_dp ; sbb=0.0_dp ; sab=0.0_dp
3942      do ii=1+indx,npw_k+indx
3943        saa=saa+cg(1,ii)*cg(1,ii)
3944        sbb=sbb+cg(2,ii)*cg(2,ii)
3945        sab=sab+cg(1,ii)*cg(2,ii)
3946      end do
3947      saab(iband)=saa
3948      sbbb(iband)=sbb
3949      sabb(iband)=sab
3950    end do ! iband
3951 
3952 
3953    do iband=1,nband_k
3954 
3955      indx=icg+(iband-1)*npw_k
3956 
3957      saa=saab(iband)
3958      sbb=sbbb(iband)
3959      sab=sabb(iband)
3960 
3961 !    Get phase angle theta
3962      if (sbb+saa>tol8)then
3963        if(abs(sbb-saa)>tol8*(sbb+saa) .or. 2*abs(sab)>tol8*(sbb+saa))then
3964          if (abs(sbb-saa)>tol8*abs(sab)) then
3965            quotient=sab/(sbb-saa)
3966            theta=0.5_dp*atan(2.0_dp*quotient)
3967          else
3968 !          Taylor expansion of the atan in terms of inverse of its argument. Correct up to 1/x2, included.
3969            theta=0.25_dp*(pi-(sbb-saa)/sab)
3970          end if
3971 !        Check roots to get theta for max Re part
3972          root1=cos(theta)**2*saa+sin(theta)**2*sbb-2.0_dp*cos(theta)*sin(theta)*sab
3973          thppi=theta+0.5_dp*pi
3974          root2=cos(thppi)**2*saa+sin(thppi)**2*sbb-2.0_dp*cos(thppi)*sin(thppi)*sab
3975          if (root2>root1) theta=thppi
3976        else
3977 !        The real part vector and the imaginary part vector are orthogonal, and of same norm. Strong indeterminacy.
3978 !        Will determine the first non-zero coefficient, and fix its phase
3979          do ii=1+indx,npw_k+indx
3980            cre=cg(1,ii)
3981            cim=cg(2,ii)
3982            if(cre**2+cim**2>tol8**2*(saa+sbb))then
3983              if(cre**2>tol8**2**cim**2)then
3984                theta=atan(cim/cre)
3985              else
3986 !              Taylor expansion of the atan in terms of inverse of its argument. Correct up to 1/x2, included.
3987                theta=pi/2-cre/cim
3988              end if
3989              exit
3990            end if
3991          end do
3992        end if
3993      else
3994        write(msg,'(a,i0,5a)')&
3995        'The eigenvector with band ',iband,' has zero norm.',ch10,&
3996        'This usually happens when the number of bands (nband) is comparable to the number of planewaves (mpw)',ch10,&
3997        'Action: Check the parameters of the calculation. If nband ~ mpw, then decrease nband or, alternatively, increase ecut'
3998        ABI_ERROR(msg)
3999      end if
4000 
4001      xx=cos(theta)
4002      yy=sin(theta)
4003 
4004 !    Here, set the first non-zero element to be positive
4005      do ii=1+indx,npw_k+indx
4006        cre=cg(1,ii)
4007        cim=cg(2,ii)
4008        cre=xx*cre-yy*cim
4009        if(abs(cre)>tol8)exit
4010      end do
4011      if(cre<zero)then
4012        xx=-xx ; yy=-yy
4013      end if
4014 
4015      creb(iband)=xx
4016      cimb(iband)=yy
4017 
4018    end do
4019 
4020    do iband=1,nband_k
4021 
4022      indx=icg+(iband-1)*npw_k
4023 
4024      xx=creb(iband)
4025      yy=cimb(iband)
4026      do ii=1+indx,npw_k+indx
4027        cre=cg(1,ii)
4028        cim=cg(2,ii)
4029        cg(1,ii)=xx*cre-yy*cim
4030        cg(2,ii)=xx*cim+yy*cre
4031      end do
4032 
4033 !    Alter phase of array S|cg>
4034      if (useoverlap==1) then
4035        indx=igsc+(iband-1)*npw_k
4036        do ii=1+indx,npw_k+indx
4037          gscre=gsc(1,ii)
4038          gscim=gsc(2,ii)
4039          gsc(1,ii)=xx*gscre-yy*gscim
4040          gsc(2,ii)=xx*gscim+yy*gscre
4041        end do
4042      end if
4043 
4044    end do ! iband
4045 
4046    ABI_FREE(cimb)
4047    ABI_FREE(creb)
4048    ABI_FREE(saab)
4049    ABI_FREE(sabb)
4050    ABI_FREE(sbbb)
4051 
4052  else  ! if istwfk/=1
4053    !  Storages that take into account the time-reversal symmetry: the freedom is only a sign freedom
4054 
4055    ABI_MALLOC(creb,(nband_k))
4056    creb(:)=zero
4057 !  Loop over bands
4058    do iband=1,nband_k
4059 
4060      indx=icg+(iband-1)*npw_k
4061 
4062 !    Here, set the first non-zero real element to be positive
4063      do ii=1+indx,npw_k+indx
4064        cre=cg(1,ii)
4065        if(abs(cre)>tol8)exit
4066      end do
4067      creb(iband)=cre
4068 
4069    end do ! iband
4070 
4071    do iband=1,nband_k
4072 
4073      cre=creb(iband)
4074      if(cre<zero)then
4075        indx=icg+(iband-1)*npw_k
4076        do ii=1+indx,npw_k+indx
4077          cg(1,ii)=-cg(1,ii)
4078          cg(2,ii)=-cg(2,ii)
4079        end do
4080        if(useoverlap==1)then
4081          indx=igsc+(iband-1)*npw_k
4082          do ii=1+indx,npw_k+indx
4083            gsc(1,ii)=-gsc(1,ii)
4084            gsc(2,ii)=-gsc(2,ii)
4085          end do
4086        end if
4087      end if
4088 
4089    end do ! iband
4090 
4091    ABI_FREE(creb)
4092 
4093  end if ! istwfk
4094 
4095 end subroutine fxphas_seq

m_cgtools/matrixelmt_g [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 matrixelmt_g

FUNCTION

  Compute a matrix element of two wavefunctions, in reciprocal space,
  for an operator that is diagonal in reciprocal space: <wf1|op|wf2>
  For the time being, only spin-independent operators are treated.

INPUTS

  diag(npw)=diagonal operator (real, spin-independent !)
  istwf_k=storage mode of the vectors
  needimag=0 if the imaginary part is not needed ; 1 if the imaginary part is needed
  npw=number of planewaves of the first vector
  nspinor=number of spinor components
  vect1(2,npw*nspinor)=first vector
  vect2(2,npw*nspinor)=second vector
  comm_fft=MPI communicator for the FFT
  me_g0=1 if this processors treats the G=0 component.

OUTPUT

  ai=imaginary part of the matrix element
  ar=real part of the matrix element

SOURCE

1113 subroutine matrixelmt_g(ai,ar,diag,istwf_k,needimag,npw,nspinor,vect1,vect2,me_g0,comm_fft)
1114 
1115 !Arguments ------------------------------------
1116 !scalars
1117  integer,intent(in) :: istwf_k,needimag,npw,nspinor,me_g0,comm_fft
1118  real(dp),intent(out) :: ai,ar
1119 !arrays
1120  real(dp),intent(in) :: diag(npw),vect1(2,npw*nspinor),vect2(2,npw*nspinor)
1121 
1122 !Local variables-------------------------------
1123 !scalars
1124  integer :: i1,ierr,ipw
1125  character(len=500) :: msg
1126 !arrays
1127  real(dp) :: buffer2(2)
1128  !real(dp),allocatable :: re_prod(:), im_prod(:)
1129 
1130 ! *************************************************************************
1131 
1132  if (nspinor==2 .and. istwf_k/=1) then
1133    write(msg,'(a,a,a,i6,a,i6)')&
1134    'When istwf_k/=1, nspinor must be 1,',ch10,&
1135    'however, nspinor=',nspinor,', and istwf_k=',istwf_k
1136    ABI_BUG(msg)
1137  end if
1138 
1139 #if 0
1140  !TODO
1141  ABI_MALLOC(re_prod,(npw*nspinor))
1142  do ipw=1,npw*nspinor
1143   re_prod(ipw) = vect1(1,ipw)*vect2(1,ipw) + vect1(2,ipw)*vect2(2,ipw)
1144  end do
1145 
1146  if (needimag == 1) then
1147    ABI_MALLOC(im_prod,(npw*nspinor))
1148    do ipw=1,npw*nspinor
1149      im_prod(ipw) = vect1(1,ipw)*vect2(2,ipw) - vect1(2,ipw)*vect2(1,ipw)
1150    end do
1151  end if
1152 #endif
1153 
1154  ar=zero
1155  if(needimag==1)ai=zero
1156 
1157 !Normal storage mode
1158  if(istwf_k==1)then
1159 
1160 !  Need only real part
1161    if(needimag==0)then
1162 
1163      do ipw=1,npw
1164        ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1165      end do
1166 
1167      if(nspinor==2)then
1168        do ipw=1+npw,2*npw
1169          ar=ar+diag(ipw-npw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1170        end do
1171      end if
1172 
1173    else ! Need also the imaginary part
1174 
1175      do ipw=1,npw
1176        ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1177        ai=ai+diag(ipw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw))
1178      end do
1179 
1180      if(nspinor==2)then
1181        do ipw=1+npw,2*npw
1182          ar=ar+diag(ipw-npw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1183          ai=ai+diag(ipw-npw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw))
1184        end do
1185      end if
1186 
1187    end if ! needimag
1188 
1189  else if(istwf_k>=2)then
1190 
1191 !  XG030513 : MPIWF need to know which proc has G=0
1192 
1193    i1=1
1194    if(istwf_k==2 .and. me_g0==1)then
1195      ar=half*diag(1)*vect1(1,1)*vect2(1,1) ; i1=2
1196    end if
1197 
1198 !  Need only real part
1199    if(needimag==0)then
1200 
1201      do ipw=i1,npw
1202        ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1203      end do
1204      ar=two*ar
1205 
1206    else ! Need also the imaginary part
1207 
1208      do ipw=i1,npw
1209        ar=ar+diag(ipw)*(vect1(1,ipw)*vect2(1,ipw)+vect1(2,ipw)*vect2(2,ipw))
1210        ai=ai+diag(ipw)*(vect1(1,ipw)*vect2(2,ipw)-vect1(2,ipw)*vect2(1,ipw))
1211      end do
1212      ar=two*ar ; ai=two*ai
1213 
1214    end if
1215 
1216  end if ! istwf_k
1217 
1218 #if 0
1219  ABI_FREE(re_prod)
1220  if (needimag == 1) then
1221    ABI_FREE(im_prod)
1222  end if
1223 #endif
1224 
1225 !MPIWF need to make reduction on ar and ai .
1226  if (xmpi_comm_size(comm_fft)>1) then
1227    buffer2(1)=ai
1228    buffer2(2)=ar
1229    call xmpi_sum(buffer2,comm_fft ,ierr)
1230    ai=buffer2(1)
1231    ar=buffer2(2)
1232  end if
1233 
1234 end subroutine matrixelmt_g

m_cgtools/mean_fftr [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 mean_fftr

FUNCTION

  Compute the mean of an arraysp(nfft,nspden), over the FFT grid, for each component nspden,
  and return it in meansp(nspden).
  Take into account the spread of the array due to parallelism: the actual number of fft
  points is nfftot, but the number of points on this proc is nfft only.
  So : for ispden from 1 to nspden
       meansp(ispden) = sum(ifft=1,nfftot) arraysp(ifft,ispden) / nfftot

INPUTS

  arraysp(nfft,nspden)=the array whose average has to be computed
  nfft=number of FFT points stored by one proc
  nfftot=total number of FFT points
  nspden=number of spin-density components

OUTPUT

  meansp(nspden)=mean value for each nspden component

SOURCE

1680 subroutine mean_fftr(arraysp,meansp,nfft,nfftot,nspden,mpi_comm_sphgrid)
1681 
1682 !Arguments ------------------------------------
1683 !scalars
1684  integer,intent(in) :: nfft,nfftot,nspden
1685  integer,intent(in),optional:: mpi_comm_sphgrid
1686 !arrays
1687  real(dp),intent(in) :: arraysp(nfft,nspden)
1688  real(dp),intent(out) :: meansp(nspden)
1689 
1690 !Local variables-------------------------------
1691 !scalars
1692  integer :: ierr,ifft,ispden,nproc_sphgrid
1693  real(dp) :: invnfftot,tmean
1694 
1695 ! *************************************************************************
1696 
1697  invnfftot=one/(dble(nfftot))
1698 
1699  do ispden=1,nspden
1700    tmean=zero
1701 !$OMP PARALLEL DO REDUCTION(+:tmean)
1702    do ifft=1,nfft
1703      tmean=tmean+arraysp(ifft,ispden)
1704    end do
1705    meansp(ispden)=tmean*invnfftot
1706  end do
1707 
1708 !XG030514 : MPIWF The values of meansp(ispden) should
1709 !now be summed accross processors in the same WF group, and spread on all procs.
1710  if(present(mpi_comm_sphgrid)) then
1711    nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid)
1712    if(nproc_sphgrid>1) then
1713      call xmpi_sum(meansp,nspden,mpi_comm_sphgrid,ierr)
1714    end if
1715  end if
1716 
1717 end subroutine mean_fftr

m_cgtools/overlap_g [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 overlap_g

FUNCTION

 Compute the scalar product between WF at two different k-points
 < u_{n,k1} | u_{n,k2}>

INPUTS

 mpw = maximum dimensioned size of npw
 npw_k1 = number of plane waves at k1
 npw_k2 = number of plane waves at k2
 nspinor = 1 for scalar, 2 for spinor wavefunctions
 pwind_k = array required to compute the scalar product (see initberry.f)
 vect1 = wavefunction at k1: | u_{n,k1} >
 vect2 = wavefunction at k1: | u_{n,k2} >

OUTPUT

 doti = imaginary part of the scalarproduct
 dotr = real part of the scalarproduct

NOTES

 In case a G-vector of the basis sphere of plane waves at k1
 does not belong to the basis sphere of plane waves at k2,
 pwind = 0. Therefore, the dimensions of vect1 &
 vect2 are (1:2,0:mpw) and the element (1:2,0) MUST be set to zero.

 The current implementation if not compatible with TR-symmetry (i.e. istwfk/=1) !

SOURCE

4234 subroutine overlap_g(doti,dotr,mpw,npw_k1,npw_k2,nspinor,pwind_k,vect1,vect2)
4235 
4236 !Arguments ------------------------------------
4237 !scalars
4238  integer,intent(in) :: mpw,npw_k1,npw_k2,nspinor
4239  real(dp),intent(out) :: doti,dotr
4240 !arrays
4241  integer,intent(in) :: pwind_k(mpw)
4242  real(dp),intent(in) :: vect1(1:2,0:mpw*nspinor),vect2(1:2,0:mpw*nspinor)
4243 
4244 !Local variables-------------------------------
4245 !scalars
4246  integer :: ipw,ispinor,jpw,spnshft1,spnshft2
4247 
4248 ! *************************************************************************
4249 
4250 !Check if vect1(:,0) = 0 and vect2(:,0) = 0
4251  if ((abs(vect1(1,0)) > tol12).or.(abs(vect1(2,0)) > tol12).or. &
4252 & (abs(vect2(1,0)) > tol12).or.(abs(vect2(2,0)) > tol12)) then
4253    ABI_BUG('vect1(:,0) and/or vect2(:,0) are not equal to zero')
4254  end if
4255 
4256 !Compute the scalar product
4257  dotr = zero; doti = zero
4258  do ispinor = 1, nspinor
4259    spnshft1 = (ispinor-1)*npw_k1
4260    spnshft2 = (ispinor-1)*npw_k2
4261 !$OMP PARALLEL DO PRIVATE(jpw) REDUCTION(+:doti,dotr)
4262    do ipw = 1, npw_k1
4263      jpw = pwind_k(ipw)
4264      dotr = dotr + vect1(1,spnshft1+ipw)*vect2(1,spnshft2+jpw) + vect1(2,spnshft1+ipw)*vect2(2,spnshft2+jpw)
4265      doti = doti + vect1(1,spnshft1+ipw)*vect2(2,spnshft2+jpw) - vect1(2,spnshft1+ipw)*vect2(1,spnshft2+jpw)
4266    end do
4267  end do
4268 
4269 end subroutine overlap_g

m_cgtools/projbd [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 projbd

FUNCTION

 Project out vector "direc" onto the bands contained in "cg".
 if useoverlap==0
  New direc=direc-$sum_{j/=i} { <cg_{j}|direc>.|cg_{j}> }$
 if useoverlap==1 (use of overlap matrix S)
  New direc=direc-$sum_{j/=i} { <cg_{j}|S|direc>.|cg_{j}> }$
 (index i can be set to -1 to sum over all bands)

INPUTS

  cg(2,mcg)=wavefunction coefficients for ALL bands
  iband0=which particular band we are interested in ("i" in the above formula)
         Can be set to -1 to sum over all bands...
  icg=shift to be given to the location of the data in cg
  iscg=shift to be given to the location of the data in scg
  istwf_k=option parameter that describes the storage of wfs
  mcg=maximum size of second dimension of cg
  mscg=maximum size of second dimension of scg
  nband=number of bands
  npw=number of planewaves
  nspinor=number of spinorial components (on current proc)
  scg(2,mscg*useoverlap)=<G|S|band> for ALL bands,
                        where S is an overlap matrix
  scprod_io=0 if scprod array has to be computed; 1 if it is input (already in memory)
  tim_projbd=timing code of the calling subroutine(can be set to 0 if not attributed)
  useoverlap=describe the overlap of wavefunctions:
               0: no overlap (S=Identity_matrix)
               1: wavefunctions are overlapping
 me_g0=1 if this processors treats G=0, 0 otherwise.
 comm=MPI communicator (used if G vectors are distributed.

SIDE EFFECTS

  direc(2,npw)= input: vector to be orthogonalised with respect to cg (and S)
                output: vector that has been orthogonalized wrt cg (and S)

  scprod(2,nband)=scalar_product
        if useoverlap==0: scalar_product_i=$<cg_{j}|direc_{i}>$
        if useoverlap==1: scalar_product_i=$<cg_{j}|S|direc_{i}>$
    if scprod_io=0, scprod is output
    if scprod_io=1, scprod is input

NOTES

  1) MPIWF Might have to be recoded for efficient paralellism

  2) The new version employs BLAS2 routine so that the OMP parallelism is delegated to BLAS library.
     May use BLAS3 if multiple wavefunctions are optimized at the same time.

  3) Note for PAW: ref.= PRB 73, 235101 (2006) [[cite:Audouze2006]], equations (71) and (72):
     in normal use, projbd applies P_c projector
     if cg and scg are inverted, projbd applies P_c+ projector

  4) cg_zgemv wraps ZGEMM whose implementation is more efficient, especially in the threaded case.

SOURCE

3025 subroutine projbd(cg,direc,iband0,icg,iscg,istwf_k,mcg,mscg,nband,&
3026                   npw,nspinor,scg,scprod,scprod_io,tim_projbd,useoverlap,me_g0,comm)
3027 
3028 !Arguments ------------------------------------
3029 !scalars
3030  integer,intent(in) :: iband0,icg,iscg,istwf_k,mcg,mscg,nband,npw,nspinor
3031  integer,intent(in) :: scprod_io,tim_projbd,useoverlap,me_g0,comm
3032 !arrays
3033  real(dp),intent(in) :: cg(2,mcg),scg(2,mscg*useoverlap)
3034  real(dp),intent(inout) :: direc(2,npw*nspinor)
3035  real(dp),intent(inout) :: scprod(2,nband)
3036 
3037 !Local variables-------------------------------
3038 !scalars
3039  integer :: nbandm,npw_sp,ierr
3040 !arrays
3041  real(dp) :: tsec(2),bkp_scprod(2),bkp_dirg0(2)
3042 
3043 ! *************************************************************************
3044 
3045  call timab(210+tim_projbd,1,tsec)
3046 
3047  npw_sp=npw*nspinor
3048 
3049  nbandm=nband
3050 
3051  if (istwf_k==1) then
3052 
3053    if (scprod_io==0) then
3054      if (useoverlap==1) then
3055        call cg_zgemv("C",npw_sp,nbandm,scg(1,iscg+1),direc,scprod)
3056      else
3057        call cg_zgemv("C",npw_sp,nbandm,cg(1,icg+1),  direc,scprod)
3058      end if
3059      call xmpi_sum(scprod,comm,ierr)
3060    end if
3061 
3062    if (iband0>0.and.iband0<=nbandm) then
3063      bkp_scprod = scprod(:,iband0)
3064      scprod(:,iband0) = zero
3065    end if
3066 
3067    call cg_zgemv("N",npw_sp,nbandm,cg(1,icg+1),scprod,direc,alpha=-cg_cone,beta=cg_cone)
3068 
3069    if (iband0>0.and.iband0<=nbandm) scprod(:,iband0) = bkp_scprod ! Restore previous value as scprod is output.
3070 
3071  else if (istwf_k>=2) then
3072    !
3073    !  u_{G0/2}(G) = u_{G0/2}(-G-G0)^*; k = G0/2
3074    !  hence:
3075    !  sum_G f*(G) g(G) = 2 REAL sum_G^{IZONE} w(G) f*(G)g(G)
3076    !  where
3077    !  w(G) = 1 except for k=0 and G=0 where w(G=0) = 1/2.
3078    !
3079    if (scprod_io==0) then
3080 
3081      if (useoverlap==1) then
3082 
3083        if (istwf_k==2 .and. me_g0==1) then
3084          bkp_dirg0 = direc(:,1)
3085          direc(1,1) = half * direc(1,1)
3086          direc(2,1) = zero
3087        end if
3088 
3089        call cg_zgemv("C",npw_sp,nbandm,scg(1,iscg+1),direc,scprod)
3090        scprod = two * scprod
3091        scprod(2,:) = zero
3092 
3093        if(istwf_k==2 .and. me_g0==1) direc(:,1) = bkp_dirg0
3094 
3095      else
3096 
3097        if (istwf_k==2 .and. me_g0==1) then
3098          bkp_dirg0 = direc(:,1)
3099          direc(1,1) = half * direc(1,1)
3100          direc(2,1) = zero
3101        end if
3102 
3103        call cg_zgemv("C",npw_sp,nbandm,cg(1,icg+1),direc,scprod)
3104        scprod = two * scprod
3105        scprod(2,:) = zero
3106 
3107        if (istwf_k==2 .and. me_g0==1) direc(:,1) = bkp_dirg0
3108      end if ! useoverlap
3109 
3110      call xmpi_sum(scprod,comm,ierr)
3111    end if
3112 
3113    if (iband0>0.and.iband0<=nbandm) then
3114      bkp_scprod = scprod(:,iband0)
3115      scprod(:,iband0) = zero
3116    end if
3117 
3118    call cg_zgemv("N",npw_sp,nbandm,cg(1,icg+1),scprod,direc,alpha=-cg_cone,beta=cg_cone)
3119 
3120    if (iband0>0.and.iband0<=nbandm) scprod(:,iband0) = bkp_scprod ! Restore previous value as scprod is output.
3121 
3122  end if ! Test on istwf_k
3123 
3124  call timab(210+tim_projbd,2,tsec)
3125 
3126 end subroutine projbd

m_cgtools/pw_orthon [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 pw_orthon

FUNCTION

 Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt.
 Two orthogonality conditions are available:
  Simple orthogonality: ${<Vec_{i}|Vec_{j}>=Delta_ij}$
  Orthogonality with overlap S: ${<Vec_{i}|S|Vec_{j}>=Delta_ij}$

INPUTS

  icg=shift to be given to the location of the data in cg(=vecnm)
  igsc=shift to be given to the location of the data in gsc(=ovl_vecnm)
  istwf_k=option parameter that describes the storage of wfs
  mcg=maximum size of second dimension of cg(=vecnm)
  mgsc=maximum size of second dimension of gsc(=ovl_vecnm)
  nelem=number of complex elements in each vector
  nvec=number of vectors to be orthonormalized
  ortalgo= option for the choice of the algorithm
         -1: no orthogonalization (direct return)
          0 or 2: old algorithm (use of buffers)
          1: new algorithm (use of blas)
          3: new new algorithm (use of lapack without copy)
  useoverlap=select the orthogonality condition
               0: no overlap between vectors
               1: vectors are overlapping
  me_g0=1 if this processor has G=0, 0 otherwise
  comm=MPI communicator

SIDE EFFECTS

  vecnm= input: vectors to be orthonormalized; array of nvec column
                vectors,each of length nelem,shifted by icg
                This array is complex or else real(dp) of twice length
         output: orthonormalized set of vectors
  if (useoverlap==1) only:
    ovl_vecnm= input: product of overlap and input vectors:
                      S|vecnm>,where S is the overlap operator
               output: updated S|vecnm> according to vecnm

NOTES

 Note that each vector has an arbitrary phase which is not fixed in this routine.

 WARNING: not yet suited for nspinor=2 with istwfk/=1

SOURCE

4761 subroutine pw_orthon(icg, igsc, istwf_k, mcg, mgsc, nelem, nvec, ortalgo, ovl_vecnm, useoverlap, vecnm, me_g0, comm)
4762 
4763  use m_abi_linalg
4764 
4765 !Arguments ------------------------------------
4766 !scalars
4767  integer,intent(in) :: icg,igsc,istwf_k,mcg,mgsc,nelem,nvec,ortalgo,useoverlap,me_g0,comm
4768 !arrays
4769  real(dp),intent(inout) :: ovl_vecnm(2,mgsc*useoverlap),vecnm(2,mcg)
4770 
4771 !Local variables-------------------------------
4772 !scalars
4773  integer :: ierr,ii,ii0,ii1,ii2,ivec,ivec2
4774  integer :: rvectsiz,vectsize,cg_idx,gsc_idx
4775  real(dp) :: doti,dotr,sum,xnorm
4776  !real(dp) :: cpu, wall, gflops
4777 #ifdef DEBUG_MODE
4778  character(len=500) :: msg
4779 #endif
4780 !arrays
4781  integer :: cgindex(nvec), gscindex(nvec)
4782  real(dp) :: buffer2(2),tsec(2)
4783  real(dp),allocatable :: rblockvectorbx(:,:),rblockvectorx(:,:),rgramxbx(:,:)
4784  complex(dpc),allocatable :: cblockvectorbx(:,:),cblockvectorx(:,:), cgramxbx(:,:)
4785 
4786 ! *************************************************************************
4787 
4788 #ifdef DEBUG_MODE
4789  !Make sure imaginary part at G=0 vanishes
4790  if (istwf_k == 2 .and. me_g0 == 1) then
4791    do ivec=1,nvec
4792      if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>zero)then
4793      ! if(abs(vecnm(2,1+nelem*(ivec-1)+icg))>tol16)then
4794        write(msg,'(2a,3i0,2es16.6,a,a)')&
4795        ' For istwf_k = 2, observed the following element of vecnm :',ch10,&
4796        nelem,ivec,icg,vecnm(1:2,1+nelem*(ivec-1)+icg), ch10,' with a non-negligible imaginary part.'
4797        ABI_BUG(msg)
4798      end if
4799    end do
4800  end if
4801 #endif
4802 
4803  ! Nothing to do if ortalgo=-1
4804  if(ortalgo==-1) return
4805 
4806  !call wrtout(std_out, sjoin(" Begin wavefunction orthogonalization with ortalgo:", itoa(ortalgo)))
4807  !call cwtime(cpu, wall, gflops, "start")
4808 
4809  do ivec=1,nvec
4810    cgindex(ivec)=nelem*(ivec-1)+icg+1
4811    gscindex(ivec)=nelem*(ivec-1)+igsc+1
4812  end do
4813 
4814  if (ortalgo==3) then
4815    ! =========================
4816    ! First (new new) algorithm
4817    ! =========================
4818    ! NEW VERSION: avoid copies, use ZHERK for NC
4819    cg_idx = cgindex(1)
4820    if (useoverlap == 1) then
4821      gsc_idx = gscindex(1)
4822      call cgpaw_cholesky(nelem, nvec, vecnm(1,cg_idx), ovl_vecnm(1,gsc_idx), istwf_k, me_g0, comm)
4823    else
4824      call cgnc_cholesky(nelem, nvec, vecnm(1,cg_idx), istwf_k, me_g0, comm, use_gemm=.FALSE.)
4825    end if
4826 
4827  else if (ortalgo==1) then
4828     ! =======================
4829     ! Second (new) algorithm
4830     ! =======================
4831     ! This first algorithm seems to be more efficient especially in the parallel band-FFT mode.
4832 
4833    if(istwf_k==1) then
4834      vectsize=nelem
4835      ABI_MALLOC(cgramxbx,(nvec,nvec))
4836      ABI_MALLOC(cblockvectorx,(vectsize,nvec))
4837      ABI_MALLOC(cblockvectorbx,(vectsize,nvec))
4838      call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorx,1,x_cplx=2)
4839      if (useoverlap == 1) then
4840        call abi_xcopy(nvec*vectsize,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2)
4841      else
4842        call abi_xcopy(nvec*vectsize,vecnm(:,cgindex(1):cgindex(nvec)-1),1,cblockvectorbx,1,x_cplx=2)
4843      end if
4844      call abi_xorthonormalize(cblockvectorx,cblockvectorbx,nvec,comm,cgramxbx,vectsize)
4845      call abi_xcopy(nvec*vectsize,cblockvectorx,1,vecnm(:,cgindex(1):cgindex(nvec)-1),1,x_cplx=2)
4846      if (useoverlap == 1) then
4847        call abi_xtrsm('r','u','n','n',vectsize,nvec,cone,cgramxbx,nvec,cblockvectorbx,vectsize)
4848        call abi_xcopy(nvec*vectsize,cblockvectorbx,1,ovl_vecnm(:,gscindex(1):gscindex(nvec)-1),1,x_cplx=2)
4849      end if
4850      ABI_FREE(cgramxbx)
4851      ABI_FREE(cblockvectorx)
4852      ABI_FREE(cblockvectorbx)
4853 
4854    else if (istwf_k==2) then
4855      ! Pack real and imaginary part of the wavefunctions.
4856      rvectsiz=nelem
4857      vectsize=2*nelem; if(me_g0==1) vectsize=vectsize-1
4858      ABI_MALLOC(rgramxbx,(nvec,nvec))
4859      ABI_MALLOC(rblockvectorx,(vectsize,nvec))
4860      ABI_MALLOC(rblockvectorbx,(vectsize,nvec))
4861      do ivec=1,nvec
4862        if (me_g0 == 1) then
4863          call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorx (1,ivec),1)
4864          call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorx(2,ivec),1)
4865          call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorx(rvectsiz+1,ivec),1)
4866          if (useoverlap == 1) then
4867            call abi_xcopy(1,ovl_vecnm(1,gscindex(ivec)),1,rblockvectorbx(1,ivec),1)
4868            call abi_xcopy(rvectsiz-1,ovl_vecnm(1,gscindex(ivec)+1),2,rblockvectorbx(2,ivec),1)
4869            call abi_xcopy(rvectsiz-1,ovl_vecnm(2,gscindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1)
4870          else
4871            call abi_xcopy(1,vecnm(1,cgindex(ivec)),1,rblockvectorbx(1,ivec),1)
4872            call abi_xcopy(rvectsiz-1,vecnm(1,cgindex(ivec)+1),2,rblockvectorbx(2,ivec),1)
4873            call abi_xcopy(rvectsiz-1,vecnm(2,cgindex(ivec)+1),2,rblockvectorbx(rvectsiz+1,ivec),1)
4874          end if
4875          rblockvectorx (2:vectsize,ivec)=rblockvectorx (2:vectsize,ivec)*sqrt2
4876          rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)*sqrt2
4877        else
4878          call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorx(1,ivec),1)
4879          call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorx(rvectsiz+1,ivec),1)
4880          if (useoverlap == 1) then
4881            call abi_xcopy(rvectsiz,ovl_vecnm(1,gscindex(ivec)),2,rblockvectorbx(1,ivec),1)
4882            call abi_xcopy(rvectsiz,ovl_vecnm(2,gscindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1)
4883          else
4884            call abi_xcopy(rvectsiz,vecnm(1,cgindex(ivec)),2,rblockvectorbx(1,ivec),1)
4885            call abi_xcopy(rvectsiz,vecnm(2,cgindex(ivec)),2,rblockvectorbx(rvectsiz+1,ivec),1)
4886          end if
4887          rblockvectorx (1:vectsize,ivec)=rblockvectorx (1:vectsize,ivec)*sqrt2
4888          rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)*sqrt2
4889        end if
4890      end do
4891 
4892      call ortho_reim(rblockvectorx,rblockvectorbx,nvec,comm,rgramxbx,vectsize)
4893 
4894      do ivec=1,nvec
4895        ! Unpack results
4896        if (me_g0 == 1) then
4897          call abi_xcopy(1,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),1)
4898          vecnm(2,cgindex(ivec))=zero
4899          rblockvectorx(2:vectsize,ivec)=rblockvectorx(2:vectsize,ivec)/sqrt2
4900          call abi_xcopy(rvectsiz-1,rblockvectorx(2,ivec),1,vecnm(1,cgindex(ivec)+1),2)
4901          call abi_xcopy(rvectsiz-1,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)+1),2)
4902        else
4903          rblockvectorx(1:vectsize,ivec)=rblockvectorx(1:vectsize,ivec)/sqrt2
4904          call abi_xcopy(rvectsiz,rblockvectorx(1,ivec),1,vecnm(1,cgindex(ivec)),2)
4905          call abi_xcopy(rvectsiz,rblockvectorx(rvectsiz+1,ivec),1,vecnm(2,cgindex(ivec)),2)
4906        end if
4907 
4908        if(useoverlap == 1) then
4909          call abi_xtrsm('r','u','n','n',vectsize,nvec,one,rgramxbx,nvec,rblockvectorbx,vectsize)
4910          if (me_g0 == 1) then
4911            call abi_xcopy(1,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),1)
4912            ovl_vecnm(2,gscindex(ivec))=zero
4913            rblockvectorbx(2:vectsize,ivec)=rblockvectorbx(2:vectsize,ivec)/sqrt2
4914            call abi_xcopy(rvectsiz-1,rblockvectorbx(2,ivec),1,ovl_vecnm(1,gscindex(ivec)+1),2)
4915            call abi_xcopy(rvectsiz-1,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)+1),2)
4916          else
4917            rblockvectorbx(1:vectsize,ivec)=rblockvectorbx(1:vectsize,ivec)/sqrt2
4918            call abi_xcopy(rvectsiz,rblockvectorbx(1,ivec),1,ovl_vecnm(1,gscindex(ivec)),2)
4919            call abi_xcopy(rvectsiz,rblockvectorbx(rvectsiz+1,ivec),1,ovl_vecnm(2,gscindex(ivec)),2)
4920          end if
4921        end if
4922      end do
4923      ABI_FREE(rgramxbx)
4924      ABI_FREE(rblockvectorx)
4925      ABI_FREE(rblockvectorbx)
4926    end if
4927 
4928  else if (ortalgo==4) then
4929    ! else if (ANY(ortalgo==(/0,2/))) then
4930 
4931    cg_idx = cgindex(1)
4932    if (useoverlap==0) then
4933      call cgnc_gramschmidt(nelem,nvec,vecnm(1,cg_idx),istwf_k,me_g0,comm)
4934    else
4935      gsc_idx = gscindex(1)
4936      call cgpaw_gramschmidt(nelem,nvec,vecnm(1,cg_idx),ovl_vecnm(1,gsc_idx),istwf_k,me_g0,comm)
4937    end if
4938 
4939  else if (ANY(ortalgo==(/0,2/))) then
4940    !  =======================
4941    !  Third (old) algorithm
4942    !  =======================
4943    ! TODO: This algo should be removed. Ref files should be updated though.
4944 
4945    do ivec=1,nvec
4946      ! Normalize each vecnm(n,m) in turn:
4947 
4948      if (useoverlap==1) then ! Using overlap S...
4949        if(istwf_k/=2)then
4950          sum=zero;ii0=1
4951        else
4952          if (me_g0 ==1) then
4953            sum=half*ovl_vecnm(1,1+nelem*(ivec-1)+igsc)*vecnm(1,1+nelem*(ivec-1)+icg)
4954            ii0=2
4955          else
4956            sum=zero;ii0=1
4957          end if
4958        end if
4959 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm)
4960        do ii=ii0+nelem*(ivec-1),nelem*ivec
4961          sum=sum+vecnm(1,ii+icg)*ovl_vecnm(1,ii+igsc)+vecnm(2,ii+icg)*ovl_vecnm(2,ii+igsc)
4962        end do
4963 
4964      else ! Without overlap...
4965        if(istwf_k/=2)then
4966          sum=zero;ii0=1
4967        else
4968          if (me_g0 ==1) then
4969            sum=half*vecnm(1,1+nelem*(ivec-1)+icg)**2
4970            ii0=2
4971          else
4972            sum=zero;ii0=1
4973          end if
4974        end if
4975 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:sum) SHARED(icg,ivec,nelem,vecnm)
4976        do ii=ii0+nelem*(ivec-1)+icg,nelem*ivec+icg
4977          sum=sum+vecnm(1,ii)**2+vecnm(2,ii)**2
4978        end do
4979      end if
4980 
4981      call timab(48,1,tsec)
4982      call xmpi_sum(sum,comm,ierr)
4983      call timab(48,2,tsec)
4984 
4985      if(istwf_k>=2)sum=two*sum
4986      xnorm = sqrt(abs(sum)) ;  sum=1.0_dp/xnorm
4987 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,vecnm)
4988      do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg
4989        vecnm(1,ii)=vecnm(1,ii)*sum
4990        vecnm(2,ii)=vecnm(2,ii)*sum
4991      end do
4992      if (useoverlap==1) then
4993 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,sum,ovl_vecnm)
4994        do ii=1+nelem*(ivec-1)+igsc,nelem*ivec+igsc
4995          ovl_vecnm(1,ii)=ovl_vecnm(1,ii)*sum
4996          ovl_vecnm(2,ii)=ovl_vecnm(2,ii)*sum
4997        end do
4998      end if
4999 
5000 !    Remove projection in all higher states.
5001      if (ivec<nvec) then
5002 
5003        if(istwf_k==1)then
5004 !        Cannot use time-reversal symmetry
5005 
5006          if (useoverlap==1) then ! Using overlap.
5007            do ivec2=ivec+1,nvec
5008 !            First compute scalar product
5009              dotr=zero ; doti=zero
5010              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
5011 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm)
5012              do ii=1,nelem
5013                dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
5014                doti=doti+vecnm(1,ii1+ii)*ovl_vecnm(2,ii2+ii)-vecnm(2,ii1+ii)*ovl_vecnm(1,ii2+ii)
5015              end do
5016 
5017              call timab(48,1,tsec)
5018              buffer2(1)=doti;buffer2(2)=dotr
5019              call xmpi_sum(buffer2,comm,ierr)
5020              call timab(48,2,tsec)
5021              doti=buffer2(1)
5022              dotr=buffer2(2)
5023 
5024 !            Then subtract the appropriate amount of the lower state
5025              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5026 #ifdef FC_INTEL
5027 !            DIR$ ivdep
5028 #endif
5029 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm)
5030              do ii=1,nelem
5031                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii)
5032                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii)
5033              end do
5034 
5035              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
5036              do ii=1,nelem
5037                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)&
5038 &               -dotr*ovl_vecnm(1,ii1+ii)&
5039 &               +doti*ovl_vecnm(2,ii1+ii)
5040                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)&
5041                -doti*ovl_vecnm(1,ii1+ii)&
5042 &               -dotr*ovl_vecnm(2,ii1+ii)
5043              end do
5044            end do
5045          else
5046 !          ----- No overlap -----
5047            do ivec2=ivec+1,nvec
5048 !            First compute scalar product
5049              dotr=zero ; doti=zero
5050              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5051 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:doti,dotr) SHARED(ii1,ii2,nelem,vecnm)
5052              do ii=1,nelem
5053                dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+&
5054 &               vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
5055                doti=doti+vecnm(1,ii1+ii)*vecnm(2,ii2+ii)-&
5056 &               vecnm(2,ii1+ii)*vecnm(1,ii2+ii)
5057              end do
5058 !            Init mpi_comm
5059              buffer2(1)=doti
5060              buffer2(2)=dotr
5061              call timab(48,1,tsec)
5062              call xmpi_sum(buffer2,comm,ierr)
5063 !            call xmpi_sum(doti,spaceComm,ierr)
5064 !            call xmpi_sum(dotr,spaceComm,ierr)
5065              call timab(48,2,tsec)
5066              doti=buffer2(1)
5067              dotr=buffer2(2)
5068 
5069 !            Then subtract the appropriate amount of the lower state
5070 #ifdef FC_INTEL
5071 !            DIR$ ivdep
5072 #endif
5073 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm)
5074              do ii=1,nelem
5075                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+&
5076 &               doti*vecnm(2,ii1+ii)
5077                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-&
5078 &               dotr*vecnm(2,ii1+ii)
5079              end do
5080            end do
5081 
5082          end if  ! Test on useoverlap
5083 
5084        else if(istwf_k==2)then
5085 !        At gamma point use of time-reversal symmetry saves cpu time.
5086 
5087          if (useoverlap==1) then
5088 !          ----- Using overlap -----
5089            do ivec2=ivec+1,nvec
5090 !            First compute scalar product
5091              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
5092              if (me_g0 ==1) then
5093                dotr=half*vecnm(1,ii1+1)*ovl_vecnm(1,ii2+1)
5094 !              Avoid double counting G=0 contribution
5095 !              Imaginary part of vecnm at G=0 should be zero,so only take real part
5096 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5097                do ii=2,nelem
5098                  dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+&
5099 &                 vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
5100                end do
5101              else
5102                dotr=0._dp
5103 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5104                do ii=1,nelem
5105                  dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+&
5106 &                 vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
5107                end do
5108              end if
5109 
5110              dotr=two*dotr
5111 
5112              call timab(48,1,tsec)
5113              call xmpi_sum(dotr,comm,ierr)
5114              call timab(48,2,tsec)
5115 
5116 !            Then subtract the appropriate amount of the lower state
5117              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5118 #ifdef FC_INTEL
5119 !            DIR$ ivdep
5120 #endif
5121 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
5122              do ii=1,nelem
5123                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
5124                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
5125              end do
5126              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
5127              do ii=1,nelem
5128                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii)
5129                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii)
5130              end do
5131            end do
5132          else
5133 !          ----- No overlap -----
5134            do ivec2=ivec+1,nvec
5135 !            First compute scalar product
5136              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5137              if (me_g0 ==1) then
5138 !              Avoid double counting G=0 contribution
5139 !              Imaginary part of vecnm at G=0 should be zero,so only take real part
5140                dotr=half*vecnm(1,ii1+1)*vecnm(1,ii2+1)
5141 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5142                do ii=2,nelem
5143                  dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
5144                end do
5145              else
5146                dotr=0._dp
5147 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5148                do ii=1,nelem
5149                  dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
5150                end do
5151              end if
5152              dotr=two*dotr
5153 
5154              call timab(48,1,tsec)
5155              call xmpi_sum(dotr,comm,ierr)
5156              call timab(48,2,tsec)
5157 
5158 !            Then subtract the appropriate amount of the lower state
5159 #ifdef FC_INTEL
5160 !            DIR$ ivdep
5161 #endif
5162 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
5163              do ii=1,nelem
5164                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
5165                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
5166              end do
5167            end do
5168          end if  ! Test on useoverlap
5169 
5170        else
5171 !        At other special points,use of time-reversal symmetry saves cpu time.
5172 
5173          if (useoverlap==1) then
5174 !          ----- Using overlap -----
5175            do ivec2=ivec+1,nvec
5176 !            First compute scalar product
5177              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+igsc
5178 !            Avoid double counting G=0 contribution
5179 !            Imaginary part of vecnm at G=0 should be zero,so only take real part
5180              dotr=zero
5181 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5182              do ii=1,nelem
5183                dotr=dotr+vecnm(1,ii1+ii)*ovl_vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*ovl_vecnm(2,ii2+ii)
5184              end do
5185              dotr=two*dotr
5186 
5187              call timab(48,1,tsec)
5188              call xmpi_sum(dotr,comm,ierr)
5189              call timab(48,2,tsec)
5190 
5191 !            Then subtract the appropriate amount of the lower state
5192              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5193 #ifdef FC_INTEL
5194 !            DIR$ ivdep
5195 #endif
5196 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
5197              do ii=1,nelem
5198                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
5199                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
5200              end do
5201              ii1=nelem*(ivec-1)+igsc;ii2=nelem*(ivec2-1)+igsc
5202              do ii=1,nelem
5203                ovl_vecnm(1,ii2+ii)=ovl_vecnm(1,ii2+ii)-dotr*ovl_vecnm(1,ii1+ii)
5204                ovl_vecnm(2,ii2+ii)=ovl_vecnm(2,ii2+ii)-dotr*ovl_vecnm(2,ii1+ii)
5205              end do
5206            end do
5207          else
5208 !          ----- No overlap -----
5209            do ivec2=ivec+1,nvec
5210 !            First compute scalar product
5211              ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5212 !            Avoid double counting G=0 contribution
5213 !            Imaginary part of vecnm at G=0 should be zero,so only take real part
5214              dotr=zero
5215 !$OMP PARALLEL DO PRIVATE(ii) REDUCTION(+:dotr) SHARED(ii1,ii2,nelem,vecnm)
5216              do ii=1,nelem
5217                dotr=dotr+vecnm(1,ii1+ii)*vecnm(1,ii2+ii)+vecnm(2,ii1+ii)*vecnm(2,ii2+ii)
5218              end do
5219              dotr=two*dotr
5220 
5221              call timab(48,1,tsec)
5222              call xmpi_sum(dotr,comm,ierr)
5223              call timab(48,2,tsec)
5224 
5225 !            Then subtract the appropriate amount of the lower state
5226 !$OMP PARALLEL DO PRIVATE(ii) SHARED(dotr,ii1,ii2,nelem,vecnm)
5227              do ii=1,nelem
5228                vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)
5229                vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-dotr*vecnm(2,ii1+ii)
5230              end do
5231            end do
5232          end if
5233 
5234        end if ! End use of time-reversal symmetry
5235      end if  ! Test on "ivec"
5236    end do ! end loop over vectors (or bands) with index ivec :
5237 
5238  else
5239    ABI_ERROR(sjoin("Wrong value for ortalgo:", itoa(ortalgo)))
5240  end if
5241 
5242  !call cwtime_report(sjoin(" pw_orthon with ortalgo: ", itoa(ortalgo)), cpu, wall, gflops)
5243 
5244 end subroutine pw_orthon

m_cgtools/pw_orthon_paw [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 pw_orthon_paw

FUNCTION

 Normalize nvec complex vectors each of length nelem and then orthogonalize by modified Gram-Schmidt.
 The overlap matrix <c_m|S|c_n> (S can be identity) has to be provided as input, and is overwritten.

INPUTS

  icg=shift to be given to the location of the data in cg(=vecnm)
  mcg=maximum size of second dimension of cg(=vecnm)
  nelem=number of complex elements in each vector
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  nvec=number of vectors to be orthonormalized
  ortalgo= option for the choice of the algorithm
         -1: no orthogonalization (direct return)
          0: do orthogonalization
  comm=MPI communicator

SIDE EFFECTS

  cprj(optional)=<p_i|c_n> coefficients, updated to keep them consistent with the WF at output
  ovl_mat=overlap matrix <c_m|S|c_n> for m<=n
  vecnm= input: vectors to be orthonormalized; array of nvec column
                vectors,each of length nelem,shifted by icg
                This array is complex or else real(dp) of twice length
         output: orthonormalized set of vectors

NOTES

 Note that each vector has an arbitrary phase which is not fixed in this routine.

SOURCE

5279 subroutine pw_orthon_cprj(icg,mcg,nelem,nspinor,nvec,ortalgo,ovl_mat,vecnm,cprj)
5280 
5281  use m_abi_linalg
5282 
5283 !Arguments ------------------------------------
5284 !scalars
5285  integer,intent(in) :: icg,mcg,nelem,nspinor,nvec,ortalgo
5286 !arrays
5287  real(dp),intent(inout) :: ovl_mat(nvec*(nvec+1)),vecnm(2,mcg)
5288  type(pawcprj_type),intent(inout),optional,target :: cprj(:,:)
5289 
5290 !Local variables-------------------------------
5291 !scalars
5292  logical :: do_cprj
5293  integer :: ii,ii1,ii2,ivec,ivec2,ivec3,iv1l,iv2l,iv3l,iv1r,iv2r,iv3r,ncprj
5294  real(dp) :: doti,dotr,summ,xnorm
5295 !arrays
5296  real(dp) :: ovl_row_tmp(2*nvec),ovl_col_tmp(2*nvec)
5297  real(dp) :: re,im
5298 
5299 ! *************************************************************************
5300 
5301 !Nothing to do if ortalgo=-1
5302  if(ortalgo==-1) return
5303 
5304  do_cprj=.false.
5305  if (present(cprj)) then
5306    do_cprj=.true.
5307    ncprj = size(cprj,2)
5308    if (ncprj/=nspinor*nvec) then
5309      ABI_ERROR('bad size for cprj')
5310    end if
5311  end if
5312 
5313  ! The overlap matrix is : ovl(i,j) = <psi_i|S|psi_j> = (<psi_j|S|psi_i>)^*
5314  ! The row index stands for the "left"  band index
5315  ! The column index stands for the "right" band index
5316  ! Only the upper triangular part of the (complex) overlap matrix is stored, so only elements with i<=j.
5317  ! They are stored in the following order: ovl(1,1),ovl(1,2),ovl(2,2),ovl(1,3),ovl(2,3),...
5318  ! so:
5319  ! -- shift for the ith row    : 2.(i.(i-1)/2) = i.(i-1)
5320  ! -- shift for the ith column : 2.(i-1)+1 = 2.i-1
5321  ! => index of real part of elem in the jth column and ith row (=ovl(i,j)) : 2.i-1+j.(j-1) (for i<=j)
5322  ! => index of imaginary part = index of real part + 1
5323  ! After orthogonalizing the first n vectors, we have:
5324  ! for i<=n, i<=j : ovl(i,j) = delta_ij
5325 
5326  do ivec=1,nvec
5327 
5328    ! First we normalize the current vector
5329    iv1r = ivec*(ivec-1) ! ith row
5330    iv1l = 2*ivec-1      ! ith column
5331    ! ovl(i1,i1) = <psi_i1|S|psi_i1>
5332    summ = ovl_mat(iv1r+iv1l)
5333    xnorm = sqrt(abs(summ)) ;  summ=1.0_dp/xnorm
5334 !$OMP PARALLEL DO PRIVATE(ii) SHARED(icg,ivec,nelem,summ,vecnm)
5335    do ii=1+nelem*(ivec-1)+icg,nelem*ivec+icg
5336      vecnm(1,ii)=vecnm(1,ii)*summ
5337      vecnm(2,ii)=vecnm(2,ii)*summ
5338    end do
5339 !  Apply the normalization to cprj coeffs
5340    if (do_cprj) call pawcprj_axpby(zero,summ,cprj(:,nspinor*(ivec-1)+1:nspinor*ivec),cprj(:,nspinor*(ivec-1)+1:nspinor*ivec))
5341 
5342    ! As the norm of |psi_i1> changed, we update the overlap matrix accordingly.
5343    ! From previous iterations, we already have:
5344    ! ovl(i2,i1) = <psi_i2|S|psi_i1> = 0 for i2<i1
5345    ! so we need to change only:
5346    ! ovl(i1,i2) = <psi_i1|S|psi_i2> for i1<=i2
5347    do ivec2=ivec,nvec
5348      iv2r=ivec2*(ivec2-1)
5349      if (ivec<ivec2) then
5350        ovl_mat(iv2r+iv1l  ) = ovl_mat(iv2r+iv1l  )*summ
5351        ovl_mat(iv2r+iv1l+1) = ovl_mat(iv2r+iv1l+1)*summ
5352      else if (ivec==ivec2) then
5353        ovl_mat(iv2r+iv1l  ) = ovl_mat(iv2r+iv1l  )*summ*summ
5354        ovl_mat(iv2r+iv1l+1) = ovl_mat(iv2r+iv1l+1)*summ*summ
5355        re = ovl_mat(iv2r+iv1l  )
5356        im = ovl_mat(iv2r+iv1l+1)
5357        if (abs(re-1)>tol10.or.abs(im)>tol10) then
5358          write(std_out,'(a,es21.10e3)') '(pw_ortho) ovl (re)',re
5359          write(std_out,'(a,es21.10e3)') '(pw_ortho) ovl (im)',im
5360          ABI_WARNING('In pw_orthon_cprj : the result should be equal to one!')
5361        end if
5362      end if
5363    end do
5364 
5365 !  Remove projection in all higher states.
5366    if (ivec<nvec) then
5367 
5368      do ivec2=ivec+1,nvec
5369 
5370        iv2r = ivec2*(ivec2-1)
5371        iv2l = 2*ivec2-1
5372        ! (dotr,doti) = <psi_i1|S|psi_i2>
5373        dotr = ovl_mat(iv2r+iv1l  )
5374        doti = ovl_mat(iv2r+iv1l+1)
5375 
5376 !      Then subtract the appropriate amount of the lower state
5377        ii1=nelem*(ivec-1)+icg;ii2=nelem*(ivec2-1)+icg
5378        ! |psi'_i2> = |psi_i2> - <psi_i1|S|psi_i2> |psi_i1>
5379 !$OMP PARALLEL DO PRIVATE(ii) SHARED(doti,dotr,ii1,ii2,nelem,vecnm)
5380        do ii=1,nelem
5381          vecnm(1,ii2+ii)=vecnm(1,ii2+ii)-dotr*vecnm(1,ii1+ii)+doti*vecnm(2,ii1+ii)
5382          vecnm(2,ii2+ii)=vecnm(2,ii2+ii)-doti*vecnm(1,ii1+ii)-dotr*vecnm(2,ii1+ii)
5383        end do
5384        if (do_cprj) call pawcprj_zaxpby((/-dotr,-doti/),(/one,zero/),cprj(:,nspinor*(ivec-1)+1:nspinor*ivec),&
5385 &                                                                    cprj(:,nspinor*(ivec2-1)+1:nspinor*ivec2))
5386        ! As |psi_i2> changed, we update the overlap matrix accordingly.
5387        ! We have: <psi'_i3|S|psi'_i2> = <psi'_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi'_i3|S|psi_i1>
5388        ! Remember that i2>i1.
5389        ! For i3<=i2, we compute the new column i2.
5390        ! For i3<i1:
5391        ! (1) <psi'_i3|S|psi'_i2> = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i3|S|psi_i1>
5392        !                         = <psi_i3|S|psi_i2>
5393        ! as for i3<i1 we have <psi_i3|S|psi_i1> = 0
5394        ! For i1<=i3<i2:
5395        ! (2) <psi'_i3|S|psi'_i2> = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i3|S|psi_i1>
5396        !                         = <psi_i3|S|psi_i2> - <psi_i1|S|psi_i2> (<psi_i1|S|psi_i3>)^*
5397        ! For i3=i2:
5398        ! (3) <psi'_i3|S|psi'_i2> =  <psi'_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi'_i2|S|psi_i1>
5399        !                         =   <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1>
5400        !                           - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1> + <psi_i1|S|psi_i2> <psi_1|S|psi_1> <psi_i2|S|psi_i1>
5401        !                         =   <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> <psi_i2|S|psi_i1>
5402        !                         =   <psi_i2|S|psi_i2> - <psi_i1|S|psi_i2> (<psi_i1|S|psi_i2>)^*
5403        ! so the case i3=i2 (3) is equivalent to the case i1<=i3<i2 (2) with i3=i2.
5404        ! Here we compute (2) and (3) in a temporary array:
5405        do ivec3=ivec,ivec2
5406          iv3r=ivec3*(ivec3-1)
5407          iv3l=2*ivec3-1
5408          ovl_col_tmp(iv3l  ) = ovl_mat(iv2r+iv3l  ) - dotr*ovl_mat(iv3r+iv1l) - doti*ovl_mat(iv3r+iv1l+1)
5409          ovl_col_tmp(iv3l+1) = ovl_mat(iv2r+iv3l+1) - doti*ovl_mat(iv3r+iv1l) + dotr*ovl_mat(iv3r+iv1l+1)
5410        end do
5411        ! For i2<i3, we compute the new row i2.
5412        ! (4) <psi'_i2|S|psi_i3> = <psi_i2|S|psi_i3> - <psi_i2|S|psi_i1> <psi_i1|S|psi_i3>
5413        !                        = <psi_i2|S|psi_i3> - (<psi_i1|S|psi_i2>)^* <psi_i1|S|psi_i3>
5414        ! Here we compute (4) in a temporary array:
5415        do ivec3=ivec2+1,nvec
5416          iv3r=ivec3*(ivec3-1)
5417          iv3l=2*ivec3-1
5418          ovl_row_tmp(iv3l  ) = ovl_mat(iv3r+iv2l  ) - dotr*ovl_mat(iv3r+iv1l) - doti*ovl_mat(iv3r+iv1l+1)
5419          ovl_row_tmp(iv3l+1) = ovl_mat(iv3r+iv2l+1) + doti*ovl_mat(iv3r+iv1l) - dotr*ovl_mat(iv3r+iv1l+1)
5420        end do
5421        ! We update the column i2 (starting from ivec and not 1, thanks to (1))
5422        do ivec3=ivec,ivec2
5423          iv3l=2*ivec3-1
5424          ovl_mat(iv2r+iv3l  ) = ovl_col_tmp(iv3l  )
5425          ovl_mat(iv2r+iv3l+1) = ovl_col_tmp(iv3l+1)
5426        end do
5427        ! We update the row i2
5428        do ivec3=ivec2+1,nvec
5429          iv3r=ivec3*(ivec3-1)
5430          iv3l=2*ivec3-1
5431          ovl_mat(iv3r+iv2l  ) = ovl_row_tmp(iv3l  )
5432          ovl_mat(iv3r+iv2l+1) = ovl_row_tmp(iv3l+1)
5433        end do
5434      end do
5435 
5436    end if  ! Test on "ivec"
5437 
5438 !end loop over vectors (or bands) with index ivec :
5439  end do
5440 
5441 end subroutine pw_orthon_cprj

m_cgtools/set_istwfk [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

  set_istwfk

FUNCTION

  Returns the value of istwfk associated to the input k-point.

INPUTS

  kpoint(3)=The k-point in reduced coordinates.

OUTPUT

  istwfk= Integer flag internally used in the code to define the storage mode of the wavefunctions.
  It also define the algorithm used to apply an operator in reciprocal space as well as the FFT
  algorithm used to go from G- to r-space and vice versa.

   1 => time-reversal cannot be used
   2 => use time-reversal at the Gamma point.
   3 => use time-reversal symmetry for k=(1/2, 0 , 0 )
   4 => use time-reversal symmetry for k=( 0 , 0 ,1/2)
   5 => use time-reversal symmetry for k=(1/2, 0 ,1/2)
   6 => use time-reversal symmetry for k=( 0 ,1/2, 0 )
   7 => use time-reversal symmetry for k=(1/2,1/2, 0 )
   8 => use time-reversal symmetry for k=( 0 ,1/2,1/2)
   9 => use time-reversal symmetry for k=(1/2,1/2,1/2)

  Useful relations:
   u_k(G) = u_{k+G0}(G-G0); u_{-k}(-G) = u_k(G)^*
  and therefore:
   u_{G0/2}(G) = u_{G0/2}(-G-G0)^*.

SOURCE

902 integer pure function set_istwfk(kpoint) result(istwfk)
903 
904 !Arguments ------------------------------------
905  real(dp),intent(in) :: kpoint(3)
906 
907 !Local variables-------------------------------
908 !scalars
909  integer :: bit0,ii
910 !arrays
911  integer :: bit(3)
912 
913 ! *************************************************************************
914 
915  bit0=1
916 
917  do ii=1,3
918    if (DABS(kpoint(ii))<tol10) then
919      bit(ii)=0
920    else if (DABS(kpoint(ii)-half)<tol10 ) then
921      bit(ii)=1
922    else
923      bit0=0
924    end if
925  end do
926 
927  if (bit0==0) then
928    istwfk=1
929  else
930    istwfk=2+bit(1)+4*bit(2)+2*bit(3) ! Note the inversion between bit(2) and bit(3)
931  end if
932 
933 end function set_istwfk

m_cgtools/sqnorm_g [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 sqnorm_g

FUNCTION

 Compute the square of the norm of one complex vector vecti, in reciprocal space
 Take into account the storage mode of the vector (istwf_k)

INPUTS

  istwf_k=option parameter that describes the storage of wfs
  npwsp= (effective) number of planewaves at this k point.
  vect(2,npwsp)=the vector in reciprocal space (npw*nspinor, usually)
  me_g0=1 if this processors treats G=0, 0 otherwise.
  comm=MPI communicator for MPI sum.

OUTPUT

  dotr= <vect|vect>

SOURCE

956 subroutine sqnorm_g(dotr, istwf_k, npwsp, vect, me_g0, comm)
957 
958 !Arguments ------------------------------------
959 !scalars
960  integer,intent(in) :: istwf_k,npwsp,me_g0,comm
961  real(dp),intent(out) :: dotr
962 !arrays
963  real(dp),intent(in) :: vect(2,npwsp)
964 
965 !Local variables-------------------------------
966 !scalars
967  integer :: ierr
968 
969 ! *************************************************************************
970 
971  if (istwf_k==1) then ! General k-point
972    !dotr = cg_real_zdotc(npwsp,vect,vect)
973    dotr = cg_dznrm2(npwsp, vect)
974    dotr = dotr * dotr
975 
976  else
977    if (istwf_k == 2 .and. me_g0 == 1) then
978      ! Gamma k-point and I have G=0
979      dotr=half*vect(1,1)**2
980      dotr = dotr + cg_real_zdotc(npwsp-1, vect(1,2), vect(1,2))
981    else
982      ! Other TR k-points
983      dotr = cg_real_zdotc(npwsp, vect, vect)
984    end if
985    dotr=two*dotr
986  end if
987 
988  if (xmpi_comm_size(comm)>1) call xmpi_sum(dotr,comm,ierr)
989 
990 end subroutine sqnorm_g

m_cgtools/sqnorm_v [ Functions ]

[ Top ] [ m_cgtools ] [ Functions ]

NAME

 sqnorm_v

FUNCTION

 Compute square of the norm of a potential (integral over FFT grid), to obtain
 a square residual-like quantity (so the sum of product of values
 is NOT divided by the number of FFT points, and NOT multiplied by the primitive cell volume).
 Take into account the spin components of the potentials (nspden),
 and sum over them.

INPUTS

  cplex=if 1, real space function on FFT grid is REAL, if 2, COMPLEX
  nfft= (effective) number of FFT grid points (for this processor)
  nspden=number of spin-density components
  opt_storage: 0, if potential is stored as V^up-up, V^dn-dn, Re[V^up-dn], Im[V^up-dn]
               1, if potential is stored as V, B_x, B_y, Bz  (B=magn. field)
  pot(cplex*nfft,nspden)=real space potential on FFT grid

OUTPUT

  norm2= value of the square of the norm

SOURCE

1599 subroutine sqnorm_v(cplex,nfft,norm2,nspden,opt_storage,pot,mpi_comm_sphgrid)
1600 
1601 !Arguments ------------------------------------
1602 !scalars
1603  integer,intent(in) :: cplex,nfft,nspden,opt_storage
1604  integer,intent(in),optional :: mpi_comm_sphgrid
1605  real(dp),intent(out) :: norm2
1606 !arrays
1607  real(dp),intent(in) :: pot(cplex*nfft,nspden)
1608 
1609 !Local variables-------------------------------
1610 !scalars
1611  integer :: ierr,ifft,ispden,nproc_sphgrid
1612  real(dp) :: ar
1613 
1614 ! *************************************************************************
1615 
1616 !Real or complex inputs are coded
1617 
1618  norm2=zero
1619  do ispden=1,min(nspden,2)
1620 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(cplex,ispden,nfft,pot) REDUCTION(+:norm2)
1621    do ifft=1,cplex*nfft
1622      norm2=norm2 + pot(ifft,ispden)**2
1623    end do
1624  end do
1625  if (nspden==4) then
1626    ar=zero
1627    do ispden=3,4
1628 !$OMP PARALLEL DO PRIVATE(ifft) SHARED(cplex,ispden,nfft,pot) REDUCTION(+:ar)
1629      do ifft=1,cplex*nfft
1630        ar=ar + pot(ifft,ispden)**2
1631      end do
1632    end do
1633    if (opt_storage==0) then
1634      if (cplex==1) then
1635        norm2=norm2+two*ar
1636      else
1637        norm2=norm2+ar
1638      end if
1639    else
1640      norm2=half*(norm2+ar)
1641    end if
1642  end if
1643 
1644 !MPIWF reduction (addition) on norm2 is needed here
1645  if(present(mpi_comm_sphgrid)) then
1646    nproc_sphgrid=xmpi_comm_size(mpi_comm_sphgrid)
1647    if(nproc_sphgrid>1)then
1648      call xmpi_sum(norm2,mpi_comm_sphgrid,ierr)
1649    end if
1650  end if
1651 
1652 end subroutine sqnorm_v