TABLE OF CONTENTS


ABINIT/m_opernla_ylm_mv [ Modules ]

[ Top ] [ Modules ]

NAME

  m_opernla_ylm_mv

FUNCTION

COPYRIGHT

  Copyright (C) 2021-2024 ABINIT group (LB,MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

15 #if defined HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "abi_common.h"
20 
21 module m_opernla_ylm_mv
22 
23  use defs_basis
24  use m_abicore
25  use m_errors
26  use m_xmpi
27 #if defined HAVE_OPENMP
28  use OMP_LIB
29 #endif
30 
31  use defs_abitypes, only : MPI_type
32  use m_time,        only : timab
33 
34  implicit none
35 
36  private

ABINIT/opernla_ylm_mv [ Functions ]

[ Top ] [ Functions ]

NAME

 opernla_ylm_mv

FUNCTION

 "matrix-vector" alternative implementation of "opernla_ylm".

 For a given wave-function |c>, get all projected scalars
 <p_lmn|c> where |p_lmn> are non-local projectors
   With:
   <p_lmn|c>=4pi/sqrt(vol) (i)^l Sum_g[c(g).f_nl(g).Y_lm(g).exp(2pi.i.g.R)]

 Here this is done in 3 steps:
 (1) compute for every g   : scal(g)   = c(g).exp(2pi.i.g.R)
 (2) compute for every lmn : scal(lmn) = Sum_g[scal(g).f_nl(g).Y_lm(g)]
 (3) compute for every lmn : <p_lmn|c> = 4pi/sqrt(vol).(i)^l.scal(lmn)

 Step (2) is a real-matrix/complex-vector multiplication, here two options are possible:
 - case nloalg(1)=2 : compute the real and imaginary parts separately using two calls of DGMEV
 - case nloalg(1)=3 : in order to read the matrix only once, we compute both real and imaginary parts at the same time "by hand"

 Depending on the achitecture and the available blas library, one option could be more interesting than an other...

INPUTS

  choice=chooses possible output:
         if choice>=0: compute projected scalars
         if choice<0: same as choice>0 but use already computed projected scalars
  cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1)
        2 if <p_lmn|c> scalars are complex
  dimffnl=second dimension of ffnl
  ffnl(npw,dimffnl,nlmn)= nonlocal quantities containing nonlocal form factors
  ia3=gives the number of the first atom in the subset presently treated
  indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn
  istwf_k=option parameter that describes the storage of wfs
  matblk=dimension of the array ph3d
  mpi_enreg=information about MPI parallelization
  nincat=number of atoms in the subset here treated
  nlmn=number of (l,m,n) numbers for current type of atom
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  npw=number of plane waves in reciprocal space
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  ph3d(2,npw,matblk)=three-dimensional phase factors
  ucvol=unit cell volume (bohr^3)
  vect(2,npw*my_nspinor)=starting vector in reciprocal space

OUTPUT

  gx(cplex,nlmn,nincat,nspinor)= projected scalars

SIDE EFFECTS

NOTES

 1-Not available yet for openMP
 2-Operate for one type of atom, and within this given type of atom,
   for a subset of at most nincat atoms.
 3-projector derivatives (abs(choice)>1) are not implemented yet

SOURCE

106 subroutine opernla_ylm_mv(choice,cplex,dimffnl,ffnl,gx,&
107 &       ia3,indlmn,istwf_k,matblk,mpi_enreg,nincat,nlmn,&
108 &       nloalg,npw,nspinor,ph3d,ucvol,vect)
109 
110 !Arguments ------------------------------------
111 !scalars
112  integer,intent(in) :: choice,cplex,dimffnl,ia3,istwf_k,matblk
113  integer,intent(in) :: nincat,nlmn,npw,nspinor
114  real(dp),intent(in) :: ucvol
115  type(MPI_type),intent(in) :: mpi_enreg
116 !arrays
117  integer,intent(in) :: indlmn(6,nlmn),nloalg(3)
118  real(dp),intent(in),target :: ffnl(npw,dimffnl,nlmn)
119  real(dp),intent(in) :: ph3d(2,npw,matblk)
120  real(dp),intent(in) :: vect(:,:)
121  real(dp),intent(out) :: gx(cplex,nlmn,nincat,nspinor)
122 
123 !Local variables-------------------------------
124 !scalars
125  logical :: use_dgemv
126  integer :: ia,iaph3d,ierr,il,ilmn,ipw,ipw0,ipwshft,ispinor,jpw
127  real(dp) :: wt
128 !arrays
129  real(dp) :: buffer_r,buffer_i,tsec(2)
130  real(dp),pointer :: ffnl_loc(:,:)
131  real(dp),allocatable :: scali(:),scalr(:)
132  real(dp),allocatable :: scalr_lmn(:),scali_lmn(:)
133  complex(dp) :: ctmp,cil(4)
134 ! *************************************************************************
135 
136  if (choice==-1) return
137 
138 !Some checks
139  if (abs(choice)>1) then
140    ABI_ERROR('Only abs(choice)<=1 is available for now.')
141  end if
142  if (nloalg(1)<2.or.nloalg(1)>10) then
143    ABI_ERROR('nloalg(1) should be between 2 and 10.')
144  end if
145 ! nthreads=1
146 !#if defined HAVE_OPENMP
147 ! nthreads=OMP_GET_NUM_THREADS()
148 !#endif
149 ! if (nthreads>1) then
150 !   ABI_ERROR('Only nthreads=1 is available for now.')
151 ! end if
152 
153  use_dgemv = nloalg(1)==2.or.nloalg(1)==5.or.nloalg(1)==7
154  if (choice>=0.or.abs(choice)>1) then
155    if (use_dgemv) then
156      if(opernla_mv_dgemv_counter>=0) opernla_mv_dgemv_counter = opernla_mv_dgemv_counter + 1
157    else
158      if(opernla_mv_counter>=0) opernla_mv_counter = opernla_mv_counter + 1
159    end if
160  end if
161 
162 !Useful variables
163  wt=four_pi/sqrt(ucvol);if (cplex==1) wt=2.d0*wt
164  ipw0=1;if (istwf_k==2.and.mpi_enreg%me_g0_fft==1) ipw0=2
165 
166 !Allocate work space
167  ABI_MALLOC(scalr,(npw))
168  ABI_MALLOC(scali,(npw))
169  ABI_MALLOC(scalr_lmn,(nlmn))
170  ABI_MALLOC(scali_lmn,(nlmn))
171 
172  ffnl_loc => ffnl(:,1,:)
173 
174 ! i^l
175  cil(1) = ( 1.0_DP, 0.0_DP) * wt
176  cil(2) = ( 0.0_DP, 1.0_DP) * wt
177  cil(3) = (-1.0_DP, 0.0_DP) * wt
178  cil(4) = ( 0.0_DP,-1.0_DP) * wt
179 
180 !$OMP PARALLEL PRIVATE(il,ilmn,ipw,jpw), &
181 !$OMP PRIVATE(ispinor,ipwshft,ia,iaph3d)
182 
183 !Loop on spinorial components
184  do ispinor =1,nspinor
185    ipwshft=(ispinor-1)*npw
186 
187 !  Loop on atoms (blocking)
188    do ia=1,nincat
189      iaph3d=ia;if (nloalg(2)>0) iaph3d=ia+ia3-1
190 !    Step (1) : Compute scal(g) = c(g).exp(2pi.i.g.R)
191 !$OMP DO
192      do ipw=ipw0,npw
193        jpw=ipw+ipwshft
194        scalr(ipw)=(vect(1,jpw)*ph3d(1,ipw,iaph3d)-vect(2,jpw)*ph3d(2,ipw,iaph3d))
195        scali(ipw)=(vect(2,jpw)*ph3d(1,ipw,iaph3d)+vect(1,jpw)*ph3d(2,ipw,iaph3d))
196      end do
197 !$OMP END DO
198 
199 !$OMP SINGLE
200      if (ipw0==2) then
201        scalr(1)=half*vect(1,1+ipwshft)*ph3d(1,1,iaph3d)
202        scali(1)=half*vect(1,1+ipwshft)*ph3d(2,1,iaph3d)
203      end if
204 !$OMP END SINGLE
205 
206 !    --------------------------------------------------------------------
207 !    ALL CHOICES:
208 !    Accumulate Gx
209 !    --------------------------------------------------------------------
210 
211      if (choice>=0) then
212 
213 !      Step (2) : Compute scal(lmn) = Sum_g[scal(g).f_nl(g).Y_lm(g)]
214        if (use_dgemv) then
215          call DGEMV('T',npw,nlmn,1.0_DP,ffnl_loc,npw,scalr,1,0.0_DP,scalr_lmn,1)
216          call DGEMV('T',npw,nlmn,1.0_DP,ffnl_loc,npw,scali,1,0.0_DP,scali_lmn,1)
217        else
218          do ilmn=1,nlmn
219 !           do ipw=1,npw
220 !             scalr_lmn(ilmn) = scalr_lmn(ilmn) + scalr(ipw) * ffnl_loc(ipw,ilmn)
221 !             scali_lmn(ilmn) = scali_lmn(ilmn) + scali(ipw) * ffnl_loc(ipw,ilmn)
222 !           end do
223 !$OMP SINGLE
224            buffer_r = 0.0_DP
225            buffer_i = 0.0_DP
226 !$OMP END SINGLE
227 !$OMP DO REDUCTION(+:buffer_r,buffer_i)
228            do ipw=1,npw
229              buffer_r = buffer_r + scalr(ipw) * ffnl_loc(ipw,ilmn)
230              buffer_i = buffer_i + scali(ipw) * ffnl_loc(ipw,ilmn)
231            end do
232 !$OMP END DO
233 !$OMP SINGLE
234            scalr_lmn(ilmn) = buffer_r
235            scali_lmn(ilmn) = buffer_i
236 !$OMP END SINGLE
237          end do
238        end if
239 !      Step (3) : Compute gx(lmn) = 4pi/sqrt(vol) (i)^l scal(lmn)
240 !$OMP SINGLE
241        if (cplex==2) then
242          do ilmn=1,nlmn
243            il=mod(indlmn(1,ilmn),4)+1
244            ctmp = cil(il) * cmplx(scalr_lmn(ilmn),scali_lmn(ilmn),kind=DP)
245            gx(1,ilmn,ia,ispinor) = real(ctmp)
246            gx(2,ilmn,ia,ispinor) = aimag(ctmp)
247          end do
248        else
249          do ilmn=1,nlmn
250            il=mod(indlmn(1,ilmn),4)+1
251            ctmp = cil(il) * cmplx(scalr_lmn(ilmn),scali_lmn(ilmn),kind=DP)
252            gx(1,ilmn,ia,ispinor) = real(ctmp)
253          end do
254        end if
255 !$OMP END SINGLE
256 
257      end if
258 
259    end do ! End loop on atoms
260 
261  end do !  End loop on spinorial components
262 !$OMP END PARALLEL
263 
264 !Deallocate temporary space
265  ABI_FREE(scalr)
266  ABI_FREE(scali)
267  ABI_FREE(scalr_lmn)
268  ABI_FREE(scali_lmn)
269 
270 !Has to reduce arrays in case of FFT parallelization
271  if (mpi_enreg%nproc_fft>1) then
272    call timab(48,1,tsec)
273    if (choice>=0) then
274      call xmpi_sum(gx,mpi_enreg%comm_fft,ierr)
275    end if
276    call timab(48,2,tsec)
277  end if
278 
279 end subroutine opernla_ylm_mv