TABLE OF CONTENTS


ABINIT/m_opernlb_ylm_mv [ Modules ]

[ Top ] [ Modules ]

NAME

  m_opernlb_ylm_mv

FUNCTION

COPYRIGHT

  Copyright (C) 2021-2024 ABINIT group (LB,MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

15 #if defined HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "abi_common.h"
20 
21 module m_opernlb_ylm_mv
22 
23  use defs_basis
24  use m_abicore
25  use m_errors
26 #if defined HAVE_OPENMP
27  use OMP_LIB
28 #endif
29 
30  implicit none
31 
32  private

ABINIT/opernlb_ylm_mv [ Functions ]

[ Top ] [ Functions ]

NAME

 opernlb_ylm_mv

FUNCTION

 "matrix-vector" alternative implementation of "opernlrb_ylm".

 * Operate with the non-local part of the hamiltonian,
   from projected scalars to reciprocal space.
 * Operate with the non-local projectors and the overlap matrix,
   from projected scalars to reciprocal space.

   The input is gxfac (gxfac_sij):
   gxfac(lmn) = Sum_l'm'n' D_l'm'n'.<p_l'm'n|c> (or S_l'm'n' for gxfac_sij)
   and here we compute :
   Sum_lmn <g|p_lmn> gxfac(lmn) = 4pi/sqrt(vol) exp(-2pi.i.g.R) Sum_lmn (-i)^l f_nl(g).Y_lm(g) gxfac(lmn)
   Here this is done in 3 steps:
   (1) compute for every lmn : gxfac_(lmn) = 4pi/sqrt(vol).(-i)^l.gxfac(lmn)
   (2) compute for every g   : scal(g)     = Sum_lmn f_nl(g).Y_lm(g).gxfac_(lmn)
   (3) compute for every g   : vect(g)     = exp(-2pi.i.g.R).scal(g)

   Step (2) is a real-matrix/complex-vector multiplication, here two options are possible:
   - case nloalg(1)=2 : compute the real and imaginary parts separately using two calls of DGMEV
   - case nloalg(1)=3 : in order to read the matrix only once, we compute both real and imaginary parts at the same time "by hand"

   Depending on the achitecture and the available blas library, one option could be more interesting than an other...

INPUTS

  choice=chooses possible output (see below)
  cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1)
        2 if <p_lmn|c> scalars are complex
  cplex_fac=1 if gxfac scalars are real, 2 if gxfac scalars are complex
  dimffnl=second dimension of ffnl
  ffnl(npw,dimffnl,nlmn)= nonlocal quantities containing nonlocal form factors
  gxfac(cplex_fac,nlmn,nincat,nspinor)= reduced projected scalars related to Vnl (NL operator)
  gxfac_sij(cplex,nlmn,nincat,nspinor*(paw_opt/3))= reduced projected scalars related to Sij (overlap)
  ia3=gives the number of the first atom in the subset presently treated
  indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn
  matblk=dimension of the array ph3d
  nincat=number of atoms in the subset here treated
  nlmn=number of (l,m,n) numbers for current type of atom
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  npw=number of plane waves in reciprocal space
  nspinor=number of spinorial components of the wavefunctions (on current proc)
  paw_opt= define the nonlocal operator concerned with:
           paw_opt=0 : Norm-conserving Vnl (use of Kleinman-Bylander ener.)
           paw_opt=1 : PAW nonlocal part of H (use of Dij coeffs)
           paw_opt=2 : PAW: (Vnl-lambda.Sij) (Sij=overlap matrix)
           paw_opt=3 : PAW overlap matrix (Sij)
           paw_opt=4 : both PAW nonlocal part of H (Dij) and overlap matrix (Sij)
  ph3d(2,npw,matblk)=three-dimensional phase factors
  ucvol=unit cell volume (bohr^3)

OUTPUT

  (see side effects)

SIDE EFFECTS

 --if (paw_opt=0)
    vectout(2,npwout*my_nspinor*ndat)=result of the aplication of the concerned operator
                or one of its derivatives to the input vect.
 --if (paw_opt=0, 1 or 4)
    vect(2,npwout*nspinor)=result of the aplication of the concerned operator
                or one of its derivatives to the input vect.:
      if (choice=1)  <G|V_nonlocal|vect_in>
  if (paw_opt=2)
    vect(2,npwout*nspinor)=final vector in reciprocal space:
      if (choice=1)  <G|V_nonlocal-lamdba.(I+S)|vect_in> (note: not including <G|I|c>)
 --if (paw_opt=3 or 4)
    svect(2,npwout*nspinor)=result of the aplication of Sij (overlap matrix)
                  or one of its derivatives to the input vect.:
      if (choice=1)  <G|I+S|vect_in> (note: not including <G|I|c>)

NOTES

 1-No openMP available for now
 2-Operate for one type of atom, and within this given type of atom,
   for a subset of at most nincat atoms.
 3-projector derivatives (abs(choice)>1) are not implemented yet

SOURCE

123 subroutine opernlb_ylm_mv(choice,cplex,cplex_fac,&
124 &                      dimffnl,ffnl,gxfac,gxfac_sij,&
125 &                      ia3,indlmn,matblk,nincat,nlmn,nloalg,npw,&
126 &                      nspinor,paw_opt,ph3d,svect,ucvol,vect)
127 
128 !Arguments ------------------------------------
129 !scalars
130  integer,intent(in) :: choice,cplex,cplex_fac,dimffnl,ia3,matblk,nincat
131  integer,intent(in) :: nlmn,npw,nspinor,paw_opt
132  real(dp),intent(in) :: ucvol
133 !arrays
134  integer,intent(in) ::  indlmn(6,nlmn),nloalg(3)
135  real(dp),intent(in),target :: ffnl(npw,dimffnl,nlmn)
136  real(dp),intent(in) :: gxfac(cplex_fac,nlmn,nincat,nspinor)
137  real(dp),intent(in) :: gxfac_sij(cplex,nlmn,nincat,nspinor*(paw_opt/3))
138  real(dp),intent(in) :: ph3d(2,npw,matblk)
139  real(dp),intent(inout) :: svect(:,:),vect(:,:)
140 !Local variables-------------------------------
141 !Arrays
142 !scalars
143  logical :: use_dgemv
144  integer :: ia,iaph3d
145  integer :: il,ilmn,ipw,jpw,ipwshft,ispinor
146  real(dp) :: buffer_r,buffer_i,wt
147 !arrays
148 ! real(dp) :: tsec(2)
149  real(dp) :: gxfac_(nlmn,2),gxfacs_(nlmn,2)
150  real(dp),allocatable :: scalr(:),scali(:)
151  real(dp),pointer :: ffnl_loc(:,:)
152  complex(dp) :: ctmp, cil(4)
153 
154 ! *************************************************************************
155 
156  DBG_ENTER("COLL")
157 
158 !Some checks
159 ! nthreads=1
160 !#if defined HAVE_OPENMP
161 ! nthreads=OMP_GET_NUM_THREADS()
162 !#endif
163 ! if (nthreads>1) then
164 !   ABI_ERROR('Only nthreads=1 is available for now.')
165 ! end if
166 
167  if (abs(choice)>1) then
168    ABI_ERROR('Only abs(choice)<=1 is available for now.')
169  end if
170  if (nloalg(1)<2.or.nloalg(1)>10) then
171    ABI_ERROR('nloalg(1) should be between 2 or 10.')
172  end if
173 
174  use_dgemv = nloalg(1)==2.or.nloalg(1)==6.or.nloalg(1)==10
175  if (use_dgemv) then
176    if(opernlb_mv_dgemv_counter>=0) then
177      opernlb_mv_dgemv_counter = opernlb_mv_dgemv_counter + 1
178      if (paw_opt==4) opernlb_mv_dgemv_counter = opernlb_mv_dgemv_counter + 1
179    end if
180  else
181    if(opernlb_mv_counter>=0) then
182      opernlb_mv_counter = opernlb_mv_counter + 1
183      if (paw_opt==4) opernlb_mv_counter = opernlb_mv_counter + 1
184    end if
185  end if
186 
187 !Inits
188  wt=four_pi/sqrt(ucvol)
189 
190  ffnl_loc => ffnl(:,1,:)
191 
192 
193  ABI_MALLOC(scalr,(npw))
194  ABI_MALLOC(scali,(npw))
195 
196 !$OMP PARALLEL PRIVATE(buffer_r,buffer_i,cil,il,ilmn,ipw,jpw,ctmp), &
197 !$OMP PRIVATE(ispinor,ipwshft,ia,iaph3d,gxfac_,gxfacs_)
198 
199 ! (-i)^l
200  cil(1) = ( 1.0_DP, 0.0_DP) * wt
201  cil(2) = ( 0.0_DP,-1.0_DP) * wt
202  cil(3) = (-1.0_DP, 0.0_DP) * wt
203  cil(4) = ( 0.0_DP, 1.0_DP) * wt
204 
205 ! if (paw_opt/=3) then
206 !   ABI_MALLOC(gxfac_,(nlmn,2))
207 ! end if
208 ! if (paw_opt>=3) then
209 !   ABI_MALLOC(gxfacs_,(nlmn,2))
210 ! end if
211 
212 !Loop on spinorial components
213  do ispinor=1,nspinor
214    ipwshft=(ispinor-1)*npw
215 
216 !  Loop on atoms (blocking)
217    do ia=1,nincat
218      iaph3d=ia;if (nloalg(2)>0) iaph3d=ia+ia3-1
219 !    Step (1) : scale gxfac with 4pi/sqr(omega).(-i)^l
220      if (paw_opt/=3) then
221        if (cplex_fac==2) then
222          do ilmn=1,nlmn
223            il=mod(indlmn(1,ilmn),4)+1
224            ctmp = cil(il) * cmplx( gxfac(1,ilmn,ia,ispinor), gxfac(2,ilmn,ia,ispinor), kind=DP )
225            gxfac_(ilmn,1) =  real(ctmp)
226            gxfac_(ilmn,2) = aimag(ctmp)
227          end do
228        else if (cplex_fac==1) then
229          do ilmn=1,nlmn
230            il=mod(indlmn(1,ilmn),4)+1
231            ctmp = cil(il) * gxfac(1,ilmn,ia,ispinor)
232            gxfac_(ilmn,1) =  real(ctmp)
233            gxfac_(ilmn,2) = aimag(ctmp)
234          end do
235        else
236          ABI_BUG('Error : should not be possible to be here')
237        end if
238      end if
239 
240 !    Step (1) bis: Scale gxfac_sij with 4pi/sqr(omega).(-i)^l
241      if (paw_opt>=3) then
242        if (cplex==2) then
243          do ilmn=1,nlmn
244            il=mod(indlmn(1,ilmn),4)+1
245            ctmp = cil(il) * cmplx( gxfac_sij(1,ilmn,ia,ispinor), gxfac_sij(2,ilmn,ia,ispinor), kind=DP )
246            gxfacs_(ilmn,1) =  real(ctmp)
247            gxfacs_(ilmn,2) = aimag(ctmp)
248          end do
249        else if (cplex==1) then
250          do ilmn=1,nlmn
251            il=mod(indlmn(1,ilmn),4)+1
252            ctmp = cil(il) * gxfac_sij(1,ilmn,ia,ispinor)
253            gxfacs_(ilmn,1) =  real(ctmp)
254            gxfacs_(ilmn,2) = aimag(ctmp)
255          end do
256        else
257          ABI_BUG('Error : should not be possible to be here')
258        end if
259      end if
260 
261 !    Compute <g|Vnl|c> (or derivatives) for each plane wave:
262      if (paw_opt/=3) then
263 
264 !      Step (2) scal(g) = Sum_lmn f_nl(g).Y_lm(g).gxfac_(lmn)
265        if (use_dgemv) then
266          call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfac_(:,1),1,0.0_DP,scalr,1)
267          call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfac_(:,2),1,0.0_DP,scali,1)
268        else
269 !         scalr(:) = zero
270 !         scali(:) = zero
271 !         do ilmn=1,nlmn
272 !           do ipw=1,npw
273 !             scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1)
274 !             scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2)
275 !           end do
276 !         end do
277 !$OMP DO
278          do ipw=1,npw
279            buffer_r = zero
280            buffer_i = zero
281            do ilmn=1,nlmn
282              buffer_r = buffer_r + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1)
283              buffer_i = buffer_i + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2)
284 !             scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1)
285 !             scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2)
286            end do
287            scalr(ipw) = buffer_r
288            scali(ipw) = buffer_i
289          end do
290 !$OMP END DO
291        end if
292 
293 !      Step (3) : vect(g) = exp(-2pi.i.g.R).scal(g)
294 !$OMP DO
295        do ipw=1,npw
296          jpw=ipw+ipwshft
297          vect(1,jpw)=vect(1,jpw)+scalr(ipw)*ph3d(1,ipw,iaph3d)+scali(ipw)*ph3d(2,ipw,iaph3d)
298          vect(2,jpw)=vect(2,jpw)-scalr(ipw)*ph3d(2,ipw,iaph3d)+scali(ipw)*ph3d(1,ipw,iaph3d)
299        end do
300 !$OMP END DO
301 
302      end if
303 
304 !    Compute <g|S|c> (or derivatives) for each plane wave:
305      if (paw_opt>=3) then
306 
307 !      Step (2) (bis) scal(g) = Sum_lmn f_nl(g).Y_lm(g).gxfacs_(lmn)
308        if (nloalg(1)==3) then
309 !         scalr(:) = zero
310 !         scali(:) = zero
311 !         do ilmn=1,nlmn
312 !           do ipw=1,npw
313 !             scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1)
314 !             scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2)
315 !           end do
316 !         end do
317 !$OMP DO
318          do ipw=1,npw
319            buffer_r = zero
320            buffer_i = zero
321            do ilmn=1,nlmn
322              buffer_r = buffer_r + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1)
323              buffer_i = buffer_i + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2)
324 !             scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1)
325 !             scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2)
326            end do
327            scalr(ipw) = buffer_r
328            scali(ipw) = buffer_i
329          end do
330 !$OMP END DO
331        else if (nloalg(1)==2) then
332          call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfacs_(:,1),1,0.0_DP,scalr,1)
333          call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfacs_(:,2),1,0.0_DP,scali,1)
334        end if
335 
336 !      Step (3) (bis) : svect(g) = exp(-2pi.i.g.R).scal(g)
337 !$OMP DO
338        do ipw=1,npw
339          jpw=ipw+ipwshft
340          svect(1,jpw)=svect(1,jpw)+scalr(ipw)*ph3d(1,ipw,iaph3d)+scali(ipw)*ph3d(2,ipw,iaph3d)
341          svect(2,jpw)=svect(2,jpw)-scalr(ipw)*ph3d(2,ipw,iaph3d)+scali(ipw)*ph3d(1,ipw,iaph3d)
342        end do
343 !$OMP END DO
344 
345      end if
346 
347 !    End loop on atoms
348    end do
349  end do !  End loop on spinors
350 ! if (paw_opt/=3) then
351 !   ABI_FREE(gxfac_)
352 ! end if
353 ! if (paw_opt>=3) then
354 !   ABI_FREE(gxfacs_)
355 ! end if
356 !$OMP END PARALLEL
357 
358  ABI_FREE(scalr)
359  ABI_FREE(scali)
360 
361 
362  DBG_EXIT("COLL")
363 
364 end subroutine opernlb_ylm_mv