TABLE OF CONTENTS
ABINIT/m_opernla_ylm_mv [ Modules ]
NAME
m_opernla_ylm_mv
FUNCTION
COPYRIGHT
Copyright (C) 2021-2024 ABINIT group (LB,MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
15 #if defined HAVE_CONFIG_H 16 #include "config.h" 17 #endif 18 19 #include "abi_common.h" 20 21 module m_opernla_ylm_mv 22 23 use defs_basis 24 use m_abicore 25 use m_errors 26 use m_xmpi 27 #if defined HAVE_OPENMP 28 use OMP_LIB 29 #endif 30 31 use defs_abitypes, only : MPI_type 32 use m_time, only : timab 33 34 implicit none 35 36 private
ABINIT/opernla_ylm_mv [ Functions ]
NAME
opernla_ylm_mv
FUNCTION
"matrix-vector" alternative implementation of "opernla_ylm". For a given wave-function |c>, get all projected scalars <p_lmn|c> where |p_lmn> are non-local projectors With: <p_lmn|c>=4pi/sqrt(vol) (i)^l Sum_g[c(g).f_nl(g).Y_lm(g).exp(2pi.i.g.R)] Here this is done in 3 steps: (1) compute for every g : scal(g) = c(g).exp(2pi.i.g.R) (2) compute for every lmn : scal(lmn) = Sum_g[scal(g).f_nl(g).Y_lm(g)] (3) compute for every lmn : <p_lmn|c> = 4pi/sqrt(vol).(i)^l.scal(lmn) Step (2) is a real-matrix/complex-vector multiplication, here two options are possible: - case nloalg(1)=2 : compute the real and imaginary parts separately using two calls of DGMEV - case nloalg(1)=3 : in order to read the matrix only once, we compute both real and imaginary parts at the same time "by hand" Depending on the achitecture and the available blas library, one option could be more interesting than an other...
INPUTS
choice=chooses possible output: if choice>=0: compute projected scalars if choice<0: same as choice>0 but use already computed projected scalars cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1) 2 if <p_lmn|c> scalars are complex dimffnl=second dimension of ffnl ffnl(npw,dimffnl,nlmn)= nonlocal quantities containing nonlocal form factors ia3=gives the number of the first atom in the subset presently treated indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn istwf_k=option parameter that describes the storage of wfs matblk=dimension of the array ph3d mpi_enreg=information about MPI parallelization nincat=number of atoms in the subset here treated nlmn=number of (l,m,n) numbers for current type of atom nloalg(3)=governs the choice of the algorithm for non-local operator. npw=number of plane waves in reciprocal space nspinor=number of spinorial components of the wavefunctions (on current proc) ph3d(2,npw,matblk)=three-dimensional phase factors ucvol=unit cell volume (bohr^3) vect(2,npw*my_nspinor)=starting vector in reciprocal space
OUTPUT
gx(cplex,nlmn,nincat,nspinor)= projected scalars
SIDE EFFECTS
NOTES
1-Not available yet for openMP 2-Operate for one type of atom, and within this given type of atom, for a subset of at most nincat atoms. 3-projector derivatives (abs(choice)>1) are not implemented yet
SOURCE
106 subroutine opernla_ylm_mv(choice,cplex,dimffnl,ffnl,gx,& 107 & ia3,indlmn,istwf_k,matblk,mpi_enreg,nincat,nlmn,& 108 & nloalg,npw,nspinor,ph3d,ucvol,vect) 109 110 !Arguments ------------------------------------ 111 !scalars 112 integer,intent(in) :: choice,cplex,dimffnl,ia3,istwf_k,matblk 113 integer,intent(in) :: nincat,nlmn,npw,nspinor 114 real(dp),intent(in) :: ucvol 115 type(MPI_type),intent(in) :: mpi_enreg 116 !arrays 117 integer,intent(in) :: indlmn(6,nlmn),nloalg(3) 118 real(dp),intent(in),target :: ffnl(npw,dimffnl,nlmn) 119 real(dp),intent(in) :: ph3d(2,npw,matblk) 120 real(dp),intent(in) :: vect(:,:) 121 real(dp),intent(out) :: gx(cplex,nlmn,nincat,nspinor) 122 123 !Local variables------------------------------- 124 !scalars 125 logical :: use_dgemv 126 integer :: ia,iaph3d,ierr,il,ilmn,ipw,ipw0,ipwshft,ispinor,jpw 127 real(dp) :: wt 128 !arrays 129 real(dp) :: buffer_r,buffer_i,tsec(2) 130 real(dp),pointer :: ffnl_loc(:,:) 131 real(dp),allocatable :: scali(:),scalr(:) 132 real(dp),allocatable :: scalr_lmn(:),scali_lmn(:) 133 complex(dp) :: ctmp,cil(4) 134 ! ************************************************************************* 135 136 if (choice==-1) return 137 138 !Some checks 139 if (abs(choice)>1) then 140 ABI_ERROR('Only abs(choice)<=1 is available for now.') 141 end if 142 if (nloalg(1)<2.or.nloalg(1)>10) then 143 ABI_ERROR('nloalg(1) should be between 2 and 10.') 144 end if 145 ! nthreads=1 146 !#if defined HAVE_OPENMP 147 ! nthreads=OMP_GET_NUM_THREADS() 148 !#endif 149 ! if (nthreads>1) then 150 ! ABI_ERROR('Only nthreads=1 is available for now.') 151 ! end if 152 153 use_dgemv = nloalg(1)==2.or.nloalg(1)==5.or.nloalg(1)==7 154 if (choice>=0.or.abs(choice)>1) then 155 if (use_dgemv) then 156 if(opernla_mv_dgemv_counter>=0) opernla_mv_dgemv_counter = opernla_mv_dgemv_counter + 1 157 else 158 if(opernla_mv_counter>=0) opernla_mv_counter = opernla_mv_counter + 1 159 end if 160 end if 161 162 !Useful variables 163 wt=four_pi/sqrt(ucvol);if (cplex==1) wt=2.d0*wt 164 ipw0=1;if (istwf_k==2.and.mpi_enreg%me_g0_fft==1) ipw0=2 165 166 !Allocate work space 167 ABI_MALLOC(scalr,(npw)) 168 ABI_MALLOC(scali,(npw)) 169 ABI_MALLOC(scalr_lmn,(nlmn)) 170 ABI_MALLOC(scali_lmn,(nlmn)) 171 172 ffnl_loc => ffnl(:,1,:) 173 174 ! i^l 175 cil(1) = ( 1.0_DP, 0.0_DP) * wt 176 cil(2) = ( 0.0_DP, 1.0_DP) * wt 177 cil(3) = (-1.0_DP, 0.0_DP) * wt 178 cil(4) = ( 0.0_DP,-1.0_DP) * wt 179 180 !$OMP PARALLEL PRIVATE(il,ilmn,ipw,jpw), & 181 !$OMP PRIVATE(ispinor,ipwshft,ia,iaph3d) 182 183 !Loop on spinorial components 184 do ispinor =1,nspinor 185 ipwshft=(ispinor-1)*npw 186 187 ! Loop on atoms (blocking) 188 do ia=1,nincat 189 iaph3d=ia;if (nloalg(2)>0) iaph3d=ia+ia3-1 190 ! Step (1) : Compute scal(g) = c(g).exp(2pi.i.g.R) 191 !$OMP DO 192 do ipw=ipw0,npw 193 jpw=ipw+ipwshft 194 scalr(ipw)=(vect(1,jpw)*ph3d(1,ipw,iaph3d)-vect(2,jpw)*ph3d(2,ipw,iaph3d)) 195 scali(ipw)=(vect(2,jpw)*ph3d(1,ipw,iaph3d)+vect(1,jpw)*ph3d(2,ipw,iaph3d)) 196 end do 197 !$OMP END DO 198 199 !$OMP SINGLE 200 if (ipw0==2) then 201 scalr(1)=half*vect(1,1+ipwshft)*ph3d(1,1,iaph3d) 202 scali(1)=half*vect(1,1+ipwshft)*ph3d(2,1,iaph3d) 203 end if 204 !$OMP END SINGLE 205 206 ! -------------------------------------------------------------------- 207 ! ALL CHOICES: 208 ! Accumulate Gx 209 ! -------------------------------------------------------------------- 210 211 if (choice>=0) then 212 213 ! Step (2) : Compute scal(lmn) = Sum_g[scal(g).f_nl(g).Y_lm(g)] 214 if (use_dgemv) then 215 call DGEMV('T',npw,nlmn,1.0_DP,ffnl_loc,npw,scalr,1,0.0_DP,scalr_lmn,1) 216 call DGEMV('T',npw,nlmn,1.0_DP,ffnl_loc,npw,scali,1,0.0_DP,scali_lmn,1) 217 else 218 do ilmn=1,nlmn 219 ! do ipw=1,npw 220 ! scalr_lmn(ilmn) = scalr_lmn(ilmn) + scalr(ipw) * ffnl_loc(ipw,ilmn) 221 ! scali_lmn(ilmn) = scali_lmn(ilmn) + scali(ipw) * ffnl_loc(ipw,ilmn) 222 ! end do 223 !$OMP SINGLE 224 buffer_r = 0.0_DP 225 buffer_i = 0.0_DP 226 !$OMP END SINGLE 227 !$OMP DO REDUCTION(+:buffer_r,buffer_i) 228 do ipw=1,npw 229 buffer_r = buffer_r + scalr(ipw) * ffnl_loc(ipw,ilmn) 230 buffer_i = buffer_i + scali(ipw) * ffnl_loc(ipw,ilmn) 231 end do 232 !$OMP END DO 233 !$OMP SINGLE 234 scalr_lmn(ilmn) = buffer_r 235 scali_lmn(ilmn) = buffer_i 236 !$OMP END SINGLE 237 end do 238 end if 239 ! Step (3) : Compute gx(lmn) = 4pi/sqrt(vol) (i)^l scal(lmn) 240 !$OMP SINGLE 241 if (cplex==2) then 242 do ilmn=1,nlmn 243 il=mod(indlmn(1,ilmn),4)+1 244 ctmp = cil(il) * cmplx(scalr_lmn(ilmn),scali_lmn(ilmn),kind=DP) 245 gx(1,ilmn,ia,ispinor) = real(ctmp) 246 gx(2,ilmn,ia,ispinor) = aimag(ctmp) 247 end do 248 else 249 do ilmn=1,nlmn 250 il=mod(indlmn(1,ilmn),4)+1 251 ctmp = cil(il) * cmplx(scalr_lmn(ilmn),scali_lmn(ilmn),kind=DP) 252 gx(1,ilmn,ia,ispinor) = real(ctmp) 253 end do 254 end if 255 !$OMP END SINGLE 256 257 end if 258 259 end do ! End loop on atoms 260 261 end do ! End loop on spinorial components 262 !$OMP END PARALLEL 263 264 !Deallocate temporary space 265 ABI_FREE(scalr) 266 ABI_FREE(scali) 267 ABI_FREE(scalr_lmn) 268 ABI_FREE(scali_lmn) 269 270 !Has to reduce arrays in case of FFT parallelization 271 if (mpi_enreg%nproc_fft>1) then 272 call timab(48,1,tsec) 273 if (choice>=0) then 274 call xmpi_sum(gx,mpi_enreg%comm_fft,ierr) 275 end if 276 call timab(48,2,tsec) 277 end if 278 279 end subroutine opernla_ylm_mv