TABLE OF CONTENTS
ABINIT/m_opernlb_ylm_mv [ Modules ]
NAME
m_opernlb_ylm_mv
FUNCTION
COPYRIGHT
Copyright (C) 2021-2024 ABINIT group (LB,MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
15 #if defined HAVE_CONFIG_H 16 #include "config.h" 17 #endif 18 19 #include "abi_common.h" 20 21 module m_opernlb_ylm_mv 22 23 use defs_basis 24 use m_abicore 25 use m_errors 26 #if defined HAVE_OPENMP 27 use OMP_LIB 28 #endif 29 30 implicit none 31 32 private
ABINIT/opernlb_ylm_mv [ Functions ]
NAME
opernlb_ylm_mv
FUNCTION
"matrix-vector" alternative implementation of "opernlrb_ylm". * Operate with the non-local part of the hamiltonian, from projected scalars to reciprocal space. * Operate with the non-local projectors and the overlap matrix, from projected scalars to reciprocal space. The input is gxfac (gxfac_sij): gxfac(lmn) = Sum_l'm'n' D_l'm'n'.<p_l'm'n|c> (or S_l'm'n' for gxfac_sij) and here we compute : Sum_lmn <g|p_lmn> gxfac(lmn) = 4pi/sqrt(vol) exp(-2pi.i.g.R) Sum_lmn (-i)^l f_nl(g).Y_lm(g) gxfac(lmn) Here this is done in 3 steps: (1) compute for every lmn : gxfac_(lmn) = 4pi/sqrt(vol).(-i)^l.gxfac(lmn) (2) compute for every g : scal(g) = Sum_lmn f_nl(g).Y_lm(g).gxfac_(lmn) (3) compute for every g : vect(g) = exp(-2pi.i.g.R).scal(g) Step (2) is a real-matrix/complex-vector multiplication, here two options are possible: - case nloalg(1)=2 : compute the real and imaginary parts separately using two calls of DGMEV - case nloalg(1)=3 : in order to read the matrix only once, we compute both real and imaginary parts at the same time "by hand" Depending on the achitecture and the available blas library, one option could be more interesting than an other...
INPUTS
choice=chooses possible output (see below) cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1) 2 if <p_lmn|c> scalars are complex cplex_fac=1 if gxfac scalars are real, 2 if gxfac scalars are complex dimffnl=second dimension of ffnl ffnl(npw,dimffnl,nlmn)= nonlocal quantities containing nonlocal form factors gxfac(cplex_fac,nlmn,nincat,nspinor)= reduced projected scalars related to Vnl (NL operator) gxfac_sij(cplex,nlmn,nincat,nspinor*(paw_opt/3))= reduced projected scalars related to Sij (overlap) ia3=gives the number of the first atom in the subset presently treated indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn matblk=dimension of the array ph3d nincat=number of atoms in the subset here treated nlmn=number of (l,m,n) numbers for current type of atom nloalg(3)=governs the choice of the algorithm for non-local operator. npw=number of plane waves in reciprocal space nspinor=number of spinorial components of the wavefunctions (on current proc) paw_opt= define the nonlocal operator concerned with: paw_opt=0 : Norm-conserving Vnl (use of Kleinman-Bylander ener.) paw_opt=1 : PAW nonlocal part of H (use of Dij coeffs) paw_opt=2 : PAW: (Vnl-lambda.Sij) (Sij=overlap matrix) paw_opt=3 : PAW overlap matrix (Sij) paw_opt=4 : both PAW nonlocal part of H (Dij) and overlap matrix (Sij) ph3d(2,npw,matblk)=three-dimensional phase factors ucvol=unit cell volume (bohr^3)
OUTPUT
(see side effects)
SIDE EFFECTS
--if (paw_opt=0) vectout(2,npwout*my_nspinor*ndat)=result of the aplication of the concerned operator or one of its derivatives to the input vect. --if (paw_opt=0, 1 or 4) vect(2,npwout*nspinor)=result of the aplication of the concerned operator or one of its derivatives to the input vect.: if (choice=1) <G|V_nonlocal|vect_in> if (paw_opt=2) vect(2,npwout*nspinor)=final vector in reciprocal space: if (choice=1) <G|V_nonlocal-lamdba.(I+S)|vect_in> (note: not including <G|I|c>) --if (paw_opt=3 or 4) svect(2,npwout*nspinor)=result of the aplication of Sij (overlap matrix) or one of its derivatives to the input vect.: if (choice=1) <G|I+S|vect_in> (note: not including <G|I|c>)
NOTES
1-No openMP available for now 2-Operate for one type of atom, and within this given type of atom, for a subset of at most nincat atoms. 3-projector derivatives (abs(choice)>1) are not implemented yet
SOURCE
123 subroutine opernlb_ylm_mv(choice,cplex,cplex_fac,& 124 & dimffnl,ffnl,gxfac,gxfac_sij,& 125 & ia3,indlmn,matblk,nincat,nlmn,nloalg,npw,& 126 & nspinor,paw_opt,ph3d,svect,ucvol,vect) 127 128 !Arguments ------------------------------------ 129 !scalars 130 integer,intent(in) :: choice,cplex,cplex_fac,dimffnl,ia3,matblk,nincat 131 integer,intent(in) :: nlmn,npw,nspinor,paw_opt 132 real(dp),intent(in) :: ucvol 133 !arrays 134 integer,intent(in) :: indlmn(6,nlmn),nloalg(3) 135 real(dp),intent(in),target :: ffnl(npw,dimffnl,nlmn) 136 real(dp),intent(in) :: gxfac(cplex_fac,nlmn,nincat,nspinor) 137 real(dp),intent(in) :: gxfac_sij(cplex,nlmn,nincat,nspinor*(paw_opt/3)) 138 real(dp),intent(in) :: ph3d(2,npw,matblk) 139 real(dp),intent(inout) :: svect(:,:),vect(:,:) 140 !Local variables------------------------------- 141 !Arrays 142 !scalars 143 logical :: use_dgemv 144 integer :: ia,iaph3d 145 integer :: il,ilmn,ipw,jpw,ipwshft,ispinor 146 real(dp) :: buffer_r,buffer_i,wt 147 !arrays 148 ! real(dp) :: tsec(2) 149 real(dp) :: gxfac_(nlmn,2),gxfacs_(nlmn,2) 150 real(dp),allocatable :: scalr(:),scali(:) 151 real(dp),pointer :: ffnl_loc(:,:) 152 complex(dp) :: ctmp, cil(4) 153 154 ! ************************************************************************* 155 156 DBG_ENTER("COLL") 157 158 !Some checks 159 ! nthreads=1 160 !#if defined HAVE_OPENMP 161 ! nthreads=OMP_GET_NUM_THREADS() 162 !#endif 163 ! if (nthreads>1) then 164 ! ABI_ERROR('Only nthreads=1 is available for now.') 165 ! end if 166 167 if (abs(choice)>1) then 168 ABI_ERROR('Only abs(choice)<=1 is available for now.') 169 end if 170 if (nloalg(1)<2.or.nloalg(1)>10) then 171 ABI_ERROR('nloalg(1) should be between 2 or 10.') 172 end if 173 174 use_dgemv = nloalg(1)==2.or.nloalg(1)==6.or.nloalg(1)==10 175 if (use_dgemv) then 176 if(opernlb_mv_dgemv_counter>=0) then 177 opernlb_mv_dgemv_counter = opernlb_mv_dgemv_counter + 1 178 if (paw_opt==4) opernlb_mv_dgemv_counter = opernlb_mv_dgemv_counter + 1 179 end if 180 else 181 if(opernlb_mv_counter>=0) then 182 opernlb_mv_counter = opernlb_mv_counter + 1 183 if (paw_opt==4) opernlb_mv_counter = opernlb_mv_counter + 1 184 end if 185 end if 186 187 !Inits 188 wt=four_pi/sqrt(ucvol) 189 190 ffnl_loc => ffnl(:,1,:) 191 192 193 ABI_MALLOC(scalr,(npw)) 194 ABI_MALLOC(scali,(npw)) 195 196 !$OMP PARALLEL PRIVATE(buffer_r,buffer_i,cil,il,ilmn,ipw,jpw,ctmp), & 197 !$OMP PRIVATE(ispinor,ipwshft,ia,iaph3d,gxfac_,gxfacs_) 198 199 ! (-i)^l 200 cil(1) = ( 1.0_DP, 0.0_DP) * wt 201 cil(2) = ( 0.0_DP,-1.0_DP) * wt 202 cil(3) = (-1.0_DP, 0.0_DP) * wt 203 cil(4) = ( 0.0_DP, 1.0_DP) * wt 204 205 ! if (paw_opt/=3) then 206 ! ABI_MALLOC(gxfac_,(nlmn,2)) 207 ! end if 208 ! if (paw_opt>=3) then 209 ! ABI_MALLOC(gxfacs_,(nlmn,2)) 210 ! end if 211 212 !Loop on spinorial components 213 do ispinor=1,nspinor 214 ipwshft=(ispinor-1)*npw 215 216 ! Loop on atoms (blocking) 217 do ia=1,nincat 218 iaph3d=ia;if (nloalg(2)>0) iaph3d=ia+ia3-1 219 ! Step (1) : scale gxfac with 4pi/sqr(omega).(-i)^l 220 if (paw_opt/=3) then 221 if (cplex_fac==2) then 222 do ilmn=1,nlmn 223 il=mod(indlmn(1,ilmn),4)+1 224 ctmp = cil(il) * cmplx( gxfac(1,ilmn,ia,ispinor), gxfac(2,ilmn,ia,ispinor), kind=DP ) 225 gxfac_(ilmn,1) = real(ctmp) 226 gxfac_(ilmn,2) = aimag(ctmp) 227 end do 228 else if (cplex_fac==1) then 229 do ilmn=1,nlmn 230 il=mod(indlmn(1,ilmn),4)+1 231 ctmp = cil(il) * gxfac(1,ilmn,ia,ispinor) 232 gxfac_(ilmn,1) = real(ctmp) 233 gxfac_(ilmn,2) = aimag(ctmp) 234 end do 235 else 236 ABI_BUG('Error : should not be possible to be here') 237 end if 238 end if 239 240 ! Step (1) bis: Scale gxfac_sij with 4pi/sqr(omega).(-i)^l 241 if (paw_opt>=3) then 242 if (cplex==2) then 243 do ilmn=1,nlmn 244 il=mod(indlmn(1,ilmn),4)+1 245 ctmp = cil(il) * cmplx( gxfac_sij(1,ilmn,ia,ispinor), gxfac_sij(2,ilmn,ia,ispinor), kind=DP ) 246 gxfacs_(ilmn,1) = real(ctmp) 247 gxfacs_(ilmn,2) = aimag(ctmp) 248 end do 249 else if (cplex==1) then 250 do ilmn=1,nlmn 251 il=mod(indlmn(1,ilmn),4)+1 252 ctmp = cil(il) * gxfac_sij(1,ilmn,ia,ispinor) 253 gxfacs_(ilmn,1) = real(ctmp) 254 gxfacs_(ilmn,2) = aimag(ctmp) 255 end do 256 else 257 ABI_BUG('Error : should not be possible to be here') 258 end if 259 end if 260 261 ! Compute <g|Vnl|c> (or derivatives) for each plane wave: 262 if (paw_opt/=3) then 263 264 ! Step (2) scal(g) = Sum_lmn f_nl(g).Y_lm(g).gxfac_(lmn) 265 if (use_dgemv) then 266 call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfac_(:,1),1,0.0_DP,scalr,1) 267 call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfac_(:,2),1,0.0_DP,scali,1) 268 else 269 ! scalr(:) = zero 270 ! scali(:) = zero 271 ! do ilmn=1,nlmn 272 ! do ipw=1,npw 273 ! scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1) 274 ! scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2) 275 ! end do 276 ! end do 277 !$OMP DO 278 do ipw=1,npw 279 buffer_r = zero 280 buffer_i = zero 281 do ilmn=1,nlmn 282 buffer_r = buffer_r + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1) 283 buffer_i = buffer_i + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2) 284 ! scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,1) 285 ! scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfac_(ilmn,2) 286 end do 287 scalr(ipw) = buffer_r 288 scali(ipw) = buffer_i 289 end do 290 !$OMP END DO 291 end if 292 293 ! Step (3) : vect(g) = exp(-2pi.i.g.R).scal(g) 294 !$OMP DO 295 do ipw=1,npw 296 jpw=ipw+ipwshft 297 vect(1,jpw)=vect(1,jpw)+scalr(ipw)*ph3d(1,ipw,iaph3d)+scali(ipw)*ph3d(2,ipw,iaph3d) 298 vect(2,jpw)=vect(2,jpw)-scalr(ipw)*ph3d(2,ipw,iaph3d)+scali(ipw)*ph3d(1,ipw,iaph3d) 299 end do 300 !$OMP END DO 301 302 end if 303 304 ! Compute <g|S|c> (or derivatives) for each plane wave: 305 if (paw_opt>=3) then 306 307 ! Step (2) (bis) scal(g) = Sum_lmn f_nl(g).Y_lm(g).gxfacs_(lmn) 308 if (nloalg(1)==3) then 309 ! scalr(:) = zero 310 ! scali(:) = zero 311 ! do ilmn=1,nlmn 312 ! do ipw=1,npw 313 ! scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1) 314 ! scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2) 315 ! end do 316 ! end do 317 !$OMP DO 318 do ipw=1,npw 319 buffer_r = zero 320 buffer_i = zero 321 do ilmn=1,nlmn 322 buffer_r = buffer_r + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1) 323 buffer_i = buffer_i + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2) 324 ! scalr(ipw) = scalr(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,1) 325 ! scali(ipw) = scali(ipw) + ffnl_loc(ipw,ilmn) * gxfacs_(ilmn,2) 326 end do 327 scalr(ipw) = buffer_r 328 scali(ipw) = buffer_i 329 end do 330 !$OMP END DO 331 else if (nloalg(1)==2) then 332 call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfacs_(:,1),1,0.0_DP,scalr,1) 333 call DGEMV('N',npw,nlmn,1.0_DP,ffnl_loc,npw,gxfacs_(:,2),1,0.0_DP,scali,1) 334 end if 335 336 ! Step (3) (bis) : svect(g) = exp(-2pi.i.g.R).scal(g) 337 !$OMP DO 338 do ipw=1,npw 339 jpw=ipw+ipwshft 340 svect(1,jpw)=svect(1,jpw)+scalr(ipw)*ph3d(1,ipw,iaph3d)+scali(ipw)*ph3d(2,ipw,iaph3d) 341 svect(2,jpw)=svect(2,jpw)-scalr(ipw)*ph3d(2,ipw,iaph3d)+scali(ipw)*ph3d(1,ipw,iaph3d) 342 end do 343 !$OMP END DO 344 345 end if 346 347 ! End loop on atoms 348 end do 349 end do ! End loop on spinors 350 ! if (paw_opt/=3) then 351 ! ABI_FREE(gxfac_) 352 ! end if 353 ! if (paw_opt>=3) then 354 ! ABI_FREE(gxfacs_) 355 ! end if 356 !$OMP END PARALLEL 357 358 ABI_FREE(scalr) 359 ABI_FREE(scali) 360 361 362 DBG_EXIT("COLL") 363 364 end subroutine opernlb_ylm_mv