TABLE OF CONTENTS
ABINIT/m_opernlc_ylm_ompgpu [ Modules ]
NAME
m_opernlc_ylm_ompgpu
FUNCTION
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group (MT) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
PARENTS
CHILDREN
SOURCE
19 #if defined HAVE_CONFIG_H 20 #include "config.h" 21 #endif 22 23 #include "abi_common.h" 24 25 module m_opernlc_ylm_ompgpu 26 27 use defs_basis 28 use m_errors 29 use m_abicore 30 use m_xmpi 31 32 use defs_abitypes, only : MPI_type 33 34 implicit none 35 36 private
ABINIT/opernlc_ylm_ompgpu [ Functions ]
NAME
opernlc_ylm_ompgpu
FUNCTION
* Operate with the non-local part of the hamiltonian, in order to reduce projected scalars * Operate with the non-local projectors and the overlap matrix, in order to reduce projected scalars
INPUTS
atindx1(natom)=index table for atoms (gives the absolute index of an atom from its rank in a block of atoms) cplex=1 if <p_lmn|c> scalars are real (equivalent to istwfk>1) 2 if <p_lmn|c> scalars are complex cplex_dgxdt(ndgxdt) = used only when cplex = 1 cplex_dgxdt(i) = 1 if dgxdt(1,i,:,:) is real, 2 if it is pure imaginary cplex_enl=1 if enl factors are real, 2 if they are complex cplex_fac=1 if gxfac scalars are real, 2 if gxfac scalars are complex dgxdt(cplex,ndgxdt,nlmn,nincat)=grads of projected scalars (only if optder>0) dimenl1,dimenl2=dimensions of enl (see enl) dimekbq=1 if enl factors do not contain a exp(-iqR) phase, 2 is they do enl(cplex_enl*dimenl1,dimenl2,nspinortot**2,dimekbq)= ->Norm conserving : ==== when paw_opt=0 ==== (Real) Kleinman-Bylander energies (hartree) dimenl1=lmnmax - dimenl2=ntypat dimekbq is 2 if Enl contains a exp(-iqR) phase, 1 otherwise ->PAW : ==== when paw_opt=1, 2 or 4 ==== (Real or complex, hermitian) Dij coefs to connect projectors dimenl1=cplex_enl*lmnmax*(lmnmax+1)/2 - dimenl2=natom These are complex numbers if cplex_enl=2 enl(:,:,1) contains Dij^up-up enl(:,:,2) contains Dij^dn-dn enl(:,:,3) contains Dij^up-dn (only if nspinor=2) enl(:,:,4) contains Dij^dn-up (only if nspinor=2) dimekbq is 2 if Dij contains a exp(-iqR) phase, 1 otherwise gx(cplex,nlmn,nincat*abs(enl_opt))= projected scalars iatm=absolute rank of first atom of the current block of atoms indlmn(6,nlmn)= array giving l,m,n,lm,ln,s for i=lmn itypat=type of atoms lambda=factor to be used when computing (Vln-lambda.S) - only for paw_opt=2 mpi_enreg=information about MPI parallelization natom=number of atoms in cell ndgxdt=second dimension of dgxdt ndgxdtfac=second dimension of dgxdtfac nincat=number of atoms in the subset here treated nlmn=number of (l,m,n) numbers for current type of atom nspinor= number of spinorial components of the wavefunctions (on current proc) nspinortot=total number of spinorial components of the wavefunctions optder=0=only gxfac is computed, 1=both gxfac and dgxdtfac are computed 2=gxfac, dgxdtfac and d2gxdtfac are computed paw_opt= define the nonlocal operator concerned with: paw_opt=0 : Norm-conserving Vnl (use of Kleinman-Bylander ener.) paw_opt=1 : PAW nonlocal part of H (use of Dij coeffs) paw_opt=2 : PAW: (Vnl-lambda.Sij) (Sij=overlap matrix) paw_opt=3 : PAW overlap matrix (Sij) paw_opt=4 : both PAW nonlocal part of H (Dij) and overlap matrix (Sij) sij(nlm*(nlmn+1)/2)=overlap matrix components (only if paw_opt=2, 3 or 4)
OUTPUT
if (paw_opt=0, 1, 2 or 4) gxfac(cplex_fac,nlmn,nincat,nspinor)= reduced projected scalars related to Vnl (NL operator) if (paw_opt=3 or 4) gxfac_sij(cplex,nlmn,nincat,nspinor)= reduced projected scalars related to Sij (overlap) if (optder==1.and.paw_opt=0, 1, 2 or 4) dgxdtfac(cplex_fac,ndgxdtfac,nlmn,nincat,nspinor)= gradients of gxfac related to Vnl (NL operator) if (optder==1.and.paw_opt=3 or 4) dgxdtfac_sij(cplex,ndgxdtfac,nlmn,nincat,nspinor)= gradients of gxfac related to Sij (overlap)
NOTES
This routine operates for one type of atom, and within this given type of atom, for a subset of at most nincat atoms. About the non-local factors symmetry: - The lower triangular part of the Dij matrix can be deduced from the upper one with the following relation: D^s2s1_ji = (D^s1s2_ij)^* where s1,s2 are spinor components - The Dij factors can contain a exp(-iqR) phase This phase does not have to be included in the symmetry rule For that reason, we first apply the real part (cos(qR).D^s1s2_ij) then, we apply the imaginary part (-sin(qR).D^s1s2_ij)
PARENTS
m_gpu_nonlop,m_nonlop_ylm
CHILDREN
xmpi_sum
SOURCE
135 subroutine opernlc_ylm_ompgpu(atindx1,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_enl,cplex_fac,& 136 & dgxdt,dgxdtfac,dgxdtfac_sij,d2gxdt,d2gxdtfac,d2gxdtfac_sij,dimenl1,dimenl2,dimekbq,enl,& 137 & gx,gxfac,gxfac_sij,iatm,indlmn,itypat,lambda,mpi_enreg,natom,ndgxdt,ndgxdtfac,& 138 & nd2gxdt,nd2gxdtfac,nincat,nlmn,nspinor,nspinortot,optder,paw_opt,sij,ndat,ibeg,iend,nprojs,ntypat) 139 140 !Arguments ------------------------------------ 141 !scalars 142 integer,intent(in) :: cplex,cplex_enl,cplex_fac,dimenl1,dimenl2,dimekbq,iatm,itypat 143 integer,intent(in) :: natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,nincat,nspinor,nspinortot,optder,paw_opt 144 integer,intent(inout) :: nlmn 145 integer,intent(in) :: ndat,ibeg,iend,nprojs,ntypat 146 real(dp) :: lambda(ndat) 147 type(MPI_type) , intent(in) :: mpi_enreg 148 !arrays 149 integer,intent(in) :: atindx1(natom),indlmn(6,nlmn,ntypat),cplex_dgxdt(ndgxdt),cplex_d2gxdt(nd2gxdt) 150 real(dp),intent(in) :: dgxdt(cplex,ndgxdt,nlmn,nincat,nspinor) 151 real(dp),intent(in) :: d2gxdt(cplex,nd2gxdt,nlmn,nincat,nspinor) 152 real(dp),intent(in),target :: enl(dimenl1,dimenl2,nspinortot**2,dimekbq) 153 real(dp),intent(inout) :: gx(cplex,nprojs,nspinor*ndat) 154 real(dp),intent(in) :: sij(:,:) 155 real(dp),intent(out),target :: dgxdtfac(cplex_fac,ndgxdtfac,nlmn,nincat,nspinor) 156 real(dp),intent(out) :: dgxdtfac_sij(cplex,ndgxdtfac,nlmn,nincat,nspinor*(paw_opt/3)) 157 real(dp),intent(out),target :: d2gxdtfac(cplex_fac,nd2gxdtfac,nlmn,nincat,nspinor) 158 real(dp),intent(out) :: d2gxdtfac_sij(cplex,nd2gxdtfac,nlmn,nincat,nspinor*(paw_opt/3)) 159 real(dp),intent(out),target :: gxfac(cplex_fac,nprojs,nspinor*ndat) 160 real(dp),intent(out) :: gxfac_sij(cplex,nprojs,nspinor*ndat) 161 162 !Tested usecases : 163 ! - Nvidia GPUs : FC_NVHPC + CUDA 164 ! - AMD GPUs : FC_LLVM + HIP 165 ! An eventual Intel implementation would use the OneAPI LLVM compiler. 166 ! Homemade CUDA/HIP interfaces would allow the use of GCC. 167 ! But it is likely that OpenMP performance won't be optimal outside GPU vendors compilers. 168 #ifdef HAVE_OPENMP_OFFLOAD 169 170 !Local variables------------------------------- 171 !Arrays 172 !scalars 173 integer :: cplex_,ia,ierr,ijlmn,ijspin,ilm,ilmn,i0lmn,iln,index_enl,iphase,ispinor,ispinor_index,idat 174 integer :: j0lmn,jilmn,jispin,jjlmn,jlm,jlmn,jspinor,jspinor_index,mu,shift 175 real(dp) :: sijr 176 !arrays 177 real(dp) :: enl_(2),gxfi(2),gxi(cplex),gxj(cplex) 178 real(dp),allocatable :: d2gxdtfac_offdiag(:,:,:,:,:),dgxdtfac_offdiag(:,:,:,:,:) 179 real(dp),allocatable :: gxfac_offdiag(:,:,:,:),gxfj(:,:) 180 real(dp),pointer :: d2gxdtfac_(:,:,:,:,:),dgxdtfac_(:,:,:,:,:),gxfac_(:,:,:) 181 real(dp),pointer :: enl_ptr(:,:,:) 182 183 ! ************************************************************************* 184 185 if (dimekbq==2) then 186 ABI_ERROR('dimekbq=2 not yet allowed with GPU !') 187 end if 188 189 DBG_ENTER("COLL") 190 191 !Parallelization over spinors treatment 192 shift=0;if (mpi_enreg%paral_spinor==1) shift=mpi_enreg%me_spinor 193 iphase=1 194 gxfac_ => gxfac 195 dgxdtfac_ => dgxdtfac 196 197 198 199 !Accumulate gxfac related to non-local operator (Norm-conserving) 200 !------------------------------------------------------------------- 201 if (paw_opt==0) then 202 !Enl is E(Kleinman-Bylander) 203 ABI_CHECK(cplex_enl/=2,"BUG: invalid cplex_enl=2!") 204 ABI_CHECK(cplex_fac==cplex,"BUG: invalid cplex_fac/=cplex!") 205 !$OMP TARGET TEAMS DISTRIBUTE & 206 !$OMP& PARALLEL DO COLLAPSE(3) & 207 !$OMP& MAP(to:gxfac_,gx,enl_ptr) & 208 !$OMP& IS_DEVICE_PTR(indlmn) & 209 !$OMP& PRIVATE(ispinor,ispinor_index,ia,ilmn,iln,enl_) 210 do ispinor=1,nspinor 211 do ia=1,nincat 212 do ilmn=1,nlmn 213 ispinor_index=ispinor+shift 214 iln=indlmn(5,ilmn,itypat) 215 enl_(1)=enl_ptr(iln,itypat,ispinor_index) 216 !TODO gxfac_(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 217 !& enl_(1)*gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 218 end do 219 end do 220 end do 221 !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO 222 end if 223 224 !Accumulate gxfac related to nonlocal operator (PAW) 225 !------------------------------------------------------------------- 226 if (paw_opt==1.or.paw_opt==2.or.paw_opt==4) then 227 !Enl is psp strength Dij or (Dij-lambda.Sij) 228 229 ! === Diagonal term(s) (up-up, down-down) 230 231 ! 1-Enl is real 232 if (cplex_enl==1) then 233 if (paw_opt==2) then 234 !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(4) & 235 !$OMP& MAP(to:gxfac,enl,atindx1,gx,sij,lambda) & 236 !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl,jlmn,j0lmn,jjlmn,ilmn,i0lmn,ijlmn) 237 do idat=1,ndat 238 do ispinor=1,nspinor 239 do ia=1,nincat 240 do jlmn=1,nlmn 241 ispinor_index=ispinor+shift 242 index_enl=atindx1(iatm+ia) 243 j0lmn=jlmn*(jlmn-1)/2 244 jjlmn=j0lmn+jlmn 245 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 246 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 247 & (enl(jjlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(jjlmn,itypat)) * & 248 & gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 249 do ilmn=1,jlmn-1 250 ijlmn=j0lmn+ilmn 251 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 252 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 253 & (enl(ijlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(ijlmn,itypat)) * & 254 & gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 255 end do 256 if(jlmn<nlmn) then 257 do ilmn=jlmn+1,nlmn 258 i0lmn=(ilmn*(ilmn-1)/2) 259 ijlmn=i0lmn+jlmn 260 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 261 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 262 & (enl(ijlmn,index_enl,ispinor_index,iphase)-lambda(idat) * sij(ijlmn,itypat)) *& 263 & gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 264 end do 265 end if 266 end do 267 end do 268 end do 269 end do 270 271 else 272 !$OMP TARGET TEAMS DISTRIBUTE & 273 !$OMP& PARALLEL DO COLLAPSE(4) & 274 !$OMP& MAP(to:enl,atindx1,gx,gxfac) & 275 !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl,jlmn,j0lmn,jjlmn,ilmn,i0lmn,ijlmn) 276 do idat=1,ndat 277 do ispinor=1,nspinor 278 do ia=1,nincat 279 do jlmn=1,nlmn 280 ispinor_index=ispinor+shift 281 index_enl=atindx1(iatm+ia) 282 j0lmn=jlmn*(jlmn-1)/2 283 jjlmn=j0lmn+jlmn 284 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 285 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 286 & enl(jjlmn,index_enl,ispinor_index,iphase) * & 287 & gx(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 288 do ilmn=1,jlmn-1 289 ijlmn=j0lmn+ilmn 290 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 291 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 292 & enl(ijlmn,index_enl,ispinor_index,iphase)*gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 293 end do 294 if(jlmn<nlmn) then 295 do ilmn=jlmn+1,nlmn 296 i0lmn=(ilmn*(ilmn-1)/2) 297 ijlmn=i0lmn+jlmn 298 gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)=& 299 & gxfac(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) + & 300 & enl(ijlmn,index_enl,ispinor_index,iphase)*gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 301 end do 302 end if 303 end do 304 end do 305 end do 306 end do 307 endif 308 309 310 ! 2-Enl is complex ===== D^ss'_ij=D^s's_ji^* 311 else 312 ABI_CHECK(cplex_fac==cplex_enl,"BUG: invalid cplex_fac/=cplex_enl!") 313 314 if (nspinortot==1) then ! -------------> NO SPINORS 315 if(paw_opt==2) then 316 !$OMP TARGET TEAMS DISTRIBUTE & 317 !$OMP& PARALLEL DO COLLAPSE(3) & 318 !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) & 319 !$OMP& IS_DEVICE_PTR(sij) & 320 !$OMP& PRIVATE(idat,ia,index_enl,jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,i0lmn,ijlmn,gxi) 321 do idat=1,ndat 322 do ia=1,nincat 323 do jlmn=1,nlmn 324 index_enl=atindx1(iatm+ia) 325 j0lmn=jlmn*(jlmn-1)/2 326 jjlmn=j0lmn+jlmn 327 enl_(1)=enl_ptr(2*jjlmn-1,index_enl,1)-lambda(idat)*sij(jjlmn,itypat) 328 gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,1) 329 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 330 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1) 331 if (cplex==2) then 332 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 333 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2) 334 end if 335 do ilmn=1,jlmn-1 336 ijlmn=j0lmn+ilmn 337 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1) 338 enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat) 339 gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) 340 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 341 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1) 342 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 343 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(1) 344 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 345 & gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1) 346 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 347 & gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxj(1) 348 if (cplex==2) then 349 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 350 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(2) 351 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 352 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2) 353 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 354 & gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxj(2) 355 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 356 & gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2) 357 end if 358 end do 359 if(jlmn<nlmn) then 360 do ilmn=jlmn+1,nlmn 361 i0lmn=ilmn*(ilmn-1)/2 362 ijlmn=i0lmn+jlmn 363 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1) 364 enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat) 365 gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1) 366 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 367 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1) 368 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))= & 369 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(1) 370 if (cplex==2) then 371 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 372 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(2) 373 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 374 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2) 375 end if 376 end do 377 end if 378 end do 379 end do 380 end do 381 else 382 !$OMP TARGET TEAMS DISTRIBUTE & 383 !$OMP& PARALLEL DO COLLAPSE(3) & 384 !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) & 385 !$OMP& IS_DEVICE_PTR(sij) & 386 !$OMP& PRIVATE(idat,ia,index_enl,jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,i0lmn,ijlmn,gxi) 387 do idat=1,ndat 388 do ia=1,nincat 389 do jlmn=1,nlmn 390 index_enl=atindx1(iatm+ia) 391 j0lmn=jlmn*(jlmn-1)/2 392 jjlmn=j0lmn+jlmn 393 enl_(1)=enl_ptr(2*jjlmn-1,index_enl,1) 394 gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,1) 395 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 396 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1) 397 if (cplex==2) then 398 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) + & 399 & enl_(1)*gxj(2) 400 end if 401 do ilmn=1,jlmn-1 402 ijlmn=j0lmn+ilmn 403 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1) 404 gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) 405 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 406 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1) 407 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 408 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxi(1) 409 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 410 & gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(1) 411 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 412 & gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxj(1) 413 if (cplex==2) then 414 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 415 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(2) 416 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 417 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(2) 418 gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 419 & gxfac_(1,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))-enl_(2)*gxj(2) 420 gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 421 & gxfac_(2,ilmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxj(2) 422 end if 423 end do 424 if(jlmn<nlmn) then 425 do ilmn=jlmn+1,nlmn 426 i0lmn=ilmn*(ilmn-1)/2 427 ijlmn=i0lmn+jlmn 428 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,1) 429 !TODO gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,1) 430 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 431 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(1)*gxi(1) 432 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = & 433 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1))+enl_(2)*gxi(1) 434 if (cplex==2) then 435 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) - & 436 & enl_(2)*gxi(2) 437 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) = gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,1+nspinor*(idat-1)) + & 438 & enl_(1)*gxi(2) 439 end if 440 end do 441 end if 442 end do 443 end do 444 end do 445 end if 446 447 else ! -------------> SPINORIAL CASE 448 449 ! === Diagonal term(s) (up-up, down-down) 450 451 !$OMP TARGET TEAMS DISTRIBUTE & 452 !$OMP& PARALLEL DO COLLAPSE(4) & 453 !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) & 454 !$OMP& IS_DEVICE_PTR(sij) & 455 !$OMP& PRIVATE(idat,ispinor,ispinor_index,ia,index_enl), & 456 !$OMP& PRIVATE(jlmn,j0lmn,jjlmn,enl_,gxj,ilmn,ijlmn,i0lmn,gxi) 457 do idat=1,ndat 458 do ispinor=1,nspinor 459 do ia=1,nincat 460 do jlmn=1,nlmn 461 ispinor_index=ispinor+shift 462 index_enl=atindx1(iatm+ia) 463 j0lmn=jlmn*(jlmn-1)/2 464 jjlmn=j0lmn+jlmn 465 enl_(1)=enl_ptr(2*jjlmn-1,index_enl,ispinor_index) 466 if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(jjlmn,itypat) 467 !TODO gxj(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 468 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 469 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxj(1) 470 if (cplex==2) then 471 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 472 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxj(2) 473 end if 474 do ilmn=1,jlmn-1 475 ijlmn=j0lmn+ilmn 476 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,ispinor_index) 477 if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat) 478 !TODO gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 479 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 480 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1) 481 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 482 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(1) 483 if (cplex==2) then 484 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 485 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(2) 486 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 487 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2) 488 end if 489 end do 490 if(jlmn<nlmn) then 491 do ilmn=jlmn+1,nlmn 492 i0lmn=ilmn*(ilmn-1)/2 493 ijlmn=i0lmn+jlmn 494 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,ispinor_index) 495 if (paw_opt==2) enl_(1)=enl_(1)-lambda(idat)*sij(ijlmn,itypat) 496 !TODO gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 497 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 498 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1) 499 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 500 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(1) 501 if (cplex==2) then 502 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 503 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(2) 504 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 505 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2) 506 end if 507 end do 508 end if 509 end do 510 end do 511 end do 512 end do 513 end if !nspinortot 514 end if !complex_enl 515 516 ! === Off-diagonal term(s) (up-down, down-up) 517 518 ! --- No parallelization over spinors --- 519 if (nspinortot==2.and.nspinor==nspinortot) then 520 ABI_CHECK(cplex_enl==2,"BUG: invalid cplex_enl/=2!") 521 ABI_CHECK(cplex_fac==cplex,"BUG: invalid cplex_fac/=cplex)!") 522 !$OMP TARGET TEAMS DISTRIBUTE & 523 !$OMP& PARALLEL DO COLLAPSE(4) & 524 !$OMP& MAP(to:gxfac_,gx,gxi,atindx1,gxj,enl_ptr) & 525 !$OMP& PRIVATE(idat,ispinor,jspinor,ia,index_enl), & 526 !$OMP& PRIVATE(jlmn,j0lmn,jjlmn,enl_,gxi,gxj,ilmn,i0lmn,ijlmn) 527 do idat=1,ndat 528 do ispinor=1,nspinortot 529 do ia=1,nincat 530 do jlmn=1,nlmn 531 jspinor=3-ispinor 532 index_enl=atindx1(iatm+ia) 533 j0lmn=jlmn*(jlmn-1)/2 534 jjlmn=j0lmn+jlmn 535 enl_(1:2)=enl_ptr(2*jjlmn-1:2*jjlmn,index_enl,2+ispinor ) 536 !TODO gxi(1:cplex)=gx(1:cplex,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 537 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(1) 538 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)-enl_(2)*gxi(1) 539 if (cplex==2) then 540 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(2)*gxi(2) 541 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(2) 542 end if 543 do ilmn=1,jlmn-1 544 ijlmn=j0lmn+ilmn 545 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,2+ispinor+(idat-1)*nspinor) 546 !TODO gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) 547 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(1) 548 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)-enl_(2)*gxi(1) 549 if (cplex==2) then 550 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(2)*gxi(2) 551 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)=gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,jspinor)+enl_(1)*gxi(2) 552 end if 553 end do 554 if(jlmn<nlmn) then 555 do ilmn=jlmn+1,nlmn 556 i0lmn=ilmn*(ilmn-1)/2 557 ijlmn=i0lmn+jlmn 558 enl_(1:2)=enl_ptr(2*ijlmn-1:2*ijlmn,index_enl,2+ispinor+(idat-1)*nspinor) 559 !TODO gxi(1:cplex)=gx(1:cplex,ilmn+(ia-1)*nlmn+ibeg,jspinor) 560 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 561 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(1) 562 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 563 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(2)*gxi(1) 564 if (cplex==2) then 565 gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 566 & gxfac_(1,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)-enl_(2)*gxi(2) 567 gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor) = & 568 & gxfac_(2,jlmn+(ia-1)*nlmn+ibeg,ispinor+(idat-1)*nspinor)+enl_(1)*gxi(2) 569 end if 570 end do 571 end if 572 end do 573 end do 574 end do 575 end do 576 577 ! --- Parallelization over spinors --- 578 else if (nspinortot==2.and.nspinor/=nspinortot) then 579 ABI_CHECK(cplex_enl==2,"BUG: invalid cplex_enl/=2!") 580 ABI_CHECK(cplex_fac==2,"BUG: invalid cplex_fac/=2!") 581 ABI_MALLOC(gxfac_offdiag,(cplex_fac,nlmn,nincat,nspinortot)) 582 !$OMP WORKSHARE 583 gxfac_offdiag(:,:,:,:)=zero 584 !$OMP END WORKSHARE 585 ispinor_index=mpi_enreg%me_spinor+1 586 jspinor_index=3-ispinor_index 587 if (ispinor_index==1) then 588 ijspin=3;jispin=4 589 else 590 ijspin=4;jispin=3 591 end if 592 !$OMP TARGET TEAMS DISTRIBUTE & 593 !$OMP& PARALLEL DO COLLAPSE(4) & 594 !$OMP& MAP(to:gx,gxi,atindx1,enl_ptr) & 595 !$OMP PRIVATE(idat,ia,index_enl,jlmn,j0lmn,ilmn,i0lmn,i0lmn,ijlmn,enl_,jilmn,gxi) 596 do idat=1,ndat 597 do ia=1,nincat 598 do jlmn=1,nlmn 599 do ilmn=1,nlmn 600 index_enl=atindx1(iatm+ia) 601 j0lmn=jlmn*(jlmn-1)/2 602 i0lmn=ilmn*(ilmn-1)/2 603 if (ilmn<=jlmn) then 604 ijlmn=j0lmn+ilmn 605 enl_(1)= enl_ptr(2*ijlmn-1,index_enl,ijspin) 606 enl_(2)=-enl_ptr(2*ijlmn ,index_enl,ijspin) 607 else 608 jilmn=i0lmn+jlmn 609 enl_(1)= enl_ptr(2*jilmn-1,index_enl,jispin) 610 enl_(2)= enl_ptr(2*jilmn ,index_enl,jispin) 611 end if 612 !TODO gxi(1:cplex)=gx(1:cplex,ilmn,ia,1+nspinor*(idat-1)) 613 gxfac_offdiag(1,jlmn,ia,jspinor_index)= & 614 & gxfac_offdiag(1,jlmn,ia,jspinor_index)+enl_(1)*gxi(1) 615 gxfac_offdiag(2,jlmn,ia,jspinor_index)= & 616 & gxfac_offdiag(2,jlmn,ia,jspinor_index)+enl_(2)*gxi(1) 617 if (cplex==2) then 618 gxfac_offdiag(1,jlmn,ia,jspinor_index)= & 619 & gxfac_offdiag(1,jlmn,ia,jspinor_index)-enl_(2)*gxi(2) 620 gxfac_offdiag(2,jlmn,ia,jspinor_index)= & 621 & gxfac_offdiag(2,jlmn,ia,jspinor_index)+enl_(1)*gxi(2) 622 end if 623 end do !ilmn 624 end do !jlmn 625 end do !iat 626 end do !idat 627 call xmpi_sum(gxfac_offdiag,mpi_enreg%comm_spinor,ierr) 628 !$OMP TARGET UPDATE FROM(gxfac_) 629 !gxfac_(:,:,:,1)=gxfac_(:,:,:,1)+gxfac_offdiag(:,:,:,ispinor_index) 630 ABI_FREE(gxfac_offdiag) 631 end if 632 633 end if !paw_opt 634 635 636 !Accumulate gxfac related to overlap (Sij) (PAW) 637 !------------------------------------------- ------------------------ 638 if (paw_opt==3.or.paw_opt==4) then ! Use Sij, overlap contribution 639 !$OMP TARGET TEAMS DISTRIBUTE & 640 !$OMP& PARALLEL DO COLLAPSE(4) & 641 !$OMP& MAP(to:sij,gx,gxfac_sij) & 642 !$OMP PRIVATE(idat, ispinor,ia,jlmn,j0lmn,jjlmn,jlm,ilmn,ilm,i0lmn,ijlmn) 643 do idat=1,ndat 644 do ispinor=1,nspinor 645 do ia=1,nincat 646 do jlmn=1,nlmn 647 j0lmn=jlmn*(jlmn-1)/2 648 jjlmn=j0lmn+jlmn 649 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)= & 650 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) & 651 + sij(jjlmn,itypat) * gx(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 652 do ilmn=1,jlmn-1 653 ijlmn=j0lmn+ilmn 654 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)= & 655 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) & 656 + sij(ijlmn,itypat) * gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 657 end do 658 if(jlmn<nlmn) then 659 do ilmn=jlmn+1,nlmn 660 i0lmn=ilmn*(ilmn-1)/2 661 ijlmn=i0lmn+jlmn 662 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor)=& 663 gxfac_sij(1:cplex,ibeg+jlmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) & 664 + sij(ijlmn,itypat) * gx(1:cplex,ibeg+ilmn+(ia-1)*nlmn,ispinor+(idat-1)*nspinor) 665 end do 666 end if 667 end do 668 end do 669 end do 670 end do 671 end if 672 673 #else 674 675 ABI_UNUSED((/cplex,cplex_enl,cplex_fac,dimenl1,dimenl2,dimekbq,iatm,itypat/)) 676 ABI_UNUSED((/natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,nincat,nspinor,nspinortot,optder,paw_opt/)) 677 ABI_UNUSED((/nlmn,ndat,ibeg,iend,nprojs,ntypat/)) 678 ABI_UNUSED((/atindx1,indlmn,cplex_dgxdt,cplex_d2gxdt/)) 679 ABI_UNUSED((/dgxdtfac,dgxdtfac_sij,d2gxdtfac,d2gxdtfac_sij,gxfac,gxfac_sij,dgxdt,d2gxdt,lambda,enl,gx,sij/)) 680 ABI_UNUSED_A(mpi_enreg) 681 ABI_BUG("Unhandled configuration for OpenMP GPU immplementation") 682 683 #endif 684 685 end subroutine opernlc_ylm_ompgpu