TABLE OF CONTENTS
ABINIT/lobpcgwf [ Functions ]
NAME
lobpcgwf
FUNCTION
this routine updates the whole wave functions at a given k-point, using the lobpcg method for a given spin-polarization, from a fixed hamiltonian but might also simply compute eigenvectors and eigenvalues at this k point. it will also update the matrix elements of the hamiltonian.
COPYRIGHT
Copyright (C) 1998-2022 ABINIT group (FBottin,GZ,AR,MT,FDahm) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
dtset <type(dataset_type)>=all input variales for this dataset gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array gsc kinpw(npw)=(modified) kinetic energy for each plane wave (hartree) mcg=second dimension of the cg array mgsc=second dimension of the gsc array mpi_enreg=information about MPI parallelization nband_k=number of bands at this k point for that spin polarization nbdblock : number of blocks npw_k=number of plane waves at this k point prtvol=control print volume and debugging output use_totvnlx=1 if one has to compute totvnlx
OUTPUT
resid_k(nband_k)=residuals for each states subham(nband_k*(nband_k+1))=the matrix elements of h If gs_hamk%usepaw==0: gsc(2,mgsc)=<g|s|c> matrix elements (s=overlap) totvnlx(nband_k*use_totvnlx,nband_k*use_totvnlx)=the matrix elements of vnl+vfockACE
SIDE EFFECTS
cg(2,mcg)=updated wavefunctions
SOURCE
80 subroutine lobpcgwf(cg,dtset,gs_hamk,gsc,icg,igsc,kinpw,mcg,mgsc,mpi_enreg,& 81 & nband_k,nbdblock,npw_k,prtvol,resid_k,subham,totvnlx,use_totvnlx) 82 83 84 use defs_basis 85 use m_abicore 86 use m_lobpcg 87 use m_abi_linalg 88 use m_wfutils 89 use m_xmpi 90 use m_errors 91 use, intrinsic :: iso_c_binding 92 use m_dtset 93 94 use defs_abitypes, only : mpi_type 95 use m_time, only : timab 96 use m_hamiltonian, only : gs_hamiltonian_type 97 use m_pawcprj, only : pawcprj_type 98 use m_getghc, only : getghc 99 use m_prep_kgb, only : prep_getghc 100 101 implicit none 102 103 !Arguments ------------------------------------ 104 integer,intent(in) :: icg,igsc,mcg,mgsc,nband_k,nbdblock,npw_k,prtvol,use_totvnlx 105 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 106 type(dataset_type),intent(in) :: dtset 107 type(mpi_type),intent(inout) :: mpi_enreg 108 real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc) 109 real(dp),intent(in) :: kinpw(npw_k) 110 real(dp),intent(out) :: resid_k(nband_k) 111 real(dp),intent(inout) :: subham(nband_k*(nband_k+1)) 112 real(dp),intent(inout) :: totvnlx((3-gs_hamk%istwf_k)*nband_k*use_totvnlx,nband_k*use_totvnlx) 113 114 !Local variables------------------------------- 115 integer, parameter :: tim_getghc=5 116 integer :: activepsize,activersize,bblocksize,bigorder,blocksize,cpopt 117 integer :: cond_try 118 integer :: iblocksize,iblock,ierr,ii,info,istwf_k,isubh 119 integer :: iterationnumber 120 integer :: iwavef,i1,i2,i3,i4,maxiterations,my_nspinor 121 integer :: nrestart,optekin,optpcon,restart 122 integer :: sij_opt,timopt,tim_wfcopy,tim_xeigen 123 integer :: tim_xortho,tim_xprecon,use_lapack_gpu,use_linalg_gpu,vectsize 124 logical :: gen_eigenpb 125 integer :: cplx 126 real(dp) :: condestgramb,deltae,deold,dum 127 complex(dpc) :: cminusone 128 real(dp) :: zvar(2) 129 logical :: havetoprecon 130 real(dp) :: tsec(2) 131 real(dp), allocatable :: gwavef(:,:),cwavef(:,:),gvnlxc(:,:) 132 real(dp), allocatable :: swavef(:,:) 133 real(dp), allocatable :: residualnorms(:),eigen(:) 134 real(dp), allocatable :: tmpeigen(:) 135 real(dp), allocatable :: pcon(:,:) 136 real(dp), allocatable :: blockvectorx(:,:),blockvectorvx(:,:),blockvectorax(:,:),blockvectorbx(:,:) 137 real(dp),allocatable :: blockvectorr(:,:),blockvectorvr(:,:),blockvectorar(:,:),blockvectorbr(:,:) 138 real(dp),allocatable :: blockvectorp(:,:),blockvectorvp(:,:),blockvectorap(:,:),blockvectorbp(:,:),blockvectordumm(:,:) 139 real(dp),allocatable :: blockvectory(:,:),blockvectorby(:,:),blockvectorz(:,:) 140 real(dp),allocatable :: gramxax(:,:),gramxar(:,:),gramxap(:,:),gramrar(:,:),gramrap(:,:),grampap(:,:) 141 real(dp),allocatable :: gramxbx(:,:),gramxbr(:,:),gramxbp(:,:),gramrbr(:,:),gramrbp(:,:),grampbp(:,:) 142 real(dp),allocatable :: coordx1(:,:),coordx2(:,:),coordx3(:,:),lambda(:,:),grama(:,:),gramb(:,:),gramyx(:,:) 143 real(dp),allocatable :: tmpgramb(:,:),transf3(:,:,:),transf5(:,:,:) 144 real(dp), allocatable :: tsubham(:,:) 145 type(pawcprj_type) :: cprj_dum(gs_hamk%natom,0) 146 character(len=500) :: message 147 character, dimension(2) :: cparam 148 type(c_ptr) :: A_gpu,C_gpu,coordx2_gpu,coordx3_gpu,bblockvector_gpu,gram_gpu 149 type(c_ptr) :: blockvectorr_gpu,blockvectorar_gpu,blockvectorbr_gpu 150 151 !Index of a given band 152 !gramindex(iblocksize)=(iblocksize-1)*cplx+1 153 154 ! ********************************************************************* 155 156 DBG_ENTER("COLL") 157 158 call timab(530,1,tsec) 159 if(abs(dtset%timopt)==4) then 160 call timab(520,1,tsec) 161 end if 162 163 !########################################################################### 164 !################ INITIALISATION ########################################## 165 !########################################################################### 166 167 !For timing 168 timopt=dtset%timopt 169 tim_wfcopy=584 170 !tim_xcopy=584 171 tim_xeigen=587 172 !tim_xgemm=532 173 tim_xortho=535 174 tim_xprecon=536 175 !tim_xtrsm=535 176 177 !Variables 178 maxiterations=dtset%nline 179 gen_eigenpb=(gs_hamk%usepaw==1) 180 my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor) 181 cminusone=-cone 182 istwf_k=gs_hamk%istwf_k 183 info = 0 184 cparam(1)='t' 185 cparam(2)='c' 186 187 !Depends on istwfk 188 if ( istwf_k == 2 ) then 189 cplx=1 190 if (mpi_enreg%me_g0 == 1) then 191 vectsize=2*npw_k*my_nspinor-1 192 else 193 vectsize=2*npw_k*my_nspinor 194 end if 195 else 196 cplx=2 197 vectsize=npw_k*my_nspinor 198 end if 199 200 !For preconditionning 201 optekin=0;if (dtset%wfoptalg>10) optekin=0 202 optpcon=1;if (dtset%wfoptalg>10) optpcon=0 203 204 !For communication 205 blocksize=mpi_enreg%nproc_fft 206 if(mpi_enreg%paral_kgb==1) blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp 207 !IF you want to compare with new lobpcg in sequential uncomment the following 208 !line 209 !blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp 210 211 !Iniitializations/allocations of GPU parallelism 212 use_linalg_gpu=0;use_lapack_gpu=0 213 if ((dtset%use_gpu_cuda==1).and. & 214 & (vectsize*blocksize*blocksize>dtset%gpu_linalg_limit)) use_linalg_gpu=1 215 #if defined HAVE_LINALG_MAGMA 216 use_lapack_gpu=use_linalg_gpu 217 #endif 218 if(use_linalg_gpu==1) then 219 call alloc_on_gpu(A_gpu,cplx*dp*vectsize*blocksize) 220 call alloc_on_gpu(C_gpu,cplx*dp*vectsize*blocksize) 221 call alloc_on_gpu(blockvectorr_gpu,cplx*dp*vectsize*blocksize) 222 call alloc_on_gpu(blockvectorar_gpu,cplx*dp*vectsize*blocksize) 223 call alloc_on_gpu(blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 224 call alloc_on_gpu(coordx2_gpu,cplx*dp*blocksize*blocksize) 225 call alloc_on_gpu(coordx3_gpu,cplx*dp*blocksize*blocksize) 226 end if 227 228 if(abs(dtset%timopt)==4) then 229 call timab(520,2,tsec) 230 end if 231 232 !########################################################################### 233 !################ BIG LOOP OVER BLOCKS #################################### 234 !########################################################################### 235 236 do iblock=1,nbdblock 237 238 if(abs(dtset%timopt)==4) then 239 call timab(521,1,tsec) 240 end if 241 242 havetoprecon=.true. 243 nrestart=0 244 bblocksize=(iblock-1)*blocksize 245 246 ! allocations 247 ABI_MALLOC(pcon,(npw_k,blocksize)) 248 ABI_MALLOC(blockvectorx,(cplx*vectsize,blocksize)) 249 ABI_MALLOC(blockvectorax,(cplx*vectsize,blocksize)) 250 ABI_MALLOC(blockvectorbx,(cplx*vectsize,blocksize)) 251 ABI_MALLOC(blockvectorr,(cplx*vectsize,blocksize)) 252 ABI_MALLOC(blockvectorar,(cplx*vectsize,blocksize)) 253 ABI_MALLOC(blockvectorbr,(cplx*vectsize,blocksize)) 254 ABI_MALLOC(blockvectorp,(cplx*vectsize,blocksize)) 255 ABI_MALLOC(blockvectorap,(cplx*vectsize,blocksize)) 256 ABI_MALLOC(blockvectorbp,(cplx*vectsize,blocksize)) 257 ABI_MALLOC(blockvectordumm,(cplx*vectsize,blocksize)) 258 ABI_MALLOC(gramxax,(cplx*blocksize,blocksize)) 259 ABI_MALLOC(gramxar,(cplx*blocksize,blocksize)) 260 ABI_MALLOC(gramxap,(cplx*blocksize,blocksize)) 261 ABI_MALLOC(gramrar,(cplx*blocksize,blocksize)) 262 ABI_MALLOC(gramrap,(cplx*blocksize,blocksize)) 263 ABI_MALLOC(grampap,(cplx*blocksize,blocksize)) 264 ABI_MALLOC(gramxbx,(cplx*blocksize,blocksize)) 265 ABI_MALLOC(gramxbr,(cplx*blocksize,blocksize)) 266 ABI_MALLOC(gramxbp,(cplx*blocksize,blocksize)) 267 ABI_MALLOC(gramrbr,(cplx*blocksize,blocksize)) 268 ABI_MALLOC(gramrbp,(cplx*blocksize,blocksize)) 269 ABI_MALLOC(grampbp,(cplx*blocksize,blocksize)) 270 ABI_MALLOC(transf3,(cplx*blocksize,blocksize,3)) 271 ABI_MALLOC(transf5,(cplx*blocksize,blocksize,5)) 272 ABI_MALLOC(lambda,(cplx*blocksize,blocksize)) 273 ABI_MALLOC(residualnorms,(blocksize)) 274 275 ABI_MALLOC(blockvectory,(cplx*vectsize,bblocksize)) 276 ABI_MALLOC(blockvectorby,(cplx*vectsize,bblocksize)) 277 ABI_MALLOC(gramyx,(cplx*bblocksize,blocksize)) 278 if (gs_hamk%usepaw==0) then 279 ABI_MALLOC(blockvectorvx,(cplx*vectsize,blocksize)) 280 ABI_MALLOC(blockvectorvr,(cplx*vectsize,blocksize)) 281 ABI_MALLOC(blockvectorvp,(cplx*vectsize,blocksize)) 282 end if 283 284 if(use_linalg_gpu==1) then 285 if(iblock/=1) then 286 call alloc_on_gpu(bblockvector_gpu,cplx*dp*vectsize*bblocksize) 287 call alloc_on_gpu(gram_gpu,cplx*dp*bblocksize*blocksize) 288 else 289 call alloc_on_gpu(bblockvector_gpu,cplx*dp*vectsize*blocksize) 290 call alloc_on_gpu(gram_gpu,cplx*dp*blocksize*blocksize) 291 end if 292 end if 293 294 ! Initialize global variables in m_wfutils. 295 call setWFParameter(cplx,mpi_enreg%me_g0,npw_k,my_nspinor,icg,igsc,blocksize) 296 297 ! transfer array of wf coeff in iblock to blockvectorx 298 call wfcopy('D',blocksize*vectsize,cg,1,blockvectorx,1,blocksize,iblock,'C',withbbloc=.true.,& 299 & timopt=timopt,tim_wfcopy=tim_wfcopy) 300 301 ! !!!!!!!!!!!!!!!!!!!!!!!! Begin if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!! 302 ! transfer array of wf coeff less than iblock to blockvectory 303 if(iblock /=1) then 304 call wfcopy('D',bblocksize*vectsize,cg,1,blockvectory,1,bblocksize,iblock,'C',withbbloc=.false.,& 305 & timopt=timopt,tim_wfcopy=tim_wfcopy) 306 307 if(gen_eigenpb) then 308 call wfcopy('D',bblocksize*vectsize,gsc,1,blockvectorby,1,bblocksize,iblock,'S',withbbloc=.false.,& 309 & timopt=timopt,tim_wfcopy=tim_wfcopy) 310 else 311 call abi_xcopy(vectsize*bblocksize,blockvectory,1,blockvectorby,1,x_cplx=x_cplx) 312 end if 313 314 ! b-orthogonalize x to the constraint y (supposed b-orthonormal) 315 ! blockvectorx=blockvectorx-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorx)) 316 317 call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,& 318 & vectsize,blockvectorx,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx) 319 320 if(abs(dtset%timopt)==3) then 321 call timab(533,1,tsec) 322 end if 323 call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr) 324 if(abs(dtset%timopt)==3) then 325 call timab(533,2,tsec) 326 end if 327 328 call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,& 329 & vectsize,gramyx,bblocksize,cone,blockvectorx,vectsize,x_cplx=x_cplx) 330 331 end if 332 ! !!!!!!!!!!!!!!!!!!!!!!!! End if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!! 333 334 ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize)) 335 ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize)) 336 ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize)) 337 ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize)) 338 339 call wfcopy('I',vectsize*blocksize,blockvectorx,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,& 340 & timopt=timopt,tim_wfcopy=tim_wfcopy) 341 342 if(abs(dtset%timopt)==4) then 343 call timab(521,2,tsec) 344 end if 345 if(abs(dtset%timopt)==4) then 346 call timab(526,1,tsec) 347 end if 348 349 cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1 350 351 if (mpi_enreg%paral_kgb==0) then 352 call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,& 353 & mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0) 354 else 355 call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,& 356 & prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.) 357 end if 358 if(abs(dtset%timopt)==4) then 359 call timab(526,2,tsec) 360 end if 361 if(abs(dtset%timopt)==4) then 362 call timab(522,1,tsec) 363 end if 364 365 if ( gen_eigenpb ) then 366 call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbx,1,blocksize,iblock,'W',withbbloc=.false.,& 367 & timopt=timopt,tim_wfcopy=tim_wfcopy) 368 else 369 call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvx,1,blocksize,iblock,'W',withbbloc=.false.,& 370 & timopt=timopt,tim_wfcopy=tim_wfcopy) 371 call abi_xcopy(vectsize*blocksize,blockvectorx,1,blockvectorbx,1,x_cplx=x_cplx) 372 end if 373 374 call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorax,1,blocksize,iblock,'W',withbbloc=.false.,& 375 & timopt=timopt,tim_wfcopy=tim_wfcopy) 376 377 ABI_FREE(cwavef) 378 ABI_FREE(gwavef) 379 ABI_FREE(gvnlxc) 380 ABI_FREE(swavef) 381 382 call abi_xorthonormalize(blockvectorx,blockvectorbx,blocksize,mpi_enreg%comm_bandspinorfft,gramxbx,vectsize,& 383 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 384 385 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorbx,vectsize,x_cplx=x_cplx) 386 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorax,vectsize,x_cplx=x_cplx) 387 388 if (gs_hamk%usepaw==0) then 389 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorvx,vectsize,x_cplx=x_cplx) 390 end if 391 392 ! Do rayleigh ritz on a in space x 393 ! gramxax=matmul(transpose(blockvectorx),blockvectorax) 394 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorx,& 395 & vectsize,blockvectorax,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 396 397 if(abs(dtset%timopt)==3) then 398 call timab(533,1,tsec) 399 end if 400 call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr) 401 if(abs(dtset%timopt)==3) then 402 call timab(533,2,tsec) 403 end if 404 ABI_MALLOC(eigen,(blocksize)) 405 406 call abi_xheev('v','u',blocksize,gramxax,blocksize,eigen,x_cplx=cplx,istwf_k=istwf_k, & 407 timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu) 408 409 ! blockvectorx=matmul(blockvectorx,gramxax) 410 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,& 411 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 412 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorx,1,x_cplx=x_cplx) 413 414 ! blockvectorax=matmul(blockvectorax,gramxax) 415 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,& 416 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 417 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorax,1,x_cplx=x_cplx) 418 419 ! blockvectorvx=matmul(blockvectorvx,gramxax) 420 if (gs_hamk%usepaw==0) then 421 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,& 422 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 423 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvx,1,x_cplx=x_cplx) 424 end if 425 426 ! blockvectorbx=matmul(blockvectorbx,gramxax) 427 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,& 428 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 429 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbx,1,x_cplx=x_cplx) 430 431 do iblocksize=1,blocksize 432 zvar=(/eigen(iblocksize),zero/) 433 call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 434 end do 435 ABI_FREE(eigen) 436 437 if(abs(dtset%timopt)==4) then 438 call timab(522,2,tsec) 439 end if 440 441 ! ########################################################################### 442 ! ################ PERFORM LOOP ON NLINE #################################### 443 ! ########################################################################### 444 ! now the main alogrithm 445 iter: do iterationnumber=1,maxiterations 446 447 if(abs(dtset%timopt)==4) then 448 call timab(523,1,tsec) 449 end if 450 451 ! Build residual 452 ! blockvectorr=blockvectorax-matmul(blockvectorx,lambda) 453 call xprecon(blockvectorbx,lambda,blocksize,& 454 & iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,& 455 & optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon) 456 457 residualnorms=sum(blockvectorr**2,dim=1) 458 459 if(abs(dtset%timopt)==3) then 460 call timab(533,1,tsec) 461 end if 462 call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr) 463 if(abs(dtset%timopt)==3) then 464 call timab(533,2,tsec) 465 end if 466 467 resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize) 468 469 ! If residual sufficiently small stop line minimizations 470 if (abs(maxval(residualnorms(1:blocksize)))<dtset%tolwfr) then 471 if (prtvol > 0) then 472 write(message, '(a,i0,a,i0,a,es12.4)' ) & 473 & ' lobpcgwf: block ',iblock,' converged after ',iterationnumber,& 474 & ' line minimizations: maxval(resid(1:blocksize)) =',maxval(residualnorms(1:blocksize)) 475 call wrtout(std_out,message,'PERS') 476 end if 477 havetoprecon=.false. 478 exit 479 end if 480 481 if(use_linalg_gpu==1) then 482 call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 483 end if 484 485 if(iblock /=1) then 486 ! Residuals orthogonal to blockvectorby 487 ! blockvectorr=blockvectorr-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorr)) 488 489 if(use_linalg_gpu==1) then 490 call copy_on_gpu(blockvectorby,bblockvector_gpu,cplx*dp*vectsize*bblocksize) 491 call gpu_xgemm(cplx,cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,bblockvector_gpu,& 492 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,bblocksize) 493 call copy_from_gpu(gramyx,gram_gpu,cplx*dp*bblocksize*blocksize) 494 else 495 call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,& 496 & vectsize,blockvectorr,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx) 497 end if 498 499 if(abs(dtset%timopt)==3) then 500 call timab(533,1,tsec) 501 end if 502 call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr) 503 if(abs(dtset%timopt)==3) then 504 call timab(533,2,tsec) 505 end if 506 507 if(use_linalg_gpu==1) then 508 call copy_on_gpu(gramyx,gram_gpu,cplx*dp*bblocksize*blocksize) 509 call copy_on_gpu(blockvectory,bblockvector_gpu,cplx*dp*vectsize*bblocksize) 510 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,bblocksize,cminusone,bblockvector_gpu,& 511 & vectsize,gram_gpu,bblocksize,cone,blockvectorr_gpu,vectsize) 512 else 513 call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,& 514 & vectsize,gramyx,bblocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx) 515 end if 516 517 end if 518 519 ! Residuals orthogonal to blockvectorx 520 ! blockvectorr=blockvectorr-matmul(blockvectorx,matmul((blockvectorbx)^T,blockvectorr)) 521 if(use_linalg_gpu==1) then 522 call copy_on_gpu(blockvectorbx,C_gpu,cplx*dp*vectsize*blocksize) 523 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,C_gpu,& 524 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 525 call copy_from_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize) 526 else 527 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,& 528 & vectsize,blockvectorr,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 529 end if 530 531 if(abs(dtset%timopt)==3) then 532 call timab(533,1,tsec) 533 end if 534 call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr) 535 if(abs(dtset%timopt)==3) then 536 call timab(533,2,tsec) 537 end if 538 539 if(use_linalg_gpu==1) then 540 call copy_on_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize) 541 call copy_on_gpu(blockvectorx,C_gpu,cplx*dp*vectsize*blocksize) 542 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cminusone,C_gpu,& 543 & vectsize,gram_gpu,blocksize,cone,blockvectorr_gpu,vectsize) 544 call copy_from_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 545 else 546 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cminusone,blockvectorx,& 547 & vectsize,gramxax,blocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx) 548 end if 549 550 ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize)) 551 ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize)) 552 ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize)) 553 ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize)) 554 555 call wfcopy('I',vectsize*blocksize,blockvectorr,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,& 556 & timopt=timopt,tim_wfcopy=tim_wfcopy) 557 558 cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1 559 560 if(abs(dtset%timopt)==4) then 561 call timab(523,2,tsec) 562 end if 563 if(abs(dtset%timopt)==4) then 564 call timab(526,1,tsec) 565 end if 566 567 if (mpi_enreg%paral_kgb==0) then 568 call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,& 569 & mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0) 570 else 571 call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,& 572 & prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.) 573 end if 574 575 if(abs(dtset%timopt)==4) then 576 call timab(526,2,tsec) 577 end if 578 if(abs(dtset%timopt)==4) then 579 call timab(524,1,tsec) 580 end if 581 582 if (gen_eigenpb) then 583 call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbr,1,blocksize,iblock,'W',withbbloc=.false.,& 584 & timopt=timopt,tim_wfcopy=tim_wfcopy) 585 else 586 call abi_xcopy(vectsize*blocksize,blockvectorr,1,blockvectorbr,1,x_cplx=x_cplx) 587 call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvr,1,blocksize,iblock,'W',withbbloc=.false.,& 588 & timopt=timopt,tim_wfcopy=tim_wfcopy) 589 end if 590 591 call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorar,1,blocksize,iblock,'W',withbbloc=.false.,& 592 & timopt=timopt,tim_wfcopy=tim_wfcopy) 593 594 ABI_FREE(cwavef) 595 ABI_FREE(gwavef) 596 ABI_FREE(gvnlxc) 597 ABI_FREE(swavef) 598 599 if(use_linalg_gpu==1) then 600 call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 601 call gpu_xorthonormalize(blockvectorr_gpu,blockvectorbr_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,& 602 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 603 call copy_from_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 604 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorbr_gpu,vectsize) 605 call copy_from_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 606 call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 607 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorar_gpu,vectsize) 608 call copy_from_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 609 if (gs_hamk%usepaw==0) then 610 call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize) 611 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 612 call copy_from_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize) 613 end if 614 call copy_from_gpu(gramrbr,gram_gpu,cplx*dp*blocksize*blocksize) 615 else 616 call abi_xorthonormalize(blockvectorr,blockvectorbr,blocksize,mpi_enreg%comm_bandspinorfft,gramrbr,vectsize,& 617 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 618 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorbr,vectsize,x_cplx=x_cplx) 619 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorar,vectsize,x_cplx=x_cplx) 620 if (gs_hamk%usepaw==0) then 621 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorvr,vectsize,x_cplx=x_cplx) 622 end if 623 end if 624 625 if(iterationnumber>1) then 626 if(use_linalg_gpu==1) then 627 call copy_on_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize) 628 call copy_on_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize) 629 call gpu_xorthonormalize(A_gpu,C_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,& 630 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 631 call copy_from_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize) 632 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,C_gpu,vectsize) 633 call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize) 634 call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize) 635 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 636 call copy_from_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize) 637 if (gs_hamk%usepaw==0) then 638 call copy_on_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize) 639 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 640 call copy_from_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize) 641 end if 642 call copy_from_gpu(grampbp,gram_gpu,cplx*dp*blocksize*blocksize) 643 else 644 ! call orthonormalize(blockvectorp,blockvectorbp,blockvectorap) 645 call abi_xorthonormalize(blockvectorp,blockvectorbp,blocksize,mpi_enreg%comm_bandspinorfft,grampbp,vectsize,& 646 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 647 ! blockvectorap=matmul(blockvectorap,grampbp) 648 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorbp,vectsize,x_cplx=x_cplx) 649 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorap,vectsize,x_cplx=x_cplx) 650 if (gs_hamk%usepaw==0) then 651 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorvp,vectsize,x_cplx=x_cplx) 652 end if 653 end if 654 end if 655 656 activersize=blocksize 657 if (iterationnumber==1) then 658 activepsize=0 659 restart=1 660 else 661 activepsize=blocksize 662 restart=0 663 end if 664 665 ! gramxar=matmul((blockvectorax)^T,blockvectorr) 666 ! gramrar=matmul((blockvectorar)^T,blockvectorr) 667 ! gramxax=matmul((blockvectorax)^T,blockvectorx) 668 if(use_linalg_gpu==1) then 669 call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize) 670 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,& 671 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 672 call copy_from_gpu(gramxar,gram_gpu,cplx*dp*blocksize*blocksize) 673 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar_gpu,& 674 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 675 call copy_from_gpu(gramrar,gram_gpu,cplx*dp*blocksize*blocksize) 676 call copy_on_gpu(blockvectorx,C_gpu,cplx*dp*vectsize*blocksize) 677 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,& 678 & vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 679 call copy_from_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize) 680 else 681 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 682 & vectsize,blockvectorr,vectsize,czero,gramxar,blocksize,x_cplx=x_cplx) 683 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,& 684 & vectsize,blockvectorr,vectsize,czero,gramrar,blocksize,x_cplx=x_cplx) 685 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 686 & vectsize,blockvectorx,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 687 end if 688 689 call abi_xcopy(blocksize*blocksize,gramxar,1,transf3(:,:,1),1,x_cplx=x_cplx) 690 call abi_xcopy(blocksize*blocksize,gramrar,1,transf3(:,:,2),1,x_cplx=x_cplx) 691 call abi_xcopy(blocksize*blocksize,gramxax,1,transf3(:,:,3),1,x_cplx=x_cplx) 692 if(abs(dtset%timopt)==3) then 693 call timab(533,1,tsec) 694 end if 695 call xmpi_sum(transf3,mpi_enreg%comm_bandspinorfft,ierr) 696 if(abs(dtset%timopt)==3) then 697 call timab(533,2,tsec) 698 end if 699 700 call abi_xcopy(blocksize*blocksize,transf3(:,:,1),1,gramxar,1,x_cplx=x_cplx) 701 call abi_xcopy(blocksize*blocksize,transf3(:,:,2),1,gramrar,1,x_cplx=x_cplx) 702 call abi_xcopy(blocksize*blocksize,transf3(:,:,3),1,gramxax,1,x_cplx=x_cplx) 703 704 ! gramxbx=matmul((blockvectorbx)^T,blockvectorx) 705 ! gramrbr=matmul((blockvectorbr)^T,blockvectorr) 706 ! gramxbr=matmul((blockvectorbx)^T,blockvectorr) 707 ! Note that the gramb matrix is more easier to construct than grama: 708 ! i) <x|B|x>=<r|B|r>=<p|B|p>=(1;0) 709 ! since the x, r and p blockvector are normalized 710 ! ii) <r|B|x>=(0;0) 711 ! since the x and r blockvector are orthogonalized 712 ! iii) The <p|B|r> and <p|B|x> have to be computed. 713 gramxbx(:,:)=zero 714 gramrbr(:,:)=zero 715 gramxbr(:,:)=zero 716 do iblocksize=1,blocksize 717 zvar=(/one,zero/) 718 call abi_xcopy(1,zvar,1,gramxbx(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 719 call abi_xcopy(1,zvar,1,gramrbr(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 720 end do 721 722 ! ########################################################################### 723 ! ################ PERFORM LOOP ON COND ##################################### 724 ! ########################################################################### 725 726 i1=0;i2=blocksize;i3=2*blocksize;i4=3*blocksize 727 cond: do cond_try=1,2 !2 when restart 728 if (restart==0) then 729 730 ! gramxap=matmul((blockvectorax)^T,blockvectorp) 731 ! gramrap=matmul((blockvectorar)^T,blockvectorp) 732 ! grampap=matmul((blockvectorap)^T,blockvectorp) 733 ! gramxbp=matmul((blockvectorbx)^T,blockvectorp) 734 ! gramrbp=matmul((blockvectorbr)^T,blockvectorp) 735 ! grampbp=matmul((blockvectorbp)^T,blockvectorp) 736 if(use_linalg_gpu==1) then 737 call copy_on_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize) 738 call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize) 739 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 740 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 741 call copy_from_gpu(gramxap,gram_gpu,cplx*dp*blocksize*blocksize) 742 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 743 & cone,blockvectorar_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 744 call copy_from_gpu(gramrap,gram_gpu,cplx*dp*blocksize*blocksize) 745 call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize) 746 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 747 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 748 call copy_from_gpu(grampap,gram_gpu,cplx*dp*blocksize*blocksize) 749 call copy_on_gpu(blockvectorbx,A_gpu,cplx*dp*vectsize*blocksize) 750 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 751 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 752 call copy_from_gpu(gramxbp,gram_gpu,cplx*dp*blocksize*blocksize) 753 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 754 & cone,blockvectorbr_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 755 call copy_from_gpu(gramrbp,gram_gpu,cplx*dp*blocksize*blocksize) 756 else 757 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 758 & vectsize,blockvectorp,vectsize,czero,gramxap,blocksize,x_cplx=x_cplx) 759 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,& 760 & vectsize,blockvectorp,vectsize,czero,gramrap,blocksize,x_cplx=x_cplx) 761 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorap,& 762 & vectsize,blockvectorp,vectsize,czero,grampap,blocksize,x_cplx=x_cplx) 763 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,& 764 & vectsize,blockvectorp,vectsize,czero,gramxbp,blocksize,x_cplx=x_cplx) 765 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbr,& 766 & vectsize,blockvectorp,vectsize,czero,gramrbp,blocksize,x_cplx=x_cplx) 767 end if 768 ! It's not necessary to compute the last one: <p|B|p>=(1;0) (see above) 769 transf5(:,:,1)=gramxap(:,:) 770 transf5(:,:,2)=gramrap(:,:) 771 transf5(:,:,3)=grampap(:,:) 772 transf5(:,:,4)=gramxbp(:,:) 773 transf5(:,:,5)=gramrbp(:,:) 774 if(abs(dtset%timopt)==3) then 775 call timab(533,1,tsec) 776 end if 777 call xmpi_sum(transf5,mpi_enreg%comm_bandspinorfft,ierr) 778 if(abs(dtset%timopt)==3) then 779 call timab(533,2,tsec) 780 end if 781 gramxap(:,:)=transf5(:,:,1) 782 gramrap(:,:)=transf5(:,:,2) 783 grampap(:,:)=transf5(:,:,3) 784 gramxbp(:,:)=transf5(:,:,4) 785 gramrbp(:,:)=transf5(:,:,5) 786 grampbp(:,:)=zero 787 do iblocksize=1,blocksize 788 zvar=(/one,zero/) 789 call abi_xcopy(1,zvar,1,grampbp(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 790 end do 791 bigorder=i4 792 ABI_MALLOC(grama,(cplx*i4,i4)) 793 ABI_MALLOC(gramb,(cplx*i4,i4)) 794 ABI_MALLOC(eigen,(i4)) 795 ! ABI_MALLOC(coordx,(cplx*i4,blocksize)) 796 ABI_MALLOC(coordx1,(cplx*blocksize,blocksize)) 797 ABI_MALLOC(coordx2,(cplx*blocksize,blocksize)) 798 ABI_MALLOC(coordx3,(cplx*blocksize,blocksize)) 799 grama(:,:)=zero;gramb(:,:)=zero 800 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax 801 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar 802 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxap 803 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar 804 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrap 805 grama(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampap 806 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx 807 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr 808 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxbp 809 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr 810 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrbp 811 gramb(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampbp 812 else 813 bigorder=i3 814 ABI_MALLOC(grama,(cplx*i3,i3)) 815 ABI_MALLOC(gramb,(cplx*i3,i3)) 816 ABI_MALLOC(eigen,(i3)) 817 ! ABI_MALLOC(coordx,(cplx*i3,blocksize)) 818 ABI_MALLOC(coordx1,(cplx*blocksize,blocksize)) 819 ABI_MALLOC(coordx2,(cplx*blocksize,blocksize)) 820 grama(:,:)=zero;gramb(:,:)=zero 821 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax 822 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar 823 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar 824 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx 825 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr 826 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr 827 end if 828 829 ABI_MALLOC(tmpgramb,(cplx*bigorder,bigorder)) 830 ABI_MALLOC(tmpeigen,(bigorder)) 831 tmpgramb=gramb 832 833 call abi_xheev('v','u',bigorder,tmpgramb,bigorder,tmpeigen,x_cplx=cplx,istwf_k=istwf_k, & 834 & timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu) 835 836 condestgramb=tmpeigen(bigorder)/tmpeigen(1) 837 ABI_FREE(tmpgramb) 838 ABI_FREE(tmpeigen) 839 840 if (condestgramb.gt.1d+5.or.condestgramb.lt.0.d0.or.info/=0) then 841 write(std_out,*)'condition number of the Gram matrix = ',condestgramb 842 if (cond_try==1.and.restart==0) then 843 ABI_FREE(grama) 844 ABI_FREE(gramb) 845 ABI_FREE(eigen) 846 ! ABI_FREE(coordx) 847 ABI_FREE(coordx1) 848 ABI_FREE(coordx2) 849 if(bigorder==i4) then 850 ABI_FREE(coordx3) 851 end if 852 if (nrestart.gt.1) then 853 ABI_WARNING('the minimization is stopped for this block') 854 exit iter 855 else 856 restart=1 857 nrestart=nrestart+1 858 call wrtout(std_out,'Lobpcgwf: restart performed',"PERS") 859 end if 860 else 861 ABI_WARNING('Gramm matrix ill-conditionned: results may be unpredictable') 862 end if 863 else 864 exit cond 865 end if 866 end do cond 867 868 ! ########################################################################### 869 ! ################ END LOOP ON COND ######################################### 870 ! ########################################################################### 871 872 call abi_xhegv(1,'v','u',bigorder,grama,bigorder,gramb,bigorder,eigen,x_cplx=cplx,istwf_k=istwf_k, & 873 timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu) 874 875 deltae=-one 876 do iblocksize=1,blocksize 877 call abi_xcopy(1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,zvar,1,x_cplx=x_cplx) 878 deltae=max(deltae,abs(cmplx(zvar(1),zvar(2))-eigen(iblocksize))) 879 zvar=(/eigen(iblocksize),zero/) 880 call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,x_cplx=x_cplx) 881 end do 882 883 ! DEBUG 884 ! write(std_out,*)'eigen',eigen(1:blocksize) 885 ! ENDDEBUG 886 887 ! coordx(1:bigorder*cplx,1:blocksize)=grama(1:bigorder*cplx,1:blocksize) 888 coordx1(:,:) = grama(1+i1*cplx : i2*cplx,1:blocksize) 889 coordx2(:,:) = grama(1+i2*cplx : i3*cplx,1:blocksize) 890 if(bigorder==i4) then 891 coordx3(:,:) = grama(1+i3*cplx : i4*cplx,1:blocksize) 892 end if 893 894 895 if(use_linalg_gpu==1) then 896 call copy_on_gpu(coordx2,coordx2_gpu,cplx*dp*blocksize*blocksize) 897 if(bigorder==i4) then 898 call copy_on_gpu(coordx3,coordx3_gpu,cplx*dp*blocksize*blocksize) 899 end if 900 end if 901 902 ABI_FREE(grama) 903 ABI_FREE(gramb) 904 ABI_FREE(eigen) 905 if (restart==0 .and. iterationnumber >1) then 906 907 ! blockvectorp=matmul(blockvectorr,coordx(i2+1:i3,:))+& 908 ! & matmul(blockvectorp,coordx(i3+1:i4,:)) 909 if(use_linalg_gpu==1) then 910 ! call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 911 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,& 912 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 913 call copy_on_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize) 914 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 915 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 916 call copy_from_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize) 917 else 918 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,& 919 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 920 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorp,& 921 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 922 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorp,1,x_cplx=x_cplx) 923 end if 924 925 ! blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))+& 926 ! & matmul(blockvectorap,coordx(i3+1:i4,:)) 927 if(use_linalg_gpu==1) then 928 ! call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 929 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,& 930 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 931 call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize) 932 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 933 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 934 call copy_from_gpu(blockvectorap,C_gpu,cplx*dp*vectsize*blocksize) 935 else 936 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,& 937 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 938 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorap,& 939 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 940 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorap,1,x_cplx=x_cplx) 941 end if 942 943 944 ! blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:))+& 945 ! & matmul(blockvectorvp,coordx(i3+1:i4,:)) 946 if (gs_hamk%usepaw==0) then 947 if(use_linalg_gpu==1) then 948 call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize) 949 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 950 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 951 call copy_on_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize) 952 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 953 & vectsize,coordx3_gpu,blocksize,cone,C_gpu,vectsize) 954 call copy_from_gpu(blockvectorvp,C_gpu,cplx*dp*vectsize*blocksize) 955 else 956 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,& 957 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 958 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvp,& 959 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 960 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvp,1,x_cplx=x_cplx) 961 end if 962 end if 963 964 ! blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))+& 965 ! & matmul(blockvectorbp,coordx(i3+1:i4,:)) 966 if(use_linalg_gpu==1) then 967 ! call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 968 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,& 969 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 970 call copy_on_gpu(blockvectorbp,A_gpu,cplx*dp*vectsize*blocksize) 971 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 972 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 973 call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize) 974 else 975 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,& 976 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 977 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbp,& 978 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 979 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbp,1,x_cplx=x_cplx) 980 end if 981 982 else 983 984 ! blockvectoSz =matmul(blockvectorr,coordx(i2+1:i3,:)) 985 if(use_linalg_gpu==1) then 986 ! call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 987 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,& 988 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 989 call copy_from_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize) 990 else 991 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,& 992 & vectsize,coordx2,blocksize,czero,blockvectorp,vectsize,x_cplx=x_cplx) 993 end if 994 995 ! blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:)) 996 if(use_linalg_gpu==1) then 997 ! call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 998 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,& 999 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1000 call copy_from_gpu(blockvectorap,C_gpu,cplx*dp*vectsize*blocksize) 1001 else 1002 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,& 1003 & vectsize,coordx2,blocksize,czero,blockvectorap,vectsize,x_cplx=x_cplx) 1004 end if 1005 ! blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:)) 1006 if (gs_hamk%usepaw==0) then 1007 if(use_linalg_gpu==1) then 1008 call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize) 1009 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1010 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1011 call copy_from_gpu(blockvectorvp,C_gpu,cplx*dp*vectsize*blocksize) 1012 else 1013 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,& 1014 & vectsize,coordx2,blocksize,czero,blockvectorvp,vectsize,x_cplx=x_cplx) 1015 end if 1016 end if 1017 1018 ! blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:)) 1019 if(use_linalg_gpu==1) then 1020 ! call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 1021 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,& 1022 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1023 call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize) 1024 else 1025 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,& 1026 & vectsize,coordx2,blocksize,czero,blockvectorbp,vectsize,x_cplx=x_cplx) 1027 end if 1028 end if 1029 1030 if(use_linalg_gpu==1) then 1031 call copy_on_gpu(coordx1,coordx2_gpu,cplx*dp*blocksize*blocksize) 1032 end if 1033 1034 ! blockvectorx = matmul(blockvectorx,coordx(i1+1:i2,:))+blockvectorp 1035 if(use_linalg_gpu==1) then 1036 call copy_on_gpu(blockvectorx,A_gpu,cplx*dp*vectsize*blocksize) 1037 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1038 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1039 call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize) 1040 else 1041 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,& 1042 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1043 end if 1044 blockvectorx = blockvectordumm+blockvectorp 1045 1046 ! blockvectorax= matmul(blockvectorax,coordx(i1+1:i2,:))+blockvectorap 1047 if(use_linalg_gpu==1) then 1048 call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize) 1049 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1050 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1051 call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize) 1052 else 1053 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,& 1054 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1055 end if 1056 blockvectorax = blockvectordumm+blockvectorap 1057 1058 ! blockvectorvx= matmul(blockvectorvx,coordx(i1+1:i2,:))+blockvectorvp 1059 if (gs_hamk%usepaw==0) then 1060 if(use_linalg_gpu==1) then 1061 call copy_on_gpu(blockvectorvx,A_gpu,cplx*dp*vectsize*blocksize) 1062 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1063 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1064 call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize) 1065 else 1066 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,& 1067 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1068 end if 1069 blockvectorvx = blockvectordumm+blockvectorvp 1070 end if 1071 1072 ! blockvectorbx= matmul(blockvectorbx,coordx(i1+1:i2,:))+blockvectorbp 1073 if(use_linalg_gpu==1) then 1074 call copy_on_gpu(blockvectorbx,A_gpu,cplx*dp*vectsize*blocksize) 1075 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1076 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1077 call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize) 1078 else 1079 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,& 1080 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1081 end if 1082 blockvectorbx = blockvectordumm+blockvectorbp 1083 1084 ! ABI_FREE(coordx) 1085 ABI_FREE(coordx1) 1086 ABI_FREE(coordx2) 1087 if(bigorder==i4) then 1088 ABI_FREE(coordx3) 1089 end if 1090 1091 ! Check convergence on energy and eventually exit 1092 if (iterationnumber==1) then 1093 deold=deltae 1094 else if (iterationnumber>1) then 1095 if ((abs(deltae)<0.005*abs(deold)).and.(iterationnumber/=maxiterations))then 1096 if(prtvol>=10)then 1097 write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) & 1098 & ' lobpcgwf: block',iblock,', line',iterationnumber,& 1099 & ', deltae=',deltae,' < 0.005*',deold,' =>skip lines !' 1100 call wrtout(std_out,message,'PERS') 1101 end if 1102 exit 1103 else if (abs(deltae)>0.005*abs(deold)) then 1104 if(prtvol>=10)then 1105 write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) & 1106 & ' lobpcgwf: block',iblock,', line',iterationnumber,& 1107 & ', deltae=',deltae,' > 0.005*',deold,' =>keep on working !' 1108 call wrtout(std_out,message,'PERS') 1109 end if 1110 end if 1111 end if 1112 1113 if(abs(dtset%timopt)==4) then 1114 call timab(524,2,tsec) 1115 end if 1116 1117 end do iter 1118 1119 ! ########################################################################### 1120 ! ################## END LOOP ON NLINE ###################################### 1121 ! ########################################################################### 1122 1123 if(abs(dtset%timopt)==4) then 1124 call timab(525,1,tsec) 1125 end if 1126 1127 if (havetoprecon) then 1128 call xprecon(blockvectorbx,lambda,blocksize,& 1129 & iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,& 1130 & optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon) 1131 1132 residualnorms=sum(abs(blockvectorr)**2,dim=1) 1133 1134 if(abs(dtset%timopt)==3) then 1135 call timab(533,1,tsec) 1136 end if 1137 call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr) 1138 if(abs(dtset%timopt)==3) then 1139 call timab(533,2,tsec) 1140 end if 1141 1142 resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize) 1143 end if 1144 1145 call wfcopy('I',vectsize*blocksize,blockvectorx,1,cg,1,blocksize,iblock,'C',withbbloc=.true.,& 1146 & timopt=timopt,tim_wfcopy=tim_wfcopy) 1147 1148 if(gen_eigenpb) then 1149 call wfcopy('I',vectsize*blocksize,blockvectorbx,1,gsc,1,blocksize,iblock,'S',withbbloc=.true.,& 1150 & timopt=timopt,tim_wfcopy=tim_wfcopy) 1151 end if 1152 1153 ! The Vnl+VFockACE part of the Hamiltonian is no more stored in the packed form such as it was the case for subvnlx(:). 1154 ! Now, the full matrix is stored in totvnlx(:,:). This trick permits: 1155 ! 1) to avoid the reconstruction of the total matrix in vtowfk.F90 (double loop over bands) 1156 ! 2) to use two optimized matrix-matrix blas routine for general (in lobpcgccwf.F90) or hermitian (in vtowfk.F90) 1157 ! operators, zgemm.f and zhemm.f respectively, rather than a triple loop in both cases. 1158 iwavef=iblock*blocksize 1159 isubh=1+2*bblocksize*(bblocksize+1)/2 1160 1161 ABI_MALLOC(blockvectorz,(cplx*vectsize,iwavef)) 1162 if(bblocksize > 0 ) then 1163 call abi_xcopy(bblocksize*vectsize,blockvectory(:,1:bblocksize),1,blockvectorz(:,1:bblocksize),1,x_cplx=x_cplx) 1164 end if 1165 call abi_xcopy( blocksize*vectsize,blockvectorx(:,1:blocksize) ,1,blockvectorz(:,bblocksize+1:iwavef),1,x_cplx=x_cplx) 1166 1167 ABI_MALLOC(tsubham,(cplx*iwavef,blocksize)) 1168 tsubham(:,:)=zero 1169 call abi_xgemm(cparam(cplx),'n',iwavef,blocksize,vectsize,cone,blockvectorz,vectsize,& 1170 & blockvectorax,vectsize,czero,tsubham,iwavef,x_cplx=x_cplx) 1171 1172 if (gs_hamk%usepaw==0) then 1173 ! MG FIXME: Here gfortran4.9 allocates temporary array for C in abi_d2zgemm. 1174 call abi_xgemm(cparam(cplx),'n',blocksize,iwavef,vectsize,cone,blockvectorvx,vectsize,& 1175 & blockvectorz,vectsize,czero,totvnlx(cplx*bblocksize+1:cplx*iwavef,1:iwavef),blocksize,x_cplx=x_cplx) 1176 end if 1177 1178 do iblocksize=1,blocksize 1179 do ii=1,bblocksize+iblocksize 1180 if ( cplx == 1 ) then 1181 subham(isubh) = tsubham(ii,iblocksize) 1182 subham(isubh+1)= zero 1183 else 1184 subham(isubh) = tsubham(2*ii-1,iblocksize) 1185 subham(isubh+1)= tsubham(2*ii ,iblocksize) 1186 end if 1187 isubh=isubh+2 1188 end do 1189 end do 1190 ABI_FREE(tsubham) 1191 ABI_FREE(blockvectorz) 1192 ! comm for subham and subvnlx are made in vtowfk 1193 1194 ABI_FREE(pcon) 1195 ABI_FREE(blockvectory) 1196 ABI_FREE(blockvectorby) 1197 ABI_FREE(gramyx) 1198 ABI_FREE(blockvectorx) 1199 ABI_FREE(blockvectorax) 1200 ABI_FREE(blockvectorbx) 1201 ABI_FREE(blockvectorr) 1202 ABI_FREE(blockvectorar) 1203 ABI_FREE(blockvectorbr) 1204 ABI_FREE(blockvectorp) 1205 ABI_FREE(blockvectorap) 1206 ABI_FREE(blockvectorbp) 1207 if (gs_hamk%usepaw==0) then 1208 ABI_FREE(blockvectorvx) 1209 ABI_FREE(blockvectorvp) 1210 ABI_FREE(blockvectorvr) 1211 end if 1212 ABI_FREE(blockvectordumm) 1213 ABI_FREE(gramxax) 1214 ABI_FREE(gramxar) 1215 ABI_FREE(gramxap) 1216 ABI_FREE(gramrar) 1217 ABI_FREE(gramrap) 1218 ABI_FREE(grampap) 1219 ABI_FREE(gramxbx) 1220 ABI_FREE(gramxbr) 1221 ABI_FREE(gramxbp) 1222 ABI_FREE(gramrbr) 1223 ABI_FREE(gramrbp) 1224 ABI_FREE(grampbp) 1225 ABI_FREE(transf3) 1226 ABI_FREE(transf5) 1227 ABI_FREE(lambda) 1228 ABI_FREE(residualnorms) 1229 if(use_linalg_gpu==1) then 1230 call dealloc_on_gpu(bblockvector_gpu) 1231 call dealloc_on_gpu(gram_gpu) 1232 end if 1233 1234 end do ! End big loop over bands inside blocks 1235 1236 if(use_linalg_gpu==1) then 1237 call dealloc_on_gpu(blockvectorr_gpu) 1238 call dealloc_on_gpu(blockvectorar_gpu) 1239 call dealloc_on_gpu(blockvectorbr_gpu) 1240 call dealloc_on_gpu(A_gpu) 1241 call dealloc_on_gpu(C_gpu) 1242 call dealloc_on_gpu(coordx2_gpu) 1243 call dealloc_on_gpu(coordx3_gpu) 1244 !call gpu_linalg_shutdown() 1245 end if 1246 1247 if(abs(dtset%timopt)==4) then 1248 call timab(525,2,tsec) 1249 end if 1250 call timab(530,2,tsec) 1251 1252 DBG_ENTER("COLL") 1253 1254 contains 1255 1256 function gramindex(iblocksize) 1257 1258 integer :: gramindex,iblocksize 1259 gramindex=(iblocksize-1)*cplx+1 1260 end function gramindex 1261 1262 end subroutine lobpcgwf
ABINIT/m_lobpcgwf_old [ Modules ]
NAME
m_lobpcgwf_old
FUNCTION
COPYRIGHT
Copyright (C) 2008-2022 ABINIT group () This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_lobpcgwf_old 23 24 implicit none 25 26 private