TABLE OF CONTENTS
ABINIT/lobpcgwf [ Functions ]
NAME
lobpcgwf
FUNCTION
this routine updates the whole wave functions at a given k-point, using the lobpcg method for a given spin-polarization, from a fixed hamiltonian but might also simply compute eigenvectors and eigenvalues at this k point. it will also update the matrix elements of the hamiltonian.
COPYRIGHT
Copyright (C) 1998-2024 ABINIT group (FBottin,GZ,AR,MT,FDahm) this file is distributed under the terms of the gnu general public license, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . for the initials of contributors, see ~abinit/doc/developers/contributors.txt .
INPUTS
dtset <type(dataset_type)>=all input variales for this dataset gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k icg=shift to be applied on the location of data in the array cg igsc=shift to be applied on the location of data in the array gsc kinpw(npw)=(modified) kinetic energy for each plane wave (hartree) mcg=second dimension of the cg array mgsc=second dimension of the gsc array mpi_enreg=information about MPI parallelization nband_k=number of bands at this k point for that spin polarization nbdblock : number of blocks npw_k=number of plane waves at this k point prtvol=control print volume and debugging output use_totvnlx=1 if one has to compute totvnlx
OUTPUT
resid_k(nband_k)=residuals for each states subham(nband_k*(nband_k+1))=the matrix elements of h If gs_hamk%usepaw==0: gsc(2,mgsc)=<g|s|c> matrix elements (s=overlap) totvnlx(nband_k*use_totvnlx,nband_k*use_totvnlx)=the matrix elements of vnl+vfockACE
SIDE EFFECTS
cg(2,mcg)=updated wavefunctions
SOURCE
80 subroutine lobpcgwf(cg,dtset,gs_hamk,gsc,icg,igsc,kinpw,mcg,mgsc,mpi_enreg,& 81 & nband_k,nbdblock,npw_k,prtvol,resid_k,subham,totvnlx,use_totvnlx) 82 83 84 use defs_basis 85 use m_abicore 86 use m_lobpcg 87 use m_abi_linalg 88 use m_wfutils 89 use m_xmpi 90 use m_errors 91 use, intrinsic :: iso_c_binding 92 use m_dtset 93 94 use defs_abitypes, only : mpi_type 95 use m_time, only : timab 96 use m_hamiltonian, only : gs_hamiltonian_type 97 use m_pawcprj, only : pawcprj_type 98 use m_getghc, only : getghc 99 use m_prep_kgb, only : prep_getghc 100 101 implicit none 102 103 !Arguments ------------------------------------ 104 integer,intent(in) :: icg,igsc,mcg,mgsc,nband_k,nbdblock,npw_k,prtvol,use_totvnlx 105 type(gs_hamiltonian_type),intent(inout) :: gs_hamk 106 type(dataset_type),intent(in) :: dtset 107 type(mpi_type),intent(in) :: mpi_enreg 108 real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc) 109 real(dp),intent(in) :: kinpw(npw_k) 110 real(dp),intent(out) :: resid_k(nband_k) 111 real(dp),intent(inout) :: subham(nband_k*(nband_k+1)) 112 real(dp),intent(inout) :: totvnlx((3-gs_hamk%istwf_k)*nband_k*use_totvnlx,nband_k*use_totvnlx) 113 114 !Local variables------------------------------- 115 integer, parameter :: tim_getghc=5 116 integer :: activepsize,activersize,bblocksize,bigorder,blocksize,cpopt 117 integer :: cond_try 118 integer :: iblocksize,iblock,ierr,ii,info,istwf_k,isubh 119 integer :: iterationnumber 120 integer :: iwavef,i1,i2,i3,i4,maxiterations,my_nspinor 121 integer :: nrestart,optekin,optpcon,restart 122 integer :: sij_opt,timopt,tim_wfcopy,tim_xeigen 123 integer :: tim_xortho,tim_xprecon,use_lapack_gpu,use_linalg_gpu,vectsize 124 logical :: gen_eigenpb 125 integer :: cplx 126 real(dp) :: condestgramb,deltae,deold,dum 127 complex(dpc) :: cminusone 128 real(dp) :: zvar(2) 129 logical :: havetoprecon 130 real(dp) :: tsec(2) 131 real(dp), allocatable :: gwavef(:,:),cwavef(:,:),gvnlxc(:,:) 132 real(dp), allocatable :: swavef(:,:) 133 real(dp), allocatable :: residualnorms(:),eigen(:) 134 real(dp), allocatable :: tmpeigen(:) 135 real(dp), allocatable :: pcon(:,:) 136 real(dp), allocatable, target :: blockvectorx(:,:),blockvectorvx(:,:),blockvectorax(:,:),blockvectorbx(:,:) 137 real(dp), allocatable, target :: blockvectorr(:,:),blockvectorvr(:,:),blockvectorar(:,:),blockvectorbr(:,:) 138 real(dp), allocatable, target :: blockvectorp(:,:),blockvectorvp(:,:),blockvectorap(:,:),blockvectorbp(:,:),blockvectordumm(:,:) 139 real(dp), allocatable, target :: blockvectory(:,:),blockvectorby(:,:),blockvectorz(:,:) 140 real(dp), allocatable, target :: gramxax(:,:),gramxar(:,:),gramxap(:,:),gramrar(:,:),gramrap(:,:),grampap(:,:) 141 real(dp), allocatable, target :: gramxbx(:,:),gramxbr(:,:),gramxbp(:,:),gramrbr(:,:),gramrbp(:,:),grampbp(:,:) 142 real(dp), allocatable, target :: coordx1(:,:),coordx2(:,:),coordx3(:,:),lambda(:,:),grama(:,:),gramb(:,:),gramyx(:,:) 143 real(dp), allocatable :: tmpgramb(:,:),transf3(:,:,:),transf5(:,:,:) 144 real(dp), allocatable :: tsubham(:,:) 145 type(pawcprj_type) :: cprj_dum(gs_hamk%natom,0) 146 character(len=500) :: message 147 character, dimension(2) :: cparam 148 type(c_ptr) :: A_gpu,C_gpu,coordx2_gpu,coordx3_gpu,bblockvector_gpu,gram_gpu 149 type(c_ptr) :: blockvectorr_gpu,blockvectorar_gpu,blockvectorbr_gpu 150 151 !Index of a given band 152 !gramindex(iblocksize)=(iblocksize-1)*cplx+1 153 154 ! ********************************************************************* 155 156 DBG_ENTER("COLL") 157 158 call timab(530,1,tsec) 159 if(abs(dtset%timopt)==4) then 160 call timab(520,1,tsec) 161 end if 162 163 !########################################################################### 164 !################ INITIALISATION ########################################## 165 !########################################################################### 166 167 !For timing 168 timopt=dtset%timopt 169 tim_wfcopy=584 170 !tim_xcopy=584 171 tim_xeigen=587 172 !tim_xgemm=532 173 tim_xortho=535 174 tim_xprecon=536 175 !tim_xtrsm=535 176 177 !Variables 178 maxiterations=dtset%nline 179 gen_eigenpb=(gs_hamk%usepaw==1) 180 my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor) 181 cminusone=-cone 182 istwf_k=gs_hamk%istwf_k 183 info = 0 184 cparam(1)='t' 185 cparam(2)='c' 186 187 !Depends on istwfk 188 if ( istwf_k == 2 ) then 189 cplx=1 190 if (mpi_enreg%me_g0 == 1) then 191 vectsize=2*npw_k*my_nspinor-1 192 else 193 vectsize=2*npw_k*my_nspinor 194 end if 195 else 196 cplx=2 197 vectsize=npw_k*my_nspinor 198 end if 199 200 !For preconditionning 201 optekin=0;if (dtset%wfoptalg>10) optekin=0 202 optpcon=1;if (dtset%wfoptalg>10) optpcon=0 203 204 !For communication 205 !blocksize=mpi_enreg%nproc_fft 206 !if(mpi_enreg%paral_kgb==1) blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp 207 blocksize=nband_k/dtset%nblock_lobpcg 208 !IF you want to compare with new lobpcg in sequential uncomment the following 209 !line 210 !blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp 211 212 !Iniitializations/allocations of GPU parallelism 213 use_linalg_gpu=0;use_lapack_gpu=0 214 if ((dtset%gpu_option==ABI_GPU_LEGACY).and. & 215 & (vectsize*blocksize*blocksize>dtset%gpu_linalg_limit)) use_linalg_gpu=1 216 #if defined HAVE_LINALG_MAGMA 217 use_lapack_gpu=use_linalg_gpu 218 #endif 219 if(use_linalg_gpu==1) then 220 call alloc_on_gpu(A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 221 call alloc_on_gpu(C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 222 call alloc_on_gpu(blockvectorr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 223 call alloc_on_gpu(blockvectorar_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 224 call alloc_on_gpu(blockvectorbr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 225 call alloc_on_gpu(coordx2_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 226 call alloc_on_gpu(coordx3_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 227 end if 228 229 if(abs(dtset%timopt)==4) then 230 call timab(520,2,tsec) 231 end if 232 233 !########################################################################### 234 !################ BIG LOOP OVER BLOCKS #################################### 235 !########################################################################### 236 237 do iblock=1,nbdblock 238 239 if(abs(dtset%timopt)==4) then 240 call timab(521,1,tsec) 241 end if 242 243 havetoprecon=.true. 244 nrestart=0 245 bblocksize=(iblock-1)*blocksize 246 247 ! allocations 248 ABI_MALLOC(pcon,(npw_k,blocksize)) 249 ABI_MALLOC(blockvectorx,(cplx*vectsize,blocksize)) 250 ABI_MALLOC(blockvectorax,(cplx*vectsize,blocksize)) 251 ABI_MALLOC(blockvectorbx,(cplx*vectsize,blocksize)) 252 ABI_MALLOC(blockvectorr,(cplx*vectsize,blocksize)) 253 ABI_MALLOC(blockvectorar,(cplx*vectsize,blocksize)) 254 ABI_MALLOC(blockvectorbr,(cplx*vectsize,blocksize)) 255 ABI_MALLOC(blockvectorp,(cplx*vectsize,blocksize)) 256 ABI_MALLOC(blockvectorap,(cplx*vectsize,blocksize)) 257 ABI_MALLOC(blockvectorbp,(cplx*vectsize,blocksize)) 258 ABI_MALLOC(blockvectordumm,(cplx*vectsize,blocksize)) 259 ABI_MALLOC(gramxax,(cplx*blocksize,blocksize)) 260 ABI_MALLOC(gramxar,(cplx*blocksize,blocksize)) 261 ABI_MALLOC(gramxap,(cplx*blocksize,blocksize)) 262 ABI_MALLOC(gramrar,(cplx*blocksize,blocksize)) 263 ABI_MALLOC(gramrap,(cplx*blocksize,blocksize)) 264 ABI_MALLOC(grampap,(cplx*blocksize,blocksize)) 265 ABI_MALLOC(gramxbx,(cplx*blocksize,blocksize)) 266 ABI_MALLOC(gramxbr,(cplx*blocksize,blocksize)) 267 ABI_MALLOC(gramxbp,(cplx*blocksize,blocksize)) 268 ABI_MALLOC(gramrbr,(cplx*blocksize,blocksize)) 269 ABI_MALLOC(gramrbp,(cplx*blocksize,blocksize)) 270 ABI_MALLOC(grampbp,(cplx*blocksize,blocksize)) 271 ABI_MALLOC(transf3,(cplx*blocksize,blocksize,3)) 272 ABI_MALLOC(transf5,(cplx*blocksize,blocksize,5)) 273 ABI_MALLOC(lambda,(cplx*blocksize,blocksize)) 274 ABI_MALLOC(residualnorms,(blocksize)) 275 276 ABI_MALLOC(blockvectory,(cplx*vectsize,bblocksize)) 277 ABI_MALLOC(blockvectorby,(cplx*vectsize,bblocksize)) 278 ABI_MALLOC(gramyx,(cplx*bblocksize,blocksize)) 279 if (gs_hamk%usepaw==0) then 280 ABI_MALLOC(blockvectorvx,(cplx*vectsize,blocksize)) 281 ABI_MALLOC(blockvectorvr,(cplx*vectsize,blocksize)) 282 ABI_MALLOC(blockvectorvp,(cplx*vectsize,blocksize)) 283 end if 284 285 if(use_linalg_gpu==1) then 286 if(iblock/=1) then 287 call alloc_on_gpu(bblockvector_gpu, INT(cplx, c_size_t)*dp*vectsize*bblocksize) 288 call alloc_on_gpu(gram_gpu, INT(cplx, c_size_t)*dp*bblocksize*blocksize) 289 else 290 call alloc_on_gpu(bblockvector_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 291 call alloc_on_gpu(gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 292 end if 293 end if 294 295 ! Initialize global variables in m_wfutils. 296 call setWFParameter(cplx,mpi_enreg%me_g0,npw_k,my_nspinor,icg,igsc,blocksize) 297 298 ! transfer array of wf coeff in iblock to blockvectorx 299 call wfcopy('D',blocksize*vectsize,cg,1,blockvectorx,1,blocksize,iblock,'C',withbbloc=.true.,& 300 & timopt=timopt,tim_wfcopy=tim_wfcopy) 301 302 ! !!!!!!!!!!!!!!!!!!!!!!!! Begin if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!! 303 ! transfer array of wf coeff less than iblock to blockvectory 304 if(iblock /=1) then 305 call wfcopy('D',bblocksize*vectsize,cg,1,blockvectory,1,bblocksize,iblock,'C',withbbloc=.false.,& 306 & timopt=timopt,tim_wfcopy=tim_wfcopy) 307 308 if(gen_eigenpb) then 309 call wfcopy('D',bblocksize*vectsize,gsc,1,blockvectorby,1,bblocksize,iblock,'S',withbbloc=.false.,& 310 & timopt=timopt,tim_wfcopy=tim_wfcopy) 311 else 312 call abi_xcopy(vectsize*bblocksize,blockvectory,1,blockvectorby,1,x_cplx=x_cplx) 313 end if 314 315 ! b-orthogonalize x to the constraint y (supposed b-orthonormal) 316 ! blockvectorx=blockvectorx-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorx)) 317 318 call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,& 319 & vectsize,blockvectorx,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx) 320 321 if(abs(dtset%timopt)==3) then 322 call timab(533,1,tsec) 323 end if 324 call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr) 325 if(abs(dtset%timopt)==3) then 326 call timab(533,2,tsec) 327 end if 328 329 call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,& 330 & vectsize,gramyx,bblocksize,cone,blockvectorx,vectsize,x_cplx=x_cplx) 331 332 end if 333 ! !!!!!!!!!!!!!!!!!!!!!!!! End if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!! 334 335 ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize)) 336 ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize)) 337 ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize)) 338 ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize)) 339 340 call wfcopy('I',vectsize*blocksize,blockvectorx,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,& 341 & timopt=timopt,tim_wfcopy=tim_wfcopy) 342 343 if(abs(dtset%timopt)==4) then 344 call timab(521,2,tsec) 345 end if 346 if(abs(dtset%timopt)==4) then 347 call timab(526,1,tsec) 348 end if 349 350 cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1 351 352 if (mpi_enreg%paral_kgb==0) then 353 call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,& 354 & mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0) 355 else 356 call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,& 357 & prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.) 358 end if 359 if(abs(dtset%timopt)==4) then 360 call timab(526,2,tsec) 361 end if 362 if(abs(dtset%timopt)==4) then 363 call timab(522,1,tsec) 364 end if 365 366 if ( gen_eigenpb ) then 367 call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbx,1,blocksize,iblock,'W',withbbloc=.false.,& 368 & timopt=timopt,tim_wfcopy=tim_wfcopy) 369 else 370 call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvx,1,blocksize,iblock,'W',withbbloc=.false.,& 371 & timopt=timopt,tim_wfcopy=tim_wfcopy) 372 call abi_xcopy(vectsize*blocksize,blockvectorx,1,blockvectorbx,1,x_cplx=x_cplx) 373 end if 374 375 call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorax,1,blocksize,iblock,'W',withbbloc=.false.,& 376 & timopt=timopt,tim_wfcopy=tim_wfcopy) 377 378 ABI_FREE(cwavef) 379 ABI_FREE(gwavef) 380 ABI_FREE(gvnlxc) 381 ABI_FREE(swavef) 382 383 call abi_xorthonormalize(blockvectorx,blockvectorbx,blocksize,mpi_enreg%comm_bandspinorfft,gramxbx,vectsize,& 384 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 385 386 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorbx,vectsize,x_cplx=x_cplx) 387 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorax,vectsize,x_cplx=x_cplx) 388 389 if (gs_hamk%usepaw==0) then 390 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorvx,vectsize,x_cplx=x_cplx) 391 end if 392 393 ! Do rayleigh ritz on a in space x 394 ! gramxax=matmul(transpose(blockvectorx),blockvectorax) 395 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorx,& 396 & vectsize,blockvectorax,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 397 398 if(abs(dtset%timopt)==3) then 399 call timab(533,1,tsec) 400 end if 401 call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr) 402 if(abs(dtset%timopt)==3) then 403 call timab(533,2,tsec) 404 end if 405 ABI_MALLOC(eigen,(blocksize)) 406 407 call abi_xheev('v','u',blocksize,gramxax,blocksize,eigen,x_cplx=cplx,istwf_k=istwf_k, & 408 timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu_magma=use_lapack_gpu) 409 410 ! blockvectorx=matmul(blockvectorx,gramxax) 411 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,& 412 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 413 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorx,1,x_cplx=x_cplx) 414 415 ! blockvectorax=matmul(blockvectorax,gramxax) 416 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,& 417 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 418 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorax,1,x_cplx=x_cplx) 419 420 ! blockvectorvx=matmul(blockvectorvx,gramxax) 421 if (gs_hamk%usepaw==0) then 422 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,& 423 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 424 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvx,1,x_cplx=x_cplx) 425 end if 426 427 ! blockvectorbx=matmul(blockvectorbx,gramxax) 428 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,& 429 & vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 430 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbx,1,x_cplx=x_cplx) 431 432 do iblocksize=1,blocksize 433 zvar=(/eigen(iblocksize),zero/) 434 call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 435 end do 436 ABI_FREE(eigen) 437 438 if(abs(dtset%timopt)==4) then 439 call timab(522,2,tsec) 440 end if 441 442 ! ########################################################################### 443 ! ################ PERFORM LOOP ON NLINE #################################### 444 ! ########################################################################### 445 ! now the main alogrithm 446 iter: do iterationnumber=1,maxiterations 447 448 if(abs(dtset%timopt)==4) then 449 call timab(523,1,tsec) 450 end if 451 452 ! Build residual 453 ! blockvectorr=blockvectorax-matmul(blockvectorx,lambda) 454 call xprecon(blockvectorbx,lambda,blocksize,& 455 & iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,& 456 & optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon) 457 458 residualnorms=sum(blockvectorr**2,dim=1) 459 460 if(abs(dtset%timopt)==3) then 461 call timab(533,1,tsec) 462 end if 463 call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr) 464 if(abs(dtset%timopt)==3) then 465 call timab(533,2,tsec) 466 end if 467 468 resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize) 469 470 ! If residual sufficiently small stop line minimizations 471 if (abs(maxval(residualnorms(1:blocksize)))<dtset%tolwfr_diago) then 472 if (prtvol > 0) then 473 write(message, '(a,i0,a,i0,a,es12.4)' ) & 474 & ' lobpcgwf: block ',iblock,' converged after ',iterationnumber,& 475 & ' line minimizations: maxval(resid(1:blocksize)) =',maxval(residualnorms(1:blocksize)) 476 call wrtout(std_out,message,'PERS') 477 end if 478 havetoprecon=.false. 479 exit 480 end if 481 482 if(use_linalg_gpu==1) then 483 call copy_on_gpu(blockvectorr, blockvectorr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 484 end if 485 486 if(iblock /=1) then 487 ! Residuals orthogonal to blockvectorby 488 ! blockvectorr=blockvectorr-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorr)) 489 490 if(use_linalg_gpu==1) then 491 call copy_on_gpu(blockvectorby, bblockvector_gpu, INT(cplx, c_size_t)*dp*vectsize*bblocksize) 492 call gpu_xgemm(cplx,cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,bblockvector_gpu,& 493 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,bblocksize) 494 call copy_from_gpu(gramyx, gram_gpu, INT(cplx, c_size_t)*dp*bblocksize*blocksize) 495 else 496 call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,& 497 & vectsize,blockvectorr,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx) 498 end if 499 500 if(abs(dtset%timopt)==3) then 501 call timab(533,1,tsec) 502 end if 503 call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr) 504 if(abs(dtset%timopt)==3) then 505 call timab(533,2,tsec) 506 end if 507 508 if(use_linalg_gpu==1) then 509 call copy_on_gpu(gramyx, gram_gpu, INT(cplx, c_size_t)*dp*bblocksize*blocksize) 510 call copy_on_gpu(blockvectory, bblockvector_gpu, INT(cplx, c_size_t)*dp*vectsize*bblocksize) 511 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,bblocksize,cminusone,bblockvector_gpu,& 512 & vectsize,gram_gpu,bblocksize,cone,blockvectorr_gpu,vectsize) 513 else 514 call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,& 515 & vectsize,gramyx,bblocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx) 516 end if 517 518 end if 519 520 ! Residuals orthogonal to blockvectorx 521 ! blockvectorr=blockvectorr-matmul(blockvectorx,matmul((blockvectorbx)^T,blockvectorr)) 522 if(use_linalg_gpu==1) then 523 call copy_on_gpu(blockvectorbx, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 524 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,C_gpu,& 525 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 526 call copy_from_gpu(gramxax, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 527 else 528 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,& 529 & vectsize,blockvectorr,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 530 end if 531 532 if(abs(dtset%timopt)==3) then 533 call timab(533,1,tsec) 534 end if 535 call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr) 536 if(abs(dtset%timopt)==3) then 537 call timab(533,2,tsec) 538 end if 539 540 if(use_linalg_gpu==1) then 541 call copy_on_gpu(gramxax, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 542 call copy_on_gpu(blockvectorx, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 543 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cminusone,C_gpu,& 544 & vectsize,gram_gpu,blocksize,cone,blockvectorr_gpu,vectsize) 545 call copy_from_gpu(blockvectorr, blockvectorr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 546 else 547 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cminusone,blockvectorx,& 548 & vectsize,gramxax,blocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx) 549 end if 550 551 ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize)) 552 ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize)) 553 ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize)) 554 ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize)) 555 556 call wfcopy('I',vectsize*blocksize,blockvectorr,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,& 557 & timopt=timopt,tim_wfcopy=tim_wfcopy) 558 559 cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1 560 561 if(abs(dtset%timopt)==4) then 562 call timab(523,2,tsec) 563 end if 564 if(abs(dtset%timopt)==4) then 565 call timab(526,1,tsec) 566 end if 567 568 if (mpi_enreg%paral_kgb==0) then 569 call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,& 570 & mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0) 571 else 572 call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,& 573 & prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.) 574 end if 575 576 if(abs(dtset%timopt)==4) then 577 call timab(526,2,tsec) 578 end if 579 if(abs(dtset%timopt)==4) then 580 call timab(524,1,tsec) 581 end if 582 583 if (gen_eigenpb) then 584 call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbr,1,blocksize,iblock,'W',withbbloc=.false.,& 585 & timopt=timopt,tim_wfcopy=tim_wfcopy) 586 else 587 call abi_xcopy(vectsize*blocksize,blockvectorr,1,blockvectorbr,1,x_cplx=x_cplx) 588 call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvr,1,blocksize,iblock,'W',withbbloc=.false.,& 589 & timopt=timopt,tim_wfcopy=tim_wfcopy) 590 end if 591 592 call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorar,1,blocksize,iblock,'W',withbbloc=.false.,& 593 & timopt=timopt,tim_wfcopy=tim_wfcopy) 594 595 ABI_FREE(cwavef) 596 ABI_FREE(gwavef) 597 ABI_FREE(gvnlxc) 598 ABI_FREE(swavef) 599 600 if(use_linalg_gpu==1) then 601 call copy_on_gpu(blockvectorbr, blockvectorbr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 602 call gpu_xorthonormalize(blockvectorr_gpu,blockvectorbr_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,& 603 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 604 call copy_from_gpu(blockvectorr, blockvectorr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 605 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorbr_gpu,vectsize) 606 call copy_from_gpu(blockvectorbr, blockvectorbr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 607 call copy_on_gpu(blockvectorar, blockvectorar_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 608 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorar_gpu,vectsize) 609 call copy_from_gpu(blockvectorar, blockvectorar_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 610 if (gs_hamk%usepaw==0) then 611 call copy_on_gpu(blockvectorvr, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 612 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 613 call copy_from_gpu(blockvectorvr, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 614 end if 615 call copy_from_gpu(gramrbr, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 616 else 617 call abi_xorthonormalize(blockvectorr,blockvectorbr,blocksize,mpi_enreg%comm_bandspinorfft,gramrbr,vectsize,& 618 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 619 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorbr,vectsize,x_cplx=x_cplx) 620 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorar,vectsize,x_cplx=x_cplx) 621 if (gs_hamk%usepaw==0) then 622 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorvr,vectsize,x_cplx=x_cplx) 623 end if 624 end if 625 626 if(iterationnumber>1) then 627 if(use_linalg_gpu==1) then 628 call copy_on_gpu(blockvectorp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 629 call copy_on_gpu(blockvectorbp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 630 call gpu_xorthonormalize(A_gpu,C_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,& 631 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 632 call copy_from_gpu(blockvectorp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 633 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,C_gpu,vectsize) 634 call copy_from_gpu(blockvectorbp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 635 call copy_on_gpu(blockvectorap, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 636 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 637 call copy_from_gpu(blockvectorap, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 638 if (gs_hamk%usepaw==0) then 639 call copy_on_gpu(blockvectorvp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 640 call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize) 641 call copy_from_gpu(blockvectorvp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 642 end if 643 call copy_from_gpu(grampbp, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 644 else 645 ! call orthonormalize(blockvectorp,blockvectorbp,blockvectorap) 646 call abi_xorthonormalize(blockvectorp,blockvectorbp,blocksize,mpi_enreg%comm_bandspinorfft,grampbp,vectsize,& 647 & x_cplx,timopt=timopt,tim_xortho=tim_xortho) 648 ! blockvectorap=matmul(blockvectorap,grampbp) 649 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorbp,vectsize,x_cplx=x_cplx) 650 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorap,vectsize,x_cplx=x_cplx) 651 if (gs_hamk%usepaw==0) then 652 call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorvp,vectsize,x_cplx=x_cplx) 653 end if 654 end if 655 end if 656 657 activersize=blocksize 658 if (iterationnumber==1) then 659 activepsize=0 660 restart=1 661 else 662 activepsize=blocksize 663 restart=0 664 end if 665 666 ! gramxar=matmul((blockvectorax)^T,blockvectorr) 667 ! gramrar=matmul((blockvectorar)^T,blockvectorr) 668 ! gramxax=matmul((blockvectorax)^T,blockvectorx) 669 if(use_linalg_gpu==1) then 670 call copy_on_gpu(blockvectorax, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 671 672 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,& 673 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 674 call copy_from_gpu(gramxar, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 675 676 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar_gpu,& 677 & vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize) 678 call copy_from_gpu(gramrar, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 679 680 call copy_on_gpu(blockvectorx, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 681 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,& 682 & vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 683 684 call copy_from_gpu(gramxax, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 685 else 686 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 687 & vectsize,blockvectorr,vectsize,czero,gramxar,blocksize,x_cplx=x_cplx) 688 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,& 689 & vectsize,blockvectorr,vectsize,czero,gramrar,blocksize,x_cplx=x_cplx) 690 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 691 & vectsize,blockvectorx,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx) 692 end if 693 694 call abi_xcopy(blocksize*blocksize,gramxar,1,transf3(:,:,1),1,x_cplx=x_cplx) 695 call abi_xcopy(blocksize*blocksize,gramrar,1,transf3(:,:,2),1,x_cplx=x_cplx) 696 call abi_xcopy(blocksize*blocksize,gramxax,1,transf3(:,:,3),1,x_cplx=x_cplx) 697 if(abs(dtset%timopt)==3) then 698 call timab(533,1,tsec) 699 end if 700 call xmpi_sum(transf3,mpi_enreg%comm_bandspinorfft,ierr) 701 if(abs(dtset%timopt)==3) then 702 call timab(533,2,tsec) 703 end if 704 705 call abi_xcopy(blocksize*blocksize,transf3(:,:,1),1,gramxar,1,x_cplx=x_cplx) 706 call abi_xcopy(blocksize*blocksize,transf3(:,:,2),1,gramrar,1,x_cplx=x_cplx) 707 call abi_xcopy(blocksize*blocksize,transf3(:,:,3),1,gramxax,1,x_cplx=x_cplx) 708 709 ! gramxbx=matmul((blockvectorbx)^T,blockvectorx) 710 ! gramrbr=matmul((blockvectorbr)^T,blockvectorr) 711 ! gramxbr=matmul((blockvectorbx)^T,blockvectorr) 712 ! Note that the gramb matrix is more easier to construct than grama: 713 ! i) <x|B|x>=<r|B|r>=<p|B|p>=(1;0) 714 ! since the x, r and p blockvector are normalized 715 ! ii) <r|B|x>=(0;0) 716 ! since the x and r blockvector are orthogonalized 717 ! iii) The <p|B|r> and <p|B|x> have to be computed. 718 gramxbx(:,:)=zero 719 gramrbr(:,:)=zero 720 gramxbr(:,:)=zero 721 do iblocksize=1,blocksize 722 zvar=(/one,zero/) 723 call abi_xcopy(1,zvar,1,gramxbx(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 724 call abi_xcopy(1,zvar,1,gramrbr(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 725 end do 726 727 ! ########################################################################### 728 ! ################ PERFORM LOOP ON COND ##################################### 729 ! ########################################################################### 730 731 i1=0;i2=blocksize;i3=2*blocksize;i4=3*blocksize 732 cond: do cond_try=1,2 !2 when restart 733 if (restart==0) then 734 735 ! gramxap=matmul((blockvectorax)^T,blockvectorp) 736 ! gramrap=matmul((blockvectorar)^T,blockvectorp) 737 ! grampap=matmul((blockvectorap)^T,blockvectorp) 738 ! gramxbp=matmul((blockvectorbx)^T,blockvectorp) 739 ! gramrbp=matmul((blockvectorbr)^T,blockvectorp) 740 ! grampbp=matmul((blockvectorbp)^T,blockvectorp) 741 if(use_linalg_gpu==1) then 742 call copy_on_gpu(blockvectorp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 743 call copy_on_gpu(blockvectorax,A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 744 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 745 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 746 call copy_from_gpu(gramxap, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 747 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 748 & cone,blockvectorar_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 749 call copy_from_gpu(gramrap, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 750 call copy_on_gpu(blockvectorap, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 751 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 752 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 753 call copy_from_gpu(grampap, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 754 call copy_on_gpu(blockvectorbx, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 755 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 756 & cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 757 call copy_from_gpu(gramxbp, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 758 call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,& 759 & cone,blockvectorbr_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize) 760 call copy_from_gpu(gramrbp, gram_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 761 else 762 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,& 763 & vectsize,blockvectorp,vectsize,czero,gramxap,blocksize,x_cplx=x_cplx) 764 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,& 765 & vectsize,blockvectorp,vectsize,czero,gramrap,blocksize,x_cplx=x_cplx) 766 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorap,& 767 & vectsize,blockvectorp,vectsize,czero,grampap,blocksize,x_cplx=x_cplx) 768 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,& 769 & vectsize,blockvectorp,vectsize,czero,gramxbp,blocksize,x_cplx=x_cplx) 770 call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbr,& 771 & vectsize,blockvectorp,vectsize,czero,gramrbp,blocksize,x_cplx=x_cplx) 772 end if 773 ! It's not necessary to compute the last one: <p|B|p>=(1;0) (see above) 774 transf5(:,:,1)=gramxap(:,:) 775 transf5(:,:,2)=gramrap(:,:) 776 transf5(:,:,3)=grampap(:,:) 777 transf5(:,:,4)=gramxbp(:,:) 778 transf5(:,:,5)=gramrbp(:,:) 779 if(abs(dtset%timopt)==3) then 780 call timab(533,1,tsec) 781 end if 782 call xmpi_sum(transf5,mpi_enreg%comm_bandspinorfft,ierr) 783 if(abs(dtset%timopt)==3) then 784 call timab(533,2,tsec) 785 end if 786 gramxap(:,:)=transf5(:,:,1) 787 gramrap(:,:)=transf5(:,:,2) 788 grampap(:,:)=transf5(:,:,3) 789 gramxbp(:,:)=transf5(:,:,4) 790 gramrbp(:,:)=transf5(:,:,5) 791 grampbp(:,:)=zero 792 do iblocksize=1,blocksize 793 zvar=(/one,zero/) 794 call abi_xcopy(1,zvar,1,grampbp(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx) 795 end do 796 bigorder=i4 797 ABI_MALLOC(grama,(cplx*i4,i4)) 798 ABI_MALLOC(gramb,(cplx*i4,i4)) 799 ABI_MALLOC(eigen,(i4)) 800 ! ABI_MALLOC(coordx,(cplx*i4,blocksize)) 801 ABI_MALLOC(coordx1,(cplx*blocksize,blocksize)) 802 ABI_MALLOC(coordx2,(cplx*blocksize,blocksize)) 803 ABI_MALLOC(coordx3,(cplx*blocksize,blocksize)) 804 grama(:,:)=zero;gramb(:,:)=zero 805 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax 806 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar 807 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxap 808 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar 809 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrap 810 grama(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampap 811 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx 812 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr 813 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxbp 814 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr 815 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrbp 816 gramb(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampbp 817 else 818 bigorder=i3 819 ABI_MALLOC(grama,(cplx*i3,i3)) 820 ABI_MALLOC(gramb,(cplx*i3,i3)) 821 ABI_MALLOC(eigen,(i3)) 822 ! ABI_MALLOC(coordx,(cplx*i3,blocksize)) 823 ABI_MALLOC(coordx1,(cplx*blocksize,blocksize)) 824 ABI_MALLOC(coordx2,(cplx*blocksize,blocksize)) 825 grama(:,:)=zero;gramb(:,:)=zero 826 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax 827 grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar 828 grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar 829 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx 830 gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr 831 gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr 832 end if 833 834 ABI_MALLOC(tmpgramb,(cplx*bigorder,bigorder)) 835 ABI_MALLOC(tmpeigen,(bigorder)) 836 tmpgramb=gramb 837 838 call abi_xheev('v','u',bigorder,tmpgramb,bigorder,tmpeigen,x_cplx=cplx,istwf_k=istwf_k, & 839 & timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu_magma=use_lapack_gpu) 840 841 condestgramb=tmpeigen(bigorder)/tmpeigen(1) 842 ABI_FREE(tmpgramb) 843 ABI_FREE(tmpeigen) 844 845 if (condestgramb.gt.1d+5.or.condestgramb.lt.0.d0.or.info/=0) then 846 write(std_out,*)'condition number of the Gram matrix = ',condestgramb 847 if (cond_try==1.and.restart==0) then 848 ABI_FREE(grama) 849 ABI_FREE(gramb) 850 ABI_FREE(eigen) 851 ! ABI_FREE(coordx) 852 ABI_FREE(coordx1) 853 ABI_FREE(coordx2) 854 if(bigorder==i4) then 855 ABI_FREE(coordx3) 856 end if 857 if (nrestart.gt.1) then 858 ABI_WARNING('the minimization is stopped for this block') 859 exit iter 860 else 861 restart=1 862 nrestart=nrestart+1 863 call wrtout(std_out,'Lobpcgwf: restart performed',"PERS") 864 end if 865 else 866 ABI_WARNING('Gramm matrix ill-conditionned: results may be unpredictable') 867 end if 868 else 869 exit cond 870 end if 871 end do cond 872 873 ! ########################################################################### 874 ! ################ END LOOP ON COND ######################################### 875 ! ########################################################################### 876 877 call abi_xhegv(1,'v','u',bigorder,grama,bigorder,gramb,bigorder,eigen,x_cplx=cplx,istwf_k=istwf_k, & 878 timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu_magma=use_lapack_gpu) 879 880 deltae=-one 881 do iblocksize=1,blocksize 882 call abi_xcopy(1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,zvar,1,x_cplx=x_cplx) 883 deltae=max(deltae,abs(cmplx(zvar(1),zvar(2))-eigen(iblocksize))) 884 zvar=(/eigen(iblocksize),zero/) 885 call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,x_cplx=x_cplx) 886 end do 887 888 ! DEBUG 889 ! write(std_out,*)'eigen',eigen(1:blocksize) 890 ! ENDDEBUG 891 892 ! coordx(1:bigorder*cplx,1:blocksize)=grama(1:bigorder*cplx,1:blocksize) 893 coordx1(:,:) = grama(1+i1*cplx : i2*cplx,1:blocksize) 894 coordx2(:,:) = grama(1+i2*cplx : i3*cplx,1:blocksize) 895 if(bigorder==i4) then 896 coordx3(:,:) = grama(1+i3*cplx : i4*cplx,1:blocksize) 897 end if 898 899 900 if(use_linalg_gpu==1) then 901 call copy_on_gpu(coordx2, coordx2_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 902 if(bigorder==i4) then 903 call copy_on_gpu(coordx3, coordx3_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 904 end if 905 end if 906 907 ABI_FREE(grama) 908 ABI_FREE(gramb) 909 ABI_FREE(eigen) 910 if (restart==0 .and. iterationnumber >1) then 911 912 ! blockvectorp=matmul(blockvectorr,coordx(i2+1:i3,:))+& 913 ! & matmul(blockvectorp,coordx(i3+1:i4,:)) 914 if(use_linalg_gpu==1) then 915 ! call copy_on_gpu(blockvectorr, blockvectorr_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 916 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,& 917 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 918 call copy_on_gpu(blockvectorp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 919 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 920 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 921 call copy_from_gpu(blockvectorp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 922 else 923 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,& 924 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 925 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorp,& 926 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 927 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorp,1,x_cplx=x_cplx) 928 end if 929 930 ! blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))+& 931 ! & matmul(blockvectorap,coordx(i3+1:i4,:)) 932 if(use_linalg_gpu==1) then 933 ! call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 934 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,& 935 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 936 call copy_on_gpu(blockvectorap, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 937 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 938 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 939 call copy_from_gpu(blockvectorap, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 940 else 941 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,& 942 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 943 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorap,& 944 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 945 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorap,1,x_cplx=x_cplx) 946 end if 947 948 949 ! blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:))+& 950 ! & matmul(blockvectorvp,coordx(i3+1:i4,:)) 951 if (gs_hamk%usepaw==0) then 952 if(use_linalg_gpu==1) then 953 call copy_on_gpu(blockvectorvr, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 954 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 955 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 956 call copy_on_gpu(blockvectorvp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 957 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 958 & vectsize,coordx3_gpu,blocksize,cone,C_gpu,vectsize) 959 call copy_from_gpu(blockvectorvp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 960 else 961 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,& 962 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 963 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvp,& 964 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 965 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvp,1,x_cplx=x_cplx) 966 end if 967 end if 968 969 ! blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))+& 970 ! & matmul(blockvectorbp,coordx(i3+1:i4,:)) 971 if(use_linalg_gpu==1) then 972 ! call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 973 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,& 974 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 975 call copy_on_gpu(blockvectorbp, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 976 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,& 977 & coordx3_gpu,blocksize,cone,C_gpu,vectsize) 978 call copy_from_gpu(blockvectorbp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 979 else 980 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,& 981 & vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 982 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbp,& 983 & vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx) 984 call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbp,1,x_cplx=x_cplx) 985 end if 986 987 else 988 989 ! blockvectoSz =matmul(blockvectorr,coordx(i2+1:i3,:)) 990 if(use_linalg_gpu==1) then 991 ! call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize) 992 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,& 993 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 994 call copy_from_gpu(blockvectorp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 995 else 996 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,& 997 & vectsize,coordx2,blocksize,czero,blockvectorp,vectsize,x_cplx=x_cplx) 998 end if 999 1000 ! blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:)) 1001 if(use_linalg_gpu==1) then 1002 ! call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize) 1003 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,& 1004 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1005 call copy_from_gpu(blockvectorap, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1006 else 1007 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,& 1008 & vectsize,coordx2,blocksize,czero,blockvectorap,vectsize,x_cplx=x_cplx) 1009 end if 1010 ! blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:)) 1011 if (gs_hamk%usepaw==0) then 1012 if(use_linalg_gpu==1) then 1013 call copy_on_gpu(blockvectorvr, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1014 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1015 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1016 call copy_from_gpu(blockvectorvp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1017 else 1018 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,& 1019 & vectsize,coordx2,blocksize,czero,blockvectorvp,vectsize,x_cplx=x_cplx) 1020 end if 1021 end if 1022 1023 ! blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:)) 1024 if(use_linalg_gpu==1) then 1025 ! call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize) 1026 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,& 1027 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1028 call copy_from_gpu(blockvectorbp, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1029 else 1030 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,& 1031 & vectsize,coordx2,blocksize,czero,blockvectorbp,vectsize,x_cplx=x_cplx) 1032 end if 1033 end if 1034 1035 if(use_linalg_gpu==1) then 1036 call copy_on_gpu(coordx1, coordx2_gpu, INT(cplx, c_size_t)*dp*blocksize*blocksize) 1037 end if 1038 1039 ! blockvectorx = matmul(blockvectorx,coordx(i1+1:i2,:))+blockvectorp 1040 if(use_linalg_gpu==1) then 1041 call copy_on_gpu(blockvectorx, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1042 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1043 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1044 call copy_from_gpu(blockvectordumm, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1045 else 1046 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,& 1047 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1048 end if 1049 blockvectorx = blockvectordumm+blockvectorp 1050 1051 ! blockvectorax= matmul(blockvectorax,coordx(i1+1:i2,:))+blockvectorap 1052 if(use_linalg_gpu==1) then 1053 call copy_on_gpu(blockvectorax, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1054 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1055 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1056 call copy_from_gpu(blockvectordumm, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1057 else 1058 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,& 1059 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1060 end if 1061 blockvectorax = blockvectordumm+blockvectorap 1062 1063 ! blockvectorvx= matmul(blockvectorvx,coordx(i1+1:i2,:))+blockvectorvp 1064 if (gs_hamk%usepaw==0) then 1065 if(use_linalg_gpu==1) then 1066 call copy_on_gpu(blockvectorvx, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1067 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1068 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1069 call copy_from_gpu(blockvectordumm, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1070 else 1071 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,& 1072 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1073 end if 1074 blockvectorvx = blockvectordumm+blockvectorvp 1075 end if 1076 1077 ! blockvectorbx= matmul(blockvectorbx,coordx(i1+1:i2,:))+blockvectorbp 1078 if(use_linalg_gpu==1) then 1079 call copy_on_gpu(blockvectorbx, A_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1080 call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,& 1081 & vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize) 1082 call copy_from_gpu(blockvectordumm, C_gpu, INT(cplx, c_size_t)*dp*vectsize*blocksize) 1083 else 1084 call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,& 1085 & vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx) 1086 end if 1087 blockvectorbx = blockvectordumm+blockvectorbp 1088 1089 ! ABI_FREE(coordx) 1090 ABI_FREE(coordx1) 1091 ABI_FREE(coordx2) 1092 if(bigorder==i4) then 1093 ABI_FREE(coordx3) 1094 end if 1095 1096 ! Check convergence on energy and eventually exit 1097 if (iterationnumber==1) then 1098 deold=deltae 1099 else if (iterationnumber>1) then 1100 if ((abs(deltae)<0.005*abs(deold)).and.(iterationnumber/=maxiterations))then 1101 if(prtvol>=10)then 1102 write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) & 1103 & ' lobpcgwf: block',iblock,', line',iterationnumber,& 1104 & ', deltae=',deltae,' < 0.005*',deold,' =>skip lines !' 1105 call wrtout(std_out,message,'PERS') 1106 end if 1107 exit 1108 else if (abs(deltae)>0.005*abs(deold)) then 1109 if(prtvol>=10)then 1110 write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) & 1111 & ' lobpcgwf: block',iblock,', line',iterationnumber,& 1112 & ', deltae=',deltae,' > 0.005*',deold,' =>keep on working !' 1113 call wrtout(std_out,message,'PERS') 1114 end if 1115 end if 1116 end if 1117 1118 if(abs(dtset%timopt)==4) then 1119 call timab(524,2,tsec) 1120 end if 1121 1122 end do iter 1123 1124 ! ########################################################################### 1125 ! ################## END LOOP ON NLINE ###################################### 1126 ! ########################################################################### 1127 1128 if(abs(dtset%timopt)==4) then 1129 call timab(525,1,tsec) 1130 end if 1131 1132 if (havetoprecon) then 1133 call xprecon(blockvectorbx,lambda,blocksize,& 1134 & iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,& 1135 & optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon) 1136 1137 residualnorms=sum(abs(blockvectorr)**2,dim=1) 1138 1139 if(abs(dtset%timopt)==3) then 1140 call timab(533,1,tsec) 1141 end if 1142 call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr) 1143 if(abs(dtset%timopt)==3) then 1144 call timab(533,2,tsec) 1145 end if 1146 1147 resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize) 1148 end if 1149 1150 call wfcopy('I',vectsize*blocksize,blockvectorx,1,cg,1,blocksize,iblock,'C',withbbloc=.true.,& 1151 & timopt=timopt,tim_wfcopy=tim_wfcopy) 1152 1153 if(gen_eigenpb) then 1154 call wfcopy('I',vectsize*blocksize,blockvectorbx,1,gsc,1,blocksize,iblock,'S',withbbloc=.true.,& 1155 & timopt=timopt,tim_wfcopy=tim_wfcopy) 1156 end if 1157 1158 ! The Vnl+VFockACE part of the Hamiltonian is no more stored in the packed form such as it was the case for subvnlx(:). 1159 ! Now, the full matrix is stored in totvnlx(:,:). This trick permits: 1160 ! 1) to avoid the reconstruction of the total matrix in vtowfk.F90 (double loop over bands) 1161 ! 2) to use two optimized matrix-matrix blas routine for general (in lobpcgccwf.F90) or hermitian (in vtowfk.F90) 1162 ! operators, zgemm.f and zhemm.f respectively, rather than a triple loop in both cases. 1163 iwavef=iblock*blocksize 1164 isubh=1+2*bblocksize*(bblocksize+1)/2 1165 1166 ABI_MALLOC(blockvectorz,(cplx*vectsize,iwavef)) 1167 if(bblocksize > 0 ) then 1168 call abi_xcopy(bblocksize*vectsize,blockvectory(:,1:bblocksize),1,blockvectorz(:,1:bblocksize),1,x_cplx=x_cplx) 1169 end if 1170 call abi_xcopy( blocksize*vectsize,blockvectorx(:,1:blocksize) ,1,blockvectorz(:,bblocksize+1:iwavef),1,x_cplx=x_cplx) 1171 1172 ABI_MALLOC(tsubham,(cplx*iwavef,blocksize)) 1173 tsubham(:,:)=zero 1174 call abi_xgemm(cparam(cplx),'n',iwavef,blocksize,vectsize,cone,blockvectorz,vectsize,& 1175 & blockvectorax,vectsize,czero,tsubham,iwavef,x_cplx=x_cplx) 1176 1177 if (gs_hamk%usepaw==0) then 1178 ! MG FIXME: Here gfortran4.9 allocates temporary array for C in abi_d2zgemm. 1179 call abi_xgemm(cparam(cplx),'n',blocksize,iwavef,vectsize,cone,blockvectorvx,vectsize,& 1180 & blockvectorz,vectsize,czero,totvnlx(cplx*bblocksize+1:cplx*iwavef,1:iwavef),blocksize,x_cplx=x_cplx) 1181 end if 1182 1183 do iblocksize=1,blocksize 1184 do ii=1,bblocksize+iblocksize 1185 if ( cplx == 1 ) then 1186 subham(isubh) = tsubham(ii,iblocksize) 1187 subham(isubh+1)= zero 1188 else 1189 subham(isubh) = tsubham(2*ii-1,iblocksize) 1190 subham(isubh+1)= tsubham(2*ii ,iblocksize) 1191 end if 1192 isubh=isubh+2 1193 end do 1194 end do 1195 ABI_FREE(tsubham) 1196 ABI_FREE(blockvectorz) 1197 ! comm for subham and subvnlx are made in vtowfk 1198 1199 ABI_FREE(pcon) 1200 ABI_FREE(blockvectory) 1201 ABI_FREE(blockvectorby) 1202 ABI_FREE(gramyx) 1203 ABI_FREE(blockvectorx) 1204 ABI_FREE(blockvectorax) 1205 ABI_FREE(blockvectorbx) 1206 ABI_FREE(blockvectorr) 1207 ABI_FREE(blockvectorar) 1208 ABI_FREE(blockvectorbr) 1209 ABI_FREE(blockvectorp) 1210 ABI_FREE(blockvectorap) 1211 ABI_FREE(blockvectorbp) 1212 if (gs_hamk%usepaw==0) then 1213 ABI_FREE(blockvectorvx) 1214 ABI_FREE(blockvectorvp) 1215 ABI_FREE(blockvectorvr) 1216 end if 1217 ABI_FREE(blockvectordumm) 1218 ABI_FREE(gramxax) 1219 ABI_FREE(gramxar) 1220 ABI_FREE(gramxap) 1221 ABI_FREE(gramrar) 1222 ABI_FREE(gramrap) 1223 ABI_FREE(grampap) 1224 ABI_FREE(gramxbx) 1225 ABI_FREE(gramxbr) 1226 ABI_FREE(gramxbp) 1227 ABI_FREE(gramrbr) 1228 ABI_FREE(gramrbp) 1229 ABI_FREE(grampbp) 1230 ABI_FREE(transf3) 1231 ABI_FREE(transf5) 1232 ABI_FREE(lambda) 1233 ABI_FREE(residualnorms) 1234 if(use_linalg_gpu==1) then 1235 call dealloc_on_gpu(bblockvector_gpu) 1236 call dealloc_on_gpu(gram_gpu) 1237 end if 1238 1239 end do ! End big loop over bands inside blocks 1240 1241 if(use_linalg_gpu==1) then 1242 call dealloc_on_gpu(blockvectorr_gpu) 1243 call dealloc_on_gpu(blockvectorar_gpu) 1244 call dealloc_on_gpu(blockvectorbr_gpu) 1245 call dealloc_on_gpu(A_gpu) 1246 call dealloc_on_gpu(C_gpu) 1247 call dealloc_on_gpu(coordx2_gpu) 1248 call dealloc_on_gpu(coordx3_gpu) 1249 !call gpu_linalg_shutdown() 1250 end if 1251 1252 if(abs(dtset%timopt)==4) then 1253 call timab(525,2,tsec) 1254 end if 1255 call timab(530,2,tsec) 1256 1257 DBG_ENTER("COLL") 1258 1259 contains 1260 1261 function gramindex(iblocksize) 1262 1263 integer :: gramindex,iblocksize 1264 gramindex=(iblocksize-1)*cplx+1 1265 end function gramindex 1266 1267 end subroutine lobpcgwf
ABINIT/m_lobpcgwf_old [ Modules ]
NAME
m_lobpcgwf_old
FUNCTION
COPYRIGHT
Copyright (C) 2008-2024 ABINIT group () This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 module m_lobpcgwf_old 23 24 implicit none 25 26 private