TABLE OF CONTENTS


ABINIT/lobpcgwf [ Functions ]

[ Top ] [ Functions ]

NAME

 lobpcgwf

FUNCTION

 this routine updates the whole wave functions at a given k-point,
 using the lobpcg method
 for a given spin-polarization, from a fixed hamiltonian
 but might also simply compute eigenvectors and eigenvalues at this k point.
 it will also update the matrix elements of the hamiltonian.

COPYRIGHT

 Copyright (C) 1998-2022 ABINIT group (FBottin,GZ,AR,MT,FDahm)
 this file is distributed under the terms of the
 gnu general public license, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 for the initials of contributors, see ~abinit/doc/developers/contributors.txt .

INPUTS

  dtset <type(dataset_type)>=all input variales for this dataset
  gs_hamk <type(gs_hamiltonian_type)>=all data for the hamiltonian at k
  icg=shift to be applied on the location of data in the array cg
  igsc=shift to be applied on the location of data in the array gsc
  kinpw(npw)=(modified) kinetic energy for each plane wave (hartree)
  mcg=second dimension of the cg array
  mgsc=second dimension of the gsc array
  mpi_enreg=information about MPI parallelization
  nband_k=number of bands at this k point for that spin polarization
  nbdblock : number of blocks
  npw_k=number of plane waves at this k point
  prtvol=control print volume and debugging output
  use_totvnlx=1 if one has to compute totvnlx

OUTPUT

  resid_k(nband_k)=residuals for each states
  subham(nband_k*(nband_k+1))=the matrix elements of h
  If gs_hamk%usepaw==0:
    gsc(2,mgsc)=<g|s|c> matrix elements (s=overlap)
    totvnlx(nband_k*use_totvnlx,nband_k*use_totvnlx)=the matrix elements of vnl+vfockACE

SIDE EFFECTS

  cg(2,mcg)=updated wavefunctions

SOURCE

  80 subroutine lobpcgwf(cg,dtset,gs_hamk,gsc,icg,igsc,kinpw,mcg,mgsc,mpi_enreg,&
  81 &                   nband_k,nbdblock,npw_k,prtvol,resid_k,subham,totvnlx,use_totvnlx)
  82 
  83 
  84  use defs_basis
  85  use m_abicore
  86  use m_lobpcg
  87  use m_abi_linalg
  88  use m_wfutils
  89  use m_xmpi
  90  use m_errors
  91  use, intrinsic :: iso_c_binding
  92  use m_dtset
  93 
  94  use defs_abitypes, only : mpi_type
  95  use m_time,        only : timab
  96  use m_hamiltonian, only : gs_hamiltonian_type
  97  use m_pawcprj,     only : pawcprj_type
  98  use m_getghc,      only : getghc
  99  use m_prep_kgb,    only : prep_getghc
 100 
 101  implicit none
 102 
 103 !Arguments ------------------------------------
 104  integer,intent(in) :: icg,igsc,mcg,mgsc,nband_k,nbdblock,npw_k,prtvol,use_totvnlx
 105  type(gs_hamiltonian_type),intent(inout) :: gs_hamk
 106  type(dataset_type),intent(in) :: dtset
 107  type(mpi_type),intent(inout) :: mpi_enreg
 108  real(dp),intent(inout) :: cg(2,mcg),gsc(2,mgsc)
 109  real(dp),intent(in) :: kinpw(npw_k)
 110  real(dp),intent(out) :: resid_k(nband_k)
 111  real(dp),intent(inout) :: subham(nband_k*(nband_k+1))
 112  real(dp),intent(inout) :: totvnlx((3-gs_hamk%istwf_k)*nband_k*use_totvnlx,nband_k*use_totvnlx)
 113 
 114 !Local variables-------------------------------
 115  integer, parameter :: tim_getghc=5
 116  integer :: activepsize,activersize,bblocksize,bigorder,blocksize,cpopt
 117  integer :: cond_try
 118  integer :: iblocksize,iblock,ierr,ii,info,istwf_k,isubh
 119  integer :: iterationnumber
 120  integer :: iwavef,i1,i2,i3,i4,maxiterations,my_nspinor
 121  integer :: nrestart,optekin,optpcon,restart
 122  integer :: sij_opt,timopt,tim_wfcopy,tim_xeigen
 123  integer :: tim_xortho,tim_xprecon,use_lapack_gpu,use_linalg_gpu,vectsize
 124  logical :: gen_eigenpb
 125  integer :: cplx
 126  real(dp) :: condestgramb,deltae,deold,dum
 127  complex(dpc) :: cminusone
 128  real(dp) :: zvar(2)
 129  logical :: havetoprecon
 130  real(dp) :: tsec(2)
 131  real(dp), allocatable :: gwavef(:,:),cwavef(:,:),gvnlxc(:,:)
 132  real(dp), allocatable :: swavef(:,:)
 133  real(dp), allocatable :: residualnorms(:),eigen(:)
 134  real(dp), allocatable :: tmpeigen(:)
 135  real(dp), allocatable :: pcon(:,:)
 136  real(dp), allocatable :: blockvectorx(:,:),blockvectorvx(:,:),blockvectorax(:,:),blockvectorbx(:,:)
 137  real(dp),allocatable  :: blockvectorr(:,:),blockvectorvr(:,:),blockvectorar(:,:),blockvectorbr(:,:)
 138  real(dp),allocatable  :: blockvectorp(:,:),blockvectorvp(:,:),blockvectorap(:,:),blockvectorbp(:,:),blockvectordumm(:,:)
 139  real(dp),allocatable  :: blockvectory(:,:),blockvectorby(:,:),blockvectorz(:,:)
 140  real(dp),allocatable  :: gramxax(:,:),gramxar(:,:),gramxap(:,:),gramrar(:,:),gramrap(:,:),grampap(:,:)
 141  real(dp),allocatable  :: gramxbx(:,:),gramxbr(:,:),gramxbp(:,:),gramrbr(:,:),gramrbp(:,:),grampbp(:,:)
 142  real(dp),allocatable  :: coordx1(:,:),coordx2(:,:),coordx3(:,:),lambda(:,:),grama(:,:),gramb(:,:),gramyx(:,:)
 143  real(dp),allocatable  :: tmpgramb(:,:),transf3(:,:,:),transf5(:,:,:)
 144  real(dp), allocatable :: tsubham(:,:)
 145  type(pawcprj_type) :: cprj_dum(gs_hamk%natom,0)
 146  character(len=500) :: message
 147  character, dimension(2) :: cparam
 148  type(c_ptr) :: A_gpu,C_gpu,coordx2_gpu,coordx3_gpu,bblockvector_gpu,gram_gpu
 149  type(c_ptr) :: blockvectorr_gpu,blockvectorar_gpu,blockvectorbr_gpu
 150 
 151 !Index of a given band
 152 !gramindex(iblocksize)=(iblocksize-1)*cplx+1
 153 
 154 ! *********************************************************************
 155 
 156  DBG_ENTER("COLL")
 157 
 158  call timab(530,1,tsec)
 159  if(abs(dtset%timopt)==4) then
 160    call timab(520,1,tsec)
 161  end if
 162 
 163 !###########################################################################
 164 !################ INITIALISATION  ##########################################
 165 !###########################################################################
 166 
 167 !For timing
 168  timopt=dtset%timopt
 169  tim_wfcopy=584
 170  !tim_xcopy=584
 171  tim_xeigen=587
 172  !tim_xgemm=532
 173  tim_xortho=535
 174  tim_xprecon=536
 175  !tim_xtrsm=535
 176 
 177 !Variables
 178  maxiterations=dtset%nline
 179  gen_eigenpb=(gs_hamk%usepaw==1)
 180  my_nspinor=max(1,dtset%nspinor/mpi_enreg%nproc_spinor)
 181  cminusone=-cone
 182  istwf_k=gs_hamk%istwf_k
 183  info = 0
 184  cparam(1)='t'
 185  cparam(2)='c'
 186 
 187 !Depends on istwfk
 188  if ( istwf_k == 2 ) then
 189    cplx=1
 190    if (mpi_enreg%me_g0 == 1) then
 191      vectsize=2*npw_k*my_nspinor-1
 192    else
 193      vectsize=2*npw_k*my_nspinor
 194    end if
 195  else
 196    cplx=2
 197    vectsize=npw_k*my_nspinor
 198  end if
 199 
 200 !For preconditionning
 201  optekin=0;if (dtset%wfoptalg>10) optekin=0
 202  optpcon=1;if (dtset%wfoptalg>10) optpcon=0
 203 
 204 !For communication
 205  blocksize=mpi_enreg%nproc_fft
 206  if(mpi_enreg%paral_kgb==1) blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp
 207  !IF you want to compare with new lobpcg in sequential uncomment the following
 208  !line
 209  !blocksize=mpi_enreg%nproc_band*mpi_enreg%bandpp
 210 
 211 !Iniitializations/allocations of GPU parallelism
 212  use_linalg_gpu=0;use_lapack_gpu=0
 213  if ((dtset%use_gpu_cuda==1).and. &
 214 & (vectsize*blocksize*blocksize>dtset%gpu_linalg_limit)) use_linalg_gpu=1
 215 #if defined HAVE_LINALG_MAGMA
 216  use_lapack_gpu=use_linalg_gpu
 217 #endif
 218  if(use_linalg_gpu==1) then
 219    call alloc_on_gpu(A_gpu,cplx*dp*vectsize*blocksize)
 220    call alloc_on_gpu(C_gpu,cplx*dp*vectsize*blocksize)
 221    call alloc_on_gpu(blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 222    call alloc_on_gpu(blockvectorar_gpu,cplx*dp*vectsize*blocksize)
 223    call alloc_on_gpu(blockvectorbr_gpu,cplx*dp*vectsize*blocksize)
 224    call alloc_on_gpu(coordx2_gpu,cplx*dp*blocksize*blocksize)
 225    call alloc_on_gpu(coordx3_gpu,cplx*dp*blocksize*blocksize)
 226  end if
 227 
 228  if(abs(dtset%timopt)==4) then
 229    call timab(520,2,tsec)
 230  end if
 231 
 232 !###########################################################################
 233 !################ BIG LOOP OVER BLOCKS  ####################################
 234 !###########################################################################
 235 
 236  do iblock=1,nbdblock
 237 
 238    if(abs(dtset%timopt)==4) then
 239      call timab(521,1,tsec)
 240    end if
 241 
 242    havetoprecon=.true.
 243    nrestart=0
 244    bblocksize=(iblock-1)*blocksize
 245 
 246 !  allocations
 247    ABI_MALLOC(pcon,(npw_k,blocksize))
 248    ABI_MALLOC(blockvectorx,(cplx*vectsize,blocksize))
 249    ABI_MALLOC(blockvectorax,(cplx*vectsize,blocksize))
 250    ABI_MALLOC(blockvectorbx,(cplx*vectsize,blocksize))
 251    ABI_MALLOC(blockvectorr,(cplx*vectsize,blocksize))
 252    ABI_MALLOC(blockvectorar,(cplx*vectsize,blocksize))
 253    ABI_MALLOC(blockvectorbr,(cplx*vectsize,blocksize))
 254    ABI_MALLOC(blockvectorp,(cplx*vectsize,blocksize))
 255    ABI_MALLOC(blockvectorap,(cplx*vectsize,blocksize))
 256    ABI_MALLOC(blockvectorbp,(cplx*vectsize,blocksize))
 257    ABI_MALLOC(blockvectordumm,(cplx*vectsize,blocksize))
 258    ABI_MALLOC(gramxax,(cplx*blocksize,blocksize))
 259    ABI_MALLOC(gramxar,(cplx*blocksize,blocksize))
 260    ABI_MALLOC(gramxap,(cplx*blocksize,blocksize))
 261    ABI_MALLOC(gramrar,(cplx*blocksize,blocksize))
 262    ABI_MALLOC(gramrap,(cplx*blocksize,blocksize))
 263    ABI_MALLOC(grampap,(cplx*blocksize,blocksize))
 264    ABI_MALLOC(gramxbx,(cplx*blocksize,blocksize))
 265    ABI_MALLOC(gramxbr,(cplx*blocksize,blocksize))
 266    ABI_MALLOC(gramxbp,(cplx*blocksize,blocksize))
 267    ABI_MALLOC(gramrbr,(cplx*blocksize,blocksize))
 268    ABI_MALLOC(gramrbp,(cplx*blocksize,blocksize))
 269    ABI_MALLOC(grampbp,(cplx*blocksize,blocksize))
 270    ABI_MALLOC(transf3,(cplx*blocksize,blocksize,3))
 271    ABI_MALLOC(transf5,(cplx*blocksize,blocksize,5))
 272    ABI_MALLOC(lambda,(cplx*blocksize,blocksize))
 273    ABI_MALLOC(residualnorms,(blocksize))
 274 
 275    ABI_MALLOC(blockvectory,(cplx*vectsize,bblocksize))
 276    ABI_MALLOC(blockvectorby,(cplx*vectsize,bblocksize))
 277    ABI_MALLOC(gramyx,(cplx*bblocksize,blocksize))
 278    if (gs_hamk%usepaw==0) then
 279      ABI_MALLOC(blockvectorvx,(cplx*vectsize,blocksize))
 280      ABI_MALLOC(blockvectorvr,(cplx*vectsize,blocksize))
 281      ABI_MALLOC(blockvectorvp,(cplx*vectsize,blocksize))
 282    end if
 283 
 284    if(use_linalg_gpu==1) then
 285      if(iblock/=1) then
 286        call alloc_on_gpu(bblockvector_gpu,cplx*dp*vectsize*bblocksize)
 287        call alloc_on_gpu(gram_gpu,cplx*dp*bblocksize*blocksize)
 288      else
 289        call alloc_on_gpu(bblockvector_gpu,cplx*dp*vectsize*blocksize)
 290        call alloc_on_gpu(gram_gpu,cplx*dp*blocksize*blocksize)
 291      end if
 292    end if
 293 
 294 ! Initialize global variables in m_wfutils.
 295    call setWFParameter(cplx,mpi_enreg%me_g0,npw_k,my_nspinor,icg,igsc,blocksize)
 296 
 297 !  transfer array of wf coeff in iblock to blockvectorx
 298    call wfcopy('D',blocksize*vectsize,cg,1,blockvectorx,1,blocksize,iblock,'C',withbbloc=.true.,&
 299 &   timopt=timopt,tim_wfcopy=tim_wfcopy)
 300 
 301 !  !!!!!!!!!!!!!!!!!!!!!!!! Begin if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!
 302 !  transfer array of wf coeff less than iblock to blockvectory
 303    if(iblock /=1) then
 304      call wfcopy('D',bblocksize*vectsize,cg,1,blockvectory,1,bblocksize,iblock,'C',withbbloc=.false.,&
 305 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
 306 
 307      if(gen_eigenpb) then
 308        call wfcopy('D',bblocksize*vectsize,gsc,1,blockvectorby,1,bblocksize,iblock,'S',withbbloc=.false.,&
 309 &       timopt=timopt,tim_wfcopy=tim_wfcopy)
 310      else
 311        call abi_xcopy(vectsize*bblocksize,blockvectory,1,blockvectorby,1,x_cplx=x_cplx)
 312      end if
 313 
 314 !    b-orthogonalize x to the constraint y (supposed b-orthonormal)
 315 !    blockvectorx=blockvectorx-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorx))
 316 
 317      call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,&
 318 &     vectsize,blockvectorx,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx)
 319 
 320      if(abs(dtset%timopt)==3) then
 321        call timab(533,1,tsec)
 322      end if
 323      call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr)
 324      if(abs(dtset%timopt)==3) then
 325        call timab(533,2,tsec)
 326      end if
 327 
 328      call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,&
 329 &     vectsize,gramyx,bblocksize,cone,blockvectorx,vectsize,x_cplx=x_cplx)
 330 
 331    end if
 332 !  !!!!!!!!!!!!!!!!!!!!!!!! End if iblock /=1 !!!!!!!!!!!!!!!!!!!!!!!!!!!
 333 
 334    ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize))
 335    ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize))
 336    ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize))
 337    ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize))
 338 
 339    call wfcopy('I',vectsize*blocksize,blockvectorx,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,&
 340 &   timopt=timopt,tim_wfcopy=tim_wfcopy)
 341 
 342    if(abs(dtset%timopt)==4) then
 343      call timab(521,2,tsec)
 344    end if
 345    if(abs(dtset%timopt)==4) then
 346      call timab(526,1,tsec)
 347    end if
 348 
 349    cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1
 350 
 351    if (mpi_enreg%paral_kgb==0) then
 352      call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,&
 353 &     mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0)
 354    else
 355      call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,&
 356 &     prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.)
 357    end if
 358    if(abs(dtset%timopt)==4) then
 359      call timab(526,2,tsec)
 360    end if
 361    if(abs(dtset%timopt)==4) then
 362      call timab(522,1,tsec)
 363    end if
 364 
 365    if ( gen_eigenpb ) then
 366      call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbx,1,blocksize,iblock,'W',withbbloc=.false.,&
 367 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
 368    else
 369      call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvx,1,blocksize,iblock,'W',withbbloc=.false.,&
 370 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
 371      call abi_xcopy(vectsize*blocksize,blockvectorx,1,blockvectorbx,1,x_cplx=x_cplx)
 372    end if
 373 
 374    call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorax,1,blocksize,iblock,'W',withbbloc=.false.,&
 375 &   timopt=timopt,tim_wfcopy=tim_wfcopy)
 376 
 377    ABI_FREE(cwavef)
 378    ABI_FREE(gwavef)
 379    ABI_FREE(gvnlxc)
 380    ABI_FREE(swavef)
 381 
 382    call abi_xorthonormalize(blockvectorx,blockvectorbx,blocksize,mpi_enreg%comm_bandspinorfft,gramxbx,vectsize,&
 383 &   x_cplx,timopt=timopt,tim_xortho=tim_xortho)
 384 
 385    call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorbx,vectsize,x_cplx=x_cplx)
 386    call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorax,vectsize,x_cplx=x_cplx)
 387 
 388    if (gs_hamk%usepaw==0) then
 389      call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramxbx,blocksize,blockvectorvx,vectsize,x_cplx=x_cplx)
 390    end if
 391 
 392 !  Do rayleigh ritz on a in space x
 393 !  gramxax=matmul(transpose(blockvectorx),blockvectorax)
 394    call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorx,&
 395 &   vectsize,blockvectorax,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx)
 396 
 397    if(abs(dtset%timopt)==3) then
 398      call timab(533,1,tsec)
 399    end if
 400    call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr)
 401    if(abs(dtset%timopt)==3) then
 402      call timab(533,2,tsec)
 403    end if
 404    ABI_MALLOC(eigen,(blocksize))
 405 
 406    call abi_xheev('v','u',blocksize,gramxax,blocksize,eigen,x_cplx=cplx,istwf_k=istwf_k, &
 407    timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu)
 408 
 409 !  blockvectorx=matmul(blockvectorx,gramxax)
 410    call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,&
 411 &   vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 412    call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorx,1,x_cplx=x_cplx)
 413 
 414 !  blockvectorax=matmul(blockvectorax,gramxax)
 415    call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,&
 416 &   vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 417    call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorax,1,x_cplx=x_cplx)
 418 
 419 !  blockvectorvx=matmul(blockvectorvx,gramxax)
 420    if (gs_hamk%usepaw==0) then
 421      call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,&
 422 &     vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 423      call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvx,1,x_cplx=x_cplx)
 424    end if
 425 
 426 !  blockvectorbx=matmul(blockvectorbx,gramxax)
 427    call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,&
 428 &   vectsize,gramxax,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 429    call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbx,1,x_cplx=x_cplx)
 430 
 431    do iblocksize=1,blocksize
 432      zvar=(/eigen(iblocksize),zero/)
 433      call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx)
 434    end do
 435    ABI_FREE(eigen)
 436 
 437    if(abs(dtset%timopt)==4) then
 438      call timab(522,2,tsec)
 439    end if
 440 
 441 !  ###########################################################################
 442 !  ################ PERFORM LOOP ON NLINE ####################################
 443 !  ###########################################################################
 444 !  now the main alogrithm
 445    iter: do iterationnumber=1,maxiterations
 446 
 447      if(abs(dtset%timopt)==4) then
 448        call timab(523,1,tsec)
 449      end if
 450 
 451 !    Build residual
 452 !    blockvectorr=blockvectorax-matmul(blockvectorx,lambda)
 453      call xprecon(blockvectorbx,lambda,blocksize,&
 454 &     iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,&
 455 &     optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon)
 456 
 457      residualnorms=sum(blockvectorr**2,dim=1)
 458 
 459      if(abs(dtset%timopt)==3) then
 460        call timab(533,1,tsec)
 461      end if
 462      call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr)
 463      if(abs(dtset%timopt)==3) then
 464        call timab(533,2,tsec)
 465      end if
 466 
 467      resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize)
 468 
 469 !    If residual sufficiently small stop line minimizations
 470      if (abs(maxval(residualnorms(1:blocksize)))<dtset%tolwfr) then
 471        if (prtvol > 0) then
 472          write(message, '(a,i0,a,i0,a,es12.4)' ) &
 473 &         ' lobpcgwf: block ',iblock,' converged after ',iterationnumber,&
 474 &         ' line minimizations: maxval(resid(1:blocksize)) =',maxval(residualnorms(1:blocksize))
 475          call wrtout(std_out,message,'PERS')
 476        end if
 477        havetoprecon=.false.
 478        exit
 479      end if
 480 
 481      if(use_linalg_gpu==1) then
 482        call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 483      end if
 484 
 485      if(iblock /=1) then
 486 !      Residuals orthogonal to blockvectorby
 487 !      blockvectorr=blockvectorr-matmul(blockvectory,matmul((blockvectorby)^T,blockvectorr))
 488 
 489        if(use_linalg_gpu==1) then
 490          call copy_on_gpu(blockvectorby,bblockvector_gpu,cplx*dp*vectsize*bblocksize)
 491          call gpu_xgemm(cplx,cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,bblockvector_gpu,&
 492 &         vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,bblocksize)
 493          call copy_from_gpu(gramyx,gram_gpu,cplx*dp*bblocksize*blocksize)
 494        else
 495          call abi_xgemm(cparam(cplx),'n',bblocksize,blocksize,vectsize,cone,blockvectorby,&
 496 &         vectsize,blockvectorr,vectsize,czero,gramyx,bblocksize,x_cplx=x_cplx)
 497        end if
 498 
 499        if(abs(dtset%timopt)==3) then
 500          call timab(533,1,tsec)
 501        end if
 502        call xmpi_sum(gramyx,mpi_enreg%comm_bandspinorfft,ierr)
 503        if(abs(dtset%timopt)==3) then
 504          call timab(533,2,tsec)
 505        end if
 506 
 507        if(use_linalg_gpu==1) then
 508          call copy_on_gpu(gramyx,gram_gpu,cplx*dp*bblocksize*blocksize)
 509          call copy_on_gpu(blockvectory,bblockvector_gpu,cplx*dp*vectsize*bblocksize)
 510          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,bblocksize,cminusone,bblockvector_gpu,&
 511 &         vectsize,gram_gpu,bblocksize,cone,blockvectorr_gpu,vectsize)
 512        else
 513          call abi_xgemm('n','n',vectsize,blocksize,bblocksize,cminusone,blockvectory,&
 514 &         vectsize,gramyx,bblocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx)
 515        end if
 516 
 517      end if
 518 
 519 !    Residuals orthogonal to blockvectorx
 520 !    blockvectorr=blockvectorr-matmul(blockvectorx,matmul((blockvectorbx)^T,blockvectorr))
 521      if(use_linalg_gpu==1) then
 522        call copy_on_gpu(blockvectorbx,C_gpu,cplx*dp*vectsize*blocksize)
 523        call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,C_gpu,&
 524 &       vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize)
 525        call copy_from_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize)
 526      else
 527        call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
 528 &       vectsize,blockvectorr,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx)
 529      end if
 530 
 531      if(abs(dtset%timopt)==3) then
 532        call timab(533,1,tsec)
 533      end if
 534      call xmpi_sum(gramxax,mpi_enreg%comm_bandspinorfft,ierr)
 535      if(abs(dtset%timopt)==3) then
 536        call timab(533,2,tsec)
 537      end if
 538 
 539      if(use_linalg_gpu==1) then
 540        call copy_on_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize)
 541        call copy_on_gpu(blockvectorx,C_gpu,cplx*dp*vectsize*blocksize)
 542        call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cminusone,C_gpu,&
 543 &       vectsize,gram_gpu,blocksize,cone,blockvectorr_gpu,vectsize)
 544        call copy_from_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 545      else
 546        call abi_xgemm('n','n',vectsize,blocksize,blocksize,cminusone,blockvectorx,&
 547 &       vectsize,gramxax,blocksize,cone,blockvectorr,vectsize,x_cplx=x_cplx)
 548      end if
 549 
 550      ABI_MALLOC(cwavef,(2,npw_k*my_nspinor*blocksize))
 551      ABI_MALLOC(gwavef,(2,npw_k*my_nspinor*blocksize))
 552      ABI_MALLOC(gvnlxc,(2,npw_k*my_nspinor*blocksize))
 553      ABI_MALLOC(swavef,(2,npw_k*my_nspinor*blocksize))
 554 
 555      call wfcopy('I',vectsize*blocksize,blockvectorr,1,cwavef,1,blocksize,iblock,'W',withbbloc=.false.,&
 556 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
 557 
 558      cpopt=-1;sij_opt=0;if (gen_eigenpb) sij_opt=1
 559 
 560      if(abs(dtset%timopt)==4) then
 561        call timab(523,2,tsec)
 562      end if
 563      if(abs(dtset%timopt)==4) then
 564        call timab(526,1,tsec)
 565      end if
 566 
 567      if (mpi_enreg%paral_kgb==0) then
 568        call getghc(cpopt,cwavef,cprj_dum,gwavef,swavef,gs_hamk,gvnlxc,dum,&
 569 &       mpi_enreg,blocksize,prtvol,sij_opt,tim_getghc,0)
 570      else
 571        call prep_getghc(cwavef,gs_hamk,gvnlxc,gwavef,swavef,dum,blocksize,mpi_enreg,&
 572 &       prtvol,sij_opt,cpopt,cprj_dum,already_transposed=.false.)
 573      end if
 574 
 575      if(abs(dtset%timopt)==4) then
 576        call timab(526,2,tsec)
 577      end if
 578      if(abs(dtset%timopt)==4) then
 579        call timab(524,1,tsec)
 580      end if
 581 
 582      if (gen_eigenpb) then
 583        call wfcopy('D',vectsize*blocksize,swavef,1,blockvectorbr,1,blocksize,iblock,'W',withbbloc=.false.,&
 584 &       timopt=timopt,tim_wfcopy=tim_wfcopy)
 585      else
 586        call abi_xcopy(vectsize*blocksize,blockvectorr,1,blockvectorbr,1,x_cplx=x_cplx)
 587        call wfcopy('D',vectsize*blocksize,gvnlxc,1,blockvectorvr,1,blocksize,iblock,'W',withbbloc=.false.,&
 588 &       timopt=timopt,tim_wfcopy=tim_wfcopy)
 589      end if
 590 
 591      call wfcopy('D',vectsize*blocksize,gwavef,1,blockvectorar,1,blocksize,iblock,'W',withbbloc=.false.,&
 592 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
 593 
 594      ABI_FREE(cwavef)
 595      ABI_FREE(gwavef)
 596      ABI_FREE(gvnlxc)
 597      ABI_FREE(swavef)
 598 
 599      if(use_linalg_gpu==1) then
 600        call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize)
 601        call gpu_xorthonormalize(blockvectorr_gpu,blockvectorbr_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,&
 602 &       x_cplx,timopt=timopt,tim_xortho=tim_xortho)
 603        call copy_from_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 604        call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorbr_gpu,vectsize)
 605        call copy_from_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize)
 606        call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize)
 607        call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,blockvectorar_gpu,vectsize)
 608        call copy_from_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize)
 609        if (gs_hamk%usepaw==0) then
 610          call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize)
 611          call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize)
 612          call copy_from_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize)
 613        end if
 614        call copy_from_gpu(gramrbr,gram_gpu,cplx*dp*blocksize*blocksize)
 615      else
 616        call abi_xorthonormalize(blockvectorr,blockvectorbr,blocksize,mpi_enreg%comm_bandspinorfft,gramrbr,vectsize,&
 617 &       x_cplx,timopt=timopt,tim_xortho=tim_xortho)
 618        call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorbr,vectsize,x_cplx=x_cplx)
 619        call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorar,vectsize,x_cplx=x_cplx)
 620        if (gs_hamk%usepaw==0) then
 621          call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,gramrbr,blocksize,blockvectorvr,vectsize,x_cplx=x_cplx)
 622        end if
 623      end if
 624 
 625      if(iterationnumber>1) then
 626        if(use_linalg_gpu==1) then
 627          call copy_on_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize)
 628          call copy_on_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize)
 629          call gpu_xorthonormalize(A_gpu,C_gpu,blocksize,mpi_enreg%comm_bandspinorfft,gram_gpu,vectsize,&
 630 &         x_cplx,timopt=timopt,tim_xortho=tim_xortho)
 631          call copy_from_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize)
 632          call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,C_gpu,vectsize)
 633          call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize)
 634          call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize)
 635          call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize)
 636          call copy_from_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize)
 637          if (gs_hamk%usepaw==0) then
 638            call copy_on_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize)
 639            call gpu_xtrsm(cplx,'r','u','n','n',vectsize,blocksize,cone,gram_gpu,blocksize,A_gpu,vectsize)
 640            call copy_from_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize)
 641          end if
 642          call copy_from_gpu(grampbp,gram_gpu,cplx*dp*blocksize*blocksize)
 643        else
 644 !        call orthonormalize(blockvectorp,blockvectorbp,blockvectorap)
 645          call abi_xorthonormalize(blockvectorp,blockvectorbp,blocksize,mpi_enreg%comm_bandspinorfft,grampbp,vectsize,&
 646 &         x_cplx,timopt=timopt,tim_xortho=tim_xortho)
 647 !        blockvectorap=matmul(blockvectorap,grampbp)
 648          call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorbp,vectsize,x_cplx=x_cplx)
 649          call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorap,vectsize,x_cplx=x_cplx)
 650          if (gs_hamk%usepaw==0) then
 651            call abi_xtrsm('r','u','n','n',vectsize,blocksize,cone,grampbp,blocksize,blockvectorvp,vectsize,x_cplx=x_cplx)
 652          end if
 653        end if
 654      end if
 655 
 656      activersize=blocksize
 657      if (iterationnumber==1) then
 658        activepsize=0
 659        restart=1
 660      else
 661        activepsize=blocksize
 662        restart=0
 663      end if
 664 
 665 !    gramxar=matmul((blockvectorax)^T,blockvectorr)
 666 !    gramrar=matmul((blockvectorar)^T,blockvectorr)
 667 !    gramxax=matmul((blockvectorax)^T,blockvectorx)
 668      if(use_linalg_gpu==1) then
 669        call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize)
 670        call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,&
 671 &       vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize)
 672        call copy_from_gpu(gramxar,gram_gpu,cplx*dp*blocksize*blocksize)
 673        call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar_gpu,&
 674 &       vectsize,blockvectorr_gpu,vectsize,czero,gram_gpu,blocksize)
 675        call copy_from_gpu(gramrar,gram_gpu,cplx*dp*blocksize*blocksize)
 676        call copy_on_gpu(blockvectorx,C_gpu,cplx*dp*vectsize*blocksize)
 677        call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,cone,A_gpu,&
 678 &       vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 679        call copy_from_gpu(gramxax,gram_gpu,cplx*dp*blocksize*blocksize)
 680      else
 681        call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,&
 682 &       vectsize,blockvectorr,vectsize,czero,gramxar,blocksize,x_cplx=x_cplx)
 683        call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,&
 684 &       vectsize,blockvectorr,vectsize,czero,gramrar,blocksize,x_cplx=x_cplx)
 685        call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,&
 686 &       vectsize,blockvectorx,vectsize,czero,gramxax,blocksize,x_cplx=x_cplx)
 687      end if
 688 
 689      call abi_xcopy(blocksize*blocksize,gramxar,1,transf3(:,:,1),1,x_cplx=x_cplx)
 690      call abi_xcopy(blocksize*blocksize,gramrar,1,transf3(:,:,2),1,x_cplx=x_cplx)
 691      call abi_xcopy(blocksize*blocksize,gramxax,1,transf3(:,:,3),1,x_cplx=x_cplx)
 692      if(abs(dtset%timopt)==3) then
 693        call timab(533,1,tsec)
 694      end if
 695      call xmpi_sum(transf3,mpi_enreg%comm_bandspinorfft,ierr)
 696      if(abs(dtset%timopt)==3) then
 697        call timab(533,2,tsec)
 698      end if
 699 
 700      call abi_xcopy(blocksize*blocksize,transf3(:,:,1),1,gramxar,1,x_cplx=x_cplx)
 701      call abi_xcopy(blocksize*blocksize,transf3(:,:,2),1,gramrar,1,x_cplx=x_cplx)
 702      call abi_xcopy(blocksize*blocksize,transf3(:,:,3),1,gramxax,1,x_cplx=x_cplx)
 703 
 704 !    gramxbx=matmul((blockvectorbx)^T,blockvectorx)
 705 !    gramrbr=matmul((blockvectorbr)^T,blockvectorr)
 706 !    gramxbr=matmul((blockvectorbx)^T,blockvectorr)
 707 !    Note that the gramb matrix is more easier to construct than grama:
 708 !    i) <x|B|x>=<r|B|r>=<p|B|p>=(1;0)
 709 !    since the x, r and p blockvector are normalized
 710 !    ii) <r|B|x>=(0;0)
 711 !    since the x and r blockvector are orthogonalized
 712 !    iii) The <p|B|r> and <p|B|x> have to be computed.
 713      gramxbx(:,:)=zero
 714      gramrbr(:,:)=zero
 715      gramxbr(:,:)=zero
 716      do iblocksize=1,blocksize
 717        zvar=(/one,zero/)
 718        call abi_xcopy(1,zvar,1,gramxbx(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx)
 719        call abi_xcopy(1,zvar,1,gramrbr(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx)
 720      end do
 721 
 722 !    ###########################################################################
 723 !    ################ PERFORM LOOP ON COND #####################################
 724 !    ###########################################################################
 725 
 726      i1=0;i2=blocksize;i3=2*blocksize;i4=3*blocksize
 727      cond: do cond_try=1,2 !2 when restart
 728        if (restart==0) then
 729 
 730 !        gramxap=matmul((blockvectorax)^T,blockvectorp)
 731 !        gramrap=matmul((blockvectorar)^T,blockvectorp)
 732 !        grampap=matmul((blockvectorap)^T,blockvectorp)
 733 !        gramxbp=matmul((blockvectorbx)^T,blockvectorp)
 734 !        gramrbp=matmul((blockvectorbr)^T,blockvectorp)
 735 !        grampbp=matmul((blockvectorbp)^T,blockvectorp)
 736          if(use_linalg_gpu==1) then
 737            call copy_on_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize)
 738            call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize)
 739            call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,&
 740 &           cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 741            call copy_from_gpu(gramxap,gram_gpu,cplx*dp*blocksize*blocksize)
 742            call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,&
 743 &           cone,blockvectorar_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 744            call copy_from_gpu(gramrap,gram_gpu,cplx*dp*blocksize*blocksize)
 745            call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize)
 746            call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,&
 747 &           cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 748            call copy_from_gpu(grampap,gram_gpu,cplx*dp*blocksize*blocksize)
 749            call copy_on_gpu(blockvectorbx,A_gpu,cplx*dp*vectsize*blocksize)
 750            call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,&
 751 &           cone,A_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 752            call copy_from_gpu(gramxbp,gram_gpu,cplx*dp*blocksize*blocksize)
 753            call gpu_xgemm(cplx,cparam(cplx),'n',blocksize,blocksize,vectsize,&
 754 &           cone,blockvectorbr_gpu,vectsize,C_gpu,vectsize,czero,gram_gpu,blocksize)
 755            call copy_from_gpu(gramrbp,gram_gpu,cplx*dp*blocksize*blocksize)
 756          else
 757            call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorax,&
 758 &           vectsize,blockvectorp,vectsize,czero,gramxap,blocksize,x_cplx=x_cplx)
 759            call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorar,&
 760 &           vectsize,blockvectorp,vectsize,czero,gramrap,blocksize,x_cplx=x_cplx)
 761            call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorap,&
 762 &           vectsize,blockvectorp,vectsize,czero,grampap,blocksize,x_cplx=x_cplx)
 763            call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbx,&
 764 &           vectsize,blockvectorp,vectsize,czero,gramxbp,blocksize,x_cplx=x_cplx)
 765            call abi_xgemm(cparam(cplx),'n',blocksize,blocksize,vectsize,cone,blockvectorbr,&
 766 &           vectsize,blockvectorp,vectsize,czero,gramrbp,blocksize,x_cplx=x_cplx)
 767          end if
 768 !        It's not necessary to compute the last one: <p|B|p>=(1;0) (see above)
 769          transf5(:,:,1)=gramxap(:,:)
 770          transf5(:,:,2)=gramrap(:,:)
 771          transf5(:,:,3)=grampap(:,:)
 772          transf5(:,:,4)=gramxbp(:,:)
 773          transf5(:,:,5)=gramrbp(:,:)
 774          if(abs(dtset%timopt)==3) then
 775            call timab(533,1,tsec)
 776          end if
 777          call xmpi_sum(transf5,mpi_enreg%comm_bandspinorfft,ierr)
 778          if(abs(dtset%timopt)==3) then
 779            call timab(533,2,tsec)
 780          end if
 781          gramxap(:,:)=transf5(:,:,1)
 782          gramrap(:,:)=transf5(:,:,2)
 783          grampap(:,:)=transf5(:,:,3)
 784          gramxbp(:,:)=transf5(:,:,4)
 785          gramrbp(:,:)=transf5(:,:,5)
 786          grampbp(:,:)=zero
 787          do iblocksize=1,blocksize
 788            zvar=(/one,zero/)
 789            call abi_xcopy(1,zvar,1,grampbp(cplx*(iblocksize-1)+1:cplx*iblocksize,iblocksize),1,x_cplx=x_cplx)
 790          end do
 791          bigorder=i4
 792          ABI_MALLOC(grama,(cplx*i4,i4))
 793          ABI_MALLOC(gramb,(cplx*i4,i4))
 794          ABI_MALLOC(eigen,(i4))
 795 !        ABI_MALLOC(coordx,(cplx*i4,blocksize))
 796          ABI_MALLOC(coordx1,(cplx*blocksize,blocksize))
 797          ABI_MALLOC(coordx2,(cplx*blocksize,blocksize))
 798          ABI_MALLOC(coordx3,(cplx*blocksize,blocksize))
 799          grama(:,:)=zero;gramb(:,:)=zero
 800          grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax
 801          grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar
 802          grama(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxap
 803          grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar
 804          grama(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrap
 805          grama(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampap
 806          gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx
 807          gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr
 808          gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i3+1:i4)=gramxbp
 809          gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr
 810          gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i3+1:i4)=gramrbp
 811          gramb(gramindex(i3+1):gramindex(i4)+cplx-1,i3+1:i4)=grampbp
 812        else
 813          bigorder=i3
 814          ABI_MALLOC(grama,(cplx*i3,i3))
 815          ABI_MALLOC(gramb,(cplx*i3,i3))
 816          ABI_MALLOC(eigen,(i3))
 817 !        ABI_MALLOC(coordx,(cplx*i3,blocksize))
 818          ABI_MALLOC(coordx1,(cplx*blocksize,blocksize))
 819          ABI_MALLOC(coordx2,(cplx*blocksize,blocksize))
 820          grama(:,:)=zero;gramb(:,:)=zero
 821          grama(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxax
 822          grama(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxar
 823          grama(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrar
 824          gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i1+1:i2)=gramxbx
 825          gramb(gramindex(i1+1):gramindex(i2)+cplx-1,i2+1:i3)=gramxbr
 826          gramb(gramindex(i2+1):gramindex(i3)+cplx-1,i2+1:i3)=gramrbr
 827        end if
 828 
 829        ABI_MALLOC(tmpgramb,(cplx*bigorder,bigorder))
 830        ABI_MALLOC(tmpeigen,(bigorder))
 831        tmpgramb=gramb
 832 
 833        call abi_xheev('v','u',bigorder,tmpgramb,bigorder,tmpeigen,x_cplx=cplx,istwf_k=istwf_k, &
 834 &       timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu)
 835 
 836        condestgramb=tmpeigen(bigorder)/tmpeigen(1)
 837        ABI_FREE(tmpgramb)
 838        ABI_FREE(tmpeigen)
 839 
 840        if (condestgramb.gt.1d+5.or.condestgramb.lt.0.d0.or.info/=0) then
 841          write(std_out,*)'condition number of the Gram matrix = ',condestgramb
 842          if (cond_try==1.and.restart==0) then
 843            ABI_FREE(grama)
 844            ABI_FREE(gramb)
 845            ABI_FREE(eigen)
 846 !          ABI_FREE(coordx)
 847            ABI_FREE(coordx1)
 848            ABI_FREE(coordx2)
 849            if(bigorder==i4) then
 850              ABI_FREE(coordx3)
 851            end if
 852            if (nrestart.gt.1) then
 853              ABI_WARNING('the minimization is stopped for this block')
 854              exit iter
 855            else
 856              restart=1
 857              nrestart=nrestart+1
 858              call wrtout(std_out,'Lobpcgwf: restart performed',"PERS")
 859            end if
 860          else
 861            ABI_WARNING('Gramm matrix ill-conditionned: results may be unpredictable')
 862          end if
 863        else
 864          exit cond
 865        end if
 866      end do cond
 867 
 868 !    ###########################################################################
 869 !    ################ END LOOP ON COND #########################################
 870 !    ###########################################################################
 871 
 872      call abi_xhegv(1,'v','u',bigorder,grama,bigorder,gramb,bigorder,eigen,x_cplx=cplx,istwf_k=istwf_k, &
 873      timopt=timopt,tim_xeigen=tim_xeigen,use_slk=dtset%use_slk,use_gpu=use_lapack_gpu)
 874 
 875      deltae=-one
 876      do iblocksize=1,blocksize
 877        call abi_xcopy(1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,zvar,1,x_cplx=x_cplx)
 878        deltae=max(deltae,abs(cmplx(zvar(1),zvar(2))-eigen(iblocksize)))
 879        zvar=(/eigen(iblocksize),zero/)
 880        call abi_xcopy(1,zvar,1,lambda(cplx*(iblocksize-1)+1,iblocksize),1,x_cplx=x_cplx)
 881      end do
 882 
 883 !    DEBUG
 884 !    write(std_out,*)'eigen',eigen(1:blocksize)
 885 !    ENDDEBUG
 886 
 887 !    coordx(1:bigorder*cplx,1:blocksize)=grama(1:bigorder*cplx,1:blocksize)
 888      coordx1(:,:) =  grama(1+i1*cplx : i2*cplx,1:blocksize)
 889      coordx2(:,:) =  grama(1+i2*cplx : i3*cplx,1:blocksize)
 890      if(bigorder==i4) then
 891        coordx3(:,:) =  grama(1+i3*cplx : i4*cplx,1:blocksize)
 892      end if
 893 
 894 
 895      if(use_linalg_gpu==1) then
 896        call copy_on_gpu(coordx2,coordx2_gpu,cplx*dp*blocksize*blocksize)
 897        if(bigorder==i4) then
 898          call copy_on_gpu(coordx3,coordx3_gpu,cplx*dp*blocksize*blocksize)
 899        end if
 900      end if
 901 
 902      ABI_FREE(grama)
 903      ABI_FREE(gramb)
 904      ABI_FREE(eigen)
 905      if (restart==0 .and. iterationnumber >1) then
 906 
 907 !      blockvectorp=matmul(blockvectorr,coordx(i2+1:i3,:))+&
 908 !      &               matmul(blockvectorp,coordx(i3+1:i4,:))
 909        if(use_linalg_gpu==1) then
 910 !        call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 911          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,&
 912 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
 913          call copy_on_gpu(blockvectorp,A_gpu,cplx*dp*vectsize*blocksize)
 914          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,&
 915 &         coordx3_gpu,blocksize,cone,C_gpu,vectsize)
 916          call copy_from_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize)
 917        else
 918          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,&
 919 &         vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 920          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorp,&
 921 &         vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx)
 922          call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorp,1,x_cplx=x_cplx)
 923        end if
 924 
 925 !      blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))+&
 926 !      &                matmul(blockvectorap,coordx(i3+1:i4,:))
 927        if(use_linalg_gpu==1) then
 928 !        call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize)
 929          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,&
 930 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
 931          call copy_on_gpu(blockvectorap,A_gpu,cplx*dp*vectsize*blocksize)
 932          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,&
 933 &         coordx3_gpu,blocksize,cone,C_gpu,vectsize)
 934          call copy_from_gpu(blockvectorap,C_gpu,cplx*dp*vectsize*blocksize)
 935        else
 936          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,&
 937 &         vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 938          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorap,&
 939 &         vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx)
 940          call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorap,1,x_cplx=x_cplx)
 941        end if
 942 
 943 
 944 !      blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:))+&
 945 !      &                matmul(blockvectorvp,coordx(i3+1:i4,:))
 946        if (gs_hamk%usepaw==0) then
 947          if(use_linalg_gpu==1) then
 948            call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize)
 949            call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
 950 &           vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
 951            call copy_on_gpu(blockvectorvp,A_gpu,cplx*dp*vectsize*blocksize)
 952            call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
 953 &           vectsize,coordx3_gpu,blocksize,cone,C_gpu,vectsize)
 954            call copy_from_gpu(blockvectorvp,C_gpu,cplx*dp*vectsize*blocksize)
 955          else
 956            call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,&
 957 &           vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 958            call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvp,&
 959 &           vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx)
 960            call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorvp,1,x_cplx=x_cplx)
 961          end if
 962        end if
 963 
 964 !      blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))+&
 965 !      &                matmul(blockvectorbp,coordx(i3+1:i4,:))
 966        if(use_linalg_gpu==1) then
 967 !        call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize)
 968          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,&
 969 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
 970          call copy_on_gpu(blockvectorbp,A_gpu,cplx*dp*vectsize*blocksize)
 971          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,vectsize,&
 972 &         coordx3_gpu,blocksize,cone,C_gpu,vectsize)
 973          call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize)
 974        else
 975          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,&
 976 &         vectsize,coordx2,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
 977          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbp,&
 978 &         vectsize,coordx3,blocksize,cone,blockvectordumm,vectsize,x_cplx=x_cplx)
 979          call abi_xcopy(vectsize*blocksize,blockvectordumm,1,blockvectorbp,1,x_cplx=x_cplx)
 980        end if
 981 
 982      else
 983 
 984 !      blockvectoSz =matmul(blockvectorr,coordx(i2+1:i3,:))
 985        if(use_linalg_gpu==1) then
 986 !        call copy_on_gpu(blockvectorr,blockvectorr_gpu,cplx*dp*vectsize*blocksize)
 987          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorr_gpu,&
 988 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
 989          call copy_from_gpu(blockvectorp,C_gpu,cplx*dp*vectsize*blocksize)
 990        else
 991          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorr,&
 992 &         vectsize,coordx2,blocksize,czero,blockvectorp,vectsize,x_cplx=x_cplx)
 993        end if
 994 
 995 !      blockvectorap=matmul(blockvectorar,coordx(i2+1:i3,:))
 996        if(use_linalg_gpu==1) then
 997 !        call copy_on_gpu(blockvectorar,blockvectorar_gpu,cplx*dp*vectsize*blocksize)
 998          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorar_gpu,&
 999 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1000          call copy_from_gpu(blockvectorap,C_gpu,cplx*dp*vectsize*blocksize)
1001        else
1002          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorar,&
1003 &         vectsize,coordx2,blocksize,czero,blockvectorap,vectsize,x_cplx=x_cplx)
1004        end if
1005 !      blockvectorvp=matmul(blockvectorvr,coordx(i2+1:i3,:))
1006        if (gs_hamk%usepaw==0) then
1007          if(use_linalg_gpu==1) then
1008            call copy_on_gpu(blockvectorvr,A_gpu,cplx*dp*vectsize*blocksize)
1009            call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
1010 &           vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1011            call copy_from_gpu(blockvectorvp,C_gpu,cplx*dp*vectsize*blocksize)
1012          else
1013            call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvr,&
1014 &           vectsize,coordx2,blocksize,czero,blockvectorvp,vectsize,x_cplx=x_cplx)
1015          end if
1016        end if
1017 
1018 !      blockvectorbp=matmul(blockvectorbr,coordx(i2+1:i3,:))
1019        if(use_linalg_gpu==1) then
1020 !        call copy_on_gpu(blockvectorbr,blockvectorbr_gpu,cplx*dp*vectsize*blocksize)
1021          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,blockvectorbr_gpu,&
1022 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1023          call copy_from_gpu(blockvectorbp,C_gpu,cplx*dp*vectsize*blocksize)
1024        else
1025          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbr,&
1026 &         vectsize,coordx2,blocksize,czero,blockvectorbp,vectsize,x_cplx=x_cplx)
1027        end if
1028      end if
1029 
1030      if(use_linalg_gpu==1) then
1031        call copy_on_gpu(coordx1,coordx2_gpu,cplx*dp*blocksize*blocksize)
1032      end if
1033 
1034 !    blockvectorx = matmul(blockvectorx,coordx(i1+1:i2,:))+blockvectorp
1035      if(use_linalg_gpu==1) then
1036        call copy_on_gpu(blockvectorx,A_gpu,cplx*dp*vectsize*blocksize)
1037        call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
1038 &       vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1039        call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize)
1040      else
1041        call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorx,&
1042 &       vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
1043      end if
1044      blockvectorx = blockvectordumm+blockvectorp
1045 
1046 !    blockvectorax= matmul(blockvectorax,coordx(i1+1:i2,:))+blockvectorap
1047      if(use_linalg_gpu==1) then
1048        call copy_on_gpu(blockvectorax,A_gpu,cplx*dp*vectsize*blocksize)
1049        call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
1050 &       vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1051        call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize)
1052      else
1053        call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorax,&
1054 &       vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
1055      end if
1056      blockvectorax = blockvectordumm+blockvectorap
1057 
1058 !    blockvectorvx= matmul(blockvectorvx,coordx(i1+1:i2,:))+blockvectorvp
1059      if (gs_hamk%usepaw==0) then
1060        if(use_linalg_gpu==1) then
1061          call copy_on_gpu(blockvectorvx,A_gpu,cplx*dp*vectsize*blocksize)
1062          call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
1063 &         vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1064          call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize)
1065        else
1066          call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorvx,&
1067 &         vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
1068        end if
1069        blockvectorvx = blockvectordumm+blockvectorvp
1070      end if
1071 
1072 !    blockvectorbx= matmul(blockvectorbx,coordx(i1+1:i2,:))+blockvectorbp
1073      if(use_linalg_gpu==1) then
1074        call copy_on_gpu(blockvectorbx,A_gpu,cplx*dp*vectsize*blocksize)
1075        call gpu_xgemm(cplx,'n','n',vectsize,blocksize,blocksize,cone,A_gpu,&
1076 &       vectsize,coordx2_gpu,blocksize,czero,C_gpu,vectsize)
1077        call copy_from_gpu(blockvectordumm,C_gpu,cplx*dp*vectsize*blocksize)
1078      else
1079        call abi_xgemm('n','n',vectsize,blocksize,blocksize,cone,blockvectorbx,&
1080 &       vectsize,coordx1,blocksize,czero,blockvectordumm,vectsize,x_cplx=x_cplx)
1081      end if
1082      blockvectorbx = blockvectordumm+blockvectorbp
1083 
1084 !    ABI_FREE(coordx)
1085      ABI_FREE(coordx1)
1086      ABI_FREE(coordx2)
1087      if(bigorder==i4) then
1088        ABI_FREE(coordx3)
1089      end if
1090 
1091 !    Check convergence on energy and eventually exit
1092      if (iterationnumber==1) then
1093        deold=deltae
1094      else if (iterationnumber>1) then
1095        if ((abs(deltae)<0.005*abs(deold)).and.(iterationnumber/=maxiterations))then
1096          if(prtvol>=10)then
1097            write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) &
1098 &           ' lobpcgwf: block',iblock,', line',iterationnumber,&
1099 &           ', deltae=',deltae,' < 0.005*',deold,' =>skip lines !'
1100            call wrtout(std_out,message,'PERS')
1101          end if
1102          exit
1103        else if (abs(deltae)>0.005*abs(deold)) then
1104          if(prtvol>=10)then
1105            write(message, '(2(a,i4),1x,a,1p,e12.4,a,e12.4,a)' ) &
1106 &           ' lobpcgwf: block',iblock,', line',iterationnumber,&
1107 &           ', deltae=',deltae,' > 0.005*',deold,' =>keep on working !'
1108            call wrtout(std_out,message,'PERS')
1109          end if
1110        end if
1111      end if
1112 
1113      if(abs(dtset%timopt)==4) then
1114        call timab(524,2,tsec)
1115      end if
1116 
1117    end do iter
1118 
1119 !  ###########################################################################
1120 !  ################## END LOOP ON NLINE ######################################
1121 !  ###########################################################################
1122 
1123    if(abs(dtset%timopt)==4) then
1124      call timab(525,1,tsec)
1125    end if
1126 
1127    if (havetoprecon) then
1128      call xprecon(blockvectorbx,lambda,blocksize,&
1129 &     iterationnumber,kinpw,mpi_enreg,npw_k,my_nspinor,&
1130 &     optekin,optpcon,pcon,blockvectorax,blockvectorr,vectsize,timopt=timopt,tim_xprecon=tim_xprecon)
1131 
1132      residualnorms=sum(abs(blockvectorr)**2,dim=1)
1133 
1134      if(abs(dtset%timopt)==3) then
1135        call timab(533,1,tsec)
1136      end if
1137      call xmpi_sum(residualnorms,mpi_enreg%comm_bandspinorfft,ierr)
1138      if(abs(dtset%timopt)==3) then
1139        call timab(533,2,tsec)
1140      end if
1141 
1142      resid_k(bblocksize+1:bblocksize+blocksize)=residualnorms(1:blocksize)
1143    end if
1144 
1145    call wfcopy('I',vectsize*blocksize,blockvectorx,1,cg,1,blocksize,iblock,'C',withbbloc=.true.,&
1146 &   timopt=timopt,tim_wfcopy=tim_wfcopy)
1147 
1148    if(gen_eigenpb) then
1149      call wfcopy('I',vectsize*blocksize,blockvectorbx,1,gsc,1,blocksize,iblock,'S',withbbloc=.true.,&
1150 &     timopt=timopt,tim_wfcopy=tim_wfcopy)
1151    end if
1152 
1153 !  The Vnl+VFockACE part of the Hamiltonian is no more stored in the packed form such as it was the case for subvnlx(:).
1154 !  Now, the full matrix is stored in totvnlx(:,:). This trick permits:
1155 !  1) to avoid the reconstruction of the total matrix in vtowfk.F90 (double loop over bands)
1156 !  2) to use two optimized matrix-matrix blas routine for general (in lobpcgccwf.F90) or hermitian (in vtowfk.F90)
1157 !  operators, zgemm.f and zhemm.f respectively, rather than a triple loop in both cases.
1158    iwavef=iblock*blocksize
1159    isubh=1+2*bblocksize*(bblocksize+1)/2
1160 
1161    ABI_MALLOC(blockvectorz,(cplx*vectsize,iwavef))
1162    if(bblocksize > 0 ) then
1163      call abi_xcopy(bblocksize*vectsize,blockvectory(:,1:bblocksize),1,blockvectorz(:,1:bblocksize),1,x_cplx=x_cplx)
1164    end if
1165    call abi_xcopy( blocksize*vectsize,blockvectorx(:,1:blocksize) ,1,blockvectorz(:,bblocksize+1:iwavef),1,x_cplx=x_cplx)
1166 
1167    ABI_MALLOC(tsubham,(cplx*iwavef,blocksize))
1168    tsubham(:,:)=zero
1169    call abi_xgemm(cparam(cplx),'n',iwavef,blocksize,vectsize,cone,blockvectorz,vectsize,&
1170 &   blockvectorax,vectsize,czero,tsubham,iwavef,x_cplx=x_cplx)
1171 
1172    if (gs_hamk%usepaw==0) then
1173      ! MG FIXME: Here gfortran4.9 allocates temporary array for C in abi_d2zgemm.
1174      call abi_xgemm(cparam(cplx),'n',blocksize,iwavef,vectsize,cone,blockvectorvx,vectsize,&
1175 &     blockvectorz,vectsize,czero,totvnlx(cplx*bblocksize+1:cplx*iwavef,1:iwavef),blocksize,x_cplx=x_cplx)
1176    end if
1177 
1178    do iblocksize=1,blocksize
1179      do ii=1,bblocksize+iblocksize
1180        if ( cplx == 1 ) then
1181          subham(isubh)  = tsubham(ii,iblocksize)
1182          subham(isubh+1)= zero
1183        else
1184          subham(isubh)  = tsubham(2*ii-1,iblocksize)
1185          subham(isubh+1)= tsubham(2*ii  ,iblocksize)
1186        end if
1187        isubh=isubh+2
1188      end do
1189    end do
1190    ABI_FREE(tsubham)
1191    ABI_FREE(blockvectorz)
1192 !  comm for subham and subvnlx are made in vtowfk
1193 
1194    ABI_FREE(pcon)
1195    ABI_FREE(blockvectory)
1196    ABI_FREE(blockvectorby)
1197    ABI_FREE(gramyx)
1198    ABI_FREE(blockvectorx)
1199    ABI_FREE(blockvectorax)
1200    ABI_FREE(blockvectorbx)
1201    ABI_FREE(blockvectorr)
1202    ABI_FREE(blockvectorar)
1203    ABI_FREE(blockvectorbr)
1204    ABI_FREE(blockvectorp)
1205    ABI_FREE(blockvectorap)
1206    ABI_FREE(blockvectorbp)
1207    if (gs_hamk%usepaw==0) then
1208      ABI_FREE(blockvectorvx)
1209      ABI_FREE(blockvectorvp)
1210      ABI_FREE(blockvectorvr)
1211    end if
1212    ABI_FREE(blockvectordumm)
1213    ABI_FREE(gramxax)
1214    ABI_FREE(gramxar)
1215    ABI_FREE(gramxap)
1216    ABI_FREE(gramrar)
1217    ABI_FREE(gramrap)
1218    ABI_FREE(grampap)
1219    ABI_FREE(gramxbx)
1220    ABI_FREE(gramxbr)
1221    ABI_FREE(gramxbp)
1222    ABI_FREE(gramrbr)
1223    ABI_FREE(gramrbp)
1224    ABI_FREE(grampbp)
1225    ABI_FREE(transf3)
1226    ABI_FREE(transf5)
1227    ABI_FREE(lambda)
1228    ABI_FREE(residualnorms)
1229    if(use_linalg_gpu==1) then
1230      call dealloc_on_gpu(bblockvector_gpu)
1231      call dealloc_on_gpu(gram_gpu)
1232    end if
1233 
1234  end do  ! End big loop over bands inside blocks
1235 
1236  if(use_linalg_gpu==1) then
1237    call dealloc_on_gpu(blockvectorr_gpu)
1238    call dealloc_on_gpu(blockvectorar_gpu)
1239    call dealloc_on_gpu(blockvectorbr_gpu)
1240    call dealloc_on_gpu(A_gpu)
1241    call dealloc_on_gpu(C_gpu)
1242    call dealloc_on_gpu(coordx2_gpu)
1243    call dealloc_on_gpu(coordx3_gpu)
1244    !call gpu_linalg_shutdown()
1245  end if
1246 
1247  if(abs(dtset%timopt)==4) then
1248    call timab(525,2,tsec)
1249  end if
1250  call timab(530,2,tsec)
1251 
1252  DBG_ENTER("COLL")
1253 
1254  contains
1255 
1256    function gramindex(iblocksize)
1257 
1258    integer :: gramindex,iblocksize
1259    gramindex=(iblocksize-1)*cplx+1
1260  end function gramindex
1261 
1262 end subroutine lobpcgwf

ABINIT/m_lobpcgwf_old [ Modules ]

[ Top ] [ Modules ]

NAME

   m_lobpcgwf_old

FUNCTION

COPYRIGHT

  Copyright (C) 2008-2022 ABINIT group ()
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 module m_lobpcgwf_old
23 
24  implicit none
25 
26  private