TABLE OF CONTENTS


ABINIT/m_exc_build [ Modules ]

[ Top ] [ Modules ]

NAME

  m_exc_build

FUNCTION

  Build BSE Hamiltonian in e-h reprensentation.

COPYRIGHT

  Copyright (C) 1992-2009 EXC group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida)
  Copyright (C) 2009-2024 ABINIT group (L.Reining, V.Olevano, F.Sottile, S.Albrecht, G.Onida, M.Giantomassi)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

17 #if defined HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "abi_common.h"
22 
23 module m_exc_build
24 
25  use defs_basis
26  use m_abicore
27  use m_bs_defs
28  use m_bse_io
29  use m_xmpi
30  use m_errors
31  use m_screen
32 #if defined HAVE_MPI2
33  use mpi
34 #endif
35  use m_hdr
36 
37  use m_wfd,          only : wfdgw_t, wave_t, WFD_STORED
38  use defs_datatypes, only : pseudopotential_type
39  use m_gwdefs,       only : czero_gw, cone_gw, GW_TOLQ0
40  use m_time,         only : cwtime, timab
41  use m_io_tools,     only : get_unit, open_file
42  use m_hide_blas,    only : xdotc, xgemv
43  use m_geometry,     only : normv
44  use m_crystal,      only : crystal_t
45  use m_gsphere,      only : gsphere_t
46  use m_vcoul,        only : vcoul_t
47  use m_bz_mesh,      only : kmesh_t, findqg0
48  use m_pawpwij,      only : pawpwff_t, pawpwij_t, pawpwij_init, pawpwij_free, paw_rho_tw_g
49  use m_pawang,       only : pawang_type
50  use m_pawtab,       only : pawtab_type
51  use m_pawcprj,      only : pawcprj_type, pawcprj_alloc, pawcprj_free
52  use m_paw_sym,      only : paw_symcprj_op
53  use m_oscillators,  only : rho_tw_g, sym_rhotwgq0
54 
55  implicit none
56 
57 #if defined HAVE_MPI1
58  include 'mpif.h'
59 #endif
60 
61  private

m_exc_build/exc_build_block [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_block

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite matrix
    elements of a plane wave.
  Wfd<wfdgw_t>=Handler for the wavefunctions.
    prtvol=Verbosity level.
  rhxtwg_q0
  is_resonant
  fname
  comm=MPI communicator.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use DFT energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approximated as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

SOURCE

 151 subroutine exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,Wfd,W,Hdr_bse,&
 152 &  nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,rhxtwg_q0,is_resonant,fname)
 153 
 154 !Arguments ------------------------------------
 155 !scalars
 156  integer,intent(in) :: nfftot_osc
 157  character(len=*),intent(in) :: fname
 158  logical,intent(in) :: is_resonant
 159  type(excparam),intent(in) :: BSp
 160  type(screen_t),intent(inout) :: W
 161  type(kmesh_t),intent(in) :: Kmesh,Qmesh
 162  type(crystal_t),intent(in) :: Cryst
 163  type(vcoul_t),intent(in) :: Vcp
 164  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
 165  type(Pseudopotential_type),intent(in) :: Psps
 166  type(Hdr_type),intent(inout) :: Hdr_bse
 167  type(pawang_type),intent(in) :: Pawang
 168  type(wfdgw_t),target,intent(inout) :: Wfd
 169 !arrays
 170  integer,intent(in) :: ngfft_osc(18)
 171  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
 172  complex(gwpc),intent(in) :: rhxtwg_q0(BSp%npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Wfd%nkibz,Wfd%nsppol)
 173  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
 174  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
 175 
 176 !Local variables ------------------------------
 177 !scalars
 178  integer,parameter :: map2sphere=1,ndat1=1,master=0
 179  integer(i8b) :: bsize_my_block
 180  integer :: nspinor,nsppol,ISg,mpi_err,tmp_size,ngx
 181  integer :: ik_bz,ikp_bz,col_glob,itpk_min,itpk_max
 182  integer :: dim_rtwg,bsh_unt,ncol,dump_unt,npweps
 183 #ifdef HAVE_MPI_IO
 184  integer :: amode,mpi_fh,hmat_type,offset_err,old_type
 185  integer(XMPI_OFFSET_KIND) :: ehdr_offset,my_offset
 186  logical,parameter :: is_fortran_file=.TRUE.
 187 #endif
 188  integer :: neh1,neh2,ig,nblocks
 189  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
 190  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
 191  integer :: iv,ivp,ic,icp,jj,nrows,sender,my_ncols
 192  integer :: use_padfft,prev_nrows,spin1,spin2,block
 193  integer :: ierr,nproc,my_rank,mgfft_osc,fftalga_osc,comm
 194  integer(i8b) :: tot_nels,prev_nels,prev_ncols,nels,ir,it,itp,ist,iend,my_hsize
 195  real(dp) :: faq,kx_fact,cputime,walltime,gflops
 196  complex(spc) :: http,ctemp
 197  complex(dpc) :: ph_mkpt,ph_mkt,ene_t,ene_tp
 198  logical,parameter :: with_umklp=.FALSE.
 199  logical :: use_mpiio,do_coulomb_term,do_exchange_term,w_is_diagonal,isirred
 200  logical :: is_qeq0
 201  character(len=500) :: msg
 202  type(wave_t),pointer :: wave_ck,  wave_ckp, wave_vk, wave_vkp
 203 !arrays
 204  integer :: bidx(2,4),g0(3),spin_ids(2,3)
 205  integer(i8b) :: nels_block(3)
 206  integer :: my_cols(2),my_rows(2),proc_end(2),proc_start(2)
 207  integer :: my_extrema(2,2),sender_extrema(2,2),my_starts(2),my_ends(2)
 208  integer,allocatable :: igfftg0(:),ktabr_k(:),ktabr_kp(:),id_tab(:)
 209  integer,allocatable :: ncols_of(:)
 210  integer(i8b),allocatable :: t_start(:),t_stop(:),hsize_of(:)
 211  integer,allocatable :: col_start(:),col_stop(:)
 212  integer,allocatable :: gbound(:,:)
 213  real(dp) :: kbz(3),kpbz(3),qbz(3),spinrot_k(4),spinrot_kp(4),kmkp(3),tsec(2)
 214  complex(dpc),allocatable :: my_bsham(:),buffer(:),buffer_2d(:,:),my_kxssp(:,:),prev_col(:)
 215 !DBYG
 216  complex(dpc),allocatable :: acoeffs(:),bcoeffs(:),ccoeffs(:) ! Coeff of W = a/q^2 + b/q + c
 217  integer :: a_unt, b_unt, c_unt
 218  complex(dpc) :: aatmp, bbtmp, cctmp
 219  complex(gwpc),allocatable :: aa_vpv(:),aa_cpc(:),aa_ctccp(:)
 220  complex(gwpc),allocatable :: bb_vpv1(:),bb_cpc1(:),bb_ctccp1(:)
 221  complex(gwpc),allocatable :: bb_vpv2(:),bb_cpc2(:),bb_ctccp2(:)
 222  complex(gwpc),allocatable :: cc_vpv(:),cc_cpc(:),cc_ctccp(:)
 223  complex(dpc),allocatable :: abuffer(:),aprev_col(:)
 224  complex(dpc),allocatable :: bbuffer(:),bprev_col(:)
 225  complex(dpc),allocatable :: cbuffer(:),cprev_col(:)
 226  character(len=fnlen) :: tmpfname
 227  integer :: ii
 228 !END DBYG
 229  complex(gwpc),allocatable :: vc_sqrt_qbz(:)
 230  complex(gwpc),allocatable :: rhotwg1(:),rhotwg2(:),rhxtwg_vpv(:),rhxtwg_cpc(:),ctccp(:)
 231  complex(gwpc),target,allocatable :: ur_ckp(:),ur_vkp(:),ur_vk(:),ur_ck(:)
 232  complex(gwpc),ABI_CONTIGUOUS pointer :: ptur_ckp(:),ptur_vkp(:),ptur_vk(:),ptur_ck(:)
 233  type(pawcprj_type),target,allocatable :: Cp_tmp1(:,:),Cp_tmp2(:,:)
 234  type(pawcprj_type),target,allocatable :: Cp_tmp3(:,:),Cp_tmp4(:,:)
 235  type(pawcprj_type),allocatable :: Cp_ckp(:,:),Cp_vkp(:,:)
 236  type(pawcprj_type),allocatable :: Cp_vk(:,:),Cp_ck(:,:)
 237  type(pawcprj_type),pointer :: ptcp_ckp(:,:),ptcp_vkp(:,:),ptcp_vk(:,:),ptcp_ck(:,:)
 238  type(pawpwij_t),allocatable :: Pwij_q(:)
 239 #ifdef HAVE_MPI_IO
 240  integer(XMPI_OFFSET_KIND) :: tmp_off,my_offpad
 241  integer(XMPI_OFFSET_KIND),allocatable :: bsize_frecord(:),offset_of_block(:)
 242 #endif
 243 #ifdef DEV_MG_DEBUG_MODE
 244  integer,allocatable :: ttp_check(:,:)
 245 #endif
 246 
 247 !************************************************************************
 248 
 249  call timab(680,1,tsec)
 250  call timab(681,1,tsec)
 251 
 252  DBG_ENTER("COLL")
 253 
 254  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
 255  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
 256 
 257  if (Wfd%nsppol==2) then
 258    ABI_WARNING("nsppol==2 is still under testing")
 259  end if
 260  !
 261  ! MPI variables.
 262  comm    = Wfd%comm
 263  nproc   = Wfd%nproc
 264  my_rank = Wfd%my_rank
 265 
 266  !
 267  ! Basic constants.
 268  nspinor = Wfd%nspinor
 269  nsppol  = Wfd%nsppol
 270  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
 271  npweps = Bsp%npweps
 272  !
 273  ! Prepare the FFT tables to have u(r) on the ngfft_osc mesh.
 274  mgfft_osc = MAXVAL(ngfft_osc(1:3))
 275  fftalga_osc = ngfft_osc(7)/100
 276  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) then
 277    call wfd%change_ngfft(Cryst,Psps,ngfft_osc)
 278  end if
 279 
 280  ABI_MALLOC(igfftg0,(npweps))
 281  ABI_MALLOC(ktabr_k,(nfftot_osc))
 282  ABI_MALLOC(ktabr_kp,(nfftot_osc))
 283  ABI_MALLOC(id_tab,(nfftot_osc))
 284  id_tab = (/(ic, ic=1,nfftot_osc)/)
 285  !
 286  ! Workspace arrays for wavefunctions and oscillator matrix elements.
 287  ABI_MALLOC(rhxtwg_vpv,(npweps))
 288  ABI_MALLOC(rhxtwg_cpc,(npweps))
 289 
 290  if (BSp%prep_interp) then
 291    call wrtout(std_out,"Preparing BSE interpolation")
 292    ABI_MALLOC(aa_vpv,(npweps))
 293    ABI_MALLOC(bb_vpv1,(npweps))
 294    ABI_MALLOC(bb_vpv2,(npweps))
 295    ABI_MALLOC(cc_vpv,(npweps))
 296 
 297    ABI_MALLOC(aa_cpc,(npweps))
 298    ABI_MALLOC(bb_cpc1,(npweps))
 299    ABI_MALLOC(bb_cpc2,(npweps))
 300    ABI_MALLOC(cc_cpc,(npweps))
 301  end if
 302 
 303  ABI_MALLOC(ur_ckp,(nspinor*nfftot_osc))
 304  ABI_MALLOC(ur_vkp,(nspinor*nfftot_osc))
 305  ABI_MALLOC(ur_ck ,(nspinor*nfftot_osc))
 306  ABI_MALLOC(ur_vk ,(nspinor*nfftot_osc))
 307 
 308  if (Wfd%usepaw==1) then
 309    ABI_MALLOC(Cp_vk,(Wfd%natom,nspinor))
 310    call pawcprj_alloc(Cp_vk,0,Wfd%nlmn_atm)
 311    ABI_MALLOC(Cp_ck,(Wfd%natom,nspinor))
 312    call pawcprj_alloc(Cp_ck,0,Wfd%nlmn_atm)
 313    ABI_MALLOC(Cp_ckp,(Wfd%natom,nspinor))
 314    call pawcprj_alloc(Cp_ckp,0,Wfd%nlmn_atm)
 315    ABI_MALLOC(Cp_vkp,(Wfd%natom,nspinor))
 316    call pawcprj_alloc(Cp_vkp,0,Wfd%nlmn_atm)
 317 
 318    ABI_MALLOC(Cp_tmp1,(Wfd%natom,nspinor))
 319    call pawcprj_alloc(Cp_tmp1,0,Wfd%nlmn_atm)
 320    ABI_MALLOC(Cp_tmp2,(Wfd%natom,nspinor))
 321    call pawcprj_alloc(Cp_tmp2,0,Wfd%nlmn_atm)
 322    ABI_MALLOC(Cp_tmp3,(Wfd%natom,nspinor))
 323    call pawcprj_alloc(Cp_tmp3,0,Wfd%nlmn_atm)
 324    ABI_MALLOC(Cp_tmp4,(Wfd%natom,nspinor))
 325    call pawcprj_alloc(Cp_tmp4,0,Wfd%nlmn_atm)
 326  end if
 327  !
 328  ! Identify the index of q==0
 329  iqbz0=0
 330  do iq_bz=1,Qmesh%nbz
 331    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
 332  end do
 333  ABI_CHECK(iqbz0/=0,"q=0 not found")
 334  !
 335  ! Treat the spin polarization.
 336  spin_ids(:,1) = (/1,1/)
 337  spin_ids(:,2) = (/2,2/)
 338  spin_ids(:,3) = (/1,2/)
 339 
 340  nblocks=1
 341  kx_fact=two
 342  nels_block(:)=0
 343  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
 344  tot_nels=nels_block(1)
 345 
 346  if (nsppol==2) then
 347    nblocks=3
 348    kx_fact=one
 349    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
 350    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
 351    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
 352    tot_nels= SUM(nels_block)
 353  end if
 354  !
 355  ! Distribute the calculation of the matrix elements among the nodes.
 356  ! * tstart and t_stop give the initial and final transition index treated by each node.
 357  ! * my_hsize is the number of transitions treated by this processor
 358  ! * my_cols(1:2) gives the initial and final column treated by this node.
 359  !
 360  use_mpiio=.FALSE.
 361 #ifdef HAVE_MPI_IO
 362  use_mpiio = (nproc>1)
 363 #endif
 364  use_mpiio=.FALSE.
 365  !use_mpiio=.TRUE.
 366 
 367  if (is_resonant) then
 368    if (use_mpiio) then
 369      write(msg,'(2a,f6.2,a)')&
 370 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",two*tot_nels*dpc*b2Gb," [Gb]."
 371    else
 372      write(msg,'(2a,f6.2,a)')&
 373 &      ". Writing resonant excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 374    end if
 375  else
 376    if (use_mpiio) then
 377      write(msg,'(2a,f6.2,a)')&
 378 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname)," via MPI-IO; file size= ",tot_nels*2*dpc*b2Gb," [Gb]."
 379    else
 380      write(msg,'(2a,f6.2,a)')&
 381 &      ". Writing coupling excitonic Hamiltonian on file "//TRIM(fname),"; file size= ",two*dpc*tot_nels*b2Gb," [Gb]."
 382    end if
 383  end if
 384  call wrtout([std_out, ab_out], msg, do_flush=.True.)
 385  !
 386  ! Master writes the BSE header with Fortran IO.
 387  if (my_rank==master) then
 388 
 389    if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="write") /= 0) then
 390       ABI_ERROR(msg)
 391    end if
 392    call exc_write_bshdr(bsh_unt,Bsp,Hdr_bse)
 393    ! To force the writing (needed for MPI-IO).
 394    close(bsh_unt)
 395 
 396    if (.not.use_mpiio) then ! Reopen the file and skip the header.
 397      if (open_file(fname,msg,newunit=bsh_unt,form="unformatted",action="readwrite") /= 0) then
 398         ABI_ERROR(msg)
 399      end if
 400      call exc_skip_bshdr(bsh_unt,ierr)
 401    end if
 402 
 403    if (BSp%prep_interp) then
 404      tmpfname = fname
 405      ii = LEN_TRIM(fname)
 406      tmpfname(ii-2:ii+1) = 'ABSR'
 407      if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="write") /= 0) then
 408        ABI_ERROR(msg)
 409      end if
 410      tmpfname(ii-2:ii+1) = 'BBSR'
 411      if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="write") /= 0) then
 412        ABI_ERROR(msg)
 413      end if
 414      tmpfname(ii-2:ii+1) = 'CBSR'
 415      if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="write") /= 0) then
 416        ABI_ERROR(msg)
 417      end if
 418      call exc_write_bshdr(a_unt,Bsp,Hdr_bse)
 419      call exc_write_bshdr(b_unt,Bsp,Hdr_bse)
 420      call exc_write_bshdr(c_unt,Bsp,Hdr_bse)
 421      close(a_unt)
 422      close(b_unt)
 423      close(c_unt)
 424      if (.not.use_mpiio) then ! Reopen the file and skip the header.
 425        tmpfname(ii-2:ii+1) = 'ABSR'
 426        if (open_file(tmpfname,msg,newunit=a_unt,form='unformatted',action="readwrite") /= 0) then
 427           ABI_ERROR(msg)
 428        end if
 429        call exc_skip_bshdr(a_unt,ierr)
 430        tmpfname(ii-2:ii+1) = 'BBSR'
 431        if (open_file(tmpfname,msg,newunit=b_unt,form='unformatted',action="readwrite") /= 0) then
 432           ABI_ERROR(msg)
 433        end if
 434        call exc_skip_bshdr(b_unt,ierr)
 435        tmpfname(ii-2:ii+1) = 'CBSR'
 436        if (open_file(tmpfname,msg,newunit=c_unt,form='unformatted',action="readwrite") /= 0) then
 437           ABI_ERROR(msg)
 438        end if
 439        call exc_skip_bshdr(c_unt,ierr)
 440      end if
 441    end if
 442  end if
 443 
 444  call xmpi_barrier(comm)
 445 
 446  if (use_mpiio) then
 447 #ifdef HAVE_MPI_IO
 448    ! Open the file with MPI-IO
 449    amode = MPI_MODE_RDWR
 450    !amode = MPI_MODE_CREATE + MPI_MODE_RDWR,
 451 
 452    call MPI_FILE_OPEN(comm, fname, amode, MPI_INFO_NULL, mpi_fh, mpi_err)
 453    ABI_CHECK_MPI(mpi_err,"opening: "//TRIM(fname))
 454 
 455    ! Skip the header.
 456    call exc_skip_bshdr_mpio(mpi_fh,xmpio_collective,ehdr_offset)
 457 
 458    ! Precompute the offset of the each block including the Fortran markers.
 459    ABI_MALLOC(offset_of_block,(nblocks))
 460    offset_of_block(1) = ehdr_offset
 461    do block=2,nblocks
 462      tmp_off = offset_of_block(block-1) + nels_block(block-1)*xmpi_bsize_dpc
 463      tmp_off = tmp_off + Bsp%nreh(block-1)*2*xmpio_bsize_frm  ! markers.
 464      offset_of_block(block) = tmp_off
 465    end do
 466 #endif
 467  end if
 468 
 469  call timab(681,2,tsec)
 470 
 471  do block=1,nsppol
 472    !
 473    ! Indices used to loop over bands.
 474    ! bidx contains the starting and final indices used to loop over bands.
 475    !
 476    !      (b3,b4)
 477    !         |... ...|
 478    ! (b1,b2) |... ...|
 479    !
 480    ! Resonant matrix is given by
 481    !      (v',c')
 482    !       |... ...|
 483    ! (v,c) |... ...|
 484    !
 485    ! Coupling matrix is given by
 486    !       (c',v')
 487    !       |... ...|
 488    ! (v,c) |... ...|
 489 
 490    if (is_resonant) then
 491      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 492      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 493      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
 494      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
 495    else
 496      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
 497      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
 498      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
 499      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
 500    end if
 501 
 502    spin1 = spin_ids(1,block)
 503    spin2 = spin_ids(2,block)
 504 
 505    do_coulomb_term  = (Bsp%use_coulomb_term .and. (spin1==spin2))
 506    do_exchange_term = (Bsp%exchange_term>0)
 507    w_is_diagonal    = BSp%use_diagonal_Wgg
 508    !
 509    ! Distribution of the matrix elements among the nodes.
 510    ! Note that rank0 will get the first transitions.
 511    nels=nels_block(block)
 512    ABI_MALLOC(t_start,(0:nproc-1))
 513    ABI_MALLOC(t_stop,(0:nproc-1))
 514    call xmpi_split_work2_i8b(nels,nproc,t_start,t_stop)
 515 
 516    ABI_MALLOC(hsize_of,(0:nproc-1))
 517    hsize_of=0
 518    do rank=0,nproc-1
 519      if (t_stop(rank)>=t_start(rank)) hsize_of(rank) = t_stop(rank)-t_start(rank)+1
 520      !write(std_out,*)"nels",nels,hsize_of(rank)
 521    end do
 522 
 523    my_hsize = hsize_of(my_rank)
 524    if (my_hsize<=0) then
 525      write(msg,'(a,i0)')"Wrong number of transitions: my_hsize= ",my_hsize
 526      ABI_ERROR(msg)
 527    end if
 528    if (my_hsize /= INT(my_hsize,KIND=i4b)) then
 529      write(msg,'(a,i0)')"Size of local block too large for a default integer, Increase the number of CPUs: my_hsize= ",my_hsize
 530      ABI_ERROR(msg)
 531    end if
 532 
 533    my_cols=0
 534    do itp=1,Bsp%nreh(block)
 535      do it=1,itp
 536        ir = it + itp*(itp-1_i8b)/2
 537        if (ir==t_start(my_rank)) then
 538          my_rows(1) = it
 539          my_cols(1) = itp
 540        end if
 541        if (ir==t_stop(my_rank)) then
 542          my_rows(2) = it
 543          my_cols(2) = itp
 544        end if
 545      end do
 546    end do
 547 
 548    my_starts = [my_rows(1),my_cols(1)]
 549    my_ends   = [my_rows(2),my_cols(2)]
 550    !
 551    ! Announce the treatment of submatrix treated by each node.
 552    bsize_my_block = 2*dpc*my_hsize
 553    write(msg,'(4(a,i0))')' Treating ',my_hsize,'/',nels,' matrix elements, from column ',my_cols(1),' up to column ',my_cols(2)
 554    call wrtout(std_out, msg)
 555 
 556    if (is_resonant) then
 557      write(msg,'(a,f8.1,a)')' Calculating resonant blocks. Memory required: ',bsize_my_block*b2Mb,' [Mb] <<< MEM'
 558    else
 559      write(msg,'(a,f8.1,a)')' Calculating coupling blocks. Memory required: ',bsize_my_block*b2Mb,' [Mb] <<< MEM'
 560    end if
 561    call wrtout(std_out, msg)
 562 
 563    ! Allocate big (scalable) buffer to store the BS matrix on this node.
 564    ABI_MALLOC_OR_DIE(my_bsham,(t_start(my_rank):t_stop(my_rank)), ierr)
 565 
 566    if (BSp%prep_interp) then
 567      ! Allocate big (scalable) buffers to store a,b,c coeffients
 568      ABI_MALLOC_OR_DIE(acoeffs,(t_start (my_rank):t_stop(my_rank)), ierr)
 569      ABI_MALLOC_OR_DIE(bcoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 570      ABI_MALLOC_OR_DIE(ccoeffs,(t_start(my_rank):t_stop(my_rank)), ierr)
 571    end if
 572 
 573    if (do_coulomb_term) then ! Construct Coulomb term.
 574 
 575      call timab(682,1,tsec) ! exc_build_ham(Coulomb)
 576 
 577      write(msg,'(a,2i2,a)')" Calculating direct Coulomb term for (spin1,spin2) ",spin1,spin2," using full W_{GG'} ..."
 578      if (w_is_diagonal) then
 579         write(msg,'(a,2i2,a)')&
 580 &        " Calculating direct Coulomb term for (spin1, spin2) ",spin1,spin2," using diagonal approximation for W_{GG'} ..."
 581      end if
 582      call wrtout(std_out, msg)
 583 
 584      ABI_MALLOC(ctccp,(npweps))
 585 
 586      if (BSp%prep_interp) then
 587        ABI_MALLOC(aa_ctccp,(npweps))
 588        ABI_MALLOC(bb_ctccp1,(npweps))
 589        ABI_MALLOC(bb_ctccp2,(npweps))
 590        ABI_MALLOC(cc_ctccp,(npweps))
 591      end if
 592 
 593      ABI_MALLOC(vc_sqrt_qbz,(npweps))
 594 
 595 #ifdef DEV_MG_DEBUG_MODE
 596      ABI_MALLOC(ttp_check,(BSp%nreh(block),BSp%nreh(block)))
 597      ttp_check=0
 598 #endif
 599 
 600      do ikp_bz=1,BSp%nkbz ! Loop over kp
 601        ! NOTE: this way of looping is good for bulk but it's not optimal in the
 602        !       case of systems sampled only at Gamma e.g. isolated systems in which
 603        !       one should take advantage of Hermiticity by looping over c-v !!!!
 604 
 605        ! Check whether (vp,cp,ikp_bz,spin2) belongs to the set of columns treated by me for some vp,cp
 606        ! Be careful since vcks2t contains zeros corresponding to transitions that should be skipped.
 607        itpk_min = MINVAL(Bsp%vcks2t(:,:,ikp_bz,spin2), MASK=(Bsp%vcks2t(:,:,ikp_bz,spin2)>0) )
 608        itpk_max = MAXVAL(Bsp%vcks2t(:,:,ikp_bz,spin2))
 609        if (my_cols(2)<itpk_min .or. my_cols(1)>itpk_max) CYCLE
 610 
 611        write(msg,'(3(a,i0))')" status: ",ikp_bz,"/",BSp%nkbz," done by node ",my_rank
 612        call wrtout(std_out, msg, do_flush=.True.)
 613 
 614        ! * Get ikp_ibz, non-symmorphic phase, ph_mkpt, and symmetries from ikp_bz.
 615        call kmesh%get_BZ_item(ikp_bz,kpbz,ikp_ibz,isym_kp,itim_kp,ph_mkpt,isirred=isirred)
 616        !ABI_CHECK(isirred,"not irred!")
 617        !ABI_CHECK(ph_mkpt == cone, "Wrong phase!")
 618 
 619        ktabr_kp(:) = ktabr(:,ikp_bz)
 620        spinrot_kp(:)=Cryst%spinrot(:,isym_kp)
 621        !ABI_CHECK(ALL(ktabr_kp == id_tab), "wrong tab")
 622 
 623        do ik_bz=1,ikp_bz ! Loop over k
 624          !
 625          ! * Get ik_ibz, non-symmorphic phase, ph_mkt, and symmetries from ik_bz
 626          call kmesh%get_BZ_item(ik_bz,kbz,ik_ibz,isym_k,itim_k,ph_mkt,isirred=isirred)
 627          !ABI_CHECK(isirred,"not irred!")
 628          !ABI_CHECK(ph_mkt == cone, "Wrong phase!")
 629 
 630          ktabr_k(:) = ktabr(:,ik_bz)
 631          spinrot_k(:)=Cryst%spinrot(:,isym_k)
 632          !ABI_CHECK(ALL(ktabr_k == id_tab), "wrong tab")
 633          !if(itim_k==2) CYCLE ! time-reversal or not
 634          !
 635          ! * Find q = K-KP-G0 in the full BZ.
 636          kmkp = Kmesh%bz(:,ik_bz) - Kmesh%bz(:,ikp_bz)
 637          call findqg0(iq_bz,g0,kmkp,Qmesh%nbz,Qmesh%bz,BSp%mG0)
 638 
 639          ! Evaluate the tables needed for the padded FFT performed in rhotwg. Note that we have
 640          ! to pass G-G0 to sphereboundary instead of G as we need FFT results on the shifted G-sphere,
 641          ! If Gamma is not inside G-G0 one has to disable FFT padding as sphereboundary will give wrong tables.
 642          ! * Get the G-G0 shift for the FFT of the oscillators.
 643          !
 644          ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
 645          call Gsph_c%fft_tabs(g0,mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
 646 #ifdef FC_IBM
 647  ! XLF does not deserve this optimization (problem with [v67mbpt][t03])
 648  use_padfft = 0
 649 #endif
 650          if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
 651          if (use_padfft==0) then
 652            ABI_FREE(gbound)
 653            ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
 654          end if
 655          !
 656          ! * Get iq_ibz, and symmetries from iq_bz
 657          call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
 658          is_qeq0 = (normv(qbz,Cryst%gmet,'G')<GW_TOLQ0)
 659 
 660          ! Symmetrize em1(omega=0)
 661          call screen_symmetrizer(W,iq_bz,Cryst,Gsph_c,Qmesh,Vcp)
 662          !
 663          ! * Set up table of |q_BZ+G|
 664          if (iq_ibz==1) then
 665            do ig=1,npweps
 666              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 667              vc_sqrt_qbz(isg)=Vcp%vcqlwl_sqrt(ig,1)
 668            end do
 669          else
 670            do ig=1,npweps
 671              isg = Gsph_c%rottb(ig,itim_q,isym_q)
 672              vc_sqrt_qbz(isg) = Vcp%vc_sqrt(ig,iq_ibz)
 673            end do
 674          end if
 675 
 676          ! === Evaluate oscillator matrix elements ===
 677          ! * $ <phj/r|e^{-i(q+G)}|phi/r> - <tphj/r|e^{-i(q+G)}|tphi/r> $ in packed form.
 678          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then
 679            ABI_MALLOC(Pwij_q,(Cryst%ntypat))
 680            call pawpwij_init(Pwij_q,npweps,Qmesh%bz(:,iq_bz),Gsph_c%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
 681          end if
 682 
 683          ! =======================================
 684          ! === Loop over the four band indices ===
 685          ! =======================================
 686          do ic=bidx(1,2),bidx(2,2) !do ic=BSp%lumo,BSp%nbnds
 687 
 688            ABI_CHECK(wfd%get_wave_ptr(ic, ik_ibz, spin1, wave_ck, msg) == 0, msg)
 689            if (wave_ck%has_ur == WFD_STORED) then
 690              ptur_ck => wave_ck%ur
 691            else
 692              call wfd%get_ur(ic,ik_ibz,spin1,ur_ck)
 693              ptur_ck => ur_ck
 694            end if
 695            !
 696            ! Get cprj for this (c,kbz,s1) in the BZ.
 697            ! phase due to the umklapp G0 in k-q is already included.
 698            if (Wfd%usepaw==1) then
 699              if (wave_ck%has_cprj == WFD_STORED) then
 700                ptcp_ck => wave_ck%cprj
 701              else
 702                call wfd%get_cprj(ic,ik_ibz,spin1,Cryst,Cp_tmp1,sorted=.FALSE.)
 703                ptcp_ck => Cp_tmp1
 704              end if
 705              call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ck,Cp_ck)
 706            end if
 707 
 708            do icp=bidx(1,4),bidx(2,4)  !do icp=BSp%lumo,BSp%nbnds
 709              ! Calculate matrix-elements rhxtwg_cpc
 710              !
 711              if (ik_bz==ikp_bz) then ! Already in memory.
 712                rhxtwg_cpc(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,icp,ic,ik_ibz,spin1),Gsph_c)
 713 
 714              else
 715                ! Calculate matrix element from wfr.
 716                ! TODO: change the order of the loops.
 717 
 718                ABI_CHECK(wfd%get_wave_ptr(icp, ikp_ibz, spin2, wave_ckp, msg) == 0, msg)
 719                if (wave_ckp%has_ur == WFD_STORED) then
 720                  ptur_ckp => wave_ckp%ur
 721                else
 722                  call wfd%get_ur(icp,ikp_ibz,spin2,ur_ckp)
 723                  ptur_ckp => ur_ckp
 724                end if
 725 
 726                ! Load cprj for this (c,k,s2) in the BZ.
 727                ! Do not care about umklapp G0 in k-q as the phase is already included.
 728                if (Wfd%usepaw==1) then
 729                  if (wave_ckp%has_cprj == WFD_STORED) then
 730                    ptcp_ckp =>  wave_ckp%cprj
 731                  else
 732                    call wfd%get_cprj(icp,ikp_ibz,spin2,Cryst,Cp_tmp2,sorted=.FALSE.)
 733                    ptcp_ckp =>  Cp_tmp2
 734                  end if
 735                  call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_ckp,Cp_ckp)
 736                end if
 737 
 738                call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 739                  ptur_ckp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 740                  ptur_ck ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 741                  dim_rtwg,rhxtwg_cpc)
 742 
 743                if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 744                  call paw_rho_tw_g(cryst,Pwij_q,npweps,dim_rtwg,nspinor,Gsph_c%gvec, Cp_ckp,Cp_ck,rhxtwg_cpc)
 745                end if
 746              end if
 747 
 748              if (BSp%prep_interp) then
 749                aa_cpc = rhxtwg_cpc
 750                aa_cpc(2:) = czero
 751                bb_cpc1 = vc_sqrt_qbz*rhxtwg_cpc
 752                bb_cpc1(1) = czero
 753                bb_cpc2 = rhxtwg_cpc
 754                bb_cpc2(2:) = czero
 755 
 756                if(ik_bz == ikp_bz) then
 757                   ! Enforce orthogonality of the wavefunctions.
 758                   if(icp == ic) then
 759                     aa_cpc(1) = cone
 760                     bb_cpc2(1) = cone
 761                   else
 762                     aa_cpc(1) = czero
 763                     bb_cpc2(1) = czero
 764                   end if
 765                end if
 766 
 767                ! MG TODO: a does not require a call to w0gemv
 768                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,aa_cpc,aa_ctccp)
 769                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc1,bb_ctccp1)
 770                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,bb_cpc2,bb_ctccp2)
 771 
 772                cc_cpc = vc_sqrt_qbz*rhxtwg_cpc
 773                cc_cpc(1) = czero
 774 
 775                call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,cc_cpc,cc_ctccp)
 776              end if
 777 
 778              ! Prepare sum_GG' rho_c'c*(G) W_qbz(G,G') rho_v'v(G')
 779              ! First sum on G: sum_G rho_c'c(G) W_qbz*(G,G') (W_qbz conjugated)
 780              rhxtwg_cpc = rhxtwg_cpc * vc_sqrt_qbz
 781              call screen_w0gemv(W,"C",npweps,nspinor,w_is_diagonal,cone_gw,czero_gw,rhxtwg_cpc,ctccp)
 782 
 783              do iv=bidx(1,1),bidx(2,1)    !do iv=BSp%lomo,BSp%homo
 784                it = BSp%vcks2t(iv,ic,ik_bz,spin1); if (it==0) CYCLE ! ir-uv-cutoff
 785                ene_t = BSp%Trans(it,spin1)%en
 786 
 787                ! TODO: use this but change the order of the loops.
 788                ABI_CHECK(wfd%get_wave_ptr(iv, ik_ibz, spin1, wave_vk, msg) == 0, msg)
 789 
 790                if (wave_vk%has_ur == WFD_STORED) then
 791                  ptur_vk => wave_vk%ur
 792                else
 793                  call wfd%get_ur(iv,ik_ibz,spin1,ur_vk)
 794                  ptur_vk => ur_vk
 795                end if
 796                !
 797                ! Load cprj for this (v,k,s1) in the BZ.
 798                ! Do not care about umklapp G0 in k-q as the phase is already included.
 799                if (Wfd%usepaw==1) then
 800                  if (wave_vk%has_cprj == WFD_STORED) then
 801                    ptcp_vk => wave_vk%cprj
 802                  else
 803                    call wfd%get_cprj(iv,ik_ibz,spin1,Cryst,Cp_tmp3,sorted=.FALSE.)
 804                    ptcp_vk => Cp_tmp3
 805                  end if
 806                  call paw_symcprj_op(ik_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vk,Cp_vk)
 807                end if
 808 
 809                do ivp=bidx(1,3),bidx(2,3) !do ivp=BSp%lomo,BSp%homo
 810 
 811                  if (is_resonant) then
 812                    itp = BSp%vcks2t(ivp,icp,ikp_bz,spin2)
 813                  else ! have to exchange band indices
 814                    itp = BSp%vcks2t(icp,ivp,ikp_bz,spin2)
 815                  end if
 816 
 817                  if (itp==0) CYCLE ! ir-uv-cutoff
 818 
 819                  ! FIXME Temporary work around, when ikp_bz == ik it might happen that itp<it
 820                  ! should rewrite the loops using contracted k-dependent indices for bands
 821                  if (itp<it) CYCLE
 822 
 823                  ir = it + itp*(itp-1)/2
 824                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) CYCLE
 825 
 826                  ene_tp = BSp%Trans(itp,spin2)%en
 827 
 828                  ! ============================================
 829                  ! === Calculate matrix elements rhxtwg_vpv ===
 830                  ! ============================================
 831                  if (ik_bz==ikp_bz) then
 832                    ! Already in memory.
 833                    rhxtwg_vpv(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,ivp,iv,ik_ibz,spin1),Gsph_c)
 834 
 835                  else
 836 
 837                    ABI_CHECK(wfd%get_wave_ptr(ivp, ikp_ibz, spin2, wave_vkp, msg) == 0, msg)
 838 
 839                    ! Calculate matrix element from wfr.
 840                    if (wave_vkp%has_ur == WFD_STORED) then
 841                      ptur_vkp => wave_vkp%ur
 842                    else
 843                      call wfd%get_ur(ivp,ikp_ibz,spin2,ur_vkp)
 844                      ptur_vkp => ur_vkp
 845                    end if
 846                    !
 847                    ! Load cprj for this (vp,kp,s2) in the BZ.
 848                    ! Do not care about umklapp G0 in k-q as the phase is already included.
 849                    if (Wfd%usepaw==1) then
 850                      if (wave_vkp%has_cprj == WFD_STORED) then
 851                        ptcp_vkp =>  wave_vkp%cprj
 852                      else
 853                        call wfd%get_cprj(ivp,ikp_ibz,spin2,Cryst,Cp_tmp4,sorted=.FALSE.)
 854                        ptcp_vkp => Cp_tmp4
 855                      end if
 856                      call paw_symcprj_op(ikp_bz,nspinor,1,Cryst,Kmesh,Pawtab,Pawang,ptcp_vkp,Cp_vkp)
 857                    end if
 858 
 859                    call rho_tw_g(nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere,use_padfft,igfftg0,gbound,&
 860                      ptur_vkp,itim_kp,ktabr_kp,ph_mkpt,spinrot_kp,&
 861                      ptur_vk ,itim_k ,ktabr_k ,ph_mkt ,spinrot_k ,&
 862                      dim_rtwg,rhxtwg_vpv)
 863 
 864                    if (Wfd%usepaw==1) then ! Add PAW onsite contribution.
 865                      call paw_rho_tw_g(cryst,Pwij_q,npweps,dim_rtwg,nspinor,Gsph_c%gvec,Cp_vkp,Cp_vk,rhxtwg_vpv)
 866                    end if
 867                  end if
 868 
 869                  ! Index in the global Hamiltonian matrix.
 870                  ir = it + itp*(itp-1_i8b)/2
 871 
 872                  if (ir<t_start(my_rank).or.ir>t_stop(my_rank)) then
 873                    write(msg,'(a,3(1x,i0))')" Gonna SIGFAULT, ir, t_start, t_stop ",ir,t_start(my_rank),t_stop(my_rank)
 874                    ABI_ERROR(msg)
 875                  end if
 876                  !ABI_CHECK(itp >= it,"itp < it")
 877 
 878                  if (BSp%prep_interp) then
 879                    ! Save a,b, c coefficients.
 880                    aa_vpv = rhxtwg_vpv
 881                    aa_vpv(2:) = czero
 882                    bb_vpv1 = rhxtwg_vpv
 883                    bb_vpv1(2:) = czero
 884                    bb_vpv2 = vc_sqrt_qbz*rhxtwg_vpv
 885                    bb_vpv2(1) = czero
 886 
 887                    if (ik_bz == ikp_bz) then
 888                       ! Enforce orthogonality of the wavefunctions.
 889                       if (ivp == iv) then
 890                         aa_vpv(1) = cone
 891                         bb_vpv1(1) = cone
 892                       else
 893                         aa_vpv(1) = czero
 894                         bb_vpv1(1) = czero
 895                       end if
 896                    end if
 897 
 898                    cc_vpv = vc_sqrt_qbz*rhxtwg_vpv
 899                    cc_vpv(1) = czero
 900 
 901                    aatmp = -faq * xdotc(npweps,aa_ctccp,1,aa_vpv,1)
 902                    bbtmp = -faq * xdotc(npweps,bb_ctccp1,1,bb_vpv1,1)-faq*xdotc(npweps,bb_ctccp2,1,bb_vpv2,1)
 903                    cctmp = -faq * xdotc(npweps,cc_ctccp,1,cc_vpv,1)
 904 
 905                    acoeffs(ir) = aatmp
 906                    bcoeffs(ir) = bbtmp
 907                    ccoeffs(ir) = cctmp
 908                  end if
 909 
 910                  ! sum_G2 rho_c'c(G) W_qbz(G,G') rho_v'v(G')
 911                  rhxtwg_vpv = vc_sqrt_qbz * rhxtwg_vpv
 912                  http = - faq * xdotc(npweps,ctccp,1,rhxtwg_vpv,1)
 913 
 914                  ! Save result taking into account the symmetry of the matrix.
 915                  ! Note that the diagonal of the resonant block is not forced to be real
 916                  my_bsham(ir) = http
 917 
 918 #ifdef DEV_MG_DEBUG_MODE
 919                  ttp_check(it,itp) = ttp_check(it,itp)+1
 920 #endif
 921                end do !ivp
 922              end do !iv
 923            end do !icp
 924          end do !ic
 925 
 926          ABI_FREE(gbound)
 927 
 928          if (Wfd%usepaw==1.and.ik_bz/=ikp_bz) then ! Free the onsite contribution for this q.
 929            call pawpwij_free(Pwij_q)
 930            ABI_FREE(Pwij_q)
 931          end if
 932 
 933        end do ! ik_bz
 934      end do ! Fat loop over ikp_bz
 935 
 936 #ifdef DEV_MG_DEBUG_MODE
 937      do itp=1,BSp%nreh(block)
 938        do it=1,BSp%nreh(block)
 939         ir = it + itp*(itp-1_i8b)/2
 940          if (itp>=it .and. ttp_check(it,itp) /= 1) then
 941            if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
 942              write(std_out,*)"WARN: upper triangle is not 1 ",it,itp,ttp_check(it,itp)
 943              write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 944              write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 945            end if
 946          end if
 947          if (itp< it .and. ttp_check(it,itp) /= 0) then
 948            write(std_out,*)"WARN: then lower triangle is not 0 ",it,itp,ttp_check(it,itp)
 949            write(std_out,*)TRIM(repr_trans(Bsp%Trans(it ,spin1)))
 950            write(std_out,*)TRIM(repr_trans(Bsp%Trans(itp,spin2)))
 951          end if
 952        end do
 953      end do
 954      ierr = SUM(SUM(ttp_check,DIM=2),DIM=1)
 955      if (ierr/=my_hsize) then
 956        write(msg,'(a,2i0)')"ierr/=my_hsize",ierr,my_hsize
 957        ABI_ERROR(msg)
 958      end if
 959      ABI_FREE(ttp_check)
 960 #endif
 961 
 962      ABI_FREE(ctccp)
 963      if(Bsp%prep_interp) then
 964        ABI_FREE(aa_ctccp)
 965        ABI_FREE(bb_ctccp1)
 966        ABI_FREE(bb_ctccp2)
 967        ABI_FREE(cc_ctccp)
 968      end if
 969 
 970      ABI_FREE(vc_sqrt_qbz)
 971      call wrtout(std_out,' Coulomb term completed')
 972 
 973      call timab(682,2,tsec) ! exc_build_ham(Coulomb)
 974    end if ! do_coulomb_term
 975    !
 976    ! =====================
 977    ! === Exchange term ===
 978    ! =====================
 979    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
 980    ! TODO might used enlarged G-sphere for better convergence.
 981    if (do_exchange_term) then
 982 
 983      !call exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
 984      ! &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham,comm)
 985 
 986      call timab(683,1,tsec) ! exc_build_ham(exchange)
 987 
 988      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
 989      call wrtout(std_out, msg)
 990 
 991      ABI_MALLOC(rhotwg1,(npweps))
 992      ABI_MALLOC(rhotwg2,(npweps))
 993 
 994      ngx = Gsph_x%ng
 995      ABI_MALLOC(vc_sqrt_qbz,(ngx))
 996 
 997      ! * Get iq_ibz, and symmetries from iq_bz.
 998      iq_bz = iqbz0 ! q = 0 -> iqbz0
 999      call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1000 
1001      ! * Set up table of |q(BZ)+G|
1002      if (iq_ibz==1) then
1003        do ig=1,ngx
1004          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1005          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1006        end do
1007      else
1008         ABI_ERROR("iq_ibz should be 1")
1009      end if
1010 
1011      do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1012 
1013        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1014        ene_tp = Bsp%Trans(itp,spin2)%en
1015        ikp_bz = Bsp%Trans(itp,spin2)%k
1016        ivp    = Bsp%Trans(itp,spin2)%v
1017        icp    = Bsp%Trans(itp,spin2)%c
1018 
1019        ikp_ibz = Kmesh%tab (ikp_bz)
1020        isym_kp = Kmesh%tabo(ikp_bz)
1021        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1022 
1023        if (is_resonant) then
1024          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1025        else ! Code for coupling block.
1026          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1027        end if
1028        !
1029        ! Multiply by the Coulomb term.
1030         do ig=2,npweps
1031           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1032         end do
1033 
1034        do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1035          ir = it + itp*(itp-1_i8b)/2
1036          if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1037 
1038          ene_t = Bsp%Trans(it,spin1)%en
1039          ik_bz = Bsp%Trans(it,spin1)%k
1040          iv    = Bsp%Trans(it,spin1)%v
1041          ic    = Bsp%Trans(it,spin1)%c
1042 
1043          ik_ibz = Kmesh%tab(ik_bz)
1044          isym_k = Kmesh%tabo(ik_bz)
1045          itim_k = (3-Kmesh%tabi(ik_bz))/2
1046          !if (itim_k==2) CYCLE ! time-reversal or not
1047 
1048          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1049          !
1050          ! sum over G/=0
1051          ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1052          ctemp = faq * kx_fact * ctemp
1053 
1054          ! exchange term is non divergent !
1055          if (BSp%prep_interp) then
1056            ccoeffs(ir) = ccoeffs(ir) + ctemp
1057          end if
1058 
1059          my_bsham(ir) = my_bsham(ir) + ctemp
1060        end do !it
1061      end do !itp
1062 
1063      ABI_FREE(rhotwg1)
1064      ABI_FREE(rhotwg2)
1065      ABI_FREE(vc_sqrt_qbz)
1066 
1067      call timab(683,2,tsec) ! exc_build_ham(exchange)
1068    end if ! do_exchange_term
1069    !
1070    ! =====================
1071    ! === Diagonal term ===
1072    ! =====================
1073    if (is_resonant .and. spin1==spin2) then
1074      write(msg,'(a,2i2,a)')" Adding diagonal term for (spin1,spin2) ",spin1,spin2," ..."
1075      call wrtout(std_out, msg)
1076      do it=1,BSp%nreh(block)
1077        ir = it + it*(it-1_i8b)/2
1078        if (ir>=t_start(my_rank) .and. ir<=t_stop(my_rank)) my_bsham(ir) = my_bsham(ir) + Bsp%Trans(it,spin1)%en
1079      end do
1080    end if
1081 
1082    if (.FALSE.) then
1083      dump_unt = get_unit()
1084      msg=' Coupling Hamiltonian matrix elements: '
1085      if (is_resonant) msg=' Reasonant Hamiltonian matrix elements: '
1086      call wrtout(dump_unt, msg)
1087      call wrtout(dump_unt,'    k  v  c  s      k" v" c" s"       H')
1088      do itp=1,BSp%nreh(block)
1089        ikp_bz = Bsp%Trans(itp,spin2)%k
1090        ivp    = Bsp%Trans(itp,spin2)%v
1091        icp    = Bsp%Trans(itp,spin2)%c
1092        do it=1,itp
1093          ik_bz = Bsp%Trans(it,spin1)%k
1094          iv    = Bsp%Trans(it,spin1)%v
1095          ic    = Bsp%Trans(it,spin1)%c
1096          ir = it + itp*(itp-1_i8b)/2
1097          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1098            http = my_bsham(ir)
1099            !if (ABS(http) > tol3) then
1100            write(msg,'(2(i0,1x),2(i5,3i3,3x),2f7.3)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1101            call wrtout(dump_unt, msg)
1102            !end if
1103          end if
1104        end do
1105      end do
1106    end if
1107 
1108 !DBYG
1109    if (.False.) then
1110      dump_unt = get_unit()
1111      dump_unt = 999
1112      msg=' Coupling Hamiltonian matrix elements: '
1113      if (is_resonant) msg=' Resonant Hamiltonian matrix elements: '
1114      call wrtout(dump_unt, msg)
1115      call wrtout(dump_unt,'    k v  c  s      k" v" c" s"       H')
1116      do itp=1,BSp%nreh(block)
1117        ikp_bz = Bsp%Trans(itp,spin2)%k
1118        ivp    = Bsp%Trans(itp,spin2)%v
1119        icp    = Bsp%Trans(itp,spin2)%c
1120        do it=1,BSp%nreh(block)
1121          ik_bz = Bsp%Trans(it,spin1)%k
1122          iv    = Bsp%Trans(it,spin1)%v
1123          ic    = Bsp%Trans(it,spin1)%c
1124          if(it > itp) then
1125            ir = itp+it*(it-1_i8b)/2
1126          else
1127            ir = it + itp*(itp-1_i8b)/2
1128          end if
1129          if (ir>=t_start(my_rank).and.ir<=t_stop(my_rank)) then
1130            if(it > itp) then
1131              http = CONJG(my_bsham(ir))
1132              if (BSp%prep_interp) then
1133                aatmp = CONJG(acoeffs(ir))
1134                bbtmp = CONJG(bcoeffs(ir))
1135                cctmp = CONJG(ccoeffs(ir))
1136              end if
1137            else
1138              http = my_bsham(ir)
1139              if (BSp%prep_interp) then
1140                aatmp = acoeffs(ir)
1141                bbtmp = bcoeffs(ir)
1142                cctmp = ccoeffs(ir)
1143              end if
1144            end if
1145            if (it == itp) http = http - Bsp%Trans(it,spin1)%en
1146            !if (ABS(http) > tol3) then
1147            if (BSp%prep_interp) then
1148              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20,2f24.20,2f24.20,2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,&
1149 &   spin2, http, aatmp, bbtmp, cctmp
1150            else
1151              write(msg,'(2(i0,1x),2(i5,3i3,3x),2f24.20)')it,itp, ik_bz,iv,ic,spin1, ikp_bz,ivp,icp,spin2, http
1152            end if
1153            call wrtout(dump_unt, msg)
1154            !end if
1155          end if
1156        end do
1157      end do
1158    end if
1159 
1160    call timab(684,1,tsec) ! exc_build_ham(synchro)
1161    call xmpi_barrier(comm)
1162    call timab(684,2,tsec) ! exc_build_ham(synchro)
1163    !
1164    ! =================================
1165    ! === Write Hamiltonian on disk ===
1166    ! =================================
1167    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1168    if (use_mpiio) then
1169 #ifdef HAVE_MPI_IO
1170      ! Write the Hamiltonian with collective MPI-IO.
1171      if (BSp%prep_interp) then
1172        ABI_ERROR("Preparation of interpolation technique not yet coded with MPI-IO")
1173      end if
1174      ABI_CHECK(nsppol==1,"nsppol==2 not coded, offset is wrong")
1175      !
1176      old_type = MPI_DOUBLE_COMPLEX
1177      call xmpio_create_fherm_packed(my_starts,my_ends,is_fortran_file,my_offset,old_type,hmat_type,offset_err)
1178 
1179      if (offset_err/=0) then
1180        write(msg,"(3a)")&
1181 &        "Global position index cannot be stored in a standard Fortran integer. ",ch10,&
1182 &        "BSE matrix cannot be written with a single MPI-IO call. "
1183        ABI_ERROR(msg)
1184      end if
1185      !
1186      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1187      my_offset = offset_of_block(block) + my_offset
1188 
1189      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1190      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1191 
1192      call MPI_TYPE_FREE(hmat_type,mpi_err)
1193      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1194 
1195      if (hsize_of(my_rank) /= INT(hsize_of(my_rank),kind=i4b) ) then
1196        ABI_ERROR("Wraparound error")
1197      end if
1198 
1199      tmp_size = INT(hsize_of(my_rank))
1200      call MPI_FILE_WRITE_ALL(mpi_fh, my_bsham, tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1201      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1202 
1203      ! It seems that personal calls in make the code stuck
1204      !if (is_fortran_file .and. my_rank==master) then ! Master writes the Fortran record markers.
1205      ! Write the Fortran record markers.
1206      neh2=BSp%nreh(block)
1207      ABI_MALLOC(bsize_frecord,(neh2))
1208      bsize_frecord = (/(col_glob * xmpi_bsize_dpc, col_glob=1,neh2)/)
1209      ! ehdr_offset points to the end of the header.
1210      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1211      my_offset = offset_of_block(block)
1212      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1213      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1214      ABI_FREE(bsize_frecord)
1215 #else
1216      ABI_BUG("You should not be here!")
1217 #endif
1218    else
1219      ! Use FORTRAN IO with sequential access mode.
1220      ! * Each node sends its data to master node.
1221      ! * Blocks are distributed according to the rank of the node.
1222      ! * Matrix is written by columns hence make sure that the last column is completely written.
1223      call cwtime(cputime,walltime,gflops,"start")
1224 
1225      if (my_rank==master) then
1226        prev_nrows=0; if (my_cols(2) /= my_rows(2)) prev_nrows = my_rows(2)
1227        ncol = my_cols(2)-my_cols(1)+1
1228        ist=1
1229        do jj=1,ncol
1230          col_glob = my_starts(2) + jj - 1
1231          nrows = col_glob; if (jj==ncol) nrows=my_rows(2)
1232          iend = ist + nrows -1
1233          write(bsh_unt) my_bsham(ist:iend)
1234          if (BSp%prep_interp) then
1235            write(a_unt) acoeffs(ist:iend)
1236            write(b_unt) bcoeffs(ist:iend)
1237            write(c_unt) ccoeffs(ist:iend)
1238          end if
1239          ist=iend+1
1240        end do
1241        write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(my_rank)
1242        ABI_CHECK(iend==hsize_of(my_rank),msg)
1243        ABI_FREE(my_bsham)
1244        if (BSp%prep_interp) then
1245          ABI_FREE(acoeffs)
1246          ABI_FREE(bcoeffs)
1247          ABI_FREE(ccoeffs)
1248        end if
1249      end if
1250 
1251      call xmpi_barrier(comm)
1252      !
1253      ! Collect data from the other nodes.
1254      do sender=1,nproc-1
1255        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1256        if (all(my_rank /= [sender, master])) goto 100
1257 
1258        if (my_rank==master)  then
1259          ABI_MALLOC(buffer,(hsize_of(sender)))
1260          if (BSp%prep_interp) then
1261            ABI_MALLOC(abuffer,(hsize_of(sender)))
1262            ABI_MALLOC(bbuffer,(hsize_of(sender)))
1263            ABI_MALLOC(cbuffer,(hsize_of(sender)))
1264          end if
1265        end if
1266        tmp_size = INT(hsize_of(sender),kind=i4b)
1267        call xmpi_exch(my_bsham,tmp_size,sender,buffer,master,comm,10*block+1,mpi_err)
1268        if (BSp%prep_interp) then
1269          call xmpi_exch(acoeffs,tmp_size,sender,abuffer,master,comm,10*block+2,mpi_err)
1270          call xmpi_exch(bcoeffs,tmp_size,sender,bbuffer,master,comm,10*block+3,mpi_err)
1271          call xmpi_exch(ccoeffs,tmp_size,sender,cbuffer,master,comm,10*block+4,mpi_err)
1272        end if
1273 
1274        ! TODO Be careful with the MPI TAG here, add optional Arguments in xmpi_exch so that the TAG can be specified!
1275        proc_start = (/my_rows(1),my_cols(1)/)
1276        proc_end   = (/my_rows(2),my_cols(2)/)
1277        my_extrema(:,1) = proc_start
1278        my_extrema(:,2) = proc_end
1279 
1280        sender_extrema = my_extrema ! just to avoid NAN on sender. xechh_mpi is not well designed
1281        call xmpi_exch(my_extrema,4,sender,sender_extrema,master,comm,10*block+5,mpi_err)
1282 
1283        if (my_rank==master) then
1284           proc_start = sender_extrema(:,1)
1285           proc_end   = sender_extrema(:,2)
1286           !write(std_out,*)"proc_start, proc_end",proc_start,proc_end
1287 
1288          if (prev_nrows>0) then ! backspace the file if the last record written was not complete.
1289            !write(std_out,*)" master node had to call backspace"
1290            backspace(bsh_unt)
1291            ABI_MALLOC(prev_col,(prev_nrows))
1292            read(bsh_unt) prev_col
1293            backspace(bsh_unt)
1294 
1295            if (BSp%prep_interp) then
1296              backspace(a_unt)
1297              ABI_MALLOC(aprev_col,(prev_nrows))
1298              read(a_unt) aprev_col
1299              backspace(a_unt)
1300 
1301              backspace(b_unt)
1302              ABI_MALLOC(bprev_col,(prev_nrows))
1303              read(b_unt) bprev_col
1304              backspace(b_unt)
1305 
1306              backspace(c_unt)
1307              ABI_MALLOC(cprev_col,(prev_nrows))
1308              read(c_unt) cprev_col
1309              backspace(c_unt)
1310            end if
1311          end if
1312          !
1313          ! Write the columns owned by sender.
1314          ncol = proc_end(2)-proc_start(2)+1
1315          ist=1
1316          do jj=1,ncol
1317            col_glob = proc_start(2) + jj-1
1318            nrows = col_glob
1319            if (jj==1   )  nrows=col_glob - proc_start(1) + 1
1320            if (jj==ncol) then
1321              nrows=proc_end(1)
1322              if (ncol==1)  nrows=proc_end(1) - proc_start(1) + 1
1323            end if
1324            iend = ist + nrows -1
1325            !write(std_out,*)"Using nrows, ist, iend=",nrows,ist,iend
1326            if (jj==1 .and. prev_nrows>0) then ! join prev_col and this subcolumn.
1327              write(bsh_unt) CMPLX(prev_col,kind=dpc),CMPLX(buffer(ist:iend),kind=dpc)
1328              if (BSp%prep_interp) then
1329                write(a_unt) CMPLX(aprev_col,kind=dpc),CMPLX(abuffer(ist:iend),kind=dpc)
1330                write(b_unt) CMPLX(bprev_col,kind=dpc),CMPLX(bbuffer(ist:iend),kind=dpc)
1331                write(c_unt) CMPLX(cprev_col,kind=dpc),CMPLX(cbuffer(ist:iend),kind=dpc)
1332              end if
1333              prev_nrows = prev_nrows + iend-ist+1
1334            else
1335              write(bsh_unt) CMPLX(buffer(ist:iend),kind=dpc)
1336              if (BSp%prep_interp) then
1337                write(a_unt) CMPLX(abuffer(ist:iend),kind=dpc)
1338                write(b_unt) CMPLX(bbuffer(ist:iend),kind=dpc)
1339                write(c_unt) CMPLX(cbuffer(ist:iend),kind=dpc)
1340              end if
1341              prev_nrows=0
1342            end if
1343            ist=iend+1
1344          end do
1345          if (ncol>1) then ! Reset prev_nrows if a new column has begun.
1346            prev_nrows = proc_end(1)
1347            if (proc_end(1) == proc_end(2)) prev_nrows = 0
1348          end if
1349          if (iend/=hsize_of(sender)) then
1350            write(msg,'(2(a,i0))')" Wraparound error: iend=",iend," my_hsize=",hsize_of(sender)
1351            ABI_ERROR(msg)
1352          end if
1353          ABI_SFREE(prev_col)
1354          if (BSp%prep_interp) then
1355            ABI_SFREE(aprev_col)
1356            ABI_SFREE(bprev_col)
1357            ABI_SFREE(cprev_col)
1358          end if
1359          ABI_FREE(buffer)
1360          if (BSp%prep_interp) then
1361            ABI_FREE(abuffer)
1362            ABI_FREE(bbuffer)
1363            ABI_FREE(cbuffer)
1364          end if
1365        end if ! master
1366        !
1367 100    call xmpi_barrier(comm)
1368      end do ! sender
1369 
1370      call cwtime(cputime,walltime,gflops,"stop")
1371      write(msg,'(2(a,f9.1),a)')" Fortran-IO completed. cpu_time: ",cputime,"[s], walltime: ",walltime," [s]"
1372      call wrtout(std_out, msg, do_flush=.True.)
1373    end if ! use_mpiio
1374    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1375    !
1376    ABI_SFREE(my_bsham)
1377    if (BSp%prep_interp) then
1378      ABI_SFREE(acoeffs)
1379      ABI_SFREE(bcoeffs)
1380      ABI_SFREE(ccoeffs)
1381    end if
1382    ABI_FREE(t_start)
1383    ABI_FREE(t_stop)
1384    ABI_FREE(hsize_of)
1385  end do ! block
1386  !
1387  ! ===========================================
1388  ! === Exchange term for spin_up spin_down ===
1389  ! ===========================================
1390 
1391  if (nsppol==2) then
1392    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1393    block=3
1394    neh1=BSp%nreh(1)
1395    neh2=BSp%nreh(2)
1396    !
1397    ! The oscillators at q=0 are available on each node for both spin.
1398    ! Here the calculation of the block is parallelized over columns.
1399    ABI_MALLOC(col_start,(0:nproc-1))
1400    ABI_MALLOC(col_stop,(0:nproc-1))
1401    call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop)
1402 
1403    my_cols(1) = col_start(my_rank)
1404    my_cols(2) = col_stop (my_rank)
1405    if (my_cols(2)-my_cols(1)<=0) then
1406      ABI_ERROR("One of the processors has zero columns!")
1407    end if
1408 
1409    ABI_MALLOC(ncols_of,(0:nproc-1))
1410    ncols_of=0
1411    do rank=0,nproc-1
1412      if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1413    end do
1414 
1415    ABI_FREE(col_start)
1416    ABI_FREE(col_stop)
1417    !
1418    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1419    ! TODO might used enlarged G-sphere for better convergence.
1420    ! Note that my_kxssp is always written on file when nsppol=2, even when
1421    ! non-local field effects are neglected.
1422    ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1423    my_kxssp=czero
1424 
1425    if (do_exchange_term) then
1426      spin1=1; spin2=2
1427      write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1428      call wrtout(std_out, msg)
1429 
1430      ABI_MALLOC(rhotwg1,(npweps))
1431      ABI_MALLOC(rhotwg2,(npweps))
1432 
1433      ngx = Gsph_x%ng
1434      ABI_MALLOC(vc_sqrt_qbz,(ngx))
1435      !
1436      ! * Get iq_ibz, and symmetries from iq_bz.
1437      iq_bz = iqbz0 ! q = 0 -> iqbz0
1438      call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1439      !
1440      ! * Set up table of |q(BZ)+G|
1441      if (iq_ibz==1) then
1442        do ig=1,ngx
1443          ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1444          vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1445        end do
1446      else
1447         ABI_ERROR("iq_ibz should be 1")
1448      end if
1449 
1450      do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
1451 
1452        if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1453        ene_tp = Bsp%Trans(itp,spin2)%en
1454        ikp_bz = Bsp%Trans(itp,spin2)%k
1455        ivp    = Bsp%Trans(itp,spin2)%v
1456        icp    = Bsp%Trans(itp,spin2)%c
1457 
1458        ikp_ibz = Kmesh%tab (ikp_bz)
1459        isym_kp = Kmesh%tabo(ikp_bz)
1460        itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1461 
1462        if (is_resonant) then
1463          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1464        else ! Code for coupling block.
1465          rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1466        end if
1467        !
1468        ! Multiply by the Coulomb term.
1469         do ig=2,npweps
1470           rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1471         end do
1472 
1473        do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
1474 
1475          ene_t = Bsp%Trans(it,spin1)%en
1476          ik_bz = Bsp%Trans(it,spin1)%k
1477          iv    = Bsp%Trans(it,spin1)%v
1478          ic    = Bsp%Trans(it,spin1)%c
1479 
1480          ik_ibz = Kmesh%tab(ik_bz)
1481          isym_k = Kmesh%tabo(ik_bz)
1482          itim_k = (3-Kmesh%tabi(ik_bz))/2
1483          !if (itim_k==2) CYCLE ! time-reversal or not
1484 
1485          rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1486          !
1487          ! sum over G/=0
1488          ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1489          ctemp = faq * kx_fact * ctemp
1490 
1491          my_kxssp(it,itp) = ctemp
1492        end do !it
1493      end do !itp
1494 
1495      ABI_FREE(rhotwg1)
1496      ABI_FREE(rhotwg2)
1497      ABI_FREE(vc_sqrt_qbz)
1498    end if ! do_exchange_term
1499    call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1500    !
1501    ! =====================================
1502    ! === Write the Hamiltonian on disk ===
1503    ! =====================================
1504    call timab(685,1,tsec) ! exc_build_ham(write_ham)
1505 
1506    if (use_mpiio) then
1507 #ifdef HAVE_MPI_IO
1508      my_ncols=ncols_of(my_rank); old_type=MPI_DOUBLE_COMPLEX
1509      call xmpio_create_fsubarray_2D((/neh1,my_ncols/),(/neh1,my_ncols/),(/1,1/),old_type,hmat_type,my_offpad,mpi_err)
1510      ABI_CHECK_MPI(mpi_err,"fsubarray_2D")
1511      !
1512      ! Each node uses a different offset to skip the header and the blocks written by the other CPUs.
1513      prev_nels=0
1514      prev_ncols=0
1515      if (my_rank>0) then
1516        prev_ncols = SUM(ncols_of(0:my_rank-1))
1517        prev_nels = neh1*prev_ncols
1518      end if
1519      tmp_off = prev_nels*xmpi_bsize_dpc + prev_ncols*2*xmpio_bsize_frm
1520 
1521      my_offset = offset_of_block(block) + tmp_off + my_offpad
1522 
1523      call MPI_FILE_SET_VIEW(mpi_fh, my_offset, MPI_BYTE, hmat_type, 'native', MPI_INFO_NULL, mpi_err)
1524      ABI_CHECK_MPI(mpi_err,"SET_VIEW")
1525 
1526      call MPI_TYPE_FREE(hmat_type,mpi_err)
1527      ABI_CHECK_MPI(mpi_err,"MPI_TYPE_FREE")
1528 
1529      tmp_size = INT(neh1*my_ncols)
1530      call MPI_FILE_WRITE_ALL(mpi_fh, my_kxssp,tmp_size, MPI_DOUBLE_COMPLEX, MPI_STATUS_IGNORE, mpi_err)
1531      ABI_CHECK_MPI(mpi_err,"FILE_WRITE")
1532 
1533      ! It seems that personal calls in make the code stuck
1534      ! Master writes the Fortran record markers.
1535      ABI_MALLOC(bsize_frecord,(neh2))
1536      bsize_frecord = neh1 * xmpi_bsize_dpc
1537      ! ehdr_offset points to the end of the header.
1538      !call xmpio_write_frmarkers(mpi_fh,ehdr_offset,xmpio_collective,neh2,bsize_frecord,mpi_err)
1539      my_offset = offset_of_block(block)
1540      call xmpio_write_frmarkers(mpi_fh,my_offset,xmpio_collective,neh2,bsize_frecord,ierr)
1541      ABI_CHECK(ierr==0,"Error while writing Fortran markers")
1542      ABI_FREE(bsize_frecord)
1543 #else
1544      ABI_BUG("You should not be here")
1545 #endif
1546    else
1547      ! Use FORTRAN IO with sequential access mode.
1548      ! * Each node sends its data to master node.
1549      ! * Columns are distributed according to the rank of the node.
1550      if (my_rank==master) then
1551        do jj=my_cols(1),my_cols(2)
1552          write(bsh_unt) my_kxssp(:,jj)
1553        end do
1554        ABI_FREE(my_kxssp)
1555      end if
1556 
1557      call xmpi_barrier(comm)
1558      !
1559      ! Collect data from the other nodes.
1560      do sender=1,nproc-1
1561        ! If I'm not involved, jump to the end of the loop and wait there (sequential IO? Of course!)
1562        if (all(my_rank /= [sender, master])) goto 200
1563 
1564        if (my_rank==master)  then
1565          ABI_MALLOC(buffer_2d,(neh1,ncols_of(sender)))
1566        end if
1567        call xmpi_exch(my_kxssp,neh1*ncols_of(sender),sender,buffer_2d,master,comm,5,mpi_err)
1568        !
1569        if (my_rank==master) then ! Write the columns owned by sender.
1570          do jj=1,ncols_of(sender)
1571            write(bsh_unt) buffer_2d(:,jj)
1572          end do
1573          ABI_FREE(buffer_2d)
1574        end if ! master
1575        !
1576 200    call xmpi_barrier(comm)
1577      end do ! sender
1578    end if
1579    call timab(685,2,tsec) ! exc_build_ham(write_ham)
1580 
1581    ABI_FREE(ncols_of)
1582    ABI_SFREE(my_kxssp)
1583  end if
1584 
1585  ! Close the file.
1586  if (use_mpiio) then
1587 #ifdef HAVE_MPI_IO
1588    call MPI_FILE_CLOSE(mpi_fh, mpi_err)
1589    ABI_CHECK_MPI(mpi_err,"FILE_CLOSE")
1590    ABI_FREE(offset_of_block)
1591 #endif
1592  end if
1593 
1594  ! master closes the Fortran files.
1595  if (my_rank==master) then
1596    close(bsh_unt)
1597    if (BSp%prep_interp) then
1598      close(a_unt)
1599      close(b_unt)
1600      close(c_unt)
1601    end if
1602  end if
1603 
1604  ! Free memory.
1605  ABI_FREE(igfftg0)
1606  ABI_FREE(ktabr_k)
1607  ABI_FREE(id_tab)
1608  ABI_FREE(ktabr_kp)
1609  ABI_FREE(rhxtwg_vpv)
1610  ABI_FREE(rhxtwg_cpc)
1611  if (BSp%prep_interp) then
1612    ABI_FREE(aa_vpv)
1613    ABI_FREE(bb_vpv1)
1614    ABI_FREE(bb_vpv2)
1615    ABI_FREE(cc_vpv)
1616    ABI_FREE(aa_cpc)
1617    ABI_FREE(bb_cpc1)
1618    ABI_FREE(bb_cpc2)
1619    ABI_FREE(cc_cpc)
1620  end if
1621  ABI_FREE(ur_ckp)
1622  ABI_FREE(ur_vkp)
1623  ABI_FREE(ur_vk)
1624  ABI_FREE(ur_ck)
1625 
1626  ! Deallocation for PAW.
1627  if (Wfd%usepaw==1) then
1628    call pawcprj_free(Cp_vk)
1629    ABI_FREE(Cp_vk)
1630    call pawcprj_free(Cp_ck)
1631    ABI_FREE(Cp_ck)
1632    call pawcprj_free(Cp_ckp)
1633    ABI_FREE(Cp_ckp)
1634    call pawcprj_free(Cp_vkp)
1635    ABI_FREE(Cp_vkp)
1636    call pawcprj_free(Cp_tmp1)
1637    ABI_FREE(Cp_tmp1)
1638    call pawcprj_free(Cp_tmp2)
1639    ABI_FREE(Cp_tmp2)
1640    call pawcprj_free(Cp_tmp3)
1641    ABI_FREE(Cp_tmp3)
1642    call pawcprj_free(Cp_tmp4)
1643    ABI_FREE(Cp_tmp4)
1644  end if
1645 
1646  call xmpi_barrier(comm)
1647 
1648  DBG_EXIT("COLL")
1649 
1650  call timab(680,2,tsec)
1651 
1652 end subroutine exc_build_block

m_exc_build/exc_build_ham [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_ham

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  BS_files<excfiles>=File names internally used in the BS code.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  ktabr(nfftot_osc,BSp%nkbz)=The FFT index of $(R^{-1}(r-\tau))$ where R is symmetry needed to obtains
    the k-points from the irreducible image.  Used to symmetrize u_Sk where S = \transpose R^{-1}
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  W<screen_t>=Data type gathering info and data for W.
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Pawang<pawang_type>=PAW angular mesh and related data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  Wfd<wfdgw_t>=Handler for the wavefunctions.

OUTPUT

  The excitonic Hamiltonian is saved on an external binary file (see below).

SOURCE

2092 subroutine exc_build_ham(BSp,BS_files,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2093 & Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff)
2094 
2095 !Arguments ------------------------------------
2096 !scalars
2097  integer,intent(in) :: nfftot_osc
2098  type(excparam),intent(in) :: BSp
2099  type(excfiles),intent(in) :: BS_files
2100  type(screen_t),intent(inout) :: W
2101  type(kmesh_t),intent(in) :: Kmesh,Qmesh
2102  type(crystal_t),intent(in) :: Cryst
2103  type(vcoul_t),intent(in) :: Vcp
2104  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
2105  type(Pseudopotential_type),intent(in) :: Psps
2106  type(Hdr_type),intent(inout) :: Hdr_bse
2107  type(pawang_type),intent(in) :: Pawang
2108  type(wfdgw_t),target,intent(inout) :: Wfd
2109 !arrays
2110  integer,intent(in) :: ngfft_osc(18)
2111  integer,intent(in) :: ktabr(nfftot_osc,Kmesh%nbz)
2112  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat*Wfd%usepaw)
2113  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2114 
2115 !Local variables ------------------------------
2116 !scalars
2117  logical :: do_resonant,do_coupling
2118  !character(len=500) :: msg
2119 !arrays
2120  real(dp) :: tsec(2)
2121  complex(gwpc),allocatable :: all_mgq0(:,:,:,:,:)
2122 
2123 !************************************************************************
2124 
2125  call timab(670,1,tsec)
2126 
2127  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2128  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2129 
2130  if (BSp%have_complex_ene) then
2131    ABI_ERROR("Complex energies are not supported yet")
2132  end if
2133 
2134  ! Do we have to compute some block?
2135  do_resonant = (BS_files%in_hreso == BSE_NOFILE)
2136  do_coupling = (BS_files%in_hcoup == BSE_NOFILE)
2137 
2138  if (BSp%use_coupling == 0) then
2139    if (.not.do_resonant) then
2140      call wrtout(std_out,"Will skip the calculation of resonant block (will use BSR file)")
2141      goto 100
2142    end if
2143  else
2144    if (.not. do_resonant .and. .not. do_coupling) then
2145      call wrtout(std_out,"Will skip the calculation of both resonant and coupling block (will use BSR and BSC files)")
2146      goto 100
2147    end if
2148  end if
2149 
2150  ! Compute M_{k,q=0}^{b,b}(G) for all k-points in the IBZ and each pair b, b'
2151  ! used for the exchange part and part of the Coulomb term.
2152  call wrtout(std_out," Calculating all matrix elements for q=0 to save CPU time")
2153 
2154  call wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,Psps,Pawtab,Paw_pwff,&
2155 &  Bsp%lomo_spin,Bsp%homo_spin,Bsp%humo_spin,nfftot_osc,ngfft_osc,Bsp%npweps,all_mgq0)
2156 
2157  ! ========================
2158  ! ==== Resonant Block ====
2159  ! ========================
2160  if (do_resonant) then
2161    call timab(672,1,tsec)
2162    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2163 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.TRUE.,BS_files%out_hreso)
2164    call timab(672,2,tsec)
2165  end if
2166 
2167  ! ========================
2168  ! ==== Coupling Block ====
2169  ! ========================
2170  if (do_coupling.and.BSp%use_coupling>0) then
2171    call timab(673,1,tsec)
2172    call exc_build_block(BSp,Cryst,Kmesh,Qmesh,ktabr,Gsph_x,Gsph_c,Vcp,&
2173 &    Wfd,W,Hdr_bse,nfftot_osc,ngfft_osc,Psps,Pawtab,Pawang,Paw_pwff,all_mgq0,.FALSE.,BS_files%out_hcoup)
2174    call timab(673,2,tsec)
2175  end if
2176 
2177  ! Free memory.
2178  ABI_FREE(all_mgq0)
2179 
2180 100 call timab(670,2,tsec)
2181 
2182 end subroutine exc_build_ham

m_exc_build/exc_build_v [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  exc_build_v

FUNCTION

  Calculate and write the excitonic Hamiltonian on an external binary file (Fortran file open
  in random mode) for subsequent treatment in the Bethe-Salpeter code.

INPUTS

  BSp<excparam>=The parameters for the Bethe-Salpeter calculation.
  Cryst<crystal_t>=Info on the crystalline structure.
  Kmesh<kmesh_t>=The list of k-points in the BZ, IBZ and symmetry tables.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=Info on the G-sphere used to describe wavefunctions and W (the largest one is actually stored).
  Gsph_c<gsphere_t>=Info on the G-sphere used to describe the correlation part.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  rhxtwg_q0
  is_resonant
  comm=MPI communicator.

OUTPUT

NOTES

  *) Version for K_V = K_C (q=0), thus KP_V = KP_C
  *) No exchange limit: use DFT energies in case.
  *) Symmetry of H(-k-k') = H*(k k') not used.
  *) Coulomb term can be approximated as diagonal in G.
  *) Valence bands treated from lomo on.
  *) Symmetries of the sub-blocks are used to reduce the number of elements to calculate.

            ____________
           |_(cv)__(vc)_|
   H_exc = |  R      C  |
           | -C*    -R* |

   where C is symmetric and R is Hermitian provided that the QP energies are real.

  For nsppol=1 ==> R = diag-W+2v; C = -W+2v
  since the Hamiltonian can be diagonalized in the spin-singlet basis set thanks to
  the fact that spin triplet does not contribute to the optical limit of epsilon.

  For nsppol=2 ==> R = diag-W+v; C = -W+v
  Now the matrix elements depend on the spin of the transitions but only those
  transitions in which the spin of the electron and of the hole are equal contribute
  to the macroscopic dielectric function. Moreover only the exchange term can connect transitions of different spin.
  When nsppol==2 the transitions are ordered using | (cv up) | (cv dwn) | (vc up) | (vc down) |

  The resonant block is given by:

      |  (v'c' up)       | (v'c' dwn)   |
      -----------------------------------           where v_{-+} = v_{+-}^H when the momentum of the photon is neglected.
      | [diag-W+v]++     |      v+-     | (vc up)   Note that v_{+-} is not Hermitian due to the presence of different spins.
  R = -----------------------------------           Actually it reduces to a Hermitian matrix when the system is not spin polarized.
      |     v-+          | [diag-W+v]-- | (vc dwn)  but in this case one should use nsppol=1.
      -----------------------------------           As a consequence the entire matrix is calculated and stored on file.

  The coupling block is given by:

      |  (c'v' up)   |    (c'v dwn)     |
      -----------------------------------           where v_{-+} = v_{+-}^t when the momentum of the photon is neglected.
      | [-W+v]++     |      v+-         | (vc up)   Also in this case the entire matrix v_{+-} has to be calculated
  C = -----------------------------------           and stored on file.
      |     v-+      |    [-W+v]--      | (vc dwn)
      -----------------------------------

SOURCE

1722 subroutine exc_build_v(spin1,spin2,nsppol,npweps,Bsp,Cryst,Kmesh,Qmesh,Gsph_x,Gsph_c,Vcp,&
1723 &  is_resonant,rhxtwg_q0,nproc,my_rank,t_start,t_stop,my_bsham)
1724 
1725 !Arguments ------------------------------------
1726 !scalars
1727  integer,intent(in) :: spin1,spin2,nsppol,npweps,nproc,my_rank
1728  logical,intent(in) :: is_resonant
1729  type(excparam),intent(in) :: BSp
1730  type(kmesh_t),intent(in) :: Kmesh,Qmesh
1731  type(crystal_t),intent(in) :: Cryst
1732  type(vcoul_t),intent(in) :: Vcp
1733  type(gsphere_t),intent(in) :: Gsph_x,Gsph_c
1734 !arrays
1735  integer(i8b),intent(in) :: t_start(0:nproc-1),t_stop(0:nproc-1)
1736  complex(gwpc),intent(in) :: rhxtwg_q0(npweps,BSp%lomo_min:BSp%humo_max,BSp%lomo_min:BSp%humo_max,Kmesh%nibz,nsppol)
1737  complex(dpc),intent(inout) :: my_bsham(t_start(my_rank):t_stop(my_rank))
1738 
1739 !Local variables ------------------------------
1740 !scalars
1741  integer :: ISg,ngx,ik_bz,ikp_bz,dim_rtwg
1742  integer :: neh1,neh2,ig,nblocks
1743  integer :: ik_ibz,itim_k,ikp_ibz,itim_kp,isym_k,isym_kp
1744  integer :: iq_bz,iq_ibz,isym_q,itim_q,iqbz0,rank
1745  integer :: iv,ivp,ic,icp
1746  integer :: block
1747  integer(i8b) :: tot_nels,ir,it,itp
1748  real(dp) :: faq,kx_fact
1749  complex(spc) :: ctemp
1750  character(len=500) :: msg
1751 !arrays
1752  integer :: bidx(2,4),spin_ids(2,3)
1753  integer(i8b) :: nels_block(3)
1754  integer :: my_cols(2),my_rows(2) !,proc_end(2),proc_start(2)
1755  integer,allocatable :: ncols_of(:)
1756  integer,allocatable :: col_start(:),col_stop(:)
1757  real(dp) :: qbz(3),tsec(2) !kbz(3),kpbz(3),
1758  complex(dpc),allocatable :: my_kxssp(:,:)
1759  complex(gwpc),allocatable :: vc_sqrt_qbz(:),rhotwg1(:),rhotwg2(:)
1760 
1761 !************************************************************************
1762 
1763  DBG_ENTER("COLL")
1764 
1765  write(msg,'(a,2i2,a)')" Calculating exchange term for (spin1,spin2) ",spin1,spin2," ..."
1766  call wrtout(std_out, msg)
1767 
1768  ! Basic constants.
1769  dim_rtwg=1; faq = one/(Cryst%ucvol*Kmesh%nbz)
1770 
1771  ! Identify the index of q==0
1772  iqbz0=0
1773  do iq_bz=1,Qmesh%nbz
1774    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
1775  end do
1776  ABI_CHECK(iqbz0/=0,"q=0 not found")
1777  !
1778  ! Treat the spin polarization.
1779  spin_ids(:,1) = (/1,1/)
1780  spin_ids(:,2) = (/2,2/)
1781  spin_ids(:,3) = (/1,2/)
1782 
1783  nblocks=1
1784  kx_fact=two
1785  nels_block(:)=0
1786  nels_block(1)=BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2
1787  tot_nels=nels_block(1)
1788 
1789  if (nsppol==2) then
1790    nblocks=3
1791    kx_fact=one
1792    nels_block(1) = BSp%nreh(1)*(BSp%nreh(1)+1_i8b)/2   ! Only the upper triangle for block 1 and 2
1793    nels_block(2) = BSp%nreh(2)*(BSp%nreh(2)+1_i8b)/2
1794    nels_block(3) = BSp%nreh(1)*BSp%nreh(2)*1_i8b       ! Note: Block 3 does not have symmetries.
1795    tot_nels= SUM(nels_block)
1796  end if
1797  !
1798  ! Distribute the calculation of the matrix elements among the nodes.
1799  ! * tstart and t_stop give the initial and final transition index treated by each node.
1800  ! * my_hsize is the number of transitions treated by this processor
1801  ! * my_cols(1:2) gives the initial and final column treated by this node.
1802  !
1803  do block=1,nsppol
1804    !
1805    ! Indices used to loop over bands.
1806    ! bidx contains the starting and final indices used to loop over bands.
1807    !
1808    !      (b3,b4)
1809    !         |... ...|
1810    ! (b1,b2) |... ...|
1811    !
1812    ! Resonant matrix is given by
1813    !      (v',c')
1814    !       |... ...|
1815    ! (v,c) |... ...|
1816    !
1817    ! Coupling matrix is given by
1818    !       (c',v')
1819    !       |... ...|
1820    ! (v,c) |... ...|
1821 
1822    if (is_resonant) then
1823      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1824      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1825      bidx(:,3) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b3
1826      bidx(:,4) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b4
1827    else
1828      bidx(:,1) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b1
1829      bidx(:,2) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b2
1830      bidx(:,3) = [BSp%lumo_spin(block),BSp%humo_spin(block)] ! range for b3
1831      bidx(:,4) = [BSp%lomo_spin(block),BSp%homo_spin(block)] ! range for b4
1832    end if
1833 
1834    !spin1 = spin_ids(1,block)
1835    !spin2 = spin_ids(2,block)
1836 
1837    my_cols=0
1838    do itp=1,Bsp%nreh(block)
1839      do it=1,itp
1840        ir = it + itp*(itp-1_i8b)/2
1841        if (ir==t_start(my_rank)) then
1842          my_rows(1) = it
1843          my_cols(1) = itp
1844        end if
1845        if (ir==t_stop(my_rank)) then
1846          my_rows(2) = it
1847          my_cols(2) = itp
1848        end if
1849      end do
1850    end do
1851 
1852    ! Allocate big (scalable) buffer to store the BS matrix on this node.
1853    !ABI_MALLOC(my_bsham,(t_start(my_rank):t_stop(my_rank)))
1854    !
1855    ! =====================
1856    ! === Exchange term ===
1857    ! =====================
1858    ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1859    ! TODO might used enlarged G-sphere for better convergence.
1860 !if (do_exchange_term) then
1861    call timab(683,1,tsec) ! exc_build_ham(exchange)
1862 
1863    ABI_MALLOC(rhotwg1,(npweps))
1864    ABI_MALLOC(rhotwg2,(npweps))
1865 
1866    ngx = Gsph_x%ng
1867    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1868 
1869    ! * Get iq_ibz, and symmetries from iq_bz.
1870    iq_bz = iqbz0 ! q = 0 -> iqbz0
1871    call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1872 
1873    ! * Set up table of |q(BZ)+G|
1874    if (iq_ibz==1) then
1875      do ig=1,ngx
1876        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1877        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1878      end do
1879    else
1880       ABI_ERROR("iq_ibz should be 1")
1881    end if
1882 
1883    do itp=1,BSp%nreh(block) ! Loop over transition tp = (kp,vp,cp,spin2)
1884 
1885      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
1886      ikp_bz = Bsp%Trans(itp,spin2)%k
1887      ivp    = Bsp%Trans(itp,spin2)%v
1888      icp    = Bsp%Trans(itp,spin2)%c
1889 
1890      ikp_ibz = Kmesh%tab (ikp_bz)
1891      isym_kp = Kmesh%tabo(ikp_bz)
1892      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
1893 
1894      if (is_resonant) then
1895        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
1896      else ! Code for coupling block.
1897        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
1898      end if
1899      !
1900      ! Multiply by the Coulomb term.
1901       do ig=2,npweps
1902         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
1903       end do
1904 
1905      do it=1,itp ! Loop over transition t = (k,v,c,spin1)
1906        ir = it + itp*(itp-1_i8b)/2
1907        if (ir<t_start(my_rank) .or. ir>t_stop(my_rank)) CYCLE
1908 
1909        ik_bz   = Bsp%Trans(it,spin1)%k
1910        iv      = Bsp%Trans(it,spin1)%v
1911        ic      = Bsp%Trans(it,spin1)%c
1912 
1913        ik_ibz = Kmesh%tab(ik_bz)
1914        isym_k = Kmesh%tabo(ik_bz)
1915        itim_k = (3-Kmesh%tabi(ik_bz))/2
1916        !if (itim_k==2) CYCLE ! time-reversal or not
1917 
1918        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
1919        !
1920        ! sum over G/=0
1921        ctemp = xdotc(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
1922        ctemp = faq * kx_fact * ctemp
1923 
1924        ! exchange term is non divergent !
1925        !if (BSp%prep_interp) then
1926        !  ccoeffs(ir) = ccoeffs(ir) + ctemp
1927        !end if
1928 
1929        my_bsham(ir) = my_bsham(ir) + ctemp
1930      end do !it
1931    end do !itp
1932 
1933    ABI_FREE(rhotwg1)
1934    ABI_FREE(rhotwg2)
1935    ABI_FREE(vc_sqrt_qbz)
1936 
1937    call timab(683,2,tsec) ! exc_build_ham(exchange)
1938 !end if ! do_exchange_term
1939  end do ! block
1940 
1941  !
1942  ! ===========================================
1943  ! === Exchange term for spin_up spin_down ===
1944  ! ===========================================
1945 
1946 if (nsppol==2) then
1947  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
1948  block=3
1949  neh1=BSp%nreh(1)
1950  neh2=BSp%nreh(2)
1951  !
1952  ! The oscillators at q=0 are available on each node for both spin.
1953  ! Here the calculation of the block is parallelized over columns.
1954  ABI_MALLOC(col_start,(0:nproc-1))
1955  ABI_MALLOC(col_stop,(0:nproc-1))
1956  call xmpi_split_work2_i4b(neh2,nproc,col_start,col_stop) !check this but it should be OK.
1957 
1958  my_cols(1) = col_start(my_rank)
1959  my_cols(2) = col_stop (my_rank)
1960  if (my_cols(2)-my_cols(1)<=0) then
1961    ABI_ERROR("One of the processors has zero columns!")
1962  end if
1963 
1964  ABI_MALLOC(ncols_of,(0:nproc-1))
1965  ncols_of=0
1966  do rank=0,nproc-1
1967    if (col_stop(rank)>=col_start(rank)) ncols_of(rank) = col_stop(rank)-col_start(rank)+1
1968  end do
1969 
1970  ABI_FREE(col_start)
1971  ABI_FREE(col_stop)
1972  !
1973  ! TODO might add treatment of <psi|q+G|psi> for q+G -> 0
1974  ! TODO might used enlarged G-sphere for better convergence.
1975  ! Note that my_kxssp is always written on file when nsppol=2, even when
1976  ! non-local field effects are neglected.
1977  ABI_MALLOC(my_kxssp,(neh1,my_cols(1):my_cols(2)))
1978  my_kxssp=czero
1979 
1980  !if (do_exchange_term) then
1981    !spin1=1; spin2=2
1982    ABI_MALLOC(rhotwg1,(npweps))
1983    ABI_MALLOC(rhotwg2,(npweps))
1984 
1985    ngx = Gsph_x%ng
1986    ABI_MALLOC(vc_sqrt_qbz,(ngx))
1987    !
1988    ! * Get iq_ibz, and symmetries from iq_bz.
1989    iq_bz = iqbz0 ! q = 0 -> iqbz0
1990    call qmesh%get_BZ_item(iq_bz,qbz,iq_ibz,isym_q,itim_q)
1991    !
1992    ! * Set up table of |q(BZ)+G|
1993    if (iq_ibz==1) then
1994      do ig=1,ngx
1995        ISg = Gsph_x%rottb(ig,itim_q,isym_q)
1996        vc_sqrt_qbz(ISg)=Vcp%vcqlwl_sqrt(ig,1)
1997      end do
1998    else
1999       ABI_ERROR("iq_ibz should be 1")
2000    end if
2001 
2002    do itp=1,neh2 ! Loop over transition tp = (kp,vp,cp,spin2)
2003 
2004      if (itp<my_cols(1) .or. itp>my_cols(2)) CYCLE ! I dont have this column.
2005      ikp_bz = Bsp%Trans(itp,spin2)%k
2006      ivp    = Bsp%Trans(itp,spin2)%v
2007      icp    = Bsp%Trans(itp,spin2)%c
2008 
2009      ikp_ibz = Kmesh%tab (ikp_bz)
2010      isym_kp = Kmesh%tabo(ikp_bz)
2011      itim_kp = (3-Kmesh%tabi(ikp_bz))/2
2012 
2013      if (is_resonant) then
2014        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,ivp,icp,ikp_ibz,spin2),Gsph_c)
2015      else ! Code for coupling block.
2016        rhotwg2(:) = sym_rhotwgq0(itim_kp,isym_kp,dim_rtwg,npweps,rhxtwg_q0(:,icp,ivp,ikp_ibz,spin2),Gsph_c)
2017      end if
2018      !
2019      ! Multiply by the Coulomb term.
2020       do ig=2,npweps
2021         rhotwg2(ig) = rhotwg2(ig) * vc_sqrt_qbz(ig) * vc_sqrt_qbz(ig)
2022       end do
2023 
2024      do it=1,neh1 ! Loop over transition t = (k,v,c,spin1) FULL matrix.
2025        ik_bz = Bsp%Trans(it,spin1)%k
2026        iv    = Bsp%Trans(it,spin1)%v
2027        ic    = Bsp%Trans(it,spin1)%c
2028 
2029        ik_ibz = Kmesh%tab(ik_bz)
2030        isym_k = Kmesh%tabo(ik_bz)
2031        itim_k = (3-Kmesh%tabi(ik_bz))/2
2032        !if (itim_k==2) CYCLE ! time-reversal or not
2033 
2034        rhotwg1(:) = sym_rhotwgq0(itim_k,isym_k,dim_rtwg,npweps,rhxtwg_q0(:,iv,ic,ik_ibz,spin1),Gsph_c)
2035        !
2036        ! sum over G/=0
2037        ctemp = XDOTC(npweps-1,rhotwg1(2:),1,rhotwg2(2:),1)
2038        ctemp = faq * kx_fact * ctemp
2039 
2040        my_kxssp(it,itp) = ctemp
2041      end do !it
2042    end do !itp
2043 
2044    ABI_FREE(rhotwg1)
2045    ABI_FREE(rhotwg2)
2046    ABI_FREE(vc_sqrt_qbz)
2047  !end if ! do_exchange_term
2048  call timab(686,2,tsec) ! exc_build_ham(exch.spin)
2049 
2050  ABI_FREE(ncols_of)
2051  ABI_SFREE(my_kxssp)
2052  end if
2053 
2054  DBG_EXIT("COLL")
2055 
2056 end subroutine exc_build_v

m_exc_build/wfd_all_mgq0 [ Functions ]

[ Top ] [ m_exc_build ] [ Functions ]

NAME

  wfd_all_mgq0

FUNCTION

INPUTS

  Wfd<wfdgw_t>=Handler for the wavefunctions.
  Cryst<crystal_t>=Info on the crystalline structure.
  Qmesh<kmesh_t>=The list of q-points for epsilon^{-1} and related symmetry tables.
  Gsph_x<gsphere_t>=G-sphere with the G-vectors in mgq0.
  Vcp<vcoul_t>=The Coulomb interaction in reciprocal space. A cutoff can be used
  Psps<Pseudopotential_type>=Variables related to pseudopotentials
  Pawtab(Psps%ntypat)<pawtab_type>=PAW tabulated starting data.
  Paw_pwff(Cryst%ntypat*Wfd%usepaw)<pawpwff_t>=Form factor used to calculate the onsite mat. elements of a plane wave.
  lomo_spin(Wfd%nsppol)=Lowest occupied band for each spin
  homo_spin(Wfd%nsppol)=Highest occupied band for each spin
  humo_spin(Wfd%nsppol)=Highest unoccupied band for each spin
  nfftot_osc=Total Number of FFT points used for the oscillator matrix elements.
  ngfft_osc(18)=Info on the FFT algorithm used to calculate the oscillator matrix elements.
  npweps=Number of G-vectors in mgq0.

OUTPUT

   mgq0(npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol)
     Allocated here and filled with the matrix elements on each node.

SOURCE

2213 subroutine wfd_all_mgq0(Wfd,Cryst,Qmesh,Gsph_x,Vcp,&
2214 & Psps,Pawtab,Paw_pwff,lomo_spin,homo_spin,humo_spin,nfftot_osc,ngfft_osc,npweps,mgq0)
2215 
2216 !Arguments ------------------------------------
2217 !scalars
2218  integer,intent(in) :: nfftot_osc,npweps
2219  type(kmesh_t),intent(in) :: Qmesh
2220  type(crystal_t),intent(in) :: Cryst
2221  type(vcoul_t),intent(in) :: Vcp
2222  type(gsphere_t),intent(in) :: Gsph_x
2223  type(Pseudopotential_type),intent(in) :: Psps
2224  type(wfdgw_t),target,intent(inout) :: Wfd
2225 !arrays
2226  integer,intent(in) :: lomo_spin(Wfd%nsppol),homo_spin(Wfd%nsppol),humo_spin(Wfd%nsppol)
2227  integer,intent(in) :: ngfft_osc(18)
2228  complex(gwpc),allocatable,intent(out) :: mgq0(:,:,:,:,:)
2229  type(Pawtab_type),intent(in) :: Pawtab(Psps%ntypat)
2230  type(pawpwff_t),intent(in) :: Paw_pwff(Psps%ntypat*Wfd%usepaw)
2231 
2232 !Local variables ------------------------------
2233 !scalars
2234  integer,parameter :: map2sphere1=1,dim_rtwg1=1,ndat1=1
2235  integer :: use_padfft,mgfft_osc,fftalga_osc,ii
2236  integer :: ik_ibz,itim_k,isym_k,iq_bz,iq_ibz,isym_q,itim_q,iqbz0
2237  integer :: ierr,iv,ic,spin,lomo_min,humo_max !,inv_ipw,ipw
2238  real(dp) :: cpu,wall,gflops !q0vol,fcc_const
2239  complex(dpc) :: ph_mkt
2240  character(len=500) :: msg
2241  type(wave_t),pointer :: wave_v, wave_c
2242 !arrays
2243  integer,allocatable :: igfftg0(:),task_distrib(:,:,:,:)
2244  integer,allocatable :: gbound(:,:),id_tab(:)
2245  real(dp) :: qbz(3),spinrot_k(4),tsec(2)
2246  complex(gwpc),allocatable :: rhotwg1(:)
2247  complex(gwpc),target,allocatable :: ur1(:),ur2(:)
2248  complex(gwpc),ABI_CONTIGUOUS pointer :: ptr_ur1(:),ptr_ur2(:)
2249  type(pawcprj_type),allocatable :: Cp1(:,:),Cp2(:,:)
2250  type(pawpwij_t),allocatable :: Pwij_q0(:)
2251 
2252 !************************************************************************
2253 
2254  call timab(671,1,tsec)
2255 
2256  ABI_CHECK(Wfd%nspinor==1,"nspinor==2 not coded")
2257  ABI_CHECK(nfftot_osc==PRODUCT(ngfft_osc(1:3)),"mismatch in FFT size")
2258 
2259  lomo_min = MINVAL(lomo_spin); humo_max = MAXVAL(humo_spin)
2260 
2261  if ( ANY(ngfft_osc(1:3) /= Wfd%ngfft(1:3)) ) call wfd%change_ngfft(Cryst,Psps,ngfft_osc)
2262 
2263  mgfft_osc   = MAXVAL(ngfft_osc(1:3))
2264  fftalga_osc = ngfft_osc(7)/100 !; fftalgc_osc=MOD(ngfft_osc(7),10)
2265 
2266  ! (temporary) Table used for the wavefunction in the IBZ.
2267  ABI_MALLOC(id_tab, (Wfd%nfft))
2268  id_tab = (/(ii, ii=1,Wfd%nfft)/)
2269 
2270  ! Analytic integration of 4pi/q^2 over the volume element:
2271  ! $4pi/V \int_V d^3q 1/q^2 =4pi bz_geometric_factor V^(-2/3)$
2272  ! i_sz=4*pi*bz_geometry_factor*q0_vol**(-two_thirds) where q0_vol= V_BZ/N_k
2273  ! bz_geometry_factor: sphere=7.79, fcc=7.44, sc=6.188, bcc=6.946, wz=5.255
2274  ! (see gwa.pdf, appendix A.4)
2275 
2276  ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2277  ! analytic integration of q**-2 over the volume element:
2278  ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2279 
2280  ! q0vol = (8.0*pi**3) / (Cryst%ucvol*Kmesh%nbz)
2281  ! fcc_const = SQRT(7.44*q0vol**(-2.0/3.0))
2282  ! rtw = (6.0*pi**2/(Cryst%ucvol*Kmesh%nkbz))**(1./3.)
2283  ! Average of (q+q')**-2 integration for head of Coulomb matrix
2284  ! INTRTW(QL) = (2*pi*rtw + pi*(rtw**2/QL-QL)*LOG((QL+rtw)/(QL-rtw)))
2285  ! &              * (Cryst%ucvol*Kmesh%nbz)/(2*pi)**3. * QL*QL
2286 
2287  if (Wfd%usepaw==1) then
2288    ABI_MALLOC(Cp1,(Wfd%natom,Wfd%nspinor))
2289    call pawcprj_alloc(Cp1,0,Wfd%nlmn_atm)
2290    ABI_MALLOC(Cp2,(Wfd%natom,Wfd%nspinor))
2291    call pawcprj_alloc(Cp2,0,Wfd%nlmn_atm)
2292  end if
2293 
2294  ABI_MALLOC(ur1,(nfftot_osc*Wfd%nspinor))
2295  ABI_MALLOC(ur2,(nfftot_osc*Wfd%nspinor))
2296 
2297  ! Identify q==0
2298  iqbz0=0
2299  do iq_bz=1,Qmesh%nbz
2300    if (ALL(ABS(Qmesh%bz(:,iq_bz))<tol3)) iqbz0 = iq_bz
2301  end do
2302  ABI_CHECK(iqbz0/=0,"q=0 not found in q-point list!")
2303 
2304  ! * Get iq_ibz, and symmetries from iqbz0.
2305  call qmesh%get_BZ_item(iqbz0,qbz,iq_ibz,isym_q,itim_q)
2306 
2307  if (Wfd%usepaw==1) then ! Prepare onsite contributions at q==0
2308    ABI_MALLOC(Pwij_q0,(Cryst%ntypat))
2309    call pawpwij_init(Pwij_q0,npweps,Qmesh%bz(:,iqbz0),Gsph_x%gvec,Cryst%rprimd,Psps,Pawtab,Paw_pwff)
2310  end if
2311  !
2312  ! Tables for the FFT of the oscillators.
2313  !  a) FFT index of the G sphere (only vertical transitions, unlike cchi0, no need to shift the sphere).
2314  !  b) gbound table for the zero-padded FFT performed in rhotwg.
2315  ABI_MALLOC(igfftg0,(Gsph_x%ng))
2316  ABI_MALLOC(gbound,(2*mgfft_osc+8,2))
2317  call Gsph_x%fft_tabs((/0,0,0/),mgfft_osc,ngfft_osc,use_padfft,gbound,igfftg0)
2318  if ( ANY(fftalga_osc == (/2,4/)) ) use_padfft=0 ! Pad-FFT is not coded in rho_tw_g
2319 #ifdef FC_IBM
2320  ! XLF does not deserve this optimization (problem with [v67mbpt][t03])
2321  use_padfft = 0
2322 #endif
2323  if (use_padfft==0) then
2324    ABI_FREE(gbound)
2325    ABI_MALLOC(gbound,(2*mgfft_osc+8,2*use_padfft))
2326  end if
2327 
2328  ABI_MALLOC(rhotwg1,(npweps))
2329 
2330  ABI_MALLOC_OR_DIE(mgq0, (npweps,lomo_min:humo_max,lomo_min:humo_max,Wfd%nkibz,Wfd%nsppol), ierr)
2331  mgq0 = czero
2332 
2333  call cwtime(cpu,wall,gflops,"start")
2334 
2335  do spin=1,Wfd%nsppol
2336    ! Distribute the calculation of the matrix elements.
2337    ! processors have the entire set of wavefunctions hence we divide the workload
2338    ! without checking if the pair of states is available. Last dimension is fake.
2339    ABI_MALLOC(task_distrib,(lomo_spin(spin):humo_spin(spin),lomo_spin(spin):humo_spin(spin),Wfd%nkibz,1))
2340    call xmpi_distab(Wfd%nproc,task_distrib)
2341 
2342    ! loop over the k-points in IBZ
2343    do ik_ibz=1,Wfd%nkibz
2344      if ( ALL(task_distrib(:,:,ik_ibz,1)/= Wfd%my_rank) ) CYCLE
2345 
2346      ! Don't need to symmetrize the wavefunctions.
2347      itim_k=1; isym_k=1; ph_mkt=cone; spinrot_k=Cryst%spinrot(:,isym_k)
2348 
2349      do iv=lomo_spin(spin),humo_spin(spin) ! Loop over band V
2350        if ( ALL(task_distrib(:,iv,ik_ibz,1)/=Wfd%my_rank) ) CYCLE
2351 
2352        ABI_CHECK(wfd%get_wave_ptr(iv, ik_ibz, spin, wave_v, msg) == 0, msg)
2353 
2354        if (wave_v%has_ur == WFD_STORED) then
2355          ptr_ur1 =>  wave_v%ur
2356        else
2357          call wfd%get_ur(iv,ik_ibz,spin,ur1)
2358          ptr_ur1 =>  ur1
2359        end if
2360 
2361        if (Wfd%usepaw==1) call wfd%get_cprj(iv,ik_ibz,spin,Cryst,Cp1,sorted=.FALSE.)
2362 
2363        ! Loop over band C
2364        do ic=lomo_spin(spin),humo_spin(spin)
2365          if ( task_distrib(ic,iv,ik_ibz,1)/=Wfd%my_rank ) CYCLE
2366 
2367          ABI_CHECK(wfd%get_wave_ptr(ic, ik_ibz, spin, wave_c, msg) == 0, msg)
2368 
2369          if (wave_c%has_ur == WFD_STORED) then
2370            ptr_ur2 =>  wave_c%ur
2371          else
2372            call wfd%get_ur(ic,ik_ibz,spin,ur2)
2373            ptr_ur2 =>  ur2
2374          end if
2375 
2376          if (Wfd%usepaw==1) call wfd%get_cprj(ic,ik_ibz,spin,Cryst,Cp2,sorted=.FALSE.)
2377 
2378          call rho_tw_g(Wfd%nspinor,npweps,nfftot_osc,ndat1,ngfft_osc,map2sphere1,use_padfft,igfftg0,gbound,&
2379            ptr_ur1,1,id_tab,ph_mkt,spinrot_k,&
2380            ptr_ur2,1,id_tab,ph_mkt,spinrot_k,&
2381            dim_rtwg1,rhotwg1)
2382 
2383          if (Wfd%usepaw==1) then
2384            ! Add PAW onsite contribution.
2385            call paw_rho_tw_g(cryst,Pwij_q0,npweps,dim_rtwg1,Wfd%nspinor,Gsph_x%gvec,Cp1,Cp2,rhotwg1)
2386          end if
2387 
2388          ! If q=0 treat Exchange and Coulomb-term independently
2389          if (iv <= homo_spin(spin) .and. ic <= homo_spin(spin) .or. &
2390              iv >  homo_spin(spin) .and. ic >  homo_spin(spin)) then
2391 
2392            if (iv/=ic) then !COULOMB term: C/=V: ignore them
2393              rhotwg1(1) = czero_gw
2394            else
2395              ! If q=0 and C=V then set up rho-twiddle(G=0) to reflect an
2396              ! analytic integration of q**-2 over the volume element:
2397              ! <q**-2> = 7.44 V**(-2/3)   (for fcc cell)
2398              !rhotwg1(1) = fcc_const * qpg(1,iqbz0)
2399              rhotwg1(1) = SQRT(GWPC_CMPLX(Vcp%i_sz,zero)) / Vcp%vcqlwl_sqrt(1,1)
2400              !if (vcut) rhotwg1(1) = 1.0
2401            end if
2402 
2403          else
2404            ! At present this term is set to zero
2405            ! EXCHANGE term: limit value.
2406            ! Set up rho-twiddle(G=0) using small vector q instead of zero and k.p perturbation theory (see notes)
2407            rhotwg1(1) = czero_gw
2408          end if
2409 
2410          mgq0(:,iv,ic,ik_ibz,spin) = rhotwg1(:)
2411        end do !ic
2412      end do !iv
2413    end do !ik_ibz
2414 
2415    ABI_FREE(task_distrib)
2416  end do !spin
2417 
2418  ! TODO: One can speedup the calculation by computing the upper triangle of the
2419  ! matrix in (b,b') space and then take advantage of the symmetry property:
2420  !
2421  !   M_{k,0}{{bb'}(G)^* = M{k,0}{b'b'}(-G)
2422 
2423 #if 0
2424  !!!! $OMP PARALLEL DO COLLAPSE(3) PRIVATE(inv_ipw)
2425  do spin=1,Wfd%nsppol
2426    do ik_ibz=1,Wfd%nkibz
2427      do iv=lomo_spin(spin),humo_spin(spin)
2428        do ic=1,iv-1
2429          do ipw=1,npweps
2430            inv_ipw = gsph_x%g2mg(ipw)
2431            mgq0(inv_ipw,ic,iv,ik_ibz,spin) = mgq0(ipw,iv,ic,ik_ibz,spin)
2432          end do
2433        end do
2434      end do
2435    end do
2436  end do
2437 #endif
2438  !
2439  ! Gather matrix elements on each node.
2440  call xmpi_sum(mgq0,Wfd%comm,ierr)
2441 
2442  call cwtime(cpu,wall,gflops,"stop")
2443  write(msg,'(2(a,f9.6))')"cpu_time = ",cpu,", wall_time = ",wall
2444  call wrtout(std_out, msg)
2445 
2446  ABI_FREE(rhotwg1)
2447  ABI_FREE(igfftg0)
2448  ABI_FREE(gbound)
2449  ABI_FREE(ur1)
2450  ABI_FREE(ur2)
2451  ABI_FREE(id_tab)
2452 
2453  if (Wfd%usepaw==1) then
2454    ! Deallocation for PAW.
2455    call pawpwij_free(Pwij_q0)
2456    ABI_FREE(Pwij_q0)
2457    call pawcprj_free(Cp1)
2458    ABI_FREE(Cp1)
2459    call pawcprj_free(Cp2)
2460    ABI_FREE(Cp2)
2461  end if
2462 
2463  call timab(671,2,tsec)
2464 
2465 end subroutine wfd_all_mgq0