TABLE OF CONTENTS


ABINIT/density_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 density_rec

FUNCTION

 This routine computes the density using  the coefficients corresponding to
 continued fraction at a point from a fixed potential.

INPUTS

  coordx, coordy, coordz=coordonnees of the computed point
  an, bn2 : coefficient given by density_rec. Input if get_rec_coef=0, output else
  nrec=order of density_rec
  fermie=fermi energy (Hartree)
  tsmear=temperature (Hartree)
  rtrotter=real trotter parameter
  tol=tolerance criteria for stopping density_rec
  inf_ucvol=infinitesimal unit cell volume
  dim_trott = max(0,2*trotter-1)

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator

SIDE EFFECTS

NOTES

  at this time :
       - exppot should be replaced by ?
       - coord should be replaced by ?
       - need a rectangular box (rmet diagonal matrix)

SOURCE

1543 subroutine density_rec(an,bn2,rho_out,nrec, &
1544 &                     fermie,tsmear,rtrotter, &
1545 &                     dim_trott,tol,inf_ucvol)
1546 
1547 !Arguments -------------------------------
1548 !scalars
1549  integer,intent(in) :: nrec
1550  integer,intent(in) :: dim_trott
1551  real(dp),intent(in) :: fermie,tol,tsmear,inf_ucvol,rtrotter
1552  real(dp), intent(out) :: rho_out
1553 !arrays
1554  real(dp),intent(in) :: an(0:nrec),bn2(0:nrec)
1555 !Local variables-------------------------------
1556 !not used, debugging purpose only
1557 !for debugging purpose, detailled printing only once for density and ekin
1558 !scalars
1559  integer, parameter :: minrec = 3
1560  integer  :: irec
1561  real(dp) :: beta,mult,prod_b2,error,errold
1562  real(dp) :: pi_on_rtrotter,twortrotter,exp1,exp2
1563  complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0
1564 ! character(len=500) :: msg
1565 !arrays
1566  real(dp) :: tsec(2)
1567  complex(dpc) :: acc_rho(0:nrec)
1568  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
1569  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
1570 !**************************************************************************
1571 
1572  call timab(605,1,tsec)
1573 
1574 !##############################################################
1575 !--Initialisation of metrics
1576  mult = two/inf_ucvol   !non-spined system
1577  beta = one/tsmear
1578 
1579 !--Variables for optimisation
1580  pi_on_rtrotter = pi/rtrotter
1581  twortrotter = two*rtrotter
1582  exp1 = exp((beta*fermie)/(rtrotter))
1583  exp2 = exp(beta*fermie/(twortrotter))
1584  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
1585  coeef_mu = cmplx(one/exp2,zero,dp)
1586 
1587  N = czero;  D = cone
1588  facrec0 = cone
1589  Nold = czero; Dold = czero
1590 !--Initialisation of accumulated density
1591  acc_rho = czero
1592 !--Initialisation of estimated error
1593  prod_b2 = twortrotter/exp1
1594  errold = zero
1595 
1596 
1597 !##############################################################
1598 !--Main loop
1599  maindo : do irec = 0, nrec
1600 
1601 !  ######################################################
1602 !  --Density computation
1603 !  !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai)
1604 !  !and for c =exp(-beta*fermie/(two*rtrotter)
1605 
1606    call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,&
1607 &   facrec0,coeef_mu,exp1,&
1608 &   an(irec),bn2(irec),&
1609 &   N,D,Nold,Dold)
1610 
1611    if(irec/=nrec .and. irec>=minrec)then
1612      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit maindo
1613    end if
1614    errold = mult*error
1615  end do maindo
1616 !--Accumulated density
1617  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
1618 
1619  call timab(605,2,tsec)
1620 
1621  end subroutine density_rec

ABINIT/entropyrec [ Functions ]

[ Top ] [ Functions ]

NAME

 entropyrec

FUNCTION

 This routine computes the local part of the entropy at a point using a path integral,
 in the recursion method.

  an, bn2 : coefficient given by the recursion.
  nrec=order of recursion
  trotter=trotter parameter
  multce=a multiplicator for computing entropy ; 2 for non-spin-polarized system
  debug_rec=debug variable
  n_pt_integ=number of points of integration for the path integral
  xmax =max point of integration on the real axis

OUTPUT

  ent_out=entropy at the point
  ent_out1,ent_out2,ent_out3,ent_out4=debug entropy at the point

NOTES

  at this time :
       - multce should be not used
       - the routine should be integraly rewrited and use the routine recursion.
       - only modified for p /= 0

SOURCE

 947 subroutine entropyrec(an,bn2,nrec,trotter,ent_out,multce,debug_rec, &
 948 &                     n_pt_integ,xmax,&
 949 &                     ent_out1,ent_out2,ent_out3,ent_out4)
 950 
 951 !Arguments -------------------------------
 952 !scalars
 953  integer,intent(in) :: n_pt_integ,nrec,trotter
 954  logical,intent(in) :: debug_rec
 955  real(dp), intent(in) :: multce,xmax
 956  real(dp),intent(out) :: ent_out,ent_out1,ent_out2,ent_out3,ent_out4
 957 !arrays
 958  real(dp),intent(in) :: an(0:nrec),bn2(0:nrec)
 959 
 960 !Local variables-------------------------------
 961 !scalars
 962  integer, parameter :: level = 7
 963  integer, save :: first_en = 1
 964  integer :: ii,kk,n_pt_integ_path2,n_pt_integ_path3
 965  real(dp) :: arg,epsilo,step,twotrotter,xmin,dr_step
 966  complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ent_acc,ent_acc1,ent_acc2
 967  complex(dpc) :: ent_acc3,ent_acc4
 968  complex(dpc) :: funczero,z_path,zj
 969  complex(dpc) ::delta_calc
 970  character(len=500) :: msg
 971 !arrays
 972  real(dp) :: tsec(2)
 973  real(dp) :: iif,factor
 974 
 975 
 976 ! *************************************************************************
 977 
 978 
 979  call timab(610,1,tsec)
 980 
 981 !structured debugging if debug_rec=T : print detailled result the first time we enter entropyrec
 982 
 983  if(debug_rec .and. first_en==1)then
 984    write(msg,'(a)')' '
 985    call wrtout(std_out,msg,'PERS')
 986    write(msg,'(a)')' entropyrec : enter '
 987    call wrtout(std_out,msg,'PERS')
 988    write(msg,'(a,i6)')'n_pt_integ ' , n_pt_integ
 989    call wrtout(std_out,msg,'COLL')
 990  end if
 991 
 992  ent_out = zero
 993  ent_out1 = zero
 994  ent_out2 = zero
 995  ent_out3 = zero
 996  ent_out4 = zero
 997  ent_acc = czero
 998  ent_acc1 = czero
 999  ent_acc2 = czero
1000  ent_acc3 = czero
1001  ent_acc4 = czero
1002 
1003 !path parameters
1004  twotrotter = max(two*real(trotter,dp),one)
1005  if(trotter==0)then
1006    factor = tol5
1007    arg =pi*three_quarters
1008    zj = cmplx(-one,one-sin(arg),dp)
1009  else
1010    factor = xmax/ten
1011    arg = pi/twotrotter
1012    zj = cmplx( cos(arg) , sin(arg),dp )
1013  end if
1014 
1015  epsilo = factor*sin( arg )
1016  xmin = factor*cos( arg )
1017  step = (xmax-xmin)/real(n_pt_integ,dp)
1018 
1019 !####################################################################
1020 ![xmax + i*epsilo,xmin + i*epsilo]
1021  dr_step = one/real(n_pt_integ,dp)
1022  path1:  do ii = 0,n_pt_integ
1023    z_path = cmplx(xmin+real(ii,dp)*(xmax-xmin)*dr_step,epsilo,dp)
1024    dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp)
1025 
1026    Nold = czero
1027    Dold = cone
1028    N = cone
1029    D = z_path - cmplx(an(0),zero,dp)
1030 
1031    do kk=1,nrec
1032      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1033      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1034 
1035      Nold = N
1036      Dold = D
1037      N = Nnew
1038      D = Dnew
1039 
1040      if(kk/=nrec)then
1041        if((bn2(kk+1)<tol14))exit
1042      end if
1043    end do
1044 
1045 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1046    delta_calc = func1_rec(z_path**twotrotter)*(N/D)*dz_path
1047    if(ii==0.or.ii==n_pt_integ)then
1048      ent_acc  = ent_acc  + half*delta_calc
1049      ent_acc1 = ent_acc1 + half*delta_calc
1050    else
1051      ent_acc  = ent_acc  + delta_calc
1052      ent_acc1 = ent_acc1 + delta_calc
1053    end if
1054  end do path1
1055 
1056 
1057 !####################################################################
1058 ![1/2zj,0]
1059  if(epsilo/step>100.d0)then
1060    n_pt_integ_path2 = int((factor*abs(zj))/step)+1
1061  else
1062    n_pt_integ_path2 = 100
1063  end if
1064 
1065  if(trotter/=0)then
1066    n_pt_integ_path3 = 0
1067    dr_step = one/real(n_pt_integ_path2,dp)
1068    dz_path = -cmplx(xmin,epsilo,dp)*dr_step
1069    path5:  do ii = 0,n_pt_integ_path2
1070      z_path = cmplx(real(ii,dp)*xmin,real(ii,dp)*epsilo,dp)*dr_step
1071      if(abs(z_path)>tol14)then
1072        Nold = czero
1073        Dold = cone
1074        N = cone
1075        D = z_path - cmplx(an(0),zero,dp)
1076        do kk=1,nrec
1077          Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1078          Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1079          Nold = N
1080          Dold = D
1081          N = Nnew
1082          D = Dnew
1083          if(kk/=nrec)then
1084            if((bn2(kk+1)<tol14))exit
1085          end if
1086        end do
1087 
1088 !      <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1089        if(abs(z_path)**twotrotter>tiny(one)) then
1090          funczero = func1_rec(z_path**twotrotter)
1091        else
1092          funczero = czero
1093        end if
1094        delta_calc = funczero*N/D*dz_path
1095        if(ii==0.or.ii==n_pt_integ_path2)then
1096          ent_acc  = ent_acc  + half*delta_calc
1097          if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc
1098        else
1099          ent_acc  = ent_acc  + funczero*delta_calc
1100          if(debug_rec) ent_acc3 = ent_acc3 + funczero*delta_calc
1101        end if
1102      end if
1103    end do path5
1104 
1105  else  ! trotter==0
1106 
1107    n_pt_integ_path3 = max(100,int((epsilo*half*pi)/real(step,dp))+1)
1108    dr_step = one/real(n_pt_integ_path3,dp)
1109    path6:  do ii = 0,n_pt_integ_path3
1110      iif=half*pi*real(ii,dp)*dr_step
1111      z_path = epsilo*cmplx(-cos(iif),1-sin(iif),dp)
1112      dz_path = epsilo*cmplx(sin(iif),-cos(iif),dp)*half*pi*dr_step
1113      if(abs(z_path)**twotrotter>tol14)then
1114        Nold = czero
1115        Dold = cone
1116        N = cone
1117        D = z_path - cmplx(an(0),zero,dp)
1118        do kk=1,nrec
1119          Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1120          Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1121          Nold = N
1122          Dold = D
1123          N = Nnew
1124          D = Dnew
1125          if(kk/=nrec .and. bn2(kk+1)<tol14) exit !-EXIT
1126        end do
1127 
1128 !      <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1129        delta_calc = func1_rec(z_path**twotrotter) * N/D * dz_path
1130        if(ii==0.or.ii==n_pt_integ_path3)then
1131          ent_acc  = ent_acc + half*delta_calc
1132          if(debug_rec) ent_acc3 = ent_acc3 + half*delta_calc
1133        else
1134          ent_acc  = ent_acc + delta_calc    !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1135          if(debug_rec) ent_acc3 = ent_acc3 + delta_calc  !<r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1136        end if
1137      end if
1138    end do path6
1139 
1140  end if
1141 
1142  if(first_en==1 .and. debug_rec) then
1143    write(msg,'(a,i5,2a,i5,2a,i5,2a,es11.4,2a,es11.4,2a,es11.4)')&
1144 &   'n_pt_path  =',n_pt_integ,ch10,&
1145 &   'n_pt_path2 =',n_pt_integ_path2,ch10,&
1146 &   'n_pt_path3 =',n_pt_integ_path3,ch10,&
1147 &   'xmin       =',xmin,ch10,&
1148 &   'xmax       =',xmax,ch10,&
1149 &   'epsilon    =',epsilo
1150    call wrtout(std_out,msg,'COLL')
1151    first_en = 0
1152  end if
1153 
1154 !####################################################################
1155 ![xmax,xmax+i*epsilo]
1156  dr_step = one/real(n_pt_integ_path2,dp)
1157  dz_path = cmplx(zero,epsilo*dr_step,dp)
1158  path4:  do ii = 0,n_pt_integ_path2
1159    z_path = cmplx(xmax,real(ii,dp)*epsilo*dr_step,dp)
1160 
1161    Nold = czero
1162    Dold = cone
1163    N = cone
1164    D = z_path - cmplx(an(0),zero,dp)
1165 
1166    do kk=1,nrec
1167      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1168      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1169 
1170      Nold = N
1171      Dold = D
1172      N = Nnew
1173      D = Dnew
1174 
1175      if(kk/=nrec)then
1176        if((bn2(kk+1)<tol14))exit
1177      end if
1178    end do
1179 
1180 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1181    delta_calc = func1_rec(z_path**twotrotter)*N/D*dz_path
1182    if(ii==0.or.ii==n_pt_integ_path2)then
1183 
1184      ent_acc =  ent_acc  + half*delta_calc
1185      if(debug_rec) ent_acc2 = ent_acc2 + half*delta_calc
1186    else
1187      ent_acc  = ent_acc  + delta_calc
1188      if(debug_rec) ent_acc2 = ent_acc2 + delta_calc
1189    end if
1190  end do path4
1191 
1192 
1193  ent_out  = multce*real(ent_acc*cmplx(zero,-piinv,dp),dp)
1194  if(debug_rec) then
1195    ent_out1 = multce*real(ent_acc1*cmplx(zero,-piinv,dp),dp)
1196    ent_out2 = multce*real(ent_acc2*cmplx(zero,-piinv,dp),dp)
1197    ent_out3 = multce*real(ent_acc3*cmplx(zero,-piinv,dp),dp)
1198    ent_out4 = multce*real(ent_acc4*cmplx(zero,-piinv,dp),dp)
1199  end if
1200 
1201  call timab(610,2,tsec)
1202 
1203  contains
1204 
1205 !function to integrate over the path
1206 !func1_rec(z_path,twotrotter) =  ( z_path**twotrotter/(1+z_path**twotrotter)*log(1+1/z_path**twotrotter)+&    !- f*ln(f)
1207 !&1/(1+z_path**twotrotter)*log(1+z_path**twotrotter))       !- (1-f)*ln(1-f)
1208 
1209 !func1_rec(z_path_pow) =   z_path_pow/(cone+z_path_pow)*log(cone+cone/z_path_pow)+&    !- f*ln(f)
1210 !&cone/(cone+z_path_pow)*log(cone+z_path_pow)       !- (1-f)*ln(1-f)
1211 
1212 !other expression of func for a path like ro(t)*exp(2*i*pi/(2*p)*(j+1/2))
1213 
1214    function func1_rec(z)
1215 
1216    complex(dpc) :: func1_rec
1217    complex(dpc),intent(in) :: z
1218 
1219    func1_rec =   z/(cone+z)*log(cone+cone/z)+ cone/(cone+z)*log(cone+z)
1220 
1221  end function func1_rec
1222 
1223 end subroutine entropyrec

ABINIT/fermisolverec [ Functions ]

[ Top ] [ Functions ]

NAME

 fermisolverec

FUNCTION

 This routine computes the fermi energy in order to have a given number of
 valence electrons in the recursion method, using a Ridder s Method

INPUTS

  debug_rec=debugging variable
  nb_rec=order of recursion
  nb_point=number of discretization point in one dimension (=n1=n2=n3)
  temperature=temperature (Hartree)
  trotter=trotter parameter
  nelect=number of valence electrons (dtset%nelect)
  acc=accuracy for the fermi energy
  max_it=maximum number of iteration for the Ridder's Method
  long_tranche=number of point computed by thi proc
  mpi_enreg=information about MPI parallelization
  inf_ucvol=infinitesimal unit cell volume
  gputopo=true if topology gpu-cpu= 2 or 3

OUTPUT

SIDE EFFECTS

  fermie=fermi energy
  rho=density, recomputed for the new fermi energy
  a, b2 : coefficient given by recursion recomputed for the new fermi energy

NOTES

  at this time :

SOURCE

1260 subroutine fermisolverec(fermie,rho,a,b2,debug_rec,nb_rec, &
1261   &                      temperature,trotter,nelect, &
1262   &                      acc, max_it, &
1263   &                      long_tranche,mpi_enreg,&
1264   &                      inf_ucvol,gputopo)
1265 
1266 !Arguments -------------------------------
1267  !scalars
1268  integer,intent(in) :: long_tranche,max_it,nb_rec,trotter
1269  logical,intent(in) :: debug_rec,gputopo
1270  real(dp),intent(in) :: acc,inf_ucvol,nelect,temperature
1271  real(dp), intent(inout) :: fermie
1272  type(MPI_type),intent(in) :: mpi_enreg
1273  !arrays
1274  real(dp), intent(inout) :: a(0:nb_rec,long_tranche), b2(0:nb_rec,long_tranche)
1275  real(dp), intent(inout) :: rho(long_tranche)
1276 
1277 !Local variables-------------------------------
1278  !scalars
1279  integer  ::  ierr,ii,ipointlocal,nn,dim_trott
1280  real(dp) :: beta,fermieh,fermiel,fermiem,fermienew,nelecth,nelectl,nelectm
1281  real(dp) :: nelectnew,res_nelecth,res_nelectl,res_nelectm,res_nelectnew
1282  real(dp) :: rtrotter,ss,fermitol
1283  character(len=500) :: msg
1284  !arrays
1285  real(dp) :: tsec(2)
1286  real(dp) :: rhotry(long_tranche)
1287  !no_abirules
1288 #ifdef HAVE_GPU_CUDA
1289  integer :: swt_tm,npitch
1290  real(cudap) :: rhocu(long_tranche)
1291  real(dp) :: tsec2(2)
1292 #endif
1293 
1294 ! *************************************************************************
1295 
1296 #ifdef HAVE_GPU_CUDA
1297  swt_tm = 0
1298 #endif
1299 
1300  call timab(609,1,tsec)
1301 
1302  beta = one/temperature
1303  rtrotter  = max(half,real(trotter,dp))
1304  dim_trott = max(0,2*trotter-1)
1305 
1306  write(msg,'(a)')' -- fermisolverec ---------------------------------'
1307  call wrtout(std_out,msg,'COLL')
1308  if(debug_rec) then
1309    write (msg,'(a,d10.3)')' nelect= ',nelect
1310    call wrtout(std_out,msg,'COLL')
1311  end if
1312 !initialisation of fermiel
1313  fermiel = fermie
1314  call timab(609,2,tsec)
1315 
1316 !initialisation fermitol
1317  fermitol = acc
1318 #ifdef HAVE_GPU_CUDA_SP
1319  if(gputopo)  fermitol = 1.d-3
1320 #endif
1321 
1322  if(gputopo) then
1323 #ifdef HAVE_GPU_CUDA
1324    swt_tm = 1
1325 !  allocate array an and bn2 on gpu for computation of trotter formula
1326    call alloc_dens_cuda(long_tranche,nb_rec,dim_trott,npitch,&
1327 &   real(a,cudap),real(b2,cudap))
1328 
1329    call timab(617,1,tsec)
1330    call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1331 &   real(fermiel,cudap),real(temperature,cudap),&
1332 &   real(rtrotter,cudap),real(inf_ucvol,cudap),&
1333 &   real(tol14,cudap),&
1334 &   rhocu)
1335    rhotry = real(rhocu,dp)
1336    call timab(617,2,tsec)
1337 #endif
1338  else
1339    do ipointlocal = 1,long_tranche
1340      call density_rec(a(:,ipointlocal),&
1341 &     b2(:,ipointlocal),&
1342 &     rhotry(ipointlocal),&
1343 &     nb_rec,fermiel,temperature,rtrotter,dim_trott, &
1344 &     tol14,inf_ucvol)
1345    end do
1346  end if
1347 
1348  call timab(609,1,tsec)
1349  nelectl = sum(rhotry)
1350  call xmpi_sum( nelectl,mpi_enreg%comm_bandfft,ierr)
1351  res_nelectl = inf_ucvol*nelectl - nelect
1352 
1353  if (res_nelectl /= zero) then
1354 !  initialisation of fermih
1355 !  excess of electrons -> smaller fermi
1356    res_nelecth = zero
1357    ii = 1
1358    fermieh = fermie - ten*sign(one,res_nelectl)*temperature
1359    do while(ii<6 .and. res_nelecth*res_nelectl>=0)
1360      fermieh = fermieh - ten*sign(one,res_nelectl)*temperature
1361      call timab(609,2,tsec)
1362 
1363      if(gputopo) then
1364 #ifdef HAVE_GPU_CUDA
1365        call timab(617,1,tsec)
1366        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1367 &       real(fermieh,cudap),real(temperature,cudap),&
1368 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1369 &       real(tol14,cudap),&
1370 &       rhocu)
1371        rhotry = real(rhocu,dp)
1372        call timab(617,2,tsec)
1373 #endif
1374      else
1375        do ipointlocal = 1,long_tranche
1376          call density_rec(a(:,ipointlocal),  &
1377 &         b2(:,ipointlocal), &
1378 &         rhotry(ipointlocal), &
1379 &         nb_rec,fermieh,temperature,rtrotter,dim_trott, &
1380 &         tol14,inf_ucvol)
1381        end do
1382      end if
1383      call timab(609,1,tsec)
1384      nelecth = sum(rhotry)
1385      call xmpi_sum( nelecth,mpi_enreg%comm_bandfft ,ierr);
1386      res_nelecth = inf_ucvol*nelecth - nelect
1387 
1388      if(debug_rec) then
1389        write (msg,'(a,es11.4e2,a,es11.4e2)') ' Fermi energy interval',fermieh,' ',fermiel
1390        call wrtout(std_out,msg,'COLL')
1391      end if
1392      ii = ii +1
1393    end do
1394 
1395    if (res_nelecth*res_nelectl>0) then
1396      write (msg,'(4a)')' fermisolverec : ERROR- ',ch10,&
1397 &     ' initial guess for fermi energy doesnt permit to  find solutions in solver',ch10
1398      ABI_ERROR(msg)
1399    end if
1400 
1401 !  MAIN LOOP   ------------------------------------------------------
1402    main : do nn=1,max_it
1403 !    fermiem computation
1404      fermiem = 0.5d0*(fermiel+fermieh)
1405 
1406 !    nelectm = zero
1407      call timab(609,2,tsec)
1408 
1409      if(gputopo) then
1410 #ifdef HAVE_GPU_CUDA
1411        call timab(617,1,tsec)
1412        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1413 &       real(fermiem,cudap),real(temperature,cudap),&
1414 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1415 &       real(tol14,cudap),&
1416 &       rhocu)
1417        rhotry = real(rhocu,dp)
1418        call timab(617,2,tsec)
1419 #endif
1420      else
1421        do ipointlocal = 1,long_tranche
1422          call density_rec(a(:,ipointlocal),  &
1423 &         b2(:,ipointlocal), &
1424 &         rhotry(ipointlocal), &
1425 &         nb_rec,fermiem,temperature,rtrotter,dim_trott, &
1426 &         tol14,inf_ucvol)
1427        end do
1428      end if
1429 
1430      call timab(609,1,tsec)
1431      nelectm = sum(rhotry)
1432      call xmpi_sum( nelectm,mpi_enreg%comm_bandfft,ierr)
1433      res_nelectm = inf_ucvol*nelectm - nelect
1434 
1435 !    new guess
1436      ss = sqrt(res_nelectm**two-res_nelectl*res_nelecth)
1437      fermienew = fermiem + (fermiem-fermiel)*sign(one, res_nelectl-res_nelecth)*res_nelectm/ss
1438 
1439      call timab(609,2,tsec)
1440      if(gputopo) then
1441 #ifdef HAVE_GPU_CUDA
1442        call timab(617,1,tsec)
1443        call density_cuda(npitch,long_tranche,nb_rec,dim_trott,&
1444 &       real(fermienew,cudap),real(temperature,cudap),&
1445 &       real(rtrotter,cudap),real(inf_ucvol,cudap),&
1446 &       real(tol14,cudap),&
1447 &       rhocu)
1448        rhotry = real(rhocu,dp)
1449        call timab(617,2,tsec)
1450 #endif
1451      else
1452        do ipointlocal = 1,long_tranche
1453          call density_rec(a(:,ipointlocal),  &
1454 &         b2(:,ipointlocal), &
1455 &         rhotry(ipointlocal), &
1456 &         nb_rec,fermienew,temperature,rtrotter,dim_trott, &
1457 &         tol14,inf_ucvol)
1458        end do
1459      end if
1460 
1461      call timab(609,1,tsec)
1462      nelectnew = sum(rhotry)
1463      call xmpi_sum( nelectnew,mpi_enreg%comm_bandfft ,ierr);
1464      res_nelectnew = inf_ucvol*nelectnew - nelect
1465 
1466 !    fermiel et fermieh for new iteration
1467      if (sign(res_nelectm,res_nelectnew) /= res_nelectm) then
1468        fermiel = fermiem
1469        res_nelectl = res_nelectm
1470        fermieh = fermienew
1471        res_nelecth = res_nelectnew
1472      else if (sign(res_nelectl,res_nelectnew) /= res_nelectl) then
1473        fermieh = fermienew
1474        res_nelecth = res_nelectnew
1475      else if (sign(res_nelecth,res_nelectnew) /= res_nelecth) then
1476        fermiel = fermienew
1477        res_nelectl = res_nelectnew
1478      end if
1479 
1480 !    are we within the tolerance ?
1481      if ((abs(res_nelectnew) < fermitol).or.(nn == max_it)) then
1482        fermie = fermienew
1483        rho = rhotry
1484        if(debug_rec) then
1485          write (msg,'(a,es11.4e2,a,i4)')' err, num_iter ', res_nelectnew, ' ',nn
1486          call wrtout(std_out,msg,'COLL')
1487          write(msg,'(a,50a)')' ',('-',ii=1,50)
1488          call wrtout(std_out,msg,'COLL')
1489        end if
1490        exit main
1491      end if
1492 
1493    end do main
1494 
1495  end if
1496 
1497 #ifdef HAVE_GPU_CUDA
1498 !deallocate array on GPU
1499  if(gputopo) then
1500    call dealloc_dens_cuda()
1501  end if
1502  call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
1503  call xmpi_barrier(mpi_enreg%comm_bandfft)
1504  call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
1505 #endif
1506 
1507  call timab(609,2,tsec)
1508 end subroutine fermisolverec

ABINIT/first_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 first_rec

FUNCTION

 When recursion method is used, in the first step this routine
 compute some quantities which are used in the rest of the calculation.

COPYRIGHT

  Copyright (C) 2009-2024 ABINIT group (MMancini)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

INPUTS

  dtset <type(dataset_type)>=all input variables for this dataset:
   | recgratio =fine/coarse grid ratio
   | recptrott =trotter parameter
   | tsmear    =temperature
   | recrcut   =tut radius in recursion (range of iteration)
   | ngfft(18) =FFT grid used as real (fine) grid in recursion
  psps <type(pseudopotential_type)>=variables related to pseudo-potentials

OUTPUT

SIDE EFFECTS

  rset <type(recursion_type)>=variables related to recursion method
   | debug<logical> = T if debugging is used
   | inf <type(metricrec_type)>=information concerning the infinitesimal metrics
   | ngfftrec(18) =truncated (or not, if not ngfftrec=ngfft)FFT grid used as real grid in recursion.
   | nfftrec =product(ngfftrec(1:3))
   | tronc<logical> = T if truncation is effectively used
   | ZT_p = fourier transform of the green_kernel calculated on the fine grid

NOTES

SOURCE

2142 subroutine first_rec(dtset,psps,rset)
2143 
2144 !Arguments ------------------------------------
2145 ! scalars
2146  type(dataset_type),intent(in) :: dtset
2147  type(pseudopotential_type),intent(in) :: psps
2148  type(recursion_type),intent(inout) :: rset
2149 !Local variables-------------------------------
2150 !scalars
2151  integer  :: nfftrec,trotter,ii,dim_trott
2152  real(dp) :: tsmear,beta,rtrotter
2153  character(len=500) :: msg
2154 !arrays
2155  integer  :: ngfftrec(18)
2156  real(dp) :: tsec(2)
2157 #ifdef HAVE_GPU_CUDA
2158  integer  :: max_rec,ierr,testpts,swt_tm
2159  real(dp) :: rho,tm_ratio
2160  real(dp) :: time_cu,time_f
2161  type(recursion_type) :: rset_test
2162  type(recparall_type) :: parold
2163  integer :: trasl(3)
2164  real(dp) :: tsec2(2),tsec3(2)
2165  real(dp) :: aloc(0,1),b2loc(0,1)
2166  real(dp) :: dm_projec(0,0,0,1,1)
2167  real(dp) :: exppot(0:dtset%nfft-1)
2168  real(dp),allocatable :: exppotloc(:)
2169  real(cudap),allocatable :: aloc_cu(:),b2loc_cu(:)
2170 #endif
2171 
2172 ! *************************************************************************
2173 
2174  call timab(601,1,tsec)  !!--Start time-counter: initialisation
2175 
2176  ABI_WARNING("RECURSION")
2177  if(dtset%recgratio>1) then
2178    write(msg,'(a)')'COARSE GRID IS USED'
2179    call wrtout(std_out,msg,'COLL')
2180  end if
2181 
2182 !--Initialisation
2183  trotter = dtset%recptrott  !--Trotter parameter
2184  tsmear  = dtset%tsmear     !--Temperature
2185  beta    = one/tsmear       !--Inverse of temperature
2186 
2187 !--Rewriting the trotter parameter
2188  dim_trott = max(0,2*trotter-1)
2189  rtrotter  = max(half,real(trotter,dp))
2190 
2191  write (msg,'(2a)')ch10,'==== FIRST CYCLE RECURSION ========================='
2192  call wrtout(std_out,msg,'COLL')
2193 
2194 
2195  ngfftrec = rset%ngfftrec
2196  nfftrec = rset%nfftrec
2197 !------------------------------------------------
2198 !--TRONCATION OF THE BOX: determines new dimensions
2199 !--Now in InitRec
2200 !--------------------------------------------------------
2201 !--DEFINITION PAW VARIABLES COARSE-FINE GRID  TO USE TRANSGRID--INGRID FUNCTIONS
2202 !--Now these variables are defined into gstate by InitRec
2203 
2204 !--------------------------------------------------------
2205 !--COMPUTATION OF THE FOURIER TRANSFORM OF THE GREEN KERNEL (only once)
2206  write (msg,'(a)')' - green kernel calculation -----------------------'
2207  call wrtout(std_out,msg,'COLL')
2208  ABI_MALLOC(rset%ZT_p,(1:2,0: nfftrec-1))
2209  call timab(601,2,tsec)
2210  call green_kernel(rset%ZT_p,rset%inf%rmet,rset%inf%ucvol,rtrotter/beta,rset%mpi,ngfftrec,nfftrec)
2211  call timab(601,1,tsec)
2212  write(msg,'(a,50a)')' ',('-',ii=1,50)
2213  call wrtout(std_out,msg,'COLL')
2214 !!--end computation of the fourier transform of the Green kernel
2215 
2216 !!-----------------------------------
2217 !!--ROUTINE FOR THE CALCULATION OF THE NON-LOCAL PSEUDO
2218 !--Now these variables here by  Init_nlpspRec
2219  call Init_nlpspRec(four*tsmear*rtrotter,psps,rset%nl,rset%inf,rset%ngfftrec,rset%debug)
2220 
2221 !!-----------------------------------
2222 !--Load distribution on procs when GPU are present
2223 #if defined HAVE_GPU_CUDA
2224 
2225 !--Test timing only if exists GPU and they are not equal to the cpus
2226  if(rset%tp == 4) then
2227    parold = rset%par
2228    ii = 0
2229    time_f = zero
2230    time_cu = zero
2231    call random_number(exppot)  !   exppot = one
2232 
2233    if(rset%gpudevice == -1) then
2234 !    --Test CPUS
2235      swt_tm = 0
2236      testpts = min(rset%par%npt, 20)
2237      call timein(tsec2(1),tsec2(2))
2238      if(rset%tronc) then
2239        ABI_MALLOC(exppotloc,(0:nfftrec-1))
2240        do while(ii< testpts)
2241          trasl = -(/1,2,3/)+ngfftrec(:3)/2
2242          call reshape_pot(trasl,dtset%nfft,nfftrec,dtset%ngfft(:3),ngfftrec(:3),&
2243 &         exppot,exppotloc)
2244          call recursion(exppotloc,0,0,0, &
2245 &         aloc, &
2246 &         b2loc, &
2247 &         rho,&
2248 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
2249 &         rset%ZT_p, &
2250 &         dtset%rectolden,dtset%typat, &
2251 &         rset%nl,&
2252 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
2253 &         6,dtset%natom,dm_projec,0)
2254          ii=ii+1
2255        end do
2256        ABI_FREE(exppotloc)
2257      else
2258        do while(ii< testpts)
2259          call recursion(exppot,0,0,0, &
2260 &         aloc, &
2261 &         b2loc, &
2262 &         rho,&
2263 &         0, rset%efermi,tsmear,rtrotter,dim_trott, &
2264 &         rset%ZT_p, &
2265 &         dtset%rectolden,dtset%typat, &
2266 &         rset%nl,&
2267 &         rset%mpi,nfftrec,ngfftrec,rset%inf,&
2268 &         6,dtset%natom,dm_projec,0)
2269          ii=ii+1
2270        end do
2271      end if
2272      call timein(tsec3(1),tsec3(2))
2273      time_f = (tsec3(1)-tsec2(1))/real(testpts,dp)
2274      time_f = time_f*time_f
2275    else
2276 !    --Test GPUS
2277      swt_tm = 1
2278      rset_test = rset
2279      rset_test%GPU%par%npt = max(rset%GPU%nptrec,100)
2280      rset_test%min_nrec = 0
2281      call get_pt0_pt1(dtset%ngfft(:3),dtset%recgratio,0,&
2282 &     rset_test%GPU%par%npt,rset_test%GPU%par)
2283 
2284 
2285      ABI_MALLOC(aloc_cu,(rset_test%GPU%par%npt))
2286      ABI_MALLOC(b2loc_cu,(rset_test%GPU%par%npt))
2287      call timein(tsec2(1),tsec2(2))
2288      call cudarec(rset_test, exppot,aloc_cu,b2loc_cu,&
2289 &     beta,trotter,dtset%rectolden,dtset%recgratio,dtset%ngfft,max_rec)
2290      call timein(tsec3(1),tsec3(2))
2291      ABI_FREE(aloc_cu)
2292      ABI_FREE(b2loc_cu)
2293 
2294      time_cu = (tsec3(1)-tsec2(1))/real(rset_test%GPU%par%npt,dp)
2295      time_cu = time_cu*time_cu
2296    end if
2297 
2298 
2299 !  --Get Total Times
2300    call xmpi_sum(time_f,rset%mpi%comm_bandfft,ierr)
2301    call xmpi_sum(time_cu,rset%mpi%comm_bandfft,ierr)
2302 
2303 !  --Average Total Times
2304    time_f   = sqrt(time_f/real(rset%mpi%nproc-rset%ngpu,dp))
2305    time_cu  = sqrt(time_cu/real(rset%ngpu,dp))
2306    tm_ratio = time_f/time_cu
2307 
2308 
2309    write(msg,'(3(a25,f10.5,a))')&
2310 &   ' Time for cpu recursion ',time_f,ch10,&
2311 &   ' Time for gpu recursion ',time_cu,ch10,&
2312 &   ' Time ratio             ',tm_ratio,ch10
2313    call wrtout(std_out,msg,'COLL')
2314 
2315 
2316 !  tm_ratio =1.20d2! 0.d0! 1.21d0
2317    rset%par = parold
2318 !  --Compute the work-load distribution on devices (gpu,cpu)
2319    if(tm_ratio>1.5d0 .and. time_cu>zero)then
2320      rset%load = 1
2321      call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),tm_ratio,1)
2322    else
2323      rset%gpudevice = -1
2324    end if
2325  end if
2326 
2327 #endif
2328 
2329 
2330 !------------------------------------------------------------
2331 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC
2332 !--Now these variables are defined into gstate by Init_rec
2333 
2334  write (msg,'(2a)')ch10,'==== END FIRST CYCLE RECURSION ====================='
2335  call wrtout(std_out,msg,'COLL')
2336  call timab(601,2,tsec) !!--stop time-counter: initialisation
2337 
2338 end subroutine first_rec

ABINIT/gran_potrec [ Functions ]

[ Top ] [ Functions ]

NAME

 gran_potrec

FUNCTION

 This routine computes the local part of the grand-potential at a point using a path integral,
 in the recursion method.

INPUTS

  an, bn2 : coefficient given by the recursion.
  nrec=order of recursion
  trotter=trotter parameter
  mult=a multiplicator for computing grand-potential (2 for non-spin-polarized system)
  debug_rec=debugging variable
  n_pt_integ=points for computation of path integral
  xmax= maximum point on the x-axis for integration

OUTPUT

  ene_out=grand-potential at the point
  if debug_rec=T then ene_out1,ene_out2,ene_out3,ene_out4 are
  the different path branch contriubutions to the grand-potential.
  In reality it is not the gren potential but the
  grand-potential (omega=-PV) divided by -T

NOTES

  in reality it is not the gren potential but the grand-potential (omega=-PV) divided by -T
  at this time :
       - mult should be not used
       - the routine should be integraly rewrited and use the routine recursion.
       - only modified for p /= 0

SOURCE

1657 subroutine gran_potrec(an,bn2,nrec,trotter,ene_out, mult, &
1658 &                     debug_rec,n_pt_integ,xmax,&
1659 &                     ene_out1,ene_out2,ene_out3,ene_out4)
1660 
1661 !Arguments -------------------------------
1662 !scalars
1663  integer,intent(in) :: n_pt_integ,nrec,trotter
1664  logical,intent(in) :: debug_rec
1665  real(dp), intent(in) :: mult,xmax
1666  real(dp),intent(inout) :: ene_out,ene_out1,ene_out2,ene_out3,ene_out4 !vz_i
1667 !arrays
1668  real(dp), intent(in) :: an(0:nrec),bn2(0:nrec)
1669 
1670 !Local variables-------------------------------
1671 !scalars
1672  integer, parameter :: level = 7
1673  integer, save :: first = 1
1674  integer :: ii,kk,n_pt_integ_path2
1675  real(dp) :: epsilon,step,twotrotter,xmin,dr_step
1676  complex(dpc) :: D,Dnew,Dold,N,Nnew,Nold,dz_path,ene_acc,ene_acc1,ene_acc2
1677  complex(dpc) :: ene_acc3,ene_acc4
1678  complex(dpc) :: z_path,delta_calc
1679  character(len=500) :: message
1680 !arrays
1681  real(dp) :: tsec(2)
1682 
1683 ! *************************************************************************
1684 
1685 
1686  call timab(611,1,tsec)
1687 
1688 !structured debugging if debug_rec=T : print detailled result the first time we enter gran_potrec
1689  if(debug_rec .and. first==1)then
1690    write(message,'(a)')' '
1691    call wrtout(std_out,message,'PERS')
1692    write(message,'(a)')' gran_potrec : enter '
1693    call wrtout(std_out,message,'PERS')
1694    write(message,'(a,i8)')'n_pt_integ ' , n_pt_integ
1695    call wrtout(std_out,message,'COLL')
1696    first=0
1697  end if
1698 
1699  ene_out = zero
1700  ene_acc = czero
1701  ene_acc1 = czero
1702  ene_acc2 = czero
1703  ene_acc3 = czero
1704  ene_acc4 = czero
1705 
1706 
1707 !path parameters
1708 !n_pt_integ = 2500
1709  xmin = -half
1710  step = (xmax-xmin)/real(n_pt_integ,dp)
1711  if(trotter==0)then
1712    twotrotter = one
1713    epsilon = .5d-1
1714  else
1715    twotrotter = two*real(trotter,dp)
1716    epsilon = half*sin( pi/twotrotter)
1717  end if
1718 
1719 !xmin = -abs(xmin)**(1.d0/twotrotter)
1720 
1721 !####################################################################
1722 ![xmax + i*epsilon,xmin + i*epsilon]
1723  dr_step = one/real(n_pt_integ,dp)
1724  dz_path = -cmplx((xmax-xmin)*dr_step,zero,dp)
1725  path1:  do ii = 0,n_pt_integ
1726 !  z_path = cmplx(xmin + real(ii,dp)*(xmax-xmin)*dr_step,epsilon,dp)
1727    z_path = cmplx(xmin,epsilon,dp) - real(ii,dp)*dz_path
1728    Nold = czero
1729    Dold = cone
1730    N = cone
1731    D = z_path - cmplx(an(0),zero,dp)
1732 
1733    do kk=1,nrec
1734      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1735      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1736 
1737      Nold = N
1738      Dold = D
1739      N = Nnew
1740      D = Dnew
1741 
1742      if(kk/=nrec)then
1743        if((bn2(kk+1)<tol14))exit
1744      end if
1745 
1746    end do
1747 
1748 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1749    delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path
1750    if(ii==0.or.ii==n_pt_integ)then
1751      ene_acc = ene_acc + half*delta_calc
1752      if(debug_rec)  ene_acc1 = ene_acc1 + half*delta_calc
1753    else
1754      ene_acc = ene_acc + delta_calc
1755      if(debug_rec)  ene_acc1 = ene_acc1 + delta_calc
1756    end if
1757  end do path1
1758 
1759 !####################################################################
1760 ![xmin + i*epsilon,xmin]
1761  if(epsilon/step>4.d0)then
1762    n_pt_integ_path2 = int(epsilon/step)+1
1763  else
1764    n_pt_integ_path2 = 5
1765  end if
1766  n_pt_integ_path2 = n_pt_integ
1767  dr_step = one/real(n_pt_integ_path2,dp)
1768  dz_path = -cmplx(zero,epsilon*dr_step,dp)
1769  path2:  do ii = 0,n_pt_integ_path2
1770 !  z_path = cmplx(xmin,real(ii,dp)*epsilon*dr_step,dp)
1771    z_path = cmplx(xmin,zero,dp)-dz_path*real(ii,dp)
1772    Nold = czero
1773    Dold = cone
1774    N = cone
1775    D = z_path - cmplx(an(0),zero,dp)
1776 
1777    do kk=1,nrec
1778      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1779      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1780 
1781      Nold = N
1782      Dold = D
1783      N = Nnew
1784      D = Dnew
1785 
1786      if(kk/=nrec)then
1787        if((bn2(kk+1)<tol14))exit
1788      end if
1789 
1790    end do
1791 
1792 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1793    delta_calc = func_rec(z_path,twotrotter)* N/D *dz_path
1794    if(ii==0.or.ii==n_pt_integ_path2)then
1795      ene_acc = ene_acc + half*delta_calc
1796      if(debug_rec) ene_acc3 = ene_acc3 + half*delta_calc
1797    else
1798      ene_acc = ene_acc + delta_calc
1799      if(debug_rec) ene_acc3 = ene_acc3 + delta_calc
1800    end if
1801  end do path2
1802 
1803 
1804 
1805 !####################################################################
1806 ![xmin,0]
1807  if(xmin/=czero)then
1808    dr_step = one/real(n_pt_integ,dp)
1809    dz_path = cmplx(xmin*dr_step,zero,dp)
1810    path3:  do ii = 1,n_pt_integ !the integrand is 0 at 0
1811 !    z_path = cmplx(real(ii,dp)*xmin*dr_step,zero,dp)
1812      z_path = real(ii,dp)*dz_path
1813 
1814      Nold = czero
1815      Dold = cone
1816      N = cone
1817      D = z_path - cmplx(an(0),zero,dp)
1818 
1819      do kk=1,nrec
1820        Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1821        Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1822 
1823        Nold = N
1824        Dold = D
1825        N = Nnew
1826        D = Dnew
1827 
1828        if(kk/=nrec)then
1829          if((bn2(kk+1)<tol14))exit
1830        end if
1831      end do
1832 
1833 !    <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1834      delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path
1835      if(ii==n_pt_integ)then
1836        ene_acc = ene_acc +half*delta_calc
1837        if(debug_rec) ene_acc4 = ene_acc4 + half*delta_calc
1838      else
1839        ene_acc = ene_acc + delta_calc
1840        if(debug_rec) ene_acc4 = ene_acc4 +delta_calc
1841      end if
1842    end do path3
1843  end if
1844 
1845 !####################################################################
1846 ![xmax,xmax+i*epsilon]
1847  dr_step = one/real(n_pt_integ_path2,dp)
1848  dz_path = cmplx(zero,epsilon*dr_step,dp)
1849  path4:  do ii = 0,n_pt_integ_path2
1850 !  z_path = cmplx(xmax,real(ii,dp)*epsilon*dr_step,dp)
1851    z_path = cmplx(xmax,0,dp)+real(ii,dp)*dz_path
1852 
1853    Nold = czero
1854    Dold = cone
1855    N = cone
1856    D = z_path - cmplx(an(0),zero,dp)
1857 
1858    do kk=1,nrec
1859      Nnew = (z_path - cmplx(an(kk),zero,dp))*N - cmplx(bn2(kk),zero,dp)*Nold
1860      Dnew = (z_path - cmplx(an(kk),zero,dp))*D - cmplx(bn2(kk),zero,dp)*Dold
1861 
1862      Nold = N
1863      Dold = D
1864      N = Nnew
1865      D = Dnew
1866 
1867      if(kk/=nrec)then
1868        if((bn2(kk+1)<tol14))exit
1869      end if
1870 
1871    end do
1872 
1873 !  <r|1/(z-e**(-beta/(2p)*(H-mu)))|r> dz
1874    delta_calc = func_rec(z_path,twotrotter) * N/D *dz_path
1875    if(ii==0.or.ii==n_pt_integ_path2)then
1876      ene_acc = ene_acc + half*delta_calc
1877      if(debug_rec) ene_acc2 = ene_acc2 + half*delta_calc
1878    else
1879      ene_acc = ene_acc + delta_calc
1880      if(debug_rec) ene_acc2 = ene_acc2 + delta_calc
1881    end if
1882  end do path4
1883 
1884  ene_out = mult*real(ene_acc*cmplx(zero,-piinv,dp),dp)
1885  if(debug_rec) then
1886    ene_out1 = mult*real(ene_acc1*cmplx(zero,-piinv,dp),dp)
1887    ene_out2 = mult*real(ene_acc2*cmplx(zero,-piinv,dp),dp)
1888    ene_out3 = mult*real(ene_acc3*cmplx(zero,-piinv,dp),dp)
1889    ene_out4 = mult*real(ene_acc4*cmplx(zero,-piinv,dp),dp)
1890  end if
1891 
1892  call timab(611,2,tsec)
1893 
1894  contains
1895 
1896 !func_rec(z_path,twotrotter) = log(cone+z_path**twotrotter)
1897 
1898    function func_rec(z,x)
1899 
1900    complex(dpc) :: func_rec
1901    complex(dpc),intent(in) :: z
1902    real(dp),intent(in) :: x
1903 
1904    func_rec = log(cone+z**x)
1905 
1906  end function func_rec
1907 
1908 end subroutine gran_potrec

ABINIT/green_kernel [ Functions ]

[ Top ] [ Functions ]

NAME

 green_kernel

FUNCTION

 this routine compute the fourrier transform of the Green kernel for the
 recursion method

INPUTS

  inf_rmet=define the  infinitesimal metric : rprimd*(transpose(rprimd)) divided
    by the number of discretisation point
  inf_ucvol=volume of infinitesimal cell
  mult=variance of the Gaussian (=rtrotter/beta)
  mpi_enreg=information about MPI parallelization
  ngfft=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nfft=total number of fft grid points
  debug_rec=debugging variable

OUTPUT

  ZT_p=fourier transforme of the Green kernel

NOTES

  at this time :
       - need a rectangular box

SOURCE

2370 subroutine green_kernel(ZT_p,inf_rmet,inf_ucvol,mult,mpi_enreg,ngfft,nfft)
2371 
2372 !Arguments -------------------------------
2373 !scalars
2374  integer,intent(in) :: nfft
2375  real(dp),intent(in) :: inf_ucvol,mult
2376  type(MPI_type),intent(in) :: mpi_enreg
2377 !arrays
2378  integer,intent(in) :: ngfft(18)
2379  real(dp),intent(in) :: inf_rmet(3,3)
2380  real(dp),intent(out) :: ZT_p(1:2,0:nfft-1)
2381 
2382 !Local variables-------------------------------
2383 !scalars
2384  integer,parameter :: n_green_max=5
2385  integer :: ii,isign,jj,kk,n_green,xx,yy,zz
2386  real(dp) :: acc, norme
2387  character(len=500) :: msg
2388 !arrays
2389  real(dp) :: tsec(2)
2390  real(dp),allocatable :: T_p(:)
2391 
2392 ! *************************************************************************
2393 
2394  call timab(603,1,tsec)
2395 
2396  norme = (mult/pi)**(onehalf)
2397 
2398  ABI_MALLOC(T_p,(0:nfft-1))
2399 
2400 !n_green should be better chosen for non rectangular cell
2401  do xx=1, n_green_max
2402    n_green = xx
2403    if(exp(-mult*dsq_green(xx*ngfft(1),0,0,inf_rmet))<tol14 &
2404 &   .and. exp(-mult*dsq_green(0,xx*ngfft(2),0,inf_rmet))<tol14 &
2405 &   .and. exp(-mult*dsq_green(0,0,xx*ngfft(3),inf_rmet))<tol14 ) exit
2406  end do
2407 
2408  acc = zero
2409  T_p = zero
2410  do kk = 0,ngfft(3)-1
2411    do jj = 0,ngfft(2)-1
2412      do ii = 0,ngfft(1)-1
2413 
2414        do xx=-n_green,n_green-1
2415          do yy=-n_green,n_green-1
2416            do zz=-n_green,n_green-1
2417 
2418              T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)+ &
2419 &             exp(-mult*dsq_green(ii+xx*ngfft(1),jj+yy*ngfft(2),kk+zz*ngfft(3),inf_rmet))
2420 
2421            end do
2422          end do
2423        end do
2424 
2425        T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk) = norme*T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)
2426        acc = acc + inf_ucvol* T_p(ii+ngfft(1)*jj+ngfft(1)*ngfft(2)*kk)
2427 
2428      end do
2429    end do
2430  end do
2431 
2432  T_p(:)= (one/acc)*T_p(:)
2433 
2434 !if(debug_rec)then
2435  write(msg,'(a,d12.3,2(2a,i8),2(2a,3d12.3),2a,d16.6)')&
2436 & ' on the boundary    ', exp(-mult*dsq_green(ngfft(1),0,0,inf_rmet)),ch10, &
2437 & ' no zero            ', count(T_p>tol14),ch10, &
2438 & ' n_green            ', n_green,ch10, &
2439 & ' erreur_n_green     ', exp(-mult*dsq_green(n_green*ngfft(1),0,0,inf_rmet)), &
2440 & exp(-mult*dsq_green(0,n_green*ngfft(2),0,inf_rmet)), &
2441 & exp(-mult*dsq_green(0,0,n_green*ngfft(3),inf_rmet)),ch10,&
2442 & ' erreur_troncat     ', T_p(ngfft(1)/2),  &
2443 & T_p(ngfft(1)*(ngfft(2)/2)), &
2444 & T_P(ngfft(1)*ngfft(2)*(ngfft(3)/2)),ch10, &
2445 & ' erreurT_p          ',abs(acc-1.d0)
2446  call wrtout(std_out,msg,'COLL')
2447 !endif
2448 
2449 
2450  isign = -1
2451  call fourdp(1,ZT_p,T_p,isign,mpi_enreg,nfft,1,ngfft,0)
2452 
2453  ABI_FREE(T_p)
2454 
2455  ZT_p(:,:) = real(nfft,dp)*ZT_p
2456 
2457 
2458  call timab(603,2,tsec)
2459 
2460  contains
2461 
2462    function dsq_green(ii,jj,kk,inf_rmet)
2463 
2464    real(dp) :: dsq_green
2465    integer,intent(in) :: ii,jj,kk
2466    real(dp),intent(in) :: inf_rmet(3,3)
2467    dsq_green= inf_rmet(1,1)*dble(ii**2)&
2468 &   +inf_rmet(2,2)*dble(jj**2)&
2469 &   +inf_rmet(3,3)*dble(kk**2)&
2470 &   +two*(inf_rmet(1,2)*dble(ii*jj)&
2471 &   +inf_rmet(2,3)*dble(jj*kk)&
2472 &   +inf_rmet(3,1)*dble(kk*ii))
2473  end function dsq_green
2474 
2475 end subroutine green_kernel

ABINIT/m_vtorhorec [ Modules ]

[ Top ] [ Modules ]

NAME

  m_vtorhorec

FUNCTION

COPYRIGHT

  Copyright (C) 2008-2024 ABINIT group (SLeroux, MMancini).
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

15 #if defined HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "abi_common.h"
20 
21 module m_vtorhorec
22 
23  use defs_basis
24  use defs_rectypes
25  use m_distribfft
26  use m_xmpi
27  use m_pretty_rec
28  use m_errors
29  use m_abicore
30  use m_per_cond
31  use m_dtset
32 
33  use defs_datatypes,     only : pseudopotential_type
34  use defs_abitypes,      only : MPI_type
35  use m_time,             only : timein, timab
36  use m_rec,              only : Calcnrec, init_nlpsprec, cpu_distribution
37  use m_rec_tools,        only : reshape_pot, trottersum, get_pt0_pt1
38  use m_spacepar,         only : symrhg
39  use m_fourier_interpol, only : transgrid
40  use m_fft,              only : fourdp
41 
42 #ifdef HAVE_GPU_CUDA
43  use m_gpu_toolbox
44  use m_hidecudarec
45  use m_xredistribute
46 #endif
47 
48  implicit none
49 
50  private

ABINIT/nlenergyrec [ Functions ]

[ Top ] [ Functions ]

NAME

 nlenergyrec

FUNCTION

 During recursion, it computes the non-local energy

INPUTS

  rset<recursion_type>=contains all recursion parameters
  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  tsmear=temperature (Hartree)
  trotter=trotter parameter
  tol=tolerance criteria for stopping recursion_nl
  ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec)
  mpi_enreg=information about MPI paralelisation
  rset<recursion_type> contains all parameter of recursion
  typat(natom)=type of pseudo potential associated to any atom
  natom=number of atoms

OUTPUT

  enlx=non-local energy

SIDE EFFECTS

NOTES

SOURCE

1939 subroutine nlenergyrec(rset,enlx,exppot,ngfft,natom,typat,tsmear,trotter,tol)
1940 
1941 !Arguments ------------------------------------
1942 !Scalar
1943  integer , intent(in)  :: natom,trotter
1944  real(dp), intent(in)  :: tsmear,tol
1945  type(recursion_type),intent(in) :: rset
1946  real(dp), intent(out) :: enlx
1947 !Arrays
1948  integer , intent(in)  :: typat(natom),ngfft(18)
1949  real(dp), intent(in)  :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1)
1950 !Local variables-------------------------------
1951  integer :: iatom,jatom
1952  integer :: ii,ipsp,dim_trott
1953  integer :: ierr,me_count
1954  integer :: ilmn,jlmn,ilm,jlm,in,jn,il
1955  character(len=500) :: msg
1956  logical  :: tronc
1957  real(dp) :: rho_nl,normali,mult
1958  type(mpi_type):: mpi_loc
1959 !Arrays
1960  integer  :: gcart_loc(3,natom)
1961  integer  :: ngfftrec(3),trasl(3)
1962  real(dp) :: tsec(2)
1963  real(dp) :: un0(0:rset%nfftrec)
1964  real(dp),pointer :: projec(:,:,:,:,:)
1965  real(dp),allocatable ::  exppotloc(:)
1966  real(dp) :: proj_arr(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1)
1967 
1968 ! *************************************************************************
1969 
1970 
1971  call timab(612,1,tsec) !!--start time-counter: nlenergyrec
1972 
1973  if(rset%debug)then
1974    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : enter'
1975    call wrtout(std_out,msg,'PERS')
1976  end if
1977 
1978  write(msg,'(a)')' -- nlenergyrec -----------------------------------'
1979  call wrtout(std_out,msg,'COLL')
1980 
1981 !--Initialisation variables
1982  enlx = zero
1983  mult = two !--is twice for non-spinned systems
1984  ngfftrec = rset%ngfftrec(:3)
1985  gcart_loc = rset%inf%gcart
1986  mpi_loc = rset%mpi
1987  me_count = 0
1988  dim_trott = max(0,2*trotter-1)
1989  nullify(projec)
1990  ABI_MALLOC(projec,(0:rset%ngfftrec(1)-1,0:rset%ngfftrec(2)-1,0:rset%ngfftrec(3)-1,rset%nl%lmnmax,natom))
1991  projec = zero
1992 
1993  tronc = rset%tronc  !--True if troncation is used
1994  if(tronc)   then
1995    ABI_MALLOC(exppotloc,(0:rset%nfftrec-1))
1996  end if
1997 
1998 
1999 !--LOOP ON ATOMS to create projectors-vector
2000  atomloop1: do iatom = 1, natom
2001    ipsp = typat(iatom)
2002 !  --Aquisition,reshape,translation,rotation of the projectors vector
2003    do ilmn = 1,rset%nl%lmnmax
2004      in = rset%nl%indlmn(3,ilmn,ipsp)
2005 !    --Projectors vector in 3-composant vector
2006      projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1)))
2007 !    --Moving the projectors vector on the center of the grid
2008      do ii=1,3
2009        projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii)
2010      end do
2011    end do
2012 
2013  end do atomloop1
2014 
2015 
2016 !##################################################################
2017 !--LOOP ON ATOMS (MAIN LOOP)
2018  atomloop: do iatom = 1, natom
2019    ipsp = typat(iatom)
2020 
2021 !  --If troncation is present, the considered atom has to be in the
2022 !  center of the grid so atoms, potential and projectors have to be translated
2023    if(tronc) then
2024      trasl = -rset%inf%gcart(:,iatom)+ngfftrec/2
2025 !    --Translation of atoms
2026      do jatom=1,natom
2027        gcart_loc(:,jatom) = rset%inf%gcart(:,jatom)+trasl
2028        gcart_loc(:,jatom) = modulo(gcart_loc(:,jatom),ngfft(:3))
2029 !      --Translation of non-local projectors
2030        do ilmn = 1,rset%nl%lmnmax
2031          projec(:,:,:,ilmn,jatom) = reshape(rset%nl%projec(:,ilmn,typat(jatom)),shape=shape(projec(:,:,:,1,1)))
2032          do ii=1,3
2033            projec(:,:,:,ilmn,jatom) = eoshift(projec(:,:,:,ilmn,jatom),shift=ngfftrec(ii)/2-gcart_loc(ii,jatom),dim=ii)
2034          end do
2035        end do
2036      end do
2037 
2038 !    --Translation of the potential
2039      call reshape_pot(trasl,ngfft(1)*ngfft(2)*ngfft(3),rset%nfftrec,ngfft(:3),ngfftrec,exppot,exppotloc)
2040    end if
2041 
2042 !  --Loop on projectors
2043    projloop: do ilmn = 1,rset%nl%lmnmax
2044      me_count = iatom+ilmn*natom-2 !--counter of the number of iteration
2045 !    --Only the proc me compute
2046      if(mpi_loc%me==mod(me_count,mpi_loc%nproc)) then
2047        ilm = rset%nl%indlmn(4,ilmn,ipsp)
2048        proj_arr = zero
2049        do jlmn = 1,rset%nl%lmnmax
2050          jlm = rset%nl%indlmn(4,jlmn,ipsp)
2051          if(ilm==jlm) then
2052            in = rset%nl%indlmn(3,ilmn,ipsp)
2053            jn = rset%nl%indlmn(3,jlmn,ipsp)
2054            il = rset%nl%indlmn(1,ilmn,ipsp)+1
2055            proj_arr(:,:,:) = proj_arr(:,:,:) + rset%nl%eivec(jn,in,il,ipsp)*projec(:,:,:,jlmn,iatom)
2056 !          write(std_out,*)'l,m,lm,n,n',il-1,rset%nl%indlmn(2,ilmn,ipsp),ilm,in,jn
2057 !          write(std_out,*)'eigevectors',rset%nl%eivec(jn,in,il,ipsp)
2058 
2059          end if
2060        end do
2061 
2062        un0 = pack(proj_arr(:,:,:),mask=.true.)
2063        normali = sum(un0*un0)*rset%inf%ucvol
2064        un0 = (one/sqrt(normali))*un0
2065 
2066        if(tronc)then
2067          call recursion_nl(exppotloc,un0,rho_nl,rset,rset%ngfftrec,&
2068 &         tsmear,trotter,dim_trott,tol,typat,&
2069 &         natom,projec)
2070        else
2071          call recursion_nl(exppot,un0,rho_nl,rset,rset%ngfftrec,&
2072 &         tsmear,trotter,dim_trott,tol,typat,&
2073 &         natom,projec)
2074        end if
2075 
2076        enlx = enlx+mult*rho_nl*rset%nl%eival(in,il,ipsp)*normali
2077      end if
2078 
2079    end do projloop
2080  end do atomloop
2081 
2082 !--Sum the contribution to the non-local energy computed by any procs
2083  call xmpi_sum(enlx,mpi_loc%comm_bandfft,ierr)
2084 
2085  if(associated(projec))  then
2086    ABI_FREE(projec)
2087  end if
2088  if(tronc)  then
2089    ABI_FREE(exppotloc)
2090  end if
2091 
2092  if(rset%debug)then
2093    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' nlenergyrec : exit'
2094    call wrtout(std_out,msg,'PERS')
2095  end if
2096 
2097  call timab(612,2,tsec)  !--stop  time-counter: nlenergyrec
2098 
2099 end subroutine nlenergyrec

ABINIT/recursion [ Functions ]

[ Top ] [ Functions ]

NAME

 recursion

FUNCTION

 This routine computes the recursion coefficients and the corresponding
 continued fraction to get the density at a point from a fixed potential.

INPUTS

  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  coordx, coordy, coordz=coordonnees of the computed point
  nrec=order of recursion
  fermie=fermi energy (Hartree)
  tsmear=temperature (Hartree)
  dim_trott=dimension of the partial fraction decomposition
  rtrotter=trotter parameter (real)
  ZT_p=fourier transform of the Green krenel (computed only once in vtorhorec)
  typat(:)=type of psp associated to any atom
  tol=tolerance criteria for stopping recursion
  debug=debugging variable
  mpi_enreg=information about MPI paralelisation
  nfft=number of points in FFT grid
  ngfft=information about FFT
  metrec<type(metricrec_type)>=information concerning the infinitesimal metrics
  inf_ucvol=infinitesimal unit cell volume
  tim_fourdp=time counter for fourdp
  natom=number of atoms
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)
  tim= 0 if the time spent in the routine is not taken into account,1 otherwise. For example
  when measuring time for loading  balancing, we don't want to add the time spent in this to the
  total time calculation

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator
  an, bn2 : coefficient given by recursion.

SIDE EFFECTS

NOTES

  at this time :
       - exppot should be replaced by ?
       - coord should be replaced by ?
       - need a rectangular box (rmet diagonal matrix)

SOURCE

2526 subroutine recursion(exppot,coordx,coordy,coordz,an,bn2,rho_out, &
2527 &                    nrec,fermie,tsmear,rtrotter,dim_trott, &
2528 &                    ZT_p, tol,typat, &
2529 &                    nlrec,mpi_enreg,&
2530 &                    nfft,ngfft,metrec,&
2531 &                    tim_fourdp,natom,projec,tim)
2532 
2533 
2534  use m_linalg_interfaces
2535 
2536 !Arguments -------------------------------
2537 !scalars
2538  integer,intent(in) :: coordx,coordy,coordz,nfft,nrec,tim
2539  integer,intent(in) :: tim_fourdp,natom,dim_trott
2540  real(dp),intent(in) :: fermie,tol,tsmear,rtrotter
2541  real(dp), intent(out) :: rho_out
2542  type(MPI_type),intent(in) :: mpi_enreg
2543  type(nlpsprec_type),intent(in) :: nlrec
2544  type(metricrec_type),intent(in) :: metrec
2545 !arrays
2546  integer, intent(in) :: ngfft(18)
2547  integer, intent(in) :: typat(natom)
2548  real(dp), intent(in) :: ZT_p(1:2, 0:nfft-1)
2549  real(dp), intent(in) :: exppot(0:nfft-1)
2550  real(dp), intent(in) :: projec(0:,0:,0:,1:,1:)
2551  real(dp), intent(out) :: an(0:nrec),bn2(0:nrec)
2552 !Local variables-------------------------------
2553 !not used, debugging purpose only
2554 !for debugging purpose, detailled printing only once for density and ekin
2555 !scalars
2556  integer, parameter :: level = 7, minrec = 3
2557  integer  :: irec,isign,timab_id,ii
2558  real(dp) :: switchimu,switchu
2559  real(dp) :: bb,beta,mult,prod_b2,error,errold
2560  real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1,exp2
2561  complex(dpc) :: cinv2rtrotter,coeef_mu,facrec0
2562 ! character(len=500) :: msg
2563 !arrays
2564  real(dp) :: tsec(2)
2565  real(dp) :: inf_tr(3)
2566  real(dp) :: Zvtempo(1:2, 0:nfft-1)
2567  real(dp) :: unold(0:nfft-1),vn(0:nfft-1),un(0:nfft-1)
2568  complex(dpc) :: acc_rho(0:nrec)
2569  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
2570  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
2571 
2572 ! *************************************************************************
2573 
2574 !--If count time or not
2575  timab_id = 616; if(tim/=0) timab_id = 606;
2576 
2577  call timab(timab_id,1,tsec)
2578 
2579 !##############################################################
2580 !--Initialisation of metrics
2581  inf_ucvol = metrec%ucvol
2582  inf_tr = metrec%tr
2583  mult = two/inf_ucvol    !non-spined system
2584 
2585  beta = one/tsmear
2586 !--Variables for optimisation
2587  pi_on_rtrotter = pi/rtrotter
2588  twortrotter = two*rtrotter
2589  exp1 = exp((beta*fermie)/(rtrotter))
2590  exp2 = exp(beta*fermie/(twortrotter))
2591  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
2592  coeef_mu = cmplx(one/exp2,zero,dp)
2593 
2594 !--Initialisation of  an,bn,un....
2595  N = czero;  D = cone
2596  facrec0 = cone
2597  Nold = czero; Dold = czero
2598 
2599  an = zero; bn2 = zero;  bn2(0) = one
2600  bb = zero; vn  = zero;  unold  = zero
2601 !--u0 is a Dirac function
2602  un = zero
2603  un(coordx+ngfft(1)*(coordy+ngfft(2)*coordz)) = one/sqrt(inf_ucvol)
2604 
2605 !--Initialisation of accumulated density
2606  acc_rho = czero
2607 !--Initialisation of estimated error
2608  prod_b2 = twortrotter/exp1
2609  errold = zero
2610 
2611 !##############################################################
2612 !--Main loop
2613  maindo : do irec = 0, nrec
2614 
2615 !  --Get an and bn2 coef by the lanczos method
2616 
2617 !  --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un
2618 !  depending on if nl part has to be calculated or not.
2619    vn = exppot * un
2620 
2621 !  --First Non-local psp contribution: (Id+sum_atom int dr1(E(r,r1))vn(r1))
2622 !  --Computation of exp(-beta*V_NL/4*p)*vn
2623    if(nlrec%nlpsp) then
2624      call timab(timab_id,2,tsec)
2625      call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec)
2626      call timab(timab_id,1,tsec)
2627 
2628 !    --Computation of exp(-beta*V/8*p)*vn in nonlocal case
2629      vn = exppot * vn
2630    end if !--End if on nlrec%nlpsp
2631 
2632 !  --Convolution with the Green kernel
2633 !  --FFT of vn
2634    isign = -1
2635    call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,1,ngfft,tim_fourdp)
2636 
2637 !  --F(T)F(vn)
2638    do ii = 0,nfft-1
2639      switchu   = Zvtempo(1,ii)
2640      switchimu = Zvtempo(2,ii)
2641      Zvtempo(1,ii) = switchu*ZT_p(1,ii) - switchimu*ZT_p(2,ii)
2642      Zvtempo(2,ii) = switchu*ZT_p(2,ii) + switchimu*ZT_p(1,ii)
2643    end do
2644 
2645 !  --F^-1(F(T)F(vn))
2646    isign = 1
2647    call fourdp(1,Zvtempo,vn,isign,mpi_enreg,nfft,1,ngfft,tim_fourdp)
2648 
2649 !  --Computation of exp(-beta*V/8*p)*un or exp(-beta*V/4*p)*un
2650 !  depending on if nl part has to be calculated or not.
2651 
2652    vn = inf_ucvol * exppot * vn
2653 
2654 !  --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn
2655    if(nlrec%nlpsp) then
2656      call timab(timab_id,2,tsec)
2657      call vn_nl_rec(vn,natom,typat,ngfft(:3),inf_ucvol,nlrec,projec)
2658      call timab(timab_id,1,tsec)
2659 
2660 !    --Computation of exp(-beta*V/8*p)*vn in nonlocal case
2661      vn = exppot * vn
2662    end if !--End if on nlrec%nlpsp
2663 
2664 !  --Multiplication of a and b2 coef by exp(beta*fermie/(two*rtrotter)) must be done in the continued fraction computation
2665 !  --Computation of a and b2
2666    an(irec) = inf_ucvol*ddot(nfft,vn,1,un,1)
2667 
2668 !  --an must be positive real
2669 !  --We must compute bn2 and prepare for the next iteration
2670    if(irec<nrec)then
2671      do ii = 0,nfft-1
2672        switchu = un(ii)
2673        un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii)
2674        unold(ii) = switchu
2675        bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii)
2676      end do
2677      bb = sqrt(bn2(irec+1))
2678      un = (one/bb)*un
2679    end if
2680 
2681 !  ######################################################
2682 !  --Density computation
2683 !  density computation is done inside the main looping, juste after the calculus of a and b2, in order to make
2684 !  it possible to stop the recursion at the needed accuracy, without doing more recursion loop than needed -
2685 !  further developpement
2686 
2687 !  !--using the property that: sum_i(bi*c)^2|(z-ai*c)=1/c*sum_i(bi)^2|(z/c-ai)
2688 !  !and for c =exp(-beta*fermie/(two*rtrotter)
2689 
2690 
2691    call trottersum(dim_trott,error,prod_b2,pi_on_rtrotter,&
2692 &   facrec0,coeef_mu,exp1,&
2693 &   an(irec),bn2(irec),&
2694 &   N,D,Nold,Dold)
2695 
2696 
2697    if(irec/=nrec .and. irec>=minrec)then
2698      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit
2699    end if
2700    errold = mult*error
2701  end do maindo
2702 !--Accumulated density
2703  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
2704 
2705 
2706  call timab(timab_id,2,tsec)
2707 
2708  end subroutine recursion

ABINIT/recursion_nl [ Functions ]

[ Top ] [ Functions ]

NAME

 recursion_nl

FUNCTION

 Given a $|un>$ vector on the real-space grid this routine calculates
 the density in  $|un>$ by recursion method.

INPUTS

  exppot=exponential of -1/tsmear*vtrial (computed only once in vtorhorec)
  trotter=trotter parameter
  dim_trott=dimension of the partial fraction decomposition
  tsmear=temperature (Hartree)
  tol=tolerance criteria for stopping recursion_nl
  ngfft=information about FFT(dtset%ngfft a priori different from ngfftrec)
  rset<recursion_type> contains all parameter of recursion
  typat(natom)=type of pseudo potential associated to any atom
  natom=number of atoms
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)

OUTPUT

  rho_out=result of the continued fraction multiplied by a multiplicator

SIDE EFFECTS

  un(:,:,:)=initial vector on the grid. it is changed in output

NOTES

  at this time :
       - need a rectangular box (rmet diagonal matrix)

SOURCE

2745 subroutine recursion_nl(exppot,un,rho_out,rset,ngfft, &
2746   &                     tsmear,trotter,dim_trott,tol,typat,&
2747   &                     natom,projec)
2748 
2749 
2750  use m_linalg_interfaces
2751 
2752 !Arguments -------------------------------
2753 !scalars
2754  integer,intent(in) :: trotter,natom,dim_trott
2755  real(dp),intent(in) :: tol,tsmear
2756  type(recursion_type),intent(in) :: rset
2757  real(dp), intent(out) :: rho_out
2758 !arrays
2759  integer,intent(in) ::  typat(natom),ngfft(18)
2760  real(dp),intent(in) :: exppot(0:ngfft(1)*ngfft(2)*ngfft(3)-1)
2761  real(dp),intent(inout) :: un(0:rset%nfftrec-1)
2762  real(dp),pointer :: projec(:,:,:,:,:)
2763 !Local variables-------------------------------
2764 !scalars
2765  integer, parameter ::  minrec = 3
2766  integer  :: irec,isign,ii
2767  real(dp) :: bb,beta,mult,prod_b2,rtrotter
2768  real(dp) :: inf_ucvol,pi_on_rtrotter,twortrotter,exp1
2769  real(dp) :: exp2,error,errold
2770  real(dp) :: switchu,switchimu
2771  complex(dpc) :: facrec0,cinv2rtrotter,coeef_mu
2772  character(len=500) :: msg
2773  type(mpi_type),pointer:: mpi_loc
2774 !arrays
2775  real(dp):: tsec(2)
2776  real(dp):: inf_tr(3)
2777  real(dp):: an(0:rset%min_nrec),bn2(0:rset%min_nrec)
2778  real(dp):: vn(0:rset%nfftrec-1)
2779  real(dp):: unold(0:rset%nfftrec-1)
2780  real(dp):: Zvtempo(1:2,0:rset%nfftrec-1)
2781  complex(dpc) :: acc_rho(0:rset%min_nrec)
2782  complex(dpc) :: D(0:dim_trott),Dold(0:dim_trott)
2783  complex(dpc) :: N(0:dim_trott),Nold(0:dim_trott)
2784 
2785 ! *************************************************************************
2786 
2787  call timab(608,1,tsec) !--start time-counter: recursion_nl
2788  if(rset%debug)then
2789    msg=' '
2790    call wrtout(std_out,msg,'COLL')
2791  end if
2792 
2793 !##############################################################
2794  beta = one/tsmear
2795 
2796 !--Rewriting the trotter parameter
2797  rtrotter  = max(half,real(trotter,dp))
2798 
2799 !--Initialisation of mpi
2800  mpi_loc => rset%mpi
2801 
2802 !--Initialisation of metrics
2803  inf_ucvol = rset%inf%ucvol
2804  inf_tr = rset%inf%tr
2805  mult = one   !--In the case of the calculus of the NL-energy
2806 
2807 !--Initialisation of  an,bn,un....
2808  N = czero;  D = cone
2809  facrec0 = cone
2810  Nold = czero; Dold = czero
2811 
2812  an = zero; bn2 = zero;  bn2(0) = one
2813  bb = zero; vn  = zero;  unold  = zero
2814 
2815 !--Variables for optimisation
2816  pi_on_rtrotter = pi/rtrotter
2817  twortrotter = two*rtrotter
2818  exp1 = exp((beta*rset%efermi)/(rtrotter))
2819  exp2 = exp(beta*rset%efermi/(twortrotter))
2820  cinv2rtrotter = cmplx(one/twortrotter,zero,dp)
2821  coeef_mu = cmplx(one/exp2,zero,dp)
2822 
2823 !--Initialisation of accumulated density
2824  acc_rho = czero
2825 !--Initialisation of estimated error
2826  prod_b2 = twortrotter/exp1
2827  errold = zero
2828 
2829 !##############################################################
2830 !--Main loop
2831  maindo : do irec = 0, rset%min_nrec
2832 !  --Get an and bn2 coef by the lanczos method
2833 
2834 !  --Computation of exp(-beta*V/8*p)*un
2835    vn = exppot * un
2836 
2837 !  --First Non-local psp contribution: (Id+sum_atom E(r,r1))vn
2838    call timab(608,2,tsec)
2839    call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec)
2840    call timab(608,1,tsec)
2841 
2842 !  --Computation of exp(-beta*V/8*p)*un
2843    vn = exppot * vn
2844 
2845 !  --Convolution with the Green kernel
2846 !  --FFT of vn
2847    isign = -1
2848    call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,1,rset%ngfftrec,6)
2849 
2850 !  --F(T)F(vn)
2851    do ii = 0,rset%nfftrec-1
2852      switchu   = Zvtempo(1,ii)
2853      switchimu = Zvtempo(2,ii)
2854      Zvtempo(1,ii) = switchu*rset%ZT_p(1,ii) - switchimu*rset%ZT_p(2,ii)
2855      Zvtempo(2,ii) = switchu*rset%ZT_p(2,ii) + switchimu*rset%ZT_p(1,ii)
2856    end do
2857 
2858 !  --F^-1(F(T)F(vn))
2859    isign = 1
2860    call fourdp(1,Zvtempo,vn,isign,mpi_loc,rset%nfftrec,1,rset%ngfftrec,6)
2861 
2862 !  --Computation of exp(-beta*V/2*p)*vn
2863    vn = inf_ucvol * exppot * vn
2864 
2865 !  --Second Non-local psp contribution: (Id+sum_atom E(r,r1))vn
2866    call timab(608,2,tsec)
2867    call vn_nl_rec(vn,natom,typat,rset%ngfftrec(:3),inf_ucvol,rset%nl,projec)
2868    call timab(608,1,tsec)
2869 
2870 !  --Computation of exp(-beta*V/8*p)*vn
2871    vn = exppot * vn
2872 
2873 
2874 !  --Multiplication of a and b2 coef by exp(beta*fermie/(2.d0*rtrotter)) must be done in the continued fraction computation
2875 !  --Computation of a and b2
2876    an(irec) = inf_ucvol*ddot(rset%nfftrec,vn,1,un,1)      !--an must be positive real
2877 
2878 !  --We must compute bn2 and prepare for the next iteration
2879    if(irec<rset%min_nrec)then
2880      do ii = 0,rset%nfftrec-1
2881        switchu = un(ii)
2882        un(ii) = vn(ii)-an(irec)*un(ii)-bb*unold(ii)
2883        unold(ii) = switchu
2884        bn2(irec+1) = bn2(irec+1)+inf_ucvol*un(ii)*un(ii)
2885      end do
2886      bb = sqrt(bn2(irec+1))
2887      un = (one/bb)*un
2888    end if
2889 
2890 !  ######################################################
2891 !  --Density computation
2892 !  in order to make it possible to stop the recursion_nl at the
2893 !  needed accuracy, without doing more recursion_nl loop than needed further developpement
2894 
2895    call trottersum(dim_trott,error,&
2896 &   prod_b2,pi_on_rtrotter,&
2897 &   facrec0,coeef_mu,exp1,&
2898 &   an(irec),bn2(irec),&
2899 &   N,D,Nold,Dold)
2900 
2901 
2902    if(irec/=rset%min_nrec .and. irec>=minrec)then
2903      if((bn2(irec+1)<tol14).or.(mult*error<tol.and.errold<tol)) exit
2904    end if
2905    errold = mult*error
2906  end do maindo
2907 !--Accumulated density
2908  rho_out = mult*real(cone-sum(N/D)*cinv2rtrotter,dp)
2909 
2910  call timab(608,2,tsec) !--stop time-counter: recursion_nl
2911 
2912 end subroutine recursion_nl

ABINIT/vn_nl_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 vn_nl_rec

FUNCTION

 this routine computes the contribution to the vector vn, during
 recursion, due to the non-local psp.

INPUTS

  vn(:,:,:)=the vector on the real-space grid.
  inf_ucvol=volume of infinitesimal cell
  natom=number of atoms
  typat(natom)=the type of psps associated to the atoms
  ngfftrec(3)=first 3 components of ngfftrec (truncated box, if different from ngfft) for the real-space grid
  nlrec<type(nlpsprec_type)> in recursion_type containing information concerning psp
  projec(ngfftrec(1),ngfftrec(2),ngfftrec(3),lmnmax,natom) is the  vector, on the ngfftrec grid containing
  the non-lacal projector $Y_{lm}(r-R_A)f_{lk}(r-R_A)

OUTPUT

 vn_nl(:,:,:)=the non_local contribution to vn

NOTES

SOURCE

2941 subroutine vn_nl_rec(vn,natom,typat,ngfftrec,inf_ucvol,nlrec,projec)
2942 
2943 
2944  use m_linalg_interfaces
2945 
2946 !Arguments -------------------------------
2947 !scalars
2948  integer,intent(in) :: natom
2949  real(dp),intent(in) :: inf_ucvol
2950  type(nlpsprec_type),intent(in) :: nlrec
2951 !arrays
2952  integer,intent(in) :: ngfftrec(3),typat(natom)
2953  real(dp),intent(in) :: projec(0:,0:,0:,1:,1:)
2954  real(dp),intent(inout):: vn(0:ngfftrec(1)*ngfftrec(2)*ngfftrec(3)-1)
2955 !Local variables-------------------------------
2956 !scalars
2957  integer :: iatom,nfftrec
2958  integer :: jlmn,il,in,jn
2959  integer :: ipsp,ilmn
2960  integer :: npsp,lmnmax
2961  real(dp):: vn_nl_loc
2962 !arrays
2963  real(dp):: vn_nl(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1)
2964  real(dp):: vtempo(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1)
2965  real(dp):: tsec(2)
2966 ! *************************************************************************
2967 
2968  call timab(615,1,tsec)
2969 !--Initialisation
2970 
2971  vn_nl = zero
2972  npsp = nlrec%npsp
2973  lmnmax = nlrec%lmnmax
2974  nfftrec = product(ngfftrec)
2975  vtempo(:,:,:) = reshape(source=vn,shape=ngfftrec(:3))
2976 
2977 !--Sum_iatom \int dr1 E(r-r_a,r1-r_a)vn(r1) *infucvol
2978  do iatom=1,natom !--Loop on atoms
2979    ipsp = typat(natom)
2980 
2981 !  --If psp(typat(iatom)) is local then cycle
2982    if(all(nlrec%pspinfo(:,ipsp)==0))  cycle
2983 
2984 
2985 !  write(std_out,*)'lmnmax',nlrec%lmnmax,lmnmax
2986 
2987    do ilmn = 1, lmnmax
2988      do jlmn = 1,lmnmax
2989        if(nlrec%indlmn(4,ilmn,ipsp)==nlrec%indlmn(4,jlmn,ipsp)) then
2990          il = 1+nlrec%indlmn(1,jlmn,ipsp)
2991          in = nlrec%indlmn(3,ilmn,ipsp)
2992          jn = nlrec%indlmn(3,jlmn,ipsp)
2993          vn_nl_loc = ddot(nfftrec,projec(:,:,:,jlmn,iatom),1,vtempo,1)
2994          vn_nl = vn_nl+projec(:,:,:,ilmn,iatom)*vn_nl_loc*nlrec%mat_exp_psp_nl(in,jn,il,ipsp)
2995        end if
2996      end do
2997    end do
2998  end do !--End loop on atoms
2999  vtempo = vtempo + vn_nl*inf_ucvol
3000 
3001  vn = reshape(source=vtempo,shape=(/nfftrec/))
3002 
3003  call timab(615,2,tsec)
3004 
3005 end subroutine vn_nl_rec

ABINIT/vtorhorec [ Functions ]

[ Top ] [ Functions ]

NAME

 vtorhorec

FUNCTION

 This routine computes the new density from a fixed potential (vtrial)
 using a recursion method

INPUTS

  deltastep= if 0 the iteration step is equal to dtset%nstep
  initialized= if 0 the initialization of the gstate run is not yet finished
  operator (ground-state symmetries)
  dtset <type(dataset_type)>=all input variables for this dataset
  irrzon(nfft**(1-1/nsym),2,(nspden/nsppol)-3*(nspden/4))=irreducible zone data
  nfftf=(effective) number of FFT grid points (for this processor)
  nspden=number of spin-density components
  nsppol=1 for unpolarized, 2 for spin-polarized
  nsym=number of symmetry elements in space group
  phnons(2,nfft**(1-1/nsym),(nspden/nsppol)-3*(nspden/4))=nonsymmorphic translation phases
  vtrial(nfft,nspden)=INPUT Vtrial(r).
  rset <type(recursion_type)> all variables for recursion
  rprimd(3,3)=dimensional primitive translations in real space (bohr)
  gprimd(3,3)=dimensional primitive translations in reciprocal space

OUTPUT

  ek=kinetic energy part of total energy.
  enlx=nonlocal psp + potential Fock ACE part of total energy.
  entropy=entropy due to the occupation number smearing (if metal)
  e_eigenvalues=Sum of the eigenvalues - Band energy (Hartree)
  fermie=fermi energy (Hartree)
  grnl(3*natom)=stores grads of nonlocal energy wrt length scales
   (3x3 tensor) and grads wrt atomic coordinates (3*natom)

SIDE EFFECTS

  rhog(2,nfft)=array for Fourier transform of electron density
  rhor(nfft,nspden)=array for electron density in electrons/bohr**3.
  rset%efermi= fermi energy

NOTES

  at this time :
       - grnl in not implemented
       - symetrie usage not implemented (irrzon not used and nsym should be 1)
       - spin-polarized not implemented (nsppol must be 1, nspden ?)
       - phnons used only in symrhg
       - need a rectangular box (ngfft(1)=ngfft(2)=ngfft(3))

SOURCE

108 subroutine vtorhorec(dtset,&
109 &  ek,enlx,entropy,e_eigenvalues,fermie,&
110 &  grnl,initialized,irrzon,nfftf,phnons,&
111 &  rhog, rhor, vtrial,rset,deltastep,rprimd,gprimd)
112 
113 !Arguments -------------------------------
114 !scalars
115  integer,intent(in) :: initialized
116  integer,intent(in) :: nfftf,deltastep
117  real(dp),intent(out) :: e_eigenvalues,ek,enlx,entropy,fermie
118  type(dataset_type),intent(in) :: dtset
119  type(recursion_type),intent(inout) :: rset
120 !arrays
121  integer, intent(in) :: irrzon(:,:,:)
122  real(dp),intent(in) :: rprimd(3,3),gprimd(3,3)
123  real(dp),intent(in) :: phnons(:,:,:)
124  real(dp),intent(in) :: vtrial(:,:)
125  real(dp),intent(inout) :: rhog(:,:)
126  real(dp),intent(out) :: grnl(:)  !vz_i
127  real(dp),intent(inout) :: rhor(:,:) !vz_i
128 
129 !Local variables-------------------------------
130 !scalars
131  integer :: nfftrec, dim_entro,ii1,jj1,kk1
132  integer :: ierr,ii,ilmn,ipsp,dim_trott
133  integer :: ipoint,ipointlocal,jj,kk,irec
134  integer :: n1,n2,n3,n4,n_pt_integ_entropy
135  integer :: nrec,iatom,min_pt,max_pt,swt_tm
136  integer ::  get_K_S_G
137  integer :: tim_fourdp,trotter
138  real(dp),parameter :: perc_vmin=one
139  real(dp) :: beta,drho,drhomax
140  real(dp) :: entropy1,entropy2,entropy3,entropy4
141  real(dp) :: entropylocal,entropylocal1,entropylocal2
142  real(dp) :: entropylocal3,entropylocal4,gran_pot,gran_pot1,gran_pot2
143  real(dp) :: gran_pot3,gran_pot4,gran_pot_local,gran_pot_local1
144  real(dp) :: gran_pot_local2,gran_pot_local3,gran_pot_local4
145  real(dp) :: inf_ucvol,intrhov,factor
146  real(dp) :: nelect,potmin,rtrotter,toldrho,tolrec,tsmear
147  real(dp) :: xmax,nlpotmin,ratio1,ratio2,ratio4,ratio8
148  type(recparall_type) :: recpar
149  character(len=500) :: msg
150  !arrays
151  integer :: ngfftrec(18),trasl(3)
152  integer,pointer :: gcart_loc(:,:)
153  integer,allocatable :: bufsize(:), bufdispl(:)
154  real(dp) :: tsec(2),tsec2(2)
155  real(dp) :: exppot(0:dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3)-1)
156  real(dp),target :: rholocal(1:rset%par%ntranche)
157  real(dp),target :: alocal(0:rset%min_nrec,1:rset%par%ntranche)
158  real(dp),target :: b2local(0:rset%min_nrec,1:rset%par%ntranche)
159  real(dp),pointer :: rho_wrk(:)
160  real(dp),pointer :: a_wrk(:,:),b2_wrk(:,:)
161  real(dp),allocatable :: rholocal_f(:), rholoc_2(:)
162  real(dp),allocatable :: rhogf(:,:),rhogc(:,:),aloc_copy(:,:),b2loc_copy(:,:)
163  real(dp),allocatable :: gran_pot_v_f(:,:),gran_pot_v_c(:,:),gran_pot_v_2(:,:)
164  real(dp),allocatable :: entropy_v_f(:,:),entropy_v_c(:,:),entropy_v_2(:,:)
165  real(dp),allocatable :: ablocal_1(:,:,:),ablocal_2(:,:,:),ablocal_f(:,:,:)
166  real(dp),allocatable :: exppotloc(:)
167  real(dp),allocatable :: projec(:,:,:,:,:)
168 #if defined HAVE_GPU_CUDA
169  integer :: max_rec
170  integer,allocatable :: vcount_0(:), displs_0(:)
171  integer,allocatable :: vcount_1(:), displs_1(:)
172  real(dp),allocatable,target :: rho_hyb(:)
173  real(dp),allocatable,target :: a_hyb(:,:),b2_hyb(:,:)
174  real(cudap),allocatable :: an_dev(:,:)
175  real(cudap),allocatable :: bn2_dev(:,:)
176 #endif
177 
178 ! *********************************************************************
179  if(rset%debug)then
180    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' vtorhorec : enter '
181    call wrtout(std_out,msg,'PERS')
182  end if
183 
184  call timab(21,1,tsec)
185  call timab(600,1,tsec2)
186 !##################################################################################################
187 !!--Initalization in the FIRST time in VTORHOREC is made in SCFCV by FIRST_REC routine
188 !!--Parameters for the recursion method AND  Initialisation
189 
190  trotter = dtset%recptrott  !--trotter parameter
191  nelect  = dtset%nelect     !--number of electrons
192 
193  toldrho = dtset%rectolden  !--tollerance for density
194  tolrec  = toldrho*1.d-2    !--tollerance for local density
195 
196  tsmear  = dtset%tsmear     !--temperature
197  beta    = one/tsmear       !--inverse of temperature
198 
199  factor = real(dtset%recgratio**3*100*rset%mpi%nproc,dp)/real(nfftf,dp)
200 
201 !--Assignation of the rset variable:
202  nrec       = rset%min_nrec
203  nfftrec    = rset%nfftrec
204  ngfftrec   = rset%ngfftrec
205  inf_ucvol  = rset%inf%ucvol
206 
207  min_pt = rset%par%displs(rset%mpi%me)+1
208  max_pt = min_pt+rset%par%vcount(rset%mpi%me)-1
209 
210 !--In the last self-constistent loop or if density is converged the
211 !thermodynamics quantities are calculated
212  get_K_S_G = 0; if(deltastep==0 .or. rset%quitrec/=0) get_K_S_G = 1;
213 
214 !--Rewriting the trotter parameter
215  rtrotter  = max(half,real(trotter,dp))
216  dim_trott = max(0,2*trotter-1)
217 
218 !--Variables Optimisation
219  ratio1 = beta/rtrotter
220  ratio2 = ratio1/two
221  ratio4 = ratio1/four
222  ratio8 = ratio1/eight
223 
224 !--Integration points for entropy
225  n_pt_integ_entropy = max(25,dtset%recnpath)
226 
227 !-- energies non-local: at day not implemented
228  enlx = zero
229  grnl = zero
230 !jmb
231  ek = zero
232 
233 !--only a copy of ngfft(1:3) and nfft  (to purge!!)
234  n1 = dtset%ngfft(1) ; n2 = dtset%ngfft(2) ; n3 = dtset%ngfft(3)
235  n4 = n3
236 
237 !--time switch to measure gpu-cpu syncrhonisation
238  swt_tm = 0 ; !no gpu initally
239 
240  exppot = zero
241  nullify(gcart_loc)
242 
243  if(dtset%rectesteg==1)then
244 !  --Free electron gas case
245    exppot = one
246  else
247    if(.not.(rset%nl%nlpsp)) then
248 !    --Local case
249 !    --COMPUTATION OF exp( -beta*pot/(4*rtrotter))
250      ABI_MALLOC(gcart_loc,(0,0))
251      gcart_loc = 0
252      exppot = exp( -(ratio4*vtrial(:,1)))
253    else
254 !    --Non-Local case
255 !    --COMPUTATION OF exp(-beta*pot/(8*rtrotter))
256      exppot = exp( -(ratio8*vtrial(:,1)))
257 
258      ABI_MALLOC(gcart_loc,(3,dtset%natom))
259      gcart_loc = rset%inf%gcart
260      ABI_MALLOC(projec,(0:ngfftrec(1)-1,0:ngfftrec(2)-1,0:ngfftrec(3)-1,rset%nl%lmnmax,dtset%natom))
261      projec = zero
262 
263      if(.not.(rset%tronc)) then
264        do iatom =1, dtset%natom
265          ipsp = dtset%typat(iatom)
266          do ilmn = 1,rset%nl%lmnmax
267            projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,ipsp),shape=shape(projec(:,:,:,1,1)))
268            do ii=1,3
269              projec(:,:,:,ilmn,iatom) = cshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii)/2-gcart_loc(ii,iatom),dim=ii)
270            end do
271          end do
272        end do
273      end if
274    end if
275  end if
276 
277 !###################################################################################
278 !MAIN LOOP
279 
280  rholocal = zero; alocal = zero; b2local = zero
281  ipointlocal = 1
282 
283 !--Allocation: if hybrid calculation is done then I have to use
284 !balanced work on devices.
285  nullify(rho_wrk,a_wrk,b2_wrk)
286 
287  if(rset%load == 1)then
288 #ifdef HAVE_GPU_CUDA
289    ABI_MALLOC(rho_hyb,(1:rset%GPU%par%npt))
290    ABI_MALLOC(a_hyb,(0:nrec,1:rset%GPU%par%npt))
291    ABI_MALLOC(b2_hyb,(0:nrec,1:rset%GPU%par%npt))
292    rho_hyb = zero; a_hyb = zero; b2_hyb = zero
293    rho_wrk => rho_hyb
294    a_wrk   => a_hyb
295    b2_wrk  => b2_hyb
296    recpar  = rset%GPU%par
297 #endif
298  else
299    rho_wrk => rholocal
300    a_wrk   => alocal
301    b2_wrk  => b2local
302    recpar  = rset%par
303  end if
304 
305 !#if defined HAVE_GPU_CUDA
306 !if(rset%debug)then
307 !write (std_out,*) 'rset%recGPU%nptrec ',rset%recGPU%nptrec
308 !write (std_out,*) 'rset%gpudevice ',rset%gpudevice
309 !write (std_out,*) 'rset%ngfftrec ',rset%ngfftrec(1:3)
310 !write (std_out,*) 'rset%min_nrec ',rset%min_nrec
311 !write (std_out,*) 'rset%par%ntranche ',rset%par%ntranche
312 !write (std_out,*) 'rset%par%min_pt ',rset%par%min_pt,min_pt
313 !write (std_out,*) 'rset%par%max_pt ',rset%par%max_pt,max_pt
314 !write (std_out,*) 'pt0 ',rset%par%pt0%x,rset%par%pt0%y,rset%par%pt0%z
315 !write (std_out,*) 'pt1 ',rset%par%pt1%x,rset%par%pt1%y,rset%par%pt1%z
316 !end if
317 !#endif
318 
319  if(rset%gpudevice>=0) then
320 #if defined HAVE_GPU_CUDA
321    swt_tm = 1;
322    call timab(607,1,tsec2)
323    ABI_MALLOC(an_dev,(0:recpar%npt-1,0:nrec))
324    ABI_MALLOC(bn2_dev,(0:recpar%npt-1,0:nrec))
325    an_dev = zero
326    bn2_dev = zero; bn2_dev(:,0) = one
327 
328    call cudarec( rset, exppot,an_dev,bn2_dev,&
329 &   beta,trotter,tolrec,dtset%recgratio,dtset%ngfft(:3),max_rec)
330 
331    max_rec = min(max_rec,nrec)
332    a_wrk(0:max_rec,1:recpar%npt)  = transpose(an_dev(0:,0:max_rec))
333    b2_wrk(0:max_rec,1:recpar%npt) = transpose(bn2_dev(0:,0:max_rec))
334 
335    ABI_FREE(an_dev)
336    ABI_FREE(bn2_dev)
337    call timab(607,2,tsec2)
338 
339    ipointlocal = recpar%npt+1
340 
341 !  !DEBUG CUDA
342 !  if(rset%debug)then
343 !  if( rset%mpi%me==0)then
344 !  do ipoint = 1,rset%par%npt,1
345 !  kk=ipoint/(product(dtset%ngfft(:2)))
346 !  jj=ipoint/dtset%ngfft(1)-kk*dtset%ngfft(2)
347 !  ii=ipoint-jj*dtset%ngfft(1)-kk*dtset%ngfft(2)*dtset%ngfft(1)
348 !  write(msg,'(a,4i8,2(a,a9,5f12.6))')&
349 !  & 'pt',ipoint,ii,jj,kk,&
350 !  & ch10,'an-gpu   ',real(alocal(:4,ipoint)),&
351 !  & ch10,'b2n-gpu  ',real(b2local(:4,ipoint))
352 !  call wrtout(std_out,msg,'COLL')
353 !  end do
354 !  endif
355 !  endif
356 !  !ENDDEBUG CUDA
357 #endif
358 
359  else
360    if (.not.(rset%tronc)) then
361      graou1 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
362        do jj = 0,n2-1,dtset%recgratio
363          do ii = 0,n1-1,dtset%recgratio
364            ipoint = ii+(jj+kk*n2)*n1
365 !          --Local position of atoms
366            if (ipoint<recpar%min_pt) cycle
367 !          --Computation done by that proc
368            tim_fourdp=6
369            call recursion(exppot,ii,jj,kk, &
370 &           a_wrk(:,ipointlocal), &
371 &           b2_wrk(:,ipointlocal), &
372 &           rho_wrk(ipointlocal),&
373 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
374 &           rset%ZT_p, &
375 &           tolrec,dtset%typat,rset%nl,&
376 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
377 &           tim_fourdp,dtset%natom,projec,1)
378            ipointlocal = ipointlocal + 1
379 !          write(std_out,*)'ipointlocal',ipoint,ipointlocal,ii,jj,kk
380            call prtwork(dtset%recgratio**3*ipointlocal*100*rset%mpi%nproc/nfftrec)
381            if(ipoint==recpar%max_pt) exit graou1
382          end do
383        end do
384      end do graou1
385    else !--We use a troncation
386      ABI_MALLOC(exppotloc,(0:nfftrec-1))
387      graou2 : do kk = recpar%pt0%z,recpar%pt1%z,dtset%recgratio
388        do jj = 0,n2-1,dtset%recgratio
389          do ii = 0,n1-1,dtset%recgratio
390            ipoint = ii+(jj+kk*n2)*n1
391            if (ipoint<recpar%min_pt) cycle
392 !          computation done by that proc
393            exppotloc = zero
394 !          --Traslation to move position on the ngfftrec grid center
395            trasl = -(/ii,jj,kk/)+ngfftrec(:3)/2
396            if(rset%nl%nlpsp) then
397              do iatom=1,dtset%natom
398 !              --local position of atoms
399                gcart_loc(:,iatom) = rset%inf%gcart(:,iatom)+trasl
400                gcart_loc(:,iatom) = modulo(gcart_loc(:,iatom),(/n1,n2,n3/))
401 !              --Traslation of non-local projectors
402                do ilmn = 1,rset%nl%lmnmax
403                  projec(:,:,:,ilmn,iatom) = reshape(rset%nl%projec(:,ilmn,dtset%typat(iatom)),shape=shape(projec(:,:,:,1,1)))
404                  do ii1=1,3
405                    projec(:,:,:,ilmn,iatom) = eoshift(projec(:,:,:,ilmn,iatom),shift=ngfftrec(ii1)/2-gcart_loc(ii1,iatom),dim=ii1)
406                  end do
407                end do
408              end do
409            end if
410 
411            call reshape_pot(trasl,nfftf,nfftrec,&
412 &           dtset%ngfft(:3),ngfftrec(:3),&
413 &           exppot,exppotloc)
414 
415            tim_fourdp=6
416            call recursion(exppotloc,ngfftrec(1)/2,ngfftrec(2)/2,ngfftrec(3)/2, &
417 &           a_wrk(:,ipointlocal), &
418 &           b2_wrk(:,ipointlocal), &
419 &           rho_wrk(ipointlocal),&
420 &           nrec, rset%efermi,tsmear,rtrotter,dim_trott, &
421 &           rset%ZT_p, &
422 &           tolrec,dtset%typat,rset%nl,&
423 &           rset%mpi,nfftrec,ngfftrec,rset%inf,&
424 &           tim_fourdp,dtset%natom,projec,1)
425            ipointlocal = ipointlocal + 1
426            call prtwork(factor*real(ipointlocal,dp))
427            if(ipoint==recpar%max_pt) exit graou2
428          end do
429        end do
430      end do graou2
431      ABI_FREE(exppotloc)
432    end if
433 
434  end if
435  write(msg,'( a12,i12)')'ipointlocal',ipointlocal
436  call wrtout(std_out,msg,'PERS')
437  call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
438  call xmpi_barrier(rset%mpi%comm_bandfft)
439  call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
440 
441 !!#############################################################
442 !!--ASSIGNATION PARAMETERS TO MENAGE PARALLELISME OF TRANSGRID
443  if((rset%load==1 .or. dtset%recgratio>1))then
444 !  --Bufsize contains the values of the number of points calculated
445 !  by any proc on the coarse grid; bufsize_f on the fine grid
446    call timab(604,1,tsec2) !--start time-counter: transgrid
447    ABI_MALLOC(bufsize,(0:rset%mpi%nproc-1))
448    ABI_MALLOC(bufdispl,(0:rset%mpi%nproc-1))
449    bufsize = 0;
450    bufsize(rset%mpi%me) = rset%par%npt
451    call xmpi_sum(bufsize,rset%mpi%comm_bandfft,ierr)
452 
453    bufdispl(0) = 0;
454    if(rset%mpi%nproc>1) bufdispl(1:) = (/(sum(bufsize(:ii)),ii=0,rset%mpi%nproc-1)/)
455    call timab(604,2,tsec2) !--stop time-counter: transgrid
456  end if
457 !!####################################################################
458 !!--REDISTRIBUTION OF LOAD ON PROCS AFTER RECURSION IF CUDA IS USED
459 #ifdef HAVE_GPU_CUDA
460  if(rset%load==1)then
461    call timab(604,1,tsec2) !--start time-counter: transgrid
462    call xredistribute(rho_hyb,rset%GPU%par%vcount,rset%GPU%par%displs,&
463 &   rholocal,bufsize,bufdispl,rset%mpi%me,&
464 &   rset%mpi%nproc,&
465 &   rset%mpi%comm_bandfft,ierr)
466 
467 
468    ABI_MALLOC(vcount_0,(0:rset%mpi%nproc-1))
469    ABI_MALLOC(displs_0,(0:rset%mpi%nproc-1))
470    ABI_MALLOC(vcount_1,(0:rset%mpi%nproc-1))
471    ABI_MALLOC(displs_1,(0:rset%mpi%nproc-1))
472 
473    vcount_0 = 0
474    vcount_0(rset%mpi%me) = rset%par%npt*(nrec+1)
475    call xmpi_sum(vcount_0,rset%mpi%comm_bandfft,ierr)
476    displs_0 = 0
477    if(rset%mpi%nproc>1) displs_0(1:) = (/(sum(vcount_0(:ii)),ii=0,rset%mpi%nproc-1)/)
478 
479 
480    vcount_1 = 0
481    vcount_1(rset%mpi%me) = rset%GPU%par%npt*(nrec+1)
482    call xmpi_sum(vcount_1,rset%mpi%comm_bandfft,ierr)
483    displs_1 = 0
484    if(rset%mpi%nproc>1) displs_1(1:) = (/(sum(vcount_1(:ii)),ii=0,rset%mpi%nproc-1)/)
485 
486 
487    call xredistribute(a_hyb,vcount_1,displs_1,&
488 &   alocal,vcount_0,displs_0,&
489 &   rset%mpi%me,rset%mpi%nproc,&
490 &   rset%mpi%comm_bandfft,ierr)
491 
492    call xredistribute(b2_hyb,vcount_1,displs_1,&
493 &   b2local,vcount_0,displs_0,&
494 &   rset%mpi%me,rset%mpi%nproc,&
495 &   rset%mpi%comm_bandfft,ierr)
496 
497    nullify(rho_wrk,a_wrk,b2_wrk)
498    ABI_FREE(rho_hyb)
499    ABI_FREE(a_hyb)
500    ABI_FREE(b2_hyb)
501    ABI_FREE(vcount_0)
502    ABI_FREE(displs_0)
503    ABI_FREE(vcount_1)
504    ABI_FREE(displs_1)
505    call timab(604,2,tsec2) !--start time-counter: transgrid
506  end if
507 #endif
508 
509 !#############################################################
510 !--TRANSGRID FOR THE DENSITY RHO AND THE COEFFICIENTS AN AND B2N
511  if (dtset%recgratio>1) then
512 !  --variables allocation and initialisation-------
513    write (msg,'(a)')' - TRANSGRID USING -----'
514    call wrtout(std_out,msg,'COLL')
515    call timab(604,1,tsec2) !--start time-counter: transgrid
516 
517    ABI_MALLOC(rholocal_f,(rset%pawfgr%nfft))
518    ABI_MALLOC(rhogf,(2,rset%pawfgr%nfft))
519    ABI_MALLOC(rhogc,(2,rset%pawfgr%nfftc))
520    ABI_MALLOC(rholoc_2,(1:rset%pawfgr%nfftc))
521    ABI_MALLOC(ablocal_2,(1:rset%pawfgr%nfftc,0:nrec,2))
522    ABI_MALLOC(ablocal_f,(1:rset%pawfgr%nfft,0:nrec,2))
523    ABI_MALLOC(ablocal_1,(1:rset%par%npt,0:nrec,2))
524 
525    call destroy_distribfft(rset%mpi%distribfft)
526    call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%pawfgr%ngfftc(2) ,rset%pawfgr%ngfftc(3))
527    call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,rset%pawfgr%ngfft(2) ,rset%pawfgr%ngfft(3))
528 
529    rholocal_f = zero; ablocal_f = zero; ablocal_2 = zero
530 
531    ablocal_1(:,:,1) = transpose(alocal(:,1:rset%par%npt))
532    ablocal_1(:,:,2) = transpose(b2local(:,1:rset%par%npt))
533 
534    if(get_K_S_G==1 .and. dtset%recgratio>1 ) then
535      ABI_MALLOC(aloc_copy,(0:nrec,1:rset%par%npt))
536      ABI_MALLOC(b2loc_copy,(0:nrec,1:rset%par%npt))
537      aloc_copy = alocal(:,1:rset%par%npt)
538      b2loc_copy = b2local(:,1:rset%par%npt)
539    end if
540 
541    if(rset%mpi%nproc ==1) then
542 !    --SEQUENTIAL CASE--
543      rholoc_2 = rholocal(1:rset%par%npt)
544      ablocal_2 = ablocal_1
545 !    --Transigrid: coarse->fine
546      rhogf = zero; rhogc = zero
547      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
548      do jj1 = 1,2
549        do ipoint = 0,nrec
550          rhogf = zero; rhogc = zero
551          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,&
552 &         ablocal_2(:,ipoint,jj1),ablocal_f(:,ipoint,jj1))
553        end do
554      end do
555 !    --Assignation of the interpolated results on the fine grid--
556      rholocal = rholocal_f
557      alocal = transpose(ablocal_f(:,:,1))
558      b2local = transpose(ablocal_f(:,:,2))
559 
560    else
561 !    --PARALLEL CASE--
562      rholoc_2 = zero
563 !    --Send on all procs rho,an,bn--
564      call xmpi_allgatherv(rholocal(1:rset%par%npt),bufsize(rset%mpi%me),rholoc_2,&
565 &     bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
566      do irec = 0,nrec
567        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,1),bufsize(rset%mpi%me),&
568 &       ablocal_2(:,irec,1),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
569        call xmpi_allgatherv(ablocal_1(1:rset%par%npt,irec,2),bufsize(rset%mpi%me),&
570 &       ablocal_2(:,irec,2),bufsize,bufdispl,rset%mpi%comm_bandfft,ierr)
571      end do
572 
573 
574 !    --Transigrid: coarse->fine on differents procs (with respect
575 !    the number of recursion)
576      rhogf = zero;  rhogc = zero
577      call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,rhogf,rholoc_2,rholocal_f)
578 
579      do irec = 0,2*(nrec+1)-1
580        ii1 = modulo(irec,rset%mpi%nproc)
581        jj1 = 1+modulo(irec,2)
582        kk1 = floor(irec/2.)
583        if(maxval(abs(ablocal_2(:,kk1,jj1))) > tol10 .and. rset%mpi%me == ii1) then
584          rhogf = zero; rhogc = zero
585          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,rset%pawfgr,rhogc,&
586 &         rhogf,ablocal_2(:,kk1,jj1),ablocal_f(:,kk1,jj1))
587        end if
588      end do
589 
590 !    --Recuperation of all interpolated results
591 !    from procs to allprocs
592      call xmpi_sum(ablocal_f,rset%mpi%comm_bandfft,ierr)
593 
594 !    --Assignation the interpolated results on the fine grid
595 !    any procs to obtain the same point as in the standard recursion
596      do ii1 = 0, rset%mpi%nproc-1
597        jj1 = rset%par%displs(ii1)+1
598        if(ii1 == rset%mpi%me) then
599          alocal = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,1))
600          b2local = transpose(ablocal_f(jj1:jj1+rset%par%vcount(ii1)-1,:,2))
601          rholocal = rholocal_f(jj1:jj1+rset%par%vcount(ii1)-1)
602        end if
603      end do
604    end if
605    ABI_FREE(ablocal_f)
606    ABI_FREE(ablocal_2)
607    ABI_FREE(ablocal_1)
608    ABI_FREE(rhogf)
609    ABI_FREE(rholocal_f)
610    ABI_FREE(rhogc)
611    ABI_FREE(rholoc_2)
612 
613    call timab(604,2,tsec2) !--stop time-counter: transgrid
614  else
615    write(msg,'(a)')' - TRANSGRID NOT USED --'
616    call wrtout(std_out,msg,'COLL')
617  end if
618 !!--End transgrid
619 !!###############################################################
620 !###################################
621 !--Fermi energy computation
622 !--find the good mu by imposing the electrons number
623  call fermisolverec(rset%efermi,rholocal,alocal,b2local,rset%debug,&
624 & rset%min_nrec,tsmear,trotter,nelect,tol10,100, &
625 & rset%par%ntranche,rset%mpi,inf_ucvol,& !.False. .and.&
626 & (rset%tp==2 .or. rset%tp==3) .and. trotter>1)
627 
628 !#################################################################
629 !######### ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
630  entropy = zero
631  gran_pot = zero
632  noentropie : if(get_K_S_G==1)then
633    entropy1 = zero; entropy2 = zero ;entropy3 = zero; entropy4 = zero
634    gran_pot1 = zero ; gran_pot2 = zero; gran_pot3 = zero; gran_pot4 = zero
635 
636 !  --Seek for the min of the path integral
637    potmin = zero;  nlpotmin = zero
638    if(dtset%rectesteg/=1) potmin = minval(vtrial(:,1))
639    if(rset%nl%nlpsp)  nlpotmin = minval(rset%nl%eival(:,:,:))
640    xmax = exp(-ratio2*(potmin+nlpotmin-rset%efermi))
641 
642    dim_entro = 0;  if(rset%debug) dim_entro = 4;
643 
644    if(dtset%recgratio>1) then
645 !    --Recgratio>1
646      ABI_MALLOC(rhogf,(2,rset%pawfgr%nfft))
647      ABI_MALLOC(rhogc,(2,rset%pawfgr%nfftc))
648      ABI_MALLOC(entropy_v_f,(rset%pawfgr%nfft,0:4))
649      ABI_MALLOC(entropy_v_c,(rset%pawfgr%nfftc,0:4))
650      ABI_MALLOC(entropy_v_2,(1:rset%par%npt,0:4))
651      ABI_MALLOC(gran_pot_v_f,(rset%pawfgr%nfft,0:4))
652      ABI_MALLOC(gran_pot_v_c,(rset%pawfgr%nfftc,0:4))
653      ABI_MALLOC(gran_pot_v_2,(1:rset%par%npt,0:4))
654 
655      entropy_v_c = zero; entropy_v_f = zero; entropy_v_2 = zero
656      gran_pot_v_c = zero; gran_pot_v_f = zero; gran_pot_v_2 = zero
657 
658 
659      do ipoint = 1,rset%par%npt
660        call entropyrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
661 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
662 &       nrec,trotter,entropy_v_2(ipoint,0),two,&
663 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
664 &       entropy_v_2(ipoint,1),&
665 &       entropy_v_2(ipoint,2),&
666 &       entropy_v_2(ipoint,3),&
667 &       entropy_v_2(ipoint,4))
668 
669        call gran_potrec(exp(rset%efermi*ratio2)*aloc_copy(:,ipoint), &
670 &       exp(rset%efermi*ratio1)*b2loc_copy(:,ipoint), &
671 &       nrec,trotter,gran_pot_v_2(ipoint,0),two,&
672 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
673 &       gran_pot_v_2(ipoint,1),&
674 &       gran_pot_v_2(ipoint,2),&
675 &       gran_pot_v_2(ipoint,3),&
676 &       gran_pot_v_2(ipoint,4))
677      end do
678      ABI_FREE(aloc_copy)
679      ABI_FREE(b2loc_copy)
680 
681      call timab(613+swt_tm,1,tsec2)  !!--start time-counter: sync gpu-cpu
682      call xmpi_barrier(rset%mpi%comm_bandfft)
683      call timab(613+swt_tm,2,tsec2)  !!--stop time-counter: sync gpu-cpu
684 
685      call timab(604,1,tsec2) !--start time-counter: transgrid
686      if(rset%mpi%nproc==1) then
687        entropy_v_c = entropy_v_2
688        gran_pot_v_c = gran_pot_v_2
689      end if
690      do ii1 = 0,dim_entro
691        call xmpi_allgatherv(entropy_v_2(:,ii1),bufsize(rset%mpi%me),&
692 &       entropy_v_c(:,ii1),bufsize,bufdispl,&
693 &       rset%mpi%comm_bandfft,ierr)
694        call xmpi_allgatherv(gran_pot_v_2(:,ii1),bufsize(rset%mpi%me),&
695 &       gran_pot_v_c(:,ii1),bufsize,bufdispl,&
696 &       rset%mpi%comm_bandfft,ierr)
697 
698        if(maxval(abs(entropy_v_c(:,ii1))) > tol10) then
699          rhogf = zero; rhogc = zero
700          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
701 &         rset%pawfgr,rhogc,rhogf,entropy_v_c(:,ii1),entropy_v_f(:,ii1))
702        end if
703        if(maxval(abs(gran_pot_v_c(:,ii1))) >tol10) then
704          rhogf = zero; rhogc = zero
705          call transgrid(1,rset%mpi,dtset%nspden,1,0,0,1,&
706 &         rset%pawfgr,rhogc,rhogf,gran_pot_v_c(:,ii1),gran_pot_v_f(:,ii1))
707        end if
708      end do
709      call timab(604,2,tsec2) !--stop time-counter: transgrid
710 
711      entropy  = sum(entropy_v_f(:,0))
712      gran_pot  = sum(gran_pot_v_f(:,0))
713 
714      if(rset%debug)then
715        entropy1 = sum(entropy_v_f(:,1))
716        entropy2 = sum(entropy_v_f(:,2))
717        entropy3 = sum(entropy_v_f(:,3))
718        entropy4 = sum(entropy_v_f(:,4))
719        gran_pot1 = sum(gran_pot_v_f(:,1))
720        gran_pot2 = sum(gran_pot_v_f(:,2))
721        gran_pot3 = sum(gran_pot_v_f(:,3))
722        gran_pot4 = sum(gran_pot_v_f(:,4))
723      end if
724 
725      ABI_FREE(entropy_v_f)
726      ABI_FREE(entropy_v_c)
727      ABI_FREE(entropy_v_2)
728      ABI_FREE(rhogf)
729      ABI_FREE(rhogc)
730      ABI_FREE(gran_pot_v_f)
731      ABI_FREE(gran_pot_v_c)
732      ABI_FREE(gran_pot_v_2)
733 
734 
735    else
736 !    --Recgratio=1
737      do ipoint = 1,rset%par%ntranche
738        call entropyrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
739 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
740 &       nrec,trotter,entropylocal,two,&
741 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
742 &       entropylocal1,entropylocal2,&
743 &       entropylocal3,entropylocal4)
744        call gran_potrec(exp(rset%efermi*ratio2)*alocal(:,ipoint), &
745 &       exp(rset%efermi*ratio1)*b2local(:,ipoint), &
746 &       nrec,trotter,gran_pot_local,two,& !/ucvol,&
747 &       rset%debug,n_pt_integ_entropy,perc_vmin*xmax,&
748 &       gran_pot_local1,gran_pot_local2,&
749 &       gran_pot_local3,gran_pot_local4)
750 
751        entropy = entropy + entropylocal
752        gran_pot = gran_pot + gran_pot_local
753        if(rset%debug)then
754          entropy1 = entropy1 + entropylocal1
755          entropy2 = entropy2 + entropylocal2
756          entropy3 = entropy3 + entropylocal3
757          entropy4 = entropy4 + entropylocal4
758          gran_pot1 = gran_pot1 + gran_pot_local1
759          gran_pot2 = gran_pot2 + gran_pot_local2
760          gran_pot3 = gran_pot3 + gran_pot_local3
761          gran_pot4 = gran_pot4 + gran_pot_local4
762        end if
763 
764      end do
765 
766      call xmpi_sum(entropy,rset%mpi%comm_bandfft ,ierr)
767      call xmpi_sum(gran_pot,rset%mpi%comm_bandfft ,ierr)
768      if(rset%debug)then
769        call xmpi_sum(entropy1,rset%mpi%comm_bandfft ,ierr)
770        call xmpi_sum(entropy2,rset%mpi%comm_bandfft ,ierr)
771        call xmpi_sum(entropy3,rset%mpi%comm_bandfft ,ierr)
772        call xmpi_sum(entropy4,rset%mpi%comm_bandfft ,ierr)
773        call xmpi_sum(gran_pot1,rset%mpi%comm_bandfft ,ierr)
774        call xmpi_sum(gran_pot2,rset%mpi%comm_bandfft ,ierr)
775        call xmpi_sum(gran_pot3,rset%mpi%comm_bandfft ,ierr)
776        call xmpi_sum(gran_pot4,rset%mpi%comm_bandfft ,ierr)
777      end if
778    end if
779 
780    if(rset%debug)then
781      write(msg,'(2(2a,4(2a,es11.4,a)))')&
782 &     ' --------------------------'        ,ch10, &
783 &     '  entropy, horiz path=',' ',entropy1,ch10, &
784 &     '  entropy, xmax  path=',' ',entropy2,ch10, &
785 &     '  entropy, xmin  path=',' ',entropy3,ch10, &
786 &     '  entropy, zero  path=',' ',entropy4,ch10, &
787 &     ' --------------------------'        ,ch10, &
788 &     ' -omega/T, horiz path=',' ',gran_pot1,ch10, &
789 &     ' -omega/T, xmax  path=',' ',gran_pot2,ch10, &
790 &     ' -omega/T, xmin  path=',' ',gran_pot3,ch10, &
791 &     ' -omega/T, zero  path=',' ',gran_pot4,ch10
792      call wrtout(std_out,msg,'COLL')
793    end if
794 
795    e_eigenvalues=tsmear*(entropy-gran_pot) + rset%efermi*nelect
796 !  --In reality gran_pot is not the gran potential but the
797 !  potential omega=-PV (Landau-potential or grand-potential)
798 !  divided by -T so the internal energy
799 !  U:=e_eigenvalues= TS+omega+muN = ST-T*sum(ln(1-n))+muN =
800 !  T(S-gran_pot)+muN
801 
802 
803    if(rset%nl%nlpsp) then
804      call nlenergyrec(rset,enlx,exppot,dtset%ngfft,dtset%natom,&
805 &     dtset%typat,tsmear,trotter,tolrec)
806    end if
807  end if noentropie
808 !##### END ENTROPY AND GRAN POTENTIAL COMPUTATION  ##################
809 !#################################################################
810 
811 !if(associated(projec))
812  if(rset%nl%nlpsp)  then
813    ABI_FREE(projec)
814  end if
815  if(associated(gcart_loc))  then
816    ABI_FREE(gcart_loc)
817  end if
818  if((dtset%recgratio/=1 .or. rset%load==1))  then
819    ABI_FREE(bufdispl)
820    ABI_FREE(bufsize)
821  end if
822 !------------------------------------------------------------------
823 !--Check if the convergence is reached for rho
824  drho = maxval(abs(rhor(min_pt:max_pt,1)-rholocal(:)))
825  drhomax = drho
826  call xmpi_max(drho,drhomax,rset%mpi%comm_bandfft,ierr)
827 
828 !write(std_out,*)'drhomax,toldrho',drhomax,toldrho
829  if(drhomax<toldrho)then
830    rset%quitrec = rset%quitrec+1
831  else
832    rset%quitrec = 0
833  end if
834 
835 !-------------------------------------------------------------------
836 !--Density on all procs
837  rhor(min_pt:max_pt,1) = rholocal(:)
838  if(rset%mpi%nproc /= 1)then
839    call xmpi_allgatherv(rholocal,rset%par%ntranche,rhor(:,1),&
840 &   rset%par%vcount,rset%par%displs,&
841 &   rset%mpi%comm_band,ierr)
842  end if
843 
844 !--------------------------------------------------------------------
845 !--2nd EKIN CALCULATION: this method is used
846  noekin2 : if(get_K_S_G==1)then
847    intrhov = (inf_ucvol)*sum(rholocal*vtrial(min_pt:max_pt,1))
848    call xmpi_sum(intrhov,rset%mpi%comm_bandfft ,ierr)
849 
850    ek = e_eigenvalues-intrhov-enlx
851 
852 
853    if(rset%debug) then
854      write (msg,'(2a,3f15.10,2a,3f15.10,2a,f15.10,a)') ch10,&
855 &     ' ek,int(rho*V),ek+int(rho*V) ', ek, intrhov,  ek+ intrhov,ch10, &
856 &     ' kT*S, kT*sum(ln(...)), diff ', tsmear*entropy, tsmear*gran_pot, tsmear*(entropy-gran_pot),ch10, &
857 &     ' kT(S-sum(ln(...)))+mu*nelect', tsmear*(entropy-gran_pot)+rset%efermi*nelect,ch10
858      call wrtout(std_out,msg,'COLL')
859    end if
860 
861 
862  end if noekin2
863 !--------------------------------------------------------------------
864  fermie = rset%efermi
865 
866 !--------------------------------------------------------
867 !!--At the first step to find the max number of recursion
868 !!  needed to convergence, then redefine nrec.
869  if(initialized==0 .and. dtset%ntime>0) then
870    call  Calcnrec(rset,b2local)
871  end if
872 
873 !--------------------------------------------------------
874  call destroy_distribfft(rset%mpi%distribfft)
875  call init_distribfft(rset%mpi%distribfft,'c',rset%mpi%nproc_fft,rset%ngfftrec(2),rset%ngfftrec(3))
876  call init_distribfft(rset%mpi%distribfft,'f',rset%mpi%nproc_fft,dtset%ngfft(2),dtset%ngfft(3))
877 
878 !--Printing results
879  write(msg,'(3a,f15.10)')&
880 & ' -- Results: --------------------------------------',ch10,&
881 & ' mu          =',rset%efermi
882  call wrtout(std_out,msg,'COLL')
883  if(get_K_S_G==1)then
884    write(msg,'(a,f15.10,6(2a,f15.10))')&
885 &   ' potmin      =',potmin,ch10,&
886 &   ' <V_eff>     =',intrhov,ch10,&
887 &   ' entropy     =',entropy,ch10,&
888 &   ' -omega/T    =',gran_pot,ch10,&
889 &   ' eigenvalues =',e_eigenvalues,ch10,&
890 &   ' kinetic     =',ek,ch10,&
891 &   ' non-loc ene =',enlx
892    call wrtout(std_out,msg,'COLL')
893  end if
894  write(msg,'(a,50a)')' ',('-',ii=1,50)
895  call wrtout(std_out,msg,'COLL')
896 !write(std_out,*)'is the pressure ',gran_pot*tsmear/(rset%inf%ucvol*real(nfftrec,dp))
897 
898 !--Structured debugging : if rset%debug=T, stop here.
899  if(.false.)then !(rset%debug)
900    call wrtout(std_out,'  rhor ','PERS')
901    write(std_out,*)rhor(:,1)
902    call wrtout(std_out,' ','COLL')
903    write(msg,'(a,2d10.3)')'  temps recursion    ',tsec
904    call wrtout(std_out,msg,'COLL')
905    write(msg,'(a,l1,a)') ' vtorhorec : rset%debug=-',rset%debug,', debugging mode => stop '
906    ABI_ERROR(msg)
907  end if
908 
909  call timab(600,2,tsec2)
910  call timab(21,2,tsec)
911 
912  call symrhg(1,gprimd,irrzon,rset%mpi,nfftf,&
913 & dtset%ngfft(1)*dtset%ngfft(2)*dtset%ngfft(3),dtset%ngfft,dtset%nspden,&
914 & dtset%nsppol,dtset%nsym,phnons,rhog,rhor,rprimd,dtset%symafm,dtset%symrel,dtset%tnons)
915 
916 end subroutine vtorhorec