TABLE OF CONTENTS


ABINIT/getngrec [ Functions ]

[ Top ] [ Functions ]

NAME

 getngrec

FUNCTION

 This routine computes the fft box for the recursion method, accordingly to the troncation radius.
 It is quite similar to getng, but :
     - there is no xboxcut and ecut consistency
     - ngfft (the initial fft box) is the maximum fft box

INPUTS

  ngfft(18)=non truncated fft box
  mgfft=maximum of ngfft(1:3)
  inf_rmet=define the infinitesimal metric : rprimd*(transpose(rprimd)), divided by the number of discretisation point
  recrcut=truncating
  delta=to obtain radius of truncation

OUTPUT

  ngfftrec=truncated fft box
  nfftrec= truncated nfft
  tronc=True if truncation is made

SIDE EFFECTS

SOURCE

 874 subroutine getngrec(ngfft,rmet,ngfftrec,nfftrec,recrcut,delta,tronc)
 875 
 876 implicit none
 877 
 878 !Arguments -------------------------------
 879 !scalars
 880 real(dp),intent(in) :: recrcut,delta
 881 integer,intent(out) :: nfftrec
 882 logical,intent(out) :: tronc
 883 !arrays
 884 integer,intent(in)  :: ngfft(18)
 885 real(dp),intent(in) :: rmet(3,3)
 886 integer,intent(out) :: ngfftrec(18)
 887 
 888 !Local variables-------------------------------
 889 !scalars
 890 integer :: ii,iimin,index,jj,jjmin,kk,kkmin,largest_ngfftrec,maxpow11,maxpow2
 891 integer :: maxpow3,maxpow5,maxpow7,mmsrch,plane
 892 real(dp) :: dsm,dsp,dsqmin,rtroncat
 893 !arrays
 894 integer :: get_ngfftrec(3),maxsrch(3),minsrch(3)
 895 integer,allocatable :: iperm(:),srch(:)
 896 real(dp) :: tsec(2)
 897 real(dp) :: inf_rmet(3,3)
 898 ! *************************************************************************
 899 
 900  call timab(602,1,tsec)
 901 
 902  ngfftrec(:) = ngfft(:)
 903 
 904  if (recrcut>tol14) then  !default value dtset%recrcut = zero means no troncation
 905    rtroncat = recrcut+delta
 906    get_ngfftrec(:)=1
 907    plane = 1
 908 
 909    do ii=1,3
 910      inf_rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp))
 911    end do
 912 
 913 
 914 !  minimum value of ngfftrec
 915    do ii = 1,3
 916      ngfftrec(ii)=floor(2*rtroncat/sqrt(inf_rmet(ii,ii)))+1  !minimum value
 917      if(ngfftrec(ii)>=ngfft(ii))then
 918        ngfftrec(ii)=ngfft(ii)
 919        get_ngfftrec(ii)=0
 920      end if
 921    end do
 922 
 923 
 924    if(sum(get_ngfftrec)/=0)then
 925      largest_ngfftrec=maxval(ngfft(1:3))
 926      maxpow2=int(log(largest_ngfftrec+0.5d0)/log(two))
 927      maxpow3=int(log(largest_ngfftrec+0.5d0)/log(three))
 928      maxpow5=int(log(largest_ngfftrec+0.5d0)/log(five))
 929      maxpow7=0
 930      maxpow11=0
 931      mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1)
 932      ABI_MALLOC(srch,(mmsrch))
 933      ABI_MALLOC(iperm,(mmsrch))
 934 !    Factors of 2
 935      srch(1)=1
 936      do ii=1,maxpow2
 937        srch(ii+1)=srch(ii)*2
 938      end do
 939 !    Factors of 3
 940      index=maxpow2+1
 941      if(maxpow3>0)then
 942        do ii=1,maxpow3
 943          srch(1+ii*index:(ii+1)*index)=3*srch(1+(ii-1)*index:ii*index)
 944        end do
 945      end if
 946 !    Factors of 5
 947      index=(maxpow3+1)*index
 948      if(maxpow5>0)then
 949        do ii=1,maxpow5
 950          srch(1+ii*index:(ii+1)*index)=5*srch(1+(ii-1)*index:ii*index)
 951        end do
 952      end if
 953 !    Factors of 7
 954      index=(maxpow5+1)*index
 955      if(maxpow7>0)then
 956        do ii=1,maxpow7
 957          srch(1+ii*index:(ii+1)*index)=7*srch(1+(ii-1)*index:ii*index)
 958        end do
 959      end if
 960 !    Factors of 11
 961      index=(maxpow7+1)*index
 962      if(maxpow11>0)then
 963        do ii=1,maxpow11
 964          srch(1+ii*index:(ii+1)*index)=11*srch(1+(ii-1)*index:ii*index)
 965        end do
 966      end if
 967 !    srch is the set of allowed ngfftrec values
 968 
 969      call sort_int(mmsrch,srch,iperm)
 970      ABI_FREE(iperm)
 971 
 972      do ii=1,3
 973        if(get_ngfftrec(ii)==1)then
 974          do jj=1,mmsrch
 975            if(srch(jj)>=ngfftrec(ii))then
 976              minsrch(ii)=jj
 977              ngfftrec(ii)=srch(jj)
 978              exit
 979            end if
 980          end do
 981          do jj=minsrch(ii),mmsrch
 982            if(srch(jj)>ngfft(ii))then
 983 !            since ngfftrec(ii)<ngfft(ii) for get_ngfftrec(ii)==1,
 984 !            and srch(mmsrch)maxval(ngfft(1:3)),
 985 !            that will appens in the range minsrch(ii),mmsrch
 986              maxsrch(ii)=jj-1
 987              exit
 988            end if
 989          end do
 990        end if
 991 !      since ngfft(ii) is in srch, we have here srch(maxsrch(ii))=ngfft(ii)
 992 !      minsrch(ii), maxsrch(ii) is the range of index of srch in which we can
 993 !      search ngfftrec(ii)
 994 
 995        if(ngfftrec(ii)>=ngfft(ii))then
 996          ngfftrec(ii)=ngfft(ii)
 997          get_ngfftrec(ii)=0
 998        end if
 999      end do
1000    end if
1001 
1002 !  verify that the entiere truncation sphere is in the fft box ;
1003 !  but only in the dimension in which we do not consider the entiere fft box
1004    do while(sum(get_ngfftrec)/=0)  !again...
1005 
1006 !    determining the minimum distance between 0 and the boundary
1007 !    of the fft box
1008 !    quite similar to the subroutine "bound", but only over the plane which
1009 !    are not the whole fft box
1010      dsqmin=dsq_rec(ngfftrec(1)/2,-ngfftrec(2)/2,-ngfftrec(3)/2,inf_rmet)+0.01d0
1011 
1012      if(get_ngfftrec(1)/=0)then
1013 !      look at +/- g1 planes:
1014        do jj=-ngfftrec(2)/2,ngfftrec(2)/2
1015          do kk=-ngfftrec(3)/2,ngfftrec(3)/2
1016            dsp = dsq_rec(ngfftrec(1)/2, jj, kk,inf_rmet)
1017            dsm = dsq_rec( - ngfftrec(1)/2, jj, kk,inf_rmet)
1018            if (dsp<dsqmin) then
1019              dsqmin = dsp
1020              iimin = ngfftrec(1)/2
1021              jjmin = jj
1022              kkmin = kk
1023              plane=1
1024            end if
1025            if (dsm<dsqmin) then
1026              dsqmin = dsm
1027              iimin =  - ngfftrec(1)/2
1028              jjmin = jj
1029              kkmin = kk
1030              plane=1
1031            end if
1032          end do
1033        end do
1034      end if
1035 
1036      if(get_ngfftrec(2)/=0)then
1037 !      +/- g2 planes:
1038        do ii=-ngfftrec(1)/2,ngfftrec(1)/2
1039          do kk=-ngfftrec(3)/2,ngfftrec(3)/2
1040            dsp = dsq_rec(ii,ngfftrec(2)/2,kk,inf_rmet)
1041            dsm = dsq_rec(ii,-ngfftrec(2)/2,kk,inf_rmet)
1042            if (dsp<dsqmin) then
1043              dsqmin = dsp
1044              iimin = ii
1045              jjmin = ngfftrec(2)/2
1046              kkmin = kk
1047              plane=2
1048            end if
1049            if (dsm<dsqmin) then
1050              dsqmin = dsm
1051              iimin = ii
1052              jjmin =  - ngfftrec(2)/2
1053              kkmin = kk
1054              plane=2
1055            end if
1056          end do
1057        end do
1058      end if
1059 
1060      if(get_ngfftrec(3)/=0)then
1061 !      +/- g3 planes:
1062        do ii=-ngfftrec(1)/2,ngfftrec(1)/2
1063          do jj=-ngfftrec(2)/2,ngfftrec(2)/2
1064            dsp = dsq_rec(ii,jj,ngfftrec(3)/2,inf_rmet)
1065            dsm = dsq_rec(ii,jj,-ngfftrec(3)/2,inf_rmet)
1066            if (dsp<dsqmin) then
1067              dsqmin = dsp
1068              iimin = ii
1069              jjmin = jj
1070              kkmin = ngfftrec(3)/2
1071              plane=3
1072            end if
1073            if (dsm<dsqmin) then
1074              dsqmin = dsm
1075              iimin = ii
1076              jjmin = jj
1077              kkmin =  - ngfftrec(3)/2
1078              plane=3
1079            end if
1080          end do
1081        end do
1082      end if
1083 
1084      if(dsqmin>=rtroncat)then
1085        get_ngfftrec=0
1086        exit
1087      end if
1088 
1089 !    Fix nearest boundary
1090      do ii=minsrch(plane),maxsrch(plane)
1091        if (srch(ii)>=ngfftrec(plane)) then
1092 !        redefine ngfft(plane) to next higher choice
1093          ngfftrec(plane)=srch(ii+1)
1094 !        verify if we cover the whole box
1095          if(ngfftrec(plane)>=ngfft(plane))then
1096            ngfftrec(plane)=ngfft(plane)
1097            get_ngfftrec(plane)=0
1098          end if
1099 !        Exit the loop over ii
1100          exit
1101        end if
1102      end do
1103 
1104    end do
1105 
1106    if (allocated(srch)) then
1107      ABI_FREE(srch)
1108    end if
1109 
1110 !  if(mod(ngfftrec(1),16)/=0) then
1111 !  ngfftrec(1) = ngfftrec(1)+(16-mod(ngfftrec(1),16))
1112 !  ngfftrec(2:3) = ngfftrec(1)
1113 !  endif
1114 
1115    ngfftrec(4)=2*(ngfftrec(1)/2)+1
1116    ngfftrec(5)=2*(ngfftrec(2)/2)+1
1117    ngfftrec(6)=ngfftrec(3)
1118 
1119 !  --algorithm
1120    ngfftrec(7)=ngfft(7)   ! to be improved for a better non-parallel algorithm - here it is automatically 401
1121    ngfftrec(8)=ngfft(8)
1122 
1123  end if
1124 
1125 !--For now, recursion method doesn't use paralelism on FFT - which would require a great number of processors
1126  nfftrec = product(ngfftrec(1:3))
1127  ngfftrec(9:11) = (/0,1,0/)   !--(/ paral, nproc, %me \)
1128  ngfftrec(12:13) = ngfftrec(2:3)   ! n2proc ! n3proc
1129 
1130  tronc  = all(ngfftrec(:3)/=ngfft(:3))
1131  call timab(602,2,tsec)
1132 
1133  contains
1134 
1135    function dsq_rec(ii,jj,kk,inf_rmet)
1136 
1137    real(dp) :: dsq_rec
1138    integer,intent(in) :: ii,jj,kk
1139    real(dp),intent(in) :: inf_rmet(3,3)
1140    dsq_rec=sqrt(&
1141 &   inf_rmet(1,1)*dble(ii**2)&
1142 &   +inf_rmet(2,2)*dble(jj**2)&
1143 &   +inf_rmet(3,3)*dble(kk**2)&
1144 &   +two*(inf_rmet(1,2)*dble(ii*jj)&
1145 &   +inf_rmet(2,3)*dble(jj*kk)&
1146 &   +inf_rmet(3,1)*dble(kk*ii)))
1147  end function dsq_rec
1148 
1149 
1150 end subroutine getngrec

ABINIT/m_rec [ Modules ]

[ Top ] [ Modules ]

NAME

  m_rec

FUNCTION

  This module  provides some functions applied to the
  recursion structured datatype recursion_type.
  It includes also some function used to change some variables
  of recursion_type

COPYRIGHT

 Copyright (C) 2002-2024 ABINIT group (MMancini)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .
 For the initials of contributors, see ~abinit/doc/developers/contributors.txt.

NOTES

 * Routines tagged with "@type_name" are strongly connected to the definition of the data type.
   Strongly connected means that the proper functioning of the implementation relies on the
   assumption that the tagged procedure is consistent with the type declaration.
   Every time a developer changes the structure "type_name" adding new entries, he/she has to make sure
   that all the strongly connected routines are changed accordingly to accomodate the modification of the data type.
   Typical examples of strongly connected routines are creation, destruction or reset methods.

SOURCE

30 #if defined HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include "abi_common.h"
35 
36 module m_rec
37 
38  use defs_basis
39  use defs_rectypes
40  use m_abicore
41  use m_errors
42  use m_xmpi
43  use m_sort
44  use m_dtset
45 
46  use defs_datatypes,    only : pseudopotential_type
47  use defs_abitypes,     only : mpi_type
48  use m_exp_mat,         only : exp_mat
49  use m_numeric_tools,   only : set2unit
50  use m_special_funcs,   only : gamma_function
51  use m_pawfgr,          only : pawfgr_nullify, indgrid, pawfgr_destroy
52  use m_paw_sphharm,     only : initylmr
53  use m_time,            only : timab
54  use m_rec_tools,       only : get_pt0_pt1
55  use m_per_cond,        only : per_cond
56 #ifdef HAVE_GPU_CUDA
57  use m_hidecudarec,     only : InitRecGPU, CleanRecGPU
58 #endif
59 
60 
61  implicit none
62 
63  private ::           &
64    find_maxmin_proc,  &  !--To calculate max and min pt for any cpu
65    H_D_distrib
66 
67  public ::            &
68    InitRec,           &  !--Main creation method.
69    Init_MetricRec,    &  !--To Initalize the inf. metric in recursion
70    Init_nlpspRec,     &  !--Main creation method for non-local part.
71    CleanRec,          &  !--deallocate all pointers.
72    Calcnrec,          &  !--calculates the new min_nrec
73    cpu_distribution      !--Regulates the work load on cpu-gpu
74 CONTAINS  !===========================================================

ABINIT/pspnl_hgh_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 pspnl_hgh_rec

FUNCTION

 This routine computes the matrices S_kk'^{l,A} of the projectors
 (it is the exponential of the overlap matrix). It coorresponds to the matrix:
   $$\left[(U_l)^{-1}*Exp(-temperature D_l )*U_l* (g_l)^{-1} -Identity\right]_kk'
   where (U_l)^-1* D_l* U_l = h^lg_l.
 $g_l = <f^l_k|f^l_{k'}>$ is the overlap matrix between projectors
 and $h^l_{kk'}$ is the strength matrix of the projectors.
 It calulates also the strength eigenvalues and eigenvectors of $h$,
 used in the calculus of non-local energy

INPUTS

  temperature=4*rtrotter/beta=4*rtrotter*tsmear: the effective temp. in  recursion
  psps <type(pseudopotential_type)>=variables related to pseudo-potentials
  debug=debug variable

OUTPUT

  nlrec%mat_exp_psp_nl=the matrix of the exponential of the projectors:
   for any psp, for any angular moment:
   h_ij=strength; g_ij=ovelap => exp(-h.g/temp/4p).g^(-1)
  nlrec%radii=Local radii of nl psp
  nlrec%pspinfo(:,:) for any typat:  (momang,typat)=number of projectors
  nlrec%eival(:,:,:) for any psp, any momang, the eigenvalues of the
    strength matrix H: eival(:,mang,psp)
  nlrec%eivec(:,:,:,:)for any psp, any momang, the eigenvectors of the
    strength matrix H: eivec(:,:,mang,psp)

SIDE EFFECTS

SOURCE

1405 subroutine pspnl_hgh_rec(psps,temperature,nlrec,debug)
1406 
1407  use m_linalg_interfaces
1408  implicit none
1409 
1410 !Arguments -----------------------------------
1411 !scalars
1412  real(dp),intent(in) :: temperature
1413  logical,intent(in) :: debug
1414  type(pseudopotential_type),intent(in) :: psps
1415  type(nlpsprec_type),intent(inout) :: nlrec
1416 !arrays
1417 !Local variables-------------------------------
1418 !scalars
1419  integer,parameter :: maxsize=3
1420  integer,parameter :: lwork=(1+32)*maxsize
1421  integer :: iangol,ipseudo,info,nproj
1422  integer :: g_mat_size,ii,nproj2
1423  real(dp) :: denom_1,denom_2,tot_proj
1424  character(len=500) :: msg
1425 !arrays
1426  integer :: ipvt(1)
1427  !real(dp) :: rwork(2*maxsize)
1428  real(dp) :: h_mat_init(3,3), rework(lwork)
1429  real(dp), allocatable :: g_mat(:,:),h_mat(:,:),eig_val_h(:)
1430  real(dp), allocatable :: identity(:,:),inv_g_mat(:,:),u_mat(:,:)
1431 
1432  complex(dpc),allocatable :: hg_mat(:,:)
1433 ! *************************************************************************
1434 
1435  if(debug)then
1436    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : enter '
1437    call wrtout(std_out,msg,'COLL')
1438  end if
1439 
1440 !--For any pseudo potential:
1441  do ipseudo = 1, psps%npsp !--Loop on the pseudos
1442    write(msg,'(a,80a)')' pseudo file',('-',ii=1,10)
1443    call wrtout(std_out,msg,'COLL')
1444    write(msg,'(a)') psps%filpsp(ipseudo)
1445    call wrtout(std_out,msg,'COLL')
1446 
1447 !  --For any angular moment:
1448    do iangol = 0,psps%mpsang-1 !--Loop on the angular moment
1449 
1450 !    --Local radius
1451      nlrec%radii(iangol+1,ipseudo) = psps%gth_params%psppar(iangol+1,0,ipseudo)
1452 
1453 !    --Strenghts of non-local projectors  (matrix h)
1454 !    --Diagonal part:
1455      h_mat_init = zero
1456      h_mat_init(1,1) = psps%gth_params%psppar(iangol+1,1,ipseudo)
1457      h_mat_init(2,2) = psps%gth_params%psppar(iangol+1,2,ipseudo)
1458      h_mat_init(3,3) = psps%gth_params%psppar(iangol+1,3,ipseudo)
1459 !    --Out-diagonal part
1460 !    --Depending on angular moment the projectors
1461 !    strength is calculated differently
1462      select case(iangol)
1463      case(0)
1464        h_mat_init(1,2) = -half*sqrt(3.d0/5.d0)*h_mat_init(2,2)
1465        h_mat_init(1,3) =  half*sqrt(5.d0/21.d0)*h_mat_init(3,3)
1466        h_mat_init(2,3) = -half*sqrt(100.d0/63.d0)*h_mat_init(3,3)
1467      case(1)
1468        h_mat_init(1,2) = -half*sqrt(5.d0/7.d0)*h_mat_init(2,2)
1469        h_mat_init(1,3) =  sixth*sqrt(35.d0/11.d0)*h_mat_init(3,3)
1470        h_mat_init(2,3) = -14.d0/six/sqrt(11.d0) *h_mat_init(3,3)
1471      case(2)
1472        h_mat_init(1,2) = -half*sqrt(7.d0/9.d0)*h_mat_init(2,2)
1473        h_mat_init(1,3) =  half*sqrt(63.d0/143.d0)*h_mat_init(3,3)
1474        h_mat_init(2,3) = -nine/sqrt(143.d0)*h_mat_init(3,3)
1475      case(3)
1476        h_mat_init(1,2) = zero;  h_mat_init(1,3) = zero;  h_mat_init(2,3) = zero;
1477      case default
1478        write(msg,'(a)')' error angular: moment component'
1479        call wrtout(std_out,msg,'COLL')
1480      end select
1481 
1482 
1483 
1484 !    --Real dimensions of projectors.
1485      g_mat_size = count(abs((/ (h_mat_init(ii,ii),ii=1,3) /))>1.d-8)
1486      nlrec%pspinfo(iangol+1,ipseudo) = g_mat_size
1487      write(msg,'(a,i2,a,i2)')' ang. moment=',iangol,', N projectors=',g_mat_size
1488      call wrtout(std_out,msg,'COLL')
1489      if (g_mat_size>0) then
1490 !      --Identity matrix
1491        ABI_MALLOC(identity,(g_mat_size,g_mat_size))
1492        call set2unit(identity)
1493 !      identity = zero
1494 !      identity(:,1) = one
1495 !      identity(:,:) = cshift(array=identity,shift=(/ (-ii,ii=0,g_mat_size) /), dim=2 )
1496 
1497 
1498 !      ############## CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ##################
1499 !      --Inverse of the matrix h
1500        ABI_MALLOC(eig_val_h,(g_mat_size))
1501        ABI_MALLOC(u_mat,(g_mat_size,g_mat_size))
1502 !      --u-mat will contain the eigenvectors of h_mat_init
1503        u_mat = h_mat_init(:g_mat_size,:g_mat_size)
1504 
1505 !      write(std_out,*)'hmat_init'
1506 !      do ii=1,g_mat_size
1507 !      write(std_out,*)h_mat_init(ii,:)
1508 !      end do
1509        call DSYEV('v','u',g_mat_size,u_mat,g_mat_size,eig_val_h,rework,lwork,info)
1510 
1511 !      --THE DIAGONAL MATRIX IS GIVEN BY D=U^t.H.U
1512 !      (eival=transpose(eivec).h_mat_init.eivec)
1513        write(msg,'(a,3d10.3)')'  eigenvalues=',eig_val_h
1514        call wrtout(std_out,msg,'COLL')
1515 !      write(std_out,*)'autovec';write(std_out,*)u_mat
1516 
1517        nlrec%eival(:g_mat_size,1+iangol,ipseudo) = eig_val_h
1518        nlrec%eivec(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = u_mat
1519        ABI_FREE(eig_val_h)
1520        ABI_FREE(u_mat)
1521 
1522 !      ##########END  CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ##################
1523 
1524        ABI_MALLOC(g_mat,(g_mat_size,g_mat_size))
1525        ABI_MALLOC(inv_g_mat,(g_mat_size,g_mat_size))
1526        ABI_MALLOC(h_mat,(g_mat_size,g_mat_size))
1527        ABI_MALLOC(hg_mat,(g_mat_size,g_mat_size))
1528 
1529        g_mat(:,:) = one
1530        h_mat(:,:) = zero
1531        h_mat(:,:) = h_mat_init(:g_mat_size,:g_mat_size)
1532 
1533 !      -------------------------------------------------------
1534 !      --Matrix  of the overlap between projetors (matrix g)
1535 !      and the h matrix of strength
1536        do nproj = 1,g_mat_size-1
1537          do nproj2 = 1+nproj,g_mat_size
1538            tot_proj = zero
1539 !          --Analytic value of overlap
1540 !          g_ij=Gamma[-1/2+i+j+l]/Sqrt(Gamma[-1/2+i+iangol]*Gamma[-1/2+j+iangol])
1541            call gamma_function(-half+real(nproj+nproj2+iangol,dp),tot_proj)
1542            call gamma_function(-half+real(iangol+2*nproj,dp),denom_1)
1543            call gamma_function(-half+real(iangol+2*nproj2,dp),denom_2)
1544 
1545            g_mat(nproj,nproj2) = tot_proj/sqrt(denom_1*denom_2)
1546            g_mat(nproj2,nproj) = g_mat(nproj,nproj2)
1547 
1548            h_mat(nproj,nproj2) = h_mat_init(nproj,nproj2)
1549            h_mat(nproj2,nproj) = h_mat_init(nproj,nproj2)
1550          end do
1551        end do
1552 
1553 !      --Inverse of the overlap matrix g
1554        inv_g_mat = g_mat
1555        call DGETRF(g_mat_size,g_mat_size,inv_g_mat,g_mat_size,ipvt,info)
1556        call DGETRI(g_mat_size,inv_g_mat,g_mat_size,ipvt,rework,lwork,info)
1557 
1558 
1559 !      -----------------------------------------------------------
1560 !      --Now it calculates the exponential of the matrix h.g
1561        hg_mat = matmul(h_mat,g_mat)
1562 
1563        call exp_mat(hg_mat,g_mat_size,-one/temperature)
1564 
1565 !      --(exp(h.g)-Identity).(g^-1)
1566        hg_mat = hg_mat-identity(:,:)
1567 
1568 
1569 !      --results on output
1570        nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = matmul(real(hg_mat,dp),inv_g_mat)
1571 
1572 !      write(std_out,*) nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo)
1573 
1574        ABI_FREE(g_mat)
1575        ABI_FREE(hg_mat)
1576        ABI_FREE(h_mat)
1577        ABI_FREE(inv_g_mat)
1578        ABI_FREE(identity)
1579      end if
1580 
1581    end do !enddo on angular moment
1582  end do !enddo on pseudos
1583 
1584 
1585  if(debug)then
1586    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : exit '
1587    call wrtout(std_out,msg,'COLL')
1588  end if
1589 
1590 end subroutine pspnl_hgh_rec

ABINIT/pspnl_operat_rec [ Functions ]

[ Top ] [ Functions ]

NAME

 pspnl_operat_rec

FUNCTION

 It calculates the non-local projectors used in recursion for any psp non-local:
 The nl interaction in recursion is $$exp{-V_{NL}/beta}=\sum_A\sum_{lm}
 \sum{ij}Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)D^l_{i,j}Y_{lm}(\hat{r-R_A})f^l_j{r-R_A}$$
 where $D^_{i,j}$ is a matrix  previously (see pspnl_operat_rec).
 In this routine  the projectors $Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)$
 are calculated. So an array of dimensions
 rset%nl%projec(nfftrec,lmnmax,nlrec%npsp)

INPUTS

 metrec<metricrec_type>=contains information concerning metric in
         recursion: grid_step, metric, infinitesimal volume
 ngfftrec(18)=is the ngfft grid (truncated if different from ngfft) of recursion
 debug=debug variable

OUTPUT

 nlrec<nlpsprec_type>%projec= array containig the projectors on the real grid
 nlrec<nlpsprec_type>%intlen= integer linear size of the non-local grid

SIDE EFFECTS

 nlrec<nlpsprec_type> data set of non-local pseudo for recursion
 The better Interaction length (Intlen) is also calculated and printed but
 recursion use intlen=ngfftrec/2

NOTES

SOURCE

1187 subroutine pspnl_operat_rec(nlrec,metrec,ngfftrec,debug)
1188 
1189  implicit none
1190 
1191 !Arguments ------------------------------------
1192 !scalars
1193  logical,intent(in) :: debug
1194  type(metricrec_type),intent(in) ::metrec
1195  type(nlpsprec_type),intent(inout) :: nlrec
1196 !arrays
1197  integer,intent(in) :: ngfftrec(18)
1198 !Local variables-------------------------------
1199  integer :: ii,intlen
1200  integer :: iangol,ipsp,iproj
1201  integer :: mpsang,jj,kk,rr
1202  integer :: nfftrec
1203  integer :: ilmn,il,ilm,in,lmnmax
1204  real(dp) :: raggio,rloc,denom,step
1205  real(dp) :: delta_out,partial,err
1206  character(len=500) :: msg
1207  real(dp) :: part_sum(3)
1208  real(dp) :: ylmr_gr_dm(0,0,0)
1209  real(dp),allocatable :: ylmr(:,:),proj_arr(:,:,:)
1210  real(dp),allocatable :: radloc(:,:),nrm(:)
1211 
1212 ! *************************************************************************
1213 
1214  if(debug)then
1215    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : enter '
1216    call wrtout(std_out,msg,'PERS')
1217  end if
1218 
1219 !#####################################################################
1220 !--CALCULATE THE (SEMI-)MAXIMUM INTERVAL WHERE ALL THE PROJECTORS ARE
1221 !DIFFERENT TO ZERO.
1222 !--For any pseudo potential:
1223  delta_out = zero
1224  step = metrec%tr(1)*half !--Scanning step= grid step/2
1225 
1226 
1227  do ipsp = 1, nlrec%npsp !--Loop on the pseudos
1228 
1229 !  --For any angular moment:
1230    do iangol = 0,maxval(nlrec%indlmn(1,:,ipsp)) !--Loop on the angular moment
1231      rloc = nlrec%radii(iangol+1,ipsp) !--Local radius
1232 
1233 !    --For any projector
1234      do iproj = 1,nlrec%pspinfo(iangol+1,ipsp)
1235 !      --Starting point to searching when the projector goes to zero.
1236 !      this correspond to twice the radius wher the projector has its maximum
1237        raggio = two*sqrt(real(-2+2*iproj+iangol,dp))*rloc
1238 !      --Caculate the gamma func at the denominator
1239        call gamma_function(real(iangol+2*iproj,dp)-half,denom)
1240 !      --Find the zero
1241 !      --The following while cycle should be replaced by a bisection
1242 !      --method. Bucause this is calculated only 1 time it is not very
1243 !      important.
1244        err = one
1245        ii=0
1246 !      tolloc = 1.d0*abs(minval(nlrec%mat_exp_psp_nl(:nlrec%pspinfo(iangol+1,ipsp),:nlrec%pspinfo(iangol+1,ipsp),iangol+1,ipsp)))
1247        do while(abs(err)>1.d-2)
1248          raggio = raggio + step
1249          err = project_prec(raggio,iproj,iangol,rloc)/sqrt(denom)
1250          ii = ii+1
1251        end do
1252        write(std_out,*)'local delta',raggio,ii
1253        delta_out=maxval((/ delta_out,raggio /))
1254      end do !end loop on projectors
1255 
1256    end do !enddo on angular moment
1257  end do !enddo on pseudos
1258 
1259 !--CALCULATE how many grid steps correspond to delta_out
1260  intlen = int(delta_out/metrec%tr(1))
1261 !--I want that intlen is odd
1262  if(mod(intlen,2)==0) intlen = intlen+1
1263 
1264  write(msg,'(2a,i3,a)') ch10,' Interac. length of non-local psp(grid steps)=',intlen,ch10
1265  call wrtout(std_out,msg,'COLL')
1266 !#####################################################################
1267 
1268 !--Initialisation
1269  nfftrec = product(ngfftrec(1:3))
1270  lmnmax = nlrec%lmnmax
1271  intlen = ngfftrec(1)/2
1272  nlrec%intlen = intlen !--Setted in recursion variables
1273 
1274 !#####################################################################
1275 !--CALCULATE E(q,q')
1276 !--Cration of the exponential*projectors*ylm matrix
1277 
1278 !--Initialisation
1279  ABI_MALLOC(nlrec%projec,(nfftrec,lmnmax,nlrec%npsp))
1280  nlrec%projec = zero
1281  ABI_MALLOC(radloc,(3,nfftrec))
1282  radloc = zero
1283  ABI_MALLOC(nrm,(nfftrec))
1284  nrm = zero
1285 
1286 !--Loop on pseudo types
1287  pseudodo: do ipsp = 1, nlrec%npsp
1288 !  --Control if the psp is non-local, else continue
1289    if(all(nlrec%pspinfo(:,ipsp)==0)) cycle
1290 !  --Vector which stores localy the upper part of symmetrical
1291 !  matrix of the exponential of the non-local operator
1292    mpsang = maxval(nlrec%indlmn(1,:,ipsp))+1
1293    ABI_MALLOC(proj_arr,(nfftrec,maxval(nlrec%pspinfo(:,ipsp)),mpsang))
1294    ABI_MALLOC(ylmr,(mpsang*mpsang,nfftrec))
1295    proj_arr = zero
1296    ylmr = zero
1297 
1298 !  !debug
1299 !  write(std_out,*)'mpsang,proj num',mpsang,maxval(nlrec%pspinfo(:,ipsp))
1300 !  !enddebug
1301 
1302 !  --Calculate the projctors
1303    do iangol = 0,mpsang-1
1304      rloc = nlrec%radii(iangol+1,ipsp)
1305      do iproj = 1,nlrec%pspinfo(iangol+1,ipsp)
1306        call gamma_function(real(iangol+2*iproj,dp)-half,denom)
1307        denom = one/sqrt(denom)
1308        do ii = 0,ngfftrec(1)-1 !--3-loop on coordinates
1309          do jj = 0,ngfftrec(2)-1
1310            do kk = 0,ngfftrec(3)-1
1311 !            --Calculate the radii
1312              part_sum(:) = real((/ ii,jj,kk /)-intlen,dp)*(metrec%tr)
1313              rr = 1+ii+(jj+kk*ngfftrec(2))*ngfftrec(3)
1314              radloc(:,rr) = part_sum
1315              nrm(rr) = sqrt(sum(part_sum(:)**two))
1316              partial = project_prec(nrm(rr),iproj,iangol,rloc)*denom
1317              if(abs(partial)>tol12 ) proj_arr(rr,iproj,iangol+1) = partial
1318            end do
1319          end do
1320        end do  !--End 3-loop on coordinates
1321      end do
1322    end do
1323 
1324 
1325 !  -------------------------------------------------------------
1326 !  --Calculate the spherical harmonics (Verified: it works well)
1327    call initylmr(mpsang,1,nfftrec,nrm(:),1,radloc(:,:),ylmr(:,:),ylmr_gr_dm)
1328 !  -------------------------------------------------------------
1329 
1330 
1331    do ilmn = 1,lmnmax
1332      ilm = nlrec%indlmn(4,ilmn,ipsp)
1333      il = nlrec%indlmn(1,ilmn,ipsp)+1
1334      in = nlrec%indlmn(3,ilmn,ipsp)
1335      write(msg,'(2a,i3,2i2)')ch10,'lm,l,n',ilm,il,in
1336      call wrtout(std_out,msg,'COLL')
1337 
1338      nlrec%projec(:,ilmn,ipsp) = ylmr(ilm,:)*proj_arr(:,in,il)
1339    end do
1340 
1341    ABI_FREE(ylmr)
1342    ABI_FREE(proj_arr)
1343  end do pseudodo !--end loop on pseudo types
1344 
1345 
1346  ABI_FREE(radloc)
1347  ABI_FREE(nrm)
1348 
1349  if(debug)then
1350    write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : exit '
1351    call wrtout(std_out,msg,'PERS')
1352  end if
1353 
1354  contains
1355 
1356    function project_prec(raggio,iproj,iangol,rloc)
1357 !--Analytical expression of the projectors in hgh-pspeudopotential
1358 !--The gamma function at denominator is missing
1359    real(dp) :: project_prec
1360    integer,intent(in) :: iproj,iangol
1361    real(dp),intent(in) :: raggio,rloc
1362 
1363    project_prec=sqrt2*(raggio/rloc)**real((iangol+2*(iproj-1)),dp)*&
1364 &   exp(-((raggio/rloc)**two)*half)/rloc**onehalf
1365  end function project_prec
1366 
1367 end subroutine pspnl_operat_rec

m_rec/Calcnrec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Calcnrec

FUNCTION

 Calculate the new min_nrec.

INPUTS

 rset<recursion_type>=Data type concerning recursion
 b2(:,:) recursion coefficients

OUTPUT

 rset%min_nrec is changed

SOURCE

809 subroutine Calcnrec(rset,b2)
810 
811  implicit none
812 
813  !Arguments ------------------------------------
814  ! scalars
815  type(recursion_type),intent(inout) :: rset
816  ! arrays
817  real(dp), intent(in):: b2(0:rset%min_nrec,1:rset%par%ntranche)
818  !Local ----------------------------------------
819  ! scalars
820  integer :: kk,ii,jj,ierr,loc_nrec
821  character(len=500) :: msg
822  ! *********************************************************************
823  ! @recursion_type
824  ! @pawfgr_type
825  kk = 1
826  loc_nrec = rset%min_nrec
827  do ii=1,rset%par%ntranche
828   !--Use to lbound because b2 passed as argument
829   !  doesn't have the same bounds as in the calling
830   !  subroutine, the +1 because b2(lbound,ii)=1.
831   jj = lbound(b2,dim=1)+1
832   do while (b2(jj,ii)>tol10 .and.  jj<=rset%min_nrec-1)
833    jj = jj+1
834    kk = max(jj,kk)
835   end do
836  enddo
837  call xmpi_max(kk,rset%min_nrec,rset%mpi%comm_bandfft,ierr)
838  rset%min_nrec = rset%min_nrec+1-lbound(b2,dim=1)
839 
840  write(msg,'(a,i2.2,a9,i2.2)') ' -- nrec adjustement   nrec=',loc_nrec,' => nrec=',rset%min_nrec
841  call wrtout(std_out,msg,'COLL')
842  write(msg,'(51a)')' ',('-',ii=1,50)
843  call wrtout(std_out,msg,'COLL')
844 
845 end subroutine Calcnrec

m_rec/CleanRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 CleanRec

FUNCTION

 Deallocate the pointers of rset<recursion_type>=Data type concerning recursion.

INPUTS

 rset<recursion_type>=Data type concerning recursion

SIDE EFFECTS

 All pointers are deallocated.

SOURCE

735 subroutine CleanRec(rset)
736 
737  implicit none
738 
739 !Arguments ------------------------------------
740 ! scalars
741  type(recursion_type),intent(inout) :: rset
742 ! arrays
743 ! *********************************************************************
744 
745  ! @recursion_type
746 
747  if (allocated(rset%ZT_p))  then
748    ABI_FREE(rset%ZT_p)
749  end if
750  if (allocated(rset%par%displs))  then
751    ABI_FREE(rset%par%displs)
752  end if
753  if (allocated(rset%par%vcount))  then
754    ABI_FREE(rset%par%vcount)
755  end if
756  if (allocated(rset%nl%mat_exp_psp_nl))  then
757    ABI_FREE(rset%nl%mat_exp_psp_nl)
758  end if
759  if (allocated(rset%nl%eival))  then
760    ABI_FREE(rset%nl%eival)
761  end if
762  if (allocated(rset%nl%eivec))  then
763    ABI_FREE(rset%nl%eivec)
764  end if
765  if (allocated(rset%nl%pspinfo))  then
766    ABI_FREE(rset%nl%pspinfo)
767  end if
768  if (allocated(rset%nl%radii))  then
769    ABI_FREE(rset%nl%radii)
770  end if
771  if (allocated(rset%nl%indlmn))  then
772    ABI_FREE(rset%nl%indlmn)
773  end if
774  if (allocated(rset%nl%projec))  then
775    ABI_FREE(rset%nl%projec)
776  end if
777  if (allocated(rset%inf%gcart))  then
778    ABI_FREE(rset%inf%gcart)
779  end if
780 
781  call pawfgr_destroy(rset%pawfgr)
782 
783  ! No is needed deallocate rset%mpi: it is a copy of mpi_enreg which
784  ! pointers are deallocated in gstate
785 
786 #ifdef HAVE_GPU_CUDA
787   call CleanRecGPU(rset%GPU,rset%load)
788 #endif
789 
790 end subroutine CleanRec

m_rec/cpu_distribution [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 cpu_distribution

FUNCTION

 Calculate the number of point,GPU,for any proc

INPUTS

  ngfft(3)=nuber of point of the grid
  gratio=recgratio ratio between the fine and coarse grid
  beta_coeff=estimated time ratio between CPU_time and GPU_time
  calc_type=if 0 takes the possible max for nptrec (to test the
  completly full graphic card). 1 after test to calculate the min
  possible value for nptrec

OUTPUT

SOURCE

278  subroutine cpu_distribution(gratio,rset,ngfft,beta_coeff,calc_type)
279 
280  implicit none
281 
282 !Arguments ------------------------------------
283  integer,intent(in)  :: gratio,calc_type
284  real(dp),intent(in) :: beta_coeff
285  integer,intent(in)  :: ngfft(3)
286  type(recursion_type),intent(inout),target :: rset
287 !Local ---------------------------
288  integer :: ii,nfft,ierr
289  integer,pointer :: proc_pt_dev(:,:)
290  type(recparall_type),pointer :: recpar
291  character(500) :: msg
292 ! *********************************************************************
293 
294  ! write(std_out,*)'start cpu_distribution'
295 
296  nullify(proc_pt_dev)
297  ABI_MALLOC(proc_pt_dev,(2,0:rset%mpi%nproc-1))
298 
299  nfft = product(ngfft)
300  call H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff)
301 
302  nullify(recpar)
303  if(rset%load == 0)then
304    ABI_MALLOC(rset%par%displs,(0:rset%mpi%nproc-1))
305    ABI_MALLOC(rset%par%vcount,(0:rset%mpi%nproc-1))
306    recpar => rset%par
307 #if defined HAVE_GPU_CUDA
308  else
309    if(rset%tp==4)then
310      if(.not. allocated(rset%GPU%par%displs)) then
311        ABI_MALLOC(rset%GPU%par%displs,(0:rset%mpi%nproc-1))
312        ABI_MALLOC(rset%GPU%par%vcount,(0:rset%mpi%nproc-1))
313      end if
314    endif
315    recpar => rset%GPU%par
316 #endif
317  endif
318 
319  recpar%ntranche = nfft/(rset%mpi%nproc)!equipartitioned point
320 
321  call find_maxmin_proc(recpar,rset%mpi%nproc,&
322 &                      rset%mpi%me,gratio,ngfft,proc_pt_dev)
323 
324  recpar%vcount = 0
325  if(rset%load==0)then
326    recpar%vcount(rset%mpi%me) = recpar%ntranche
327  else
328    recpar%vcount(rset%mpi%me) = recpar%npt
329  endif
330 
331  call xmpi_sum(recpar%vcount,rset%mpi%comm_bandfft,ierr)
332 
333  recpar%displs = 0
334  if(rset%mpi%nproc>1) recpar%displs(1:) = (/(sum(recpar%vcount(:ii)),ii=0,rset%mpi%nproc-2)/)
335 
336  !--INITALIZATION OF CUDA FOR RECURSION
337 #if defined HAVE_GPU_CUDA
338  if(rset%load == 0)   rset%GPU%par = rset%par
339  call InitRecGPU(rset,nfft,gratio,rset%GPU%map(rset%mpi%me),calc_type)
340 #else
341   ierr = calc_type !only of abirule when there is not HAVE_GPU_CUDA
342 #endif
343 
344 
345 ! if(rset%debug ) then
346  write(msg,'(a,i7,2(2a,3i7),8(2a,i7),2(2a,3i7),(2a,e14.6))')&
347    & ' me                 ',  rset%mpi%me,ch10,&
348    & ' ngfft              ',  ngfft(1:3),ch10,&
349    & ' ngfftrec           ',  rset%ngfftrec(1:3),ch10,&
350    & ' load               ',  rset%load,ch10,&
351    & ' ntranche           ',  recpar%ntranche,ch10,&
352    & ' min_pt             ',  recpar%min_pt,ch10,&
353    & ' max_pt             ',  recpar%max_pt,ch10,&
354    & ' rset%mpi%nproc     ',  rset%mpi%nproc,ch10,&
355    & ' rset%mpi%nproc_fft ',  rset%mpi%nproc_fft,ch10,&
356    & ' dtset%ngfft(10)    ',  rset%ngfftrec(10),ch10,&
357    & ' recpar%npt         ',  recpar%npt,ch10,&
358    & ' recpar%pt0         ',  recpar%pt0%x,recpar%pt0%y,recpar%pt0%z,ch10,&
359    & ' recpar%pt1         ',  recpar%pt1%x,recpar%pt1%y,recpar%pt1%z,ch10,&
360    & ' grid step          ',  rset%inf%tr(1)
361  call wrtout(std_out,msg,'PERS')
362 #if defined HAVE_GPU_CUDA
363  write(msg,'(a,i7,2(2a,i7),a)')&
364    & ' rset%ngp           ',  rset%ngpu,ch10,&
365    & ' gpudevice          ',  rset%gpudevice,ch10,&
366    & ' nptrec             ',  rset%GPU%nptrec,ch10
367  call wrtout(std_out,msg,'PERS')
368 #endif
369 !  write(std_out,*)'display',recpar%displs
370 !  write(std_out,*)'vcount',recpar%vcount
371 ! end if
372 
373 
374  nullify(recpar)
375  if(associated(proc_pt_dev))  then
376    ABI_FREE(proc_pt_dev)
377  end if
378 
379 ! write(std_out,*)'exit from cpu_distribution'
380 end subroutine cpu_distribution

m_rec/find_maxmin_proc [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 find_maxmin_proc

FUNCTION

 To calculate max and min pt for any cpu, it is useful for
 recgratio!=1

INPUTS

 nproc = number of procs
 me = identity of the proc
 ngfft(3) = fine grid (corresponds to dtset%ngfft(1:3))
 proc_pt_dev(2,0:nproc-1) which device and how many points
 recpar%npt = number of points computed by the proc me (see side effects)

OUTPUT

 recpar%pt0<type(vec_int)>=Intial point for this proc in x,y,z
 recpar%pt1<type(vec_int)>=Final point for this proc in x,y,z
 recpar%min_pt=Intial point for this proc
 recpar%max_pt=Final point for this proc

SIDE EFFECTS

 recpar%ntranche=number of pts computed by the proc me on the fine grid.


 So when  recgratio!=1, ntranche will  not correspond to the npt!

SOURCE

218 subroutine find_maxmin_proc(recpar,nproc,me,gratio,ngfft,proc_pt_dev)
219 
220  implicit none
221 
222 !Arguments ------------------------------------
223  integer,intent(in)   :: nproc,me,gratio
224  integer,intent(in)   :: ngfft(3)
225  type(recparall_type),intent(inout) :: recpar
226  integer,pointer :: proc_pt_dev(:,:)
227 !Local ---------------------------
228  integer :: pointoncpu
229  integer :: nfft,ntot,ii
230  integer :: inf,sup
231  integer :: proc_limit(0:nproc-1)
232 ! *********************************************************************
233  !  write(std_out,*)'start find_maxmin_proc'
234  recpar%npt = 0
235  nfft = product(ngfft)
236  ntot = nfft/(gratio*gratio*gratio)
237  pointoncpu = ntot/nproc
238 
239  proc_limit = (/(sum(proc_pt_dev(2,:ii)),ii=0,nproc-1)/)
240 
241  if(gratio==1)then
242    recpar%ntranche = proc_limit(me)
243    if(me/=0) recpar%ntranche = recpar%ntranche-proc_limit(me-1)
244  endif
245 
246  inf=0
247  if(me/=0) inf = proc_limit(me-1)
248  sup = proc_limit(me)
249 
250 
251  call get_pt0_pt1(ngfft,gratio,inf,sup,recpar)
252 
253  recpar%npt = sup-inf
254 
255  !write(std_out,*)'exit find_maxmin_proc'
256 end subroutine find_maxmin_proc

m_rec/H_D_distrib [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 H_D_distrib

FUNCTION

 Calculate the number of point,GPU,for any proc

INPUTS

  rset<recursion_type>= recursion variables
  cpu (-1 if there are not gpu)
  nfft=nuber of point of the fine grid
  ngfftrec=nuber of point of one edge of the coarse grid
  gratio=recgratio ratio between the fine and coarse grid
  beta_coeff=estimated time ratio between CPU_time and GPU_time

OUTPUT

  proc_pt_dev(2,0:nproc-1) which device and how many points
  that proc has to compute: proc_pt_dev(1,iproc) which device
  associated to proc i (-1 if none), proc_pt_dev(2,iproc) how
  many points

SOURCE

100 subroutine H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff)
101 
102  implicit none
103 
104 !Arguments ------------------------------------
105  integer, intent(in) :: nfft,gratio
106  real(dp),intent(in) :: beta_coeff
107  integer,pointer :: proc_pt_dev(:,:)
108  type(recursion_type),intent(inout) :: rset
109 !Local ---------------------------
110  integer :: me,icpu,resto,ntot,ngpu
111  integer :: n_per_cpu,n_per_gpu
112  character(500) :: msg
113 #ifdef HAVE_GPU_CUDA
114  integer,pointer :: ndev(:)
115 #else
116  integer :: ndev(0:rset%mpi%nproc-1)
117 #endif
118 ! *********************************************************************
119 
120 
121 #ifdef HAVE_GPU_CUDA
122  ndev => rset%GPU%map
123 #else
124  ndev = -1
125 #endif
126 
127  me = rset%mpi%me
128  ntot = nfft/(gratio*gratio*gratio)
129  ngpu = rset%ngpu
130 
131  !--If sequential code all points are computed by the proc 0
132  if(rset%mpi%nproc ==1) then
133    proc_pt_dev(1,0) = ndev(0)
134    proc_pt_dev(2,0) = ntot
135    return
136  end if
137 
138  !--Number of points for any cpu
139  n_per_cpu = int(int(ntot/(rset%mpi%nproc+ngpu*(beta_coeff-1.d0))))
140  n_per_gpu = int(n_per_cpu*beta_coeff)
141  !write(std_out,*)'n_per_cpu',n_per_cpu
142  !write(std_out,*)'rset%GPU%map',rset%GPU%map
143  do icpu=0,rset%mpi%nproc-1
144    proc_pt_dev(1,icpu) = ndev(icpu)
145    proc_pt_dev(2,icpu) = n_per_cpu
146    if(ndev(icpu)>-1) proc_pt_dev(2,icpu) = n_per_gpu
147  end do
148 
149  !--Distribute the rest
150  resto = ntot-sum(proc_pt_dev(2,:))
151  icpu = 0
152  !write(std_out,*)'rest',resto,ngpu
153  if(resto>0) then
154    if(ngpu/=0) then
155      !--distribute rest only on GPU
156      do while(resto/=0)
157        if(proc_pt_dev(1,icpu)>-1) then
158          proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1
159          resto = resto-1
160        endif
161        icpu = mod(icpu+1,rset%mpi%nproc)
162      enddo
163    else
164      !--distribute rest on all CPU
165      do while(resto/=0)
166        proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1
167        resto = resto-1
168        icpu = mod(icpu+1,rset%mpi%nproc)
169      enddo
170      return
171    endif
172  endif
173 
174  !--Printing GPU and load distribution on procs
175  write(msg,'(3a)')&
176       & ' -Load on procs------------',ch10,&
177       & '   me  device        points'
178  call wrtout(std_out,msg,'COLL')
179  do icpu=0,rset%mpi%nproc-1
180    write(msg,'(i5,i8,i14)') icpu,proc_pt_dev(:,icpu);
181    call wrtout(std_out,msg,'COLL')
182  end do
183 
184 end subroutine H_D_distrib

m_rec/Init_MetricRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Init_MetricRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.
 In particular, the information on the infinitesimal metric.
 Also other variable are initialized

INPUTS

 rmet: metrics
 ucvol=unit cell volume in bohr**3.
 ngfft(1:3)=fine grid used in recursion
 rprimd=Real space PRIMitive translations, Dimensional
 xred=vectors (X) of atom positions in reduced coordinates
 natom=number of atoms
 debug=debug variable

OUTPUT

 metrec <type(metricrec_type)>= infinitesimal metrics used in recursion

SOURCE

572 subroutine Init_MetricRec(metrec,nlpsp,rmet,ucvol,rprimd,xred,ngfft,natom,debug)
573 
574  implicit none
575 !Arguments ------------------------------------
576 !scalars
577  integer, intent(in) ::natom
578  real(dp), intent(in) :: ucvol
579  logical,intent(in) ::nlpsp,debug
580  type(metricrec_type),intent(inout) :: metrec
581 !arrays
582  integer,intent(in) :: ngfft(3)
583  real(dp),intent(in) :: rmet(3,3),rprimd(3,3),xred(3,natom)
584 
585 !Local ---------------------------
586  integer :: ii
587  real(dp) :: xcart(3,natom)
588  character(500) :: msg
589 ! *********************************************************************
590 
591  !--Intialisation of variables concerning the infinitesimal metric
592  do ii=1,3
593    metrec%rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp))
594  end do
595  metrec%ucvol = ucvol/real(product(ngfft(1:3)),dp)
596  metrec%tr(:) = sqrt((/(metrec%rmet(ii,ii),ii=1,3)/)) !grid step
597 
598  !--Initialisation of others variables
599  !--In non-loc-psp case: calculate the position of ions and conversion factor
600  if(nlpsp) then
601    do ii = 1,natom
602      xcart(:,ii) = matmul(rprimd(:,:),xred(:,ii))
603    end do
604    metrec%gcart(:,:) = per_cond(natom,xcart,ngfft(1:3),metrec%tr(:))
605    if(debug) then
606      do ii=1,natom
607        write (msg,'(a,3f8.2)')'xcart=',xcart(:,ii)
608        call wrtout(std_out,msg,'COLL')
609        write (msg,'(a,3i4)')'gcart=',metrec%gcart(:,ii)
610        call wrtout(std_out,msg,'COLL')
611      end do
612    end if
613  end if
614 
615 end subroutine Init_MetricRec

m_rec/Init_nlpspRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 Init_nlpspRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.
 In particular, the non-local part of pseudo-potential.

INPUTS

 tempe=temperature
 psps <type(pseudopotential_type)>=variables related to pseudo-potentials
 metrec <type(metricrec_type)>=infinitesimal metrics used in recursion
 ngfftrec(18)=Number of Grid points for Fast Fourier Transform for
   Recursion (truncated box, if different from ngfft)
 debug=debug variable

SIDE EFFECTS

 nlrec <type(nlpsprec_type)>=pseudo-potentials information for recursion

SOURCE

639 subroutine Init_nlpspRec(tempe,psps,nlrec,metrec,ngfftrec,debug)
640 
641  implicit none
642 !Arguments ------------------------------------
643 ! scalars
644  logical,intent(in) :: debug
645  real(dp), intent(in) :: tempe
646  type(pseudopotential_type),intent(in) ::psps
647  type(metricrec_type),intent(inout) :: metrec
648  type(nlpsprec_type),intent(inout) :: nlrec
649 ! arrays
650  integer,intent(in) :: ngfftrec(18)
651 !Local ---------------------------
652  integer :: ii,jj
653  character(500) :: msg
654 ! *********************************************************************
655  !!--Routine for the calcul of the non-local pseudo
656  if(any(psps%pspcod/=3) .and. nlrec%nlpsp ) then
657    msg = "The non-local part of psp is used in Recursion only for hgh-psp"
658    ABI_WARNING(msg)
659    nlrec%nlpsp = .False.
660    if (allocated(metrec%gcart))  then
661      ABI_FREE(metrec%gcart)
662    end if
663  end if
664 
665  if(any(psps%pspcod==3) .and.  nlrec%nlpsp) then
666 
667   nlrec%nlpsp = .True.
668   nlrec%npsp  = psps%npsp
669   nlrec%lmnmax = count(psps%indlmn(3,:,psps%npsp)/=0)
670   ABI_MALLOC(nlrec%mat_exp_psp_nl,(3,3,psps%mpsang,psps%npsp))
671   ABI_MALLOC(nlrec%eival,(3,psps%mpsang,psps%npsp))
672   ABI_MALLOC(nlrec%eivec,(3,3,psps%mpsang,psps%npsp))
673   ABI_MALLOC(nlrec%pspinfo,(psps%mpsang,psps%npsp))
674   ABI_MALLOC(nlrec%radii,(psps%mpsang,psps%npsp))
675   ABI_MALLOC(nlrec%indlmn,(6,nlrec%lmnmax,psps%npsp))
676   nlrec%indlmn(:,:,:) = psps%indlmn(:,:nlrec%lmnmax,:)
677   nlrec%mat_exp_psp_nl(:,:,:,:) = zero
678   nlrec%eivec(:,:,:,:) = zero
679   nlrec%eival(:,:,:) = zero
680   nlrec%radii(:,:) = zero
681   nlrec%pspinfo(:,:) = 0
682 
683   !--Get the exponential of the strength times the projectors overlap
684   !  of the non-local part of psp(hgh):
685   !  h_ij=strength; g_ij=ovelap => (exp(-h.g/temp/4p)-Identity).g^(-1)
686   !  And the diagonalisation of the projectors and associated eigenvectors
687   call pspnl_hgh_rec(psps,tempe,nlrec,debug)
688 
689   if(debug)then
690    do jj=1,psps%npsp
691     write(msg,'(a)')' Exponential matrices:'
692     call wrtout(std_out,msg,'COLL')
693     do ii=1,psps%mpsang
694      write(msg,'(a,i2,a,3f15.10,a,3f15.10,a,3f15.10)')&
695        &   'angular moment',ii-1,ch10,&
696        &                    nlrec%mat_exp_psp_nl(1,:,ii,jj),ch10,&
697        &                    nlrec%mat_exp_psp_nl(2,:,ii,jj),ch10,&
698        &                    nlrec%mat_exp_psp_nl(3,:,ii,jj)
699      call wrtout(std_out,msg,'COLL')
700     end do
701    end do
702   end if
703 
704   !--Now it calculates the matrix of the exp(V_NL)
705   call pspnl_operat_rec(nlrec,metrec,ngfftrec,debug)
706 
707  else !--Only local pseudo potentials
708   nlrec%nlpsp = .False.
709   nlrec%npsp  = psps%npsp
710   ABI_MALLOC(nlrec%mat_exp_psp_nl,(0,0,0,0))
711   ABI_MALLOC(nlrec%pspinfo,(0,0))
712   ABI_MALLOC(nlrec%radii,(0,0))
713   ABI_MALLOC(nlrec%indlmn,(0,0,0))
714   ABI_MALLOC(nlrec%projec,(0,0,0))
715  endif
716 
717 end subroutine Init_nlpspRec

m_rec/InitRec [ Functions ]

[ Top ] [ m_rec ] [ Functions ]

NAME

 InitRec

FUNCTION

 Initialise the rset<recursion_type>=Data type concerning recursion.

INPUTS

 dtset <type(dataset_type)>=all input variables in this dataset
 mpi_ab <type(mpi_type)=MPI-parallelisation information
 mproj=0 if psp is only local

SIDE EFFECTS

 All pointers set to null().

SOURCE

402 subroutine InitRec(dtset,mpi_ab,rset,rmet,mproj)
403 
404 #ifdef HAVE_GPU_CUDA
405  use m_gpu_detect,only    :get_topo,find_set_gpu
406  use m_hidecudarec,only   :InitRecGPU_0
407 #include "cuda_common.h"
408 #endif
409  implicit none
410 
411 !Arguments ------------------------------------
412 ! scalars
413  integer,intent(in) :: mproj
414  type(dataset_type),intent(in) :: dtset
415  type(MPI_type),intent(in),target :: mpi_ab
416  type(recursion_type),intent(inout) :: rset
417  real(dp),intent(in) :: rmet(3,3)
418 ! arrays
419 !Local ---------------------------
420  integer :: ii
421  real(dp) :: beta,rtrotter
422 #if defined HAVE_GPU_CUDA
423  character(500) :: msg
424 #endif
425 ! *********************************************************************
426  ! @recursion_type
427  ! @pawfgr_type
428 
429  !--Initialisation
430  beta = one/dtset%tsmear  !--Inverse of temperature
431  !--Rewriting the trotter parameter
432  rtrotter  = max(half,real(dtset%recptrott,dp))
433 
434  rset%debug= (dtset%prtvol==-7)
435  rset%quitrec   = 0
436  rset%min_nrec  = dtset%recnrec
437  rset%efermi    = dtset%recefermi !initial guess for fermie
438 
439  rset%nfftrec   = 0
440  rset%ngfftrec  = 0
441 
442  rset%tronc = .False.
443 
444  rset%mpi => mpi_ab
445 
446  !--Are all pseudo-potentials local?
447  rset%nl%nlpsp = (mproj/=0)
448  !--Some initialisation concerning the metrics
449  !  If non-local psps then it allocates the atoms positions
450  !   on the grid
451  if(rset%nl%nlpsp)  then
452   ABI_MALLOC(rset%inf%gcart,(3,dtset%natom))
453  else
454   ABI_MALLOC(rset%inf%gcart,(0,0))
455  end if
456  rset%inf%gcart = 0
457 
458  !----------------------------------------------------------
459  !--TRONCATION OF THE BOX
460  !! determines new dimensions the method is similar to the one used
461  !! in getng  (except that ecut and xboxcutmin give no constraint,
462  !!  and symmetries are not handled)
463 
464  call getngrec(dtset%ngfft,rmet,rset%ngfftrec,rset%nfftrec,dtset%recrcut,0.25d0*sqrt(beta/rtrotter),rset%tronc)
465  !  1/4*sqrt(beta/trotter) for guess - should be modified
466 
467  !------------------------------------------------------------
468  !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC
469  !----------------------------------------------------------
470  !--Paralelism using the band communicator (not used in the recursion)
471  !--Distribution on procs with cuda
472 
473 
474  rset%ngpu = 0       !--Initial guess no GPU at all
475  rset%gpudevice = -1 !--Inital guess no GPU associated
476  rset%load = 0       !--Inital homogeneous work load
477  rset%tp = 0         !--Initial guess 1 cpu, 0 gpu
478 
479 
480 #ifdef HAVE_GPU_CUDA
481  !--Initialise GPU variables for recursion
482  call InitRecGPU_0(rset%GPU,mpi_ab)
483 
484  !--Get the distribution of GPUs on CPUs
485  call find_set_gpu(mpi_ab%nproc,mpi_ab%comm_bandfft,rset%GPU%map,rset%ngpu)
486 
487  !--Get the topology of the machine
488  call get_topo(rset%mpi%nproc,rset%ngpu,rset%tp)
489  if(rset%tp>4)then
490    msg = 'm_rec: number of gpu>number of cpu is not implemented'
491    ABI_ERROR(msg)
492  endif
493 !  rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1
494 !  rset%ngpu = 0; rset%GPU%map=-1
495  !--For the moment cuda doesnt take into account non-local psp
496  if(rset%nl%nlpsp) then
497    rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1
498    rset%GPU%map = -1
499  endif
500 #else
501  if(rset%mpi%nproc>1)rset%tp = 1
502 #endif
503 
504  !--Basic initialization for recursion metric (only needed for printing)
505  do ii=1,3
506    rset%inf%rmet(ii,:) = rmet(ii,:)/(real(dtset%ngfft(1:3)*dtset%ngfft(ii),dp))
507  end do
508  rset%inf%tr(:) = sqrt((/(rset%inf%rmet(ii,ii),ii=1,3)/)) !grid step
509 
510  !--Compute the work loqd distribution on devices (gpu,cpu)
511  call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),1.d0,0)
512 
513  !------------------------------------------------------------
514  !--DEFINITION VARIABLE COARSE-FINE GRID  TO USE TRANSGRID--INGRID FUNCTIONS
515  call pawfgr_nullify(rset%pawfgr)
516  !if coarse grid is used
517  if (dtset%recgratio>1) then
518    !fine grid--
519    rset%pawfgr%mgfft = 0
520    rset%pawfgr%nfft = product(dtset%ngfft(1:3))
521    rset%pawfgr%ngfft(:) = dtset%ngfft(:)
522    rset%pawfgr%ngfft(9:11)=(/0,1,0/)
523    rset%pawfgr%ngfft(12:13)= dtset%ngfft(2:3)
524    !coarse grid--
525    rset%pawfgr%mgfftc = 0
526    rset%pawfgr%ngfftc(:) = rset%pawfgr%ngfft(:)
527    rset%pawfgr%ngfftc(:3) = floor(real(dtset%ngfft(:3)+1,dp)/real(dtset%recgratio,dp))
528    rset%pawfgr%nfftc = product(rset%pawfgr%ngfftc(1:3))
529 
530    rset%pawfgr%usefinegrid = 1
531    ABI_MALLOC(rset%pawfgr%fintocoa,(rset%pawfgr%nfft))
532    ABI_MALLOC(rset%pawfgr%coatofin,(rset%pawfgr%nfftc))
533    call indgrid(rset%pawfgr%coatofin,rset%pawfgr%fintocoa,&
534      rset%pawfgr%nfftc,rset%pawfgr%nfft,&
535      rset%pawfgr%ngfftc,rset%pawfgr%ngfft)
536 
537  else
538    rset%pawfgr%mgfft = 0
539    rset%pawfgr%ngfft = 0
540    rset%pawfgr%mgfftc = 0
541 
542    rset%pawfgr%usefinegrid = 0
543  end if
544 
545 
546 end subroutine InitRec