TABLE OF CONTENTS
- ABINIT/getngrec
- ABINIT/m_rec
- ABINIT/pspnl_hgh_rec
- ABINIT/pspnl_operat_rec
- m_rec/Calcnrec
- m_rec/CleanRec
- m_rec/cpu_distribution
- m_rec/find_maxmin_proc
- m_rec/H_D_distrib
- m_rec/Init_MetricRec
- m_rec/Init_nlpspRec
- m_rec/InitRec
ABINIT/getngrec [ Functions ]
NAME
getngrec
FUNCTION
This routine computes the fft box for the recursion method, accordingly to the troncation radius. It is quite similar to getng, but : - there is no xboxcut and ecut consistency - ngfft (the initial fft box) is the maximum fft box
INPUTS
ngfft(18)=non truncated fft box mgfft=maximum of ngfft(1:3) inf_rmet=define the infinitesimal metric : rprimd*(transpose(rprimd)), divided by the number of discretisation point recrcut=truncating delta=to obtain radius of truncation
OUTPUT
ngfftrec=truncated fft box nfftrec= truncated nfft tronc=True if truncation is made
SIDE EFFECTS
SOURCE
874 subroutine getngrec(ngfft,rmet,ngfftrec,nfftrec,recrcut,delta,tronc) 875 876 implicit none 877 878 !Arguments ------------------------------- 879 !scalars 880 real(dp),intent(in) :: recrcut,delta 881 integer,intent(out) :: nfftrec 882 logical,intent(out) :: tronc 883 !arrays 884 integer,intent(in) :: ngfft(18) 885 real(dp),intent(in) :: rmet(3,3) 886 integer,intent(out) :: ngfftrec(18) 887 888 !Local variables------------------------------- 889 !scalars 890 integer :: ii,iimin,index,jj,jjmin,kk,kkmin,largest_ngfftrec,maxpow11,maxpow2 891 integer :: maxpow3,maxpow5,maxpow7,mmsrch,plane 892 real(dp) :: dsm,dsp,dsqmin,rtroncat 893 !arrays 894 integer :: get_ngfftrec(3),maxsrch(3),minsrch(3) 895 integer,allocatable :: iperm(:),srch(:) 896 real(dp) :: tsec(2) 897 real(dp) :: inf_rmet(3,3) 898 ! ************************************************************************* 899 900 call timab(602,1,tsec) 901 902 ngfftrec(:) = ngfft(:) 903 904 if (recrcut>tol14) then !default value dtset%recrcut = zero means no troncation 905 rtroncat = recrcut+delta 906 get_ngfftrec(:)=1 907 plane = 1 908 909 do ii=1,3 910 inf_rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp)) 911 end do 912 913 914 ! minimum value of ngfftrec 915 do ii = 1,3 916 ngfftrec(ii)=floor(2*rtroncat/sqrt(inf_rmet(ii,ii)))+1 !minimum value 917 if(ngfftrec(ii)>=ngfft(ii))then 918 ngfftrec(ii)=ngfft(ii) 919 get_ngfftrec(ii)=0 920 end if 921 end do 922 923 924 if(sum(get_ngfftrec)/=0)then 925 largest_ngfftrec=maxval(ngfft(1:3)) 926 maxpow2=int(log(largest_ngfftrec+0.5d0)/log(two)) 927 maxpow3=int(log(largest_ngfftrec+0.5d0)/log(three)) 928 maxpow5=int(log(largest_ngfftrec+0.5d0)/log(five)) 929 maxpow7=0 930 maxpow11=0 931 mmsrch=(maxpow2+1)*(maxpow3+1)*(maxpow5+1)*(maxpow7+1)*(maxpow11+1) 932 ABI_MALLOC(srch,(mmsrch)) 933 ABI_MALLOC(iperm,(mmsrch)) 934 ! Factors of 2 935 srch(1)=1 936 do ii=1,maxpow2 937 srch(ii+1)=srch(ii)*2 938 end do 939 ! Factors of 3 940 index=maxpow2+1 941 if(maxpow3>0)then 942 do ii=1,maxpow3 943 srch(1+ii*index:(ii+1)*index)=3*srch(1+(ii-1)*index:ii*index) 944 end do 945 end if 946 ! Factors of 5 947 index=(maxpow3+1)*index 948 if(maxpow5>0)then 949 do ii=1,maxpow5 950 srch(1+ii*index:(ii+1)*index)=5*srch(1+(ii-1)*index:ii*index) 951 end do 952 end if 953 ! Factors of 7 954 index=(maxpow5+1)*index 955 if(maxpow7>0)then 956 do ii=1,maxpow7 957 srch(1+ii*index:(ii+1)*index)=7*srch(1+(ii-1)*index:ii*index) 958 end do 959 end if 960 ! Factors of 11 961 index=(maxpow7+1)*index 962 if(maxpow11>0)then 963 do ii=1,maxpow11 964 srch(1+ii*index:(ii+1)*index)=11*srch(1+(ii-1)*index:ii*index) 965 end do 966 end if 967 ! srch is the set of allowed ngfftrec values 968 969 call sort_int(mmsrch,srch,iperm) 970 ABI_FREE(iperm) 971 972 do ii=1,3 973 if(get_ngfftrec(ii)==1)then 974 do jj=1,mmsrch 975 if(srch(jj)>=ngfftrec(ii))then 976 minsrch(ii)=jj 977 ngfftrec(ii)=srch(jj) 978 exit 979 end if 980 end do 981 do jj=minsrch(ii),mmsrch 982 if(srch(jj)>ngfft(ii))then 983 ! since ngfftrec(ii)<ngfft(ii) for get_ngfftrec(ii)==1, 984 ! and srch(mmsrch)maxval(ngfft(1:3)), 985 ! that will appens in the range minsrch(ii),mmsrch 986 maxsrch(ii)=jj-1 987 exit 988 end if 989 end do 990 end if 991 ! since ngfft(ii) is in srch, we have here srch(maxsrch(ii))=ngfft(ii) 992 ! minsrch(ii), maxsrch(ii) is the range of index of srch in which we can 993 ! search ngfftrec(ii) 994 995 if(ngfftrec(ii)>=ngfft(ii))then 996 ngfftrec(ii)=ngfft(ii) 997 get_ngfftrec(ii)=0 998 end if 999 end do 1000 end if 1001 1002 ! verify that the entiere truncation sphere is in the fft box ; 1003 ! but only in the dimension in which we do not consider the entiere fft box 1004 do while(sum(get_ngfftrec)/=0) !again... 1005 1006 ! determining the minimum distance between 0 and the boundary 1007 ! of the fft box 1008 ! quite similar to the subroutine "bound", but only over the plane which 1009 ! are not the whole fft box 1010 dsqmin=dsq_rec(ngfftrec(1)/2,-ngfftrec(2)/2,-ngfftrec(3)/2,inf_rmet)+0.01d0 1011 1012 if(get_ngfftrec(1)/=0)then 1013 ! look at +/- g1 planes: 1014 do jj=-ngfftrec(2)/2,ngfftrec(2)/2 1015 do kk=-ngfftrec(3)/2,ngfftrec(3)/2 1016 dsp = dsq_rec(ngfftrec(1)/2, jj, kk,inf_rmet) 1017 dsm = dsq_rec( - ngfftrec(1)/2, jj, kk,inf_rmet) 1018 if (dsp<dsqmin) then 1019 dsqmin = dsp 1020 iimin = ngfftrec(1)/2 1021 jjmin = jj 1022 kkmin = kk 1023 plane=1 1024 end if 1025 if (dsm<dsqmin) then 1026 dsqmin = dsm 1027 iimin = - ngfftrec(1)/2 1028 jjmin = jj 1029 kkmin = kk 1030 plane=1 1031 end if 1032 end do 1033 end do 1034 end if 1035 1036 if(get_ngfftrec(2)/=0)then 1037 ! +/- g2 planes: 1038 do ii=-ngfftrec(1)/2,ngfftrec(1)/2 1039 do kk=-ngfftrec(3)/2,ngfftrec(3)/2 1040 dsp = dsq_rec(ii,ngfftrec(2)/2,kk,inf_rmet) 1041 dsm = dsq_rec(ii,-ngfftrec(2)/2,kk,inf_rmet) 1042 if (dsp<dsqmin) then 1043 dsqmin = dsp 1044 iimin = ii 1045 jjmin = ngfftrec(2)/2 1046 kkmin = kk 1047 plane=2 1048 end if 1049 if (dsm<dsqmin) then 1050 dsqmin = dsm 1051 iimin = ii 1052 jjmin = - ngfftrec(2)/2 1053 kkmin = kk 1054 plane=2 1055 end if 1056 end do 1057 end do 1058 end if 1059 1060 if(get_ngfftrec(3)/=0)then 1061 ! +/- g3 planes: 1062 do ii=-ngfftrec(1)/2,ngfftrec(1)/2 1063 do jj=-ngfftrec(2)/2,ngfftrec(2)/2 1064 dsp = dsq_rec(ii,jj,ngfftrec(3)/2,inf_rmet) 1065 dsm = dsq_rec(ii,jj,-ngfftrec(3)/2,inf_rmet) 1066 if (dsp<dsqmin) then 1067 dsqmin = dsp 1068 iimin = ii 1069 jjmin = jj 1070 kkmin = ngfftrec(3)/2 1071 plane=3 1072 end if 1073 if (dsm<dsqmin) then 1074 dsqmin = dsm 1075 iimin = ii 1076 jjmin = jj 1077 kkmin = - ngfftrec(3)/2 1078 plane=3 1079 end if 1080 end do 1081 end do 1082 end if 1083 1084 if(dsqmin>=rtroncat)then 1085 get_ngfftrec=0 1086 exit 1087 end if 1088 1089 ! Fix nearest boundary 1090 do ii=minsrch(plane),maxsrch(plane) 1091 if (srch(ii)>=ngfftrec(plane)) then 1092 ! redefine ngfft(plane) to next higher choice 1093 ngfftrec(plane)=srch(ii+1) 1094 ! verify if we cover the whole box 1095 if(ngfftrec(plane)>=ngfft(plane))then 1096 ngfftrec(plane)=ngfft(plane) 1097 get_ngfftrec(plane)=0 1098 end if 1099 ! Exit the loop over ii 1100 exit 1101 end if 1102 end do 1103 1104 end do 1105 1106 if (allocated(srch)) then 1107 ABI_FREE(srch) 1108 end if 1109 1110 ! if(mod(ngfftrec(1),16)/=0) then 1111 ! ngfftrec(1) = ngfftrec(1)+(16-mod(ngfftrec(1),16)) 1112 ! ngfftrec(2:3) = ngfftrec(1) 1113 ! endif 1114 1115 ngfftrec(4)=2*(ngfftrec(1)/2)+1 1116 ngfftrec(5)=2*(ngfftrec(2)/2)+1 1117 ngfftrec(6)=ngfftrec(3) 1118 1119 ! --algorithm 1120 ngfftrec(7)=ngfft(7) ! to be improved for a better non-parallel algorithm - here it is automatically 401 1121 ngfftrec(8)=ngfft(8) 1122 1123 end if 1124 1125 !--For now, recursion method doesn't use paralelism on FFT - which would require a great number of processors 1126 nfftrec = product(ngfftrec(1:3)) 1127 ngfftrec(9:11) = (/0,1,0/) !--(/ paral, nproc, %me \) 1128 ngfftrec(12:13) = ngfftrec(2:3) ! n2proc ! n3proc 1129 1130 tronc = all(ngfftrec(:3)/=ngfft(:3)) 1131 call timab(602,2,tsec) 1132 1133 contains 1134 1135 function dsq_rec(ii,jj,kk,inf_rmet) 1136 1137 real(dp) :: dsq_rec 1138 integer,intent(in) :: ii,jj,kk 1139 real(dp),intent(in) :: inf_rmet(3,3) 1140 dsq_rec=sqrt(& 1141 & inf_rmet(1,1)*dble(ii**2)& 1142 & +inf_rmet(2,2)*dble(jj**2)& 1143 & +inf_rmet(3,3)*dble(kk**2)& 1144 & +two*(inf_rmet(1,2)*dble(ii*jj)& 1145 & +inf_rmet(2,3)*dble(jj*kk)& 1146 & +inf_rmet(3,1)*dble(kk*ii))) 1147 end function dsq_rec 1148 1149 1150 end subroutine getngrec
ABINIT/m_rec [ Modules ]
NAME
m_rec
FUNCTION
This module provides some functions applied to the recursion structured datatype recursion_type. It includes also some function used to change some variables of recursion_type
COPYRIGHT
Copyright (C) 2002-2024 ABINIT group (MMancini) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt . For the initials of contributors, see ~abinit/doc/developers/contributors.txt.
NOTES
* Routines tagged with "@type_name" are strongly connected to the definition of the data type. Strongly connected means that the proper functioning of the implementation relies on the assumption that the tagged procedure is consistent with the type declaration. Every time a developer changes the structure "type_name" adding new entries, he/she has to make sure that all the strongly connected routines are changed accordingly to accomodate the modification of the data type. Typical examples of strongly connected routines are creation, destruction or reset methods.
SOURCE
30 #if defined HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #include "abi_common.h" 35 36 module m_rec 37 38 use defs_basis 39 use defs_rectypes 40 use m_abicore 41 use m_errors 42 use m_xmpi 43 use m_sort 44 use m_dtset 45 46 use defs_datatypes, only : pseudopotential_type 47 use defs_abitypes, only : mpi_type 48 use m_exp_mat, only : exp_mat 49 use m_numeric_tools, only : set2unit 50 use m_special_funcs, only : gamma_function 51 use m_pawfgr, only : pawfgr_nullify, indgrid, pawfgr_destroy 52 use m_paw_sphharm, only : initylmr 53 use m_time, only : timab 54 use m_rec_tools, only : get_pt0_pt1 55 use m_per_cond, only : per_cond 56 #ifdef HAVE_GPU_CUDA 57 use m_hidecudarec, only : InitRecGPU, CleanRecGPU 58 #endif 59 60 61 implicit none 62 63 private :: & 64 find_maxmin_proc, & !--To calculate max and min pt for any cpu 65 H_D_distrib 66 67 public :: & 68 InitRec, & !--Main creation method. 69 Init_MetricRec, & !--To Initalize the inf. metric in recursion 70 Init_nlpspRec, & !--Main creation method for non-local part. 71 CleanRec, & !--deallocate all pointers. 72 Calcnrec, & !--calculates the new min_nrec 73 cpu_distribution !--Regulates the work load on cpu-gpu 74 CONTAINS !===========================================================
ABINIT/pspnl_hgh_rec [ Functions ]
NAME
pspnl_hgh_rec
FUNCTION
This routine computes the matrices S_kk'^{l,A} of the projectors (it is the exponential of the overlap matrix). It coorresponds to the matrix: $$\left[(U_l)^{-1}*Exp(-temperature D_l )*U_l* (g_l)^{-1} -Identity\right]_kk' where (U_l)^-1* D_l* U_l = h^lg_l. $g_l = <f^l_k|f^l_{k'}>$ is the overlap matrix between projectors and $h^l_{kk'}$ is the strength matrix of the projectors. It calulates also the strength eigenvalues and eigenvectors of $h$, used in the calculus of non-local energy
INPUTS
temperature=4*rtrotter/beta=4*rtrotter*tsmear: the effective temp. in recursion psps <type(pseudopotential_type)>=variables related to pseudo-potentials debug=debug variable
OUTPUT
nlrec%mat_exp_psp_nl=the matrix of the exponential of the projectors: for any psp, for any angular moment: h_ij=strength; g_ij=ovelap => exp(-h.g/temp/4p).g^(-1) nlrec%radii=Local radii of nl psp nlrec%pspinfo(:,:) for any typat: (momang,typat)=number of projectors nlrec%eival(:,:,:) for any psp, any momang, the eigenvalues of the strength matrix H: eival(:,mang,psp) nlrec%eivec(:,:,:,:)for any psp, any momang, the eigenvectors of the strength matrix H: eivec(:,:,mang,psp)
SIDE EFFECTS
SOURCE
1405 subroutine pspnl_hgh_rec(psps,temperature,nlrec,debug) 1406 1407 use m_linalg_interfaces 1408 implicit none 1409 1410 !Arguments ----------------------------------- 1411 !scalars 1412 real(dp),intent(in) :: temperature 1413 logical,intent(in) :: debug 1414 type(pseudopotential_type),intent(in) :: psps 1415 type(nlpsprec_type),intent(inout) :: nlrec 1416 !arrays 1417 !Local variables------------------------------- 1418 !scalars 1419 integer,parameter :: maxsize=3 1420 integer,parameter :: lwork=(1+32)*maxsize 1421 integer :: iangol,ipseudo,info,nproj 1422 integer :: g_mat_size,ii,nproj2 1423 real(dp) :: denom_1,denom_2,tot_proj 1424 character(len=500) :: msg 1425 !arrays 1426 integer :: ipvt(1) 1427 !real(dp) :: rwork(2*maxsize) 1428 real(dp) :: h_mat_init(3,3), rework(lwork) 1429 real(dp), allocatable :: g_mat(:,:),h_mat(:,:),eig_val_h(:) 1430 real(dp), allocatable :: identity(:,:),inv_g_mat(:,:),u_mat(:,:) 1431 1432 complex(dpc),allocatable :: hg_mat(:,:) 1433 ! ************************************************************************* 1434 1435 if(debug)then 1436 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : enter ' 1437 call wrtout(std_out,msg,'COLL') 1438 end if 1439 1440 !--For any pseudo potential: 1441 do ipseudo = 1, psps%npsp !--Loop on the pseudos 1442 write(msg,'(a,80a)')' pseudo file',('-',ii=1,10) 1443 call wrtout(std_out,msg,'COLL') 1444 write(msg,'(a)') psps%filpsp(ipseudo) 1445 call wrtout(std_out,msg,'COLL') 1446 1447 ! --For any angular moment: 1448 do iangol = 0,psps%mpsang-1 !--Loop on the angular moment 1449 1450 ! --Local radius 1451 nlrec%radii(iangol+1,ipseudo) = psps%gth_params%psppar(iangol+1,0,ipseudo) 1452 1453 ! --Strenghts of non-local projectors (matrix h) 1454 ! --Diagonal part: 1455 h_mat_init = zero 1456 h_mat_init(1,1) = psps%gth_params%psppar(iangol+1,1,ipseudo) 1457 h_mat_init(2,2) = psps%gth_params%psppar(iangol+1,2,ipseudo) 1458 h_mat_init(3,3) = psps%gth_params%psppar(iangol+1,3,ipseudo) 1459 ! --Out-diagonal part 1460 ! --Depending on angular moment the projectors 1461 ! strength is calculated differently 1462 select case(iangol) 1463 case(0) 1464 h_mat_init(1,2) = -half*sqrt(3.d0/5.d0)*h_mat_init(2,2) 1465 h_mat_init(1,3) = half*sqrt(5.d0/21.d0)*h_mat_init(3,3) 1466 h_mat_init(2,3) = -half*sqrt(100.d0/63.d0)*h_mat_init(3,3) 1467 case(1) 1468 h_mat_init(1,2) = -half*sqrt(5.d0/7.d0)*h_mat_init(2,2) 1469 h_mat_init(1,3) = sixth*sqrt(35.d0/11.d0)*h_mat_init(3,3) 1470 h_mat_init(2,3) = -14.d0/six/sqrt(11.d0) *h_mat_init(3,3) 1471 case(2) 1472 h_mat_init(1,2) = -half*sqrt(7.d0/9.d0)*h_mat_init(2,2) 1473 h_mat_init(1,3) = half*sqrt(63.d0/143.d0)*h_mat_init(3,3) 1474 h_mat_init(2,3) = -nine/sqrt(143.d0)*h_mat_init(3,3) 1475 case(3) 1476 h_mat_init(1,2) = zero; h_mat_init(1,3) = zero; h_mat_init(2,3) = zero; 1477 case default 1478 write(msg,'(a)')' error angular: moment component' 1479 call wrtout(std_out,msg,'COLL') 1480 end select 1481 1482 1483 1484 ! --Real dimensions of projectors. 1485 g_mat_size = count(abs((/ (h_mat_init(ii,ii),ii=1,3) /))>1.d-8) 1486 nlrec%pspinfo(iangol+1,ipseudo) = g_mat_size 1487 write(msg,'(a,i2,a,i2)')' ang. moment=',iangol,', N projectors=',g_mat_size 1488 call wrtout(std_out,msg,'COLL') 1489 if (g_mat_size>0) then 1490 ! --Identity matrix 1491 ABI_MALLOC(identity,(g_mat_size,g_mat_size)) 1492 call set2unit(identity) 1493 ! identity = zero 1494 ! identity(:,1) = one 1495 ! identity(:,:) = cshift(array=identity,shift=(/ (-ii,ii=0,g_mat_size) /), dim=2 ) 1496 1497 1498 ! ############## CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ################## 1499 ! --Inverse of the matrix h 1500 ABI_MALLOC(eig_val_h,(g_mat_size)) 1501 ABI_MALLOC(u_mat,(g_mat_size,g_mat_size)) 1502 ! --u-mat will contain the eigenvectors of h_mat_init 1503 u_mat = h_mat_init(:g_mat_size,:g_mat_size) 1504 1505 ! write(std_out,*)'hmat_init' 1506 ! do ii=1,g_mat_size 1507 ! write(std_out,*)h_mat_init(ii,:) 1508 ! end do 1509 call DSYEV('v','u',g_mat_size,u_mat,g_mat_size,eig_val_h,rework,lwork,info) 1510 1511 ! --THE DIAGONAL MATRIX IS GIVEN BY D=U^t.H.U 1512 ! (eival=transpose(eivec).h_mat_init.eivec) 1513 write(msg,'(a,3d10.3)')' eigenvalues=',eig_val_h 1514 call wrtout(std_out,msg,'COLL') 1515 ! write(std_out,*)'autovec';write(std_out,*)u_mat 1516 1517 nlrec%eival(:g_mat_size,1+iangol,ipseudo) = eig_val_h 1518 nlrec%eivec(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = u_mat 1519 ABI_FREE(eig_val_h) 1520 ABI_FREE(u_mat) 1521 1522 ! ##########END CALCULOUS OF THE EIGEN_SPACE OF THE PROJECTORS STRENGTHS ################## 1523 1524 ABI_MALLOC(g_mat,(g_mat_size,g_mat_size)) 1525 ABI_MALLOC(inv_g_mat,(g_mat_size,g_mat_size)) 1526 ABI_MALLOC(h_mat,(g_mat_size,g_mat_size)) 1527 ABI_MALLOC(hg_mat,(g_mat_size,g_mat_size)) 1528 1529 g_mat(:,:) = one 1530 h_mat(:,:) = zero 1531 h_mat(:,:) = h_mat_init(:g_mat_size,:g_mat_size) 1532 1533 ! ------------------------------------------------------- 1534 ! --Matrix of the overlap between projetors (matrix g) 1535 ! and the h matrix of strength 1536 do nproj = 1,g_mat_size-1 1537 do nproj2 = 1+nproj,g_mat_size 1538 tot_proj = zero 1539 ! --Analytic value of overlap 1540 ! g_ij=Gamma[-1/2+i+j+l]/Sqrt(Gamma[-1/2+i+iangol]*Gamma[-1/2+j+iangol]) 1541 call gamma_function(-half+real(nproj+nproj2+iangol,dp),tot_proj) 1542 call gamma_function(-half+real(iangol+2*nproj,dp),denom_1) 1543 call gamma_function(-half+real(iangol+2*nproj2,dp),denom_2) 1544 1545 g_mat(nproj,nproj2) = tot_proj/sqrt(denom_1*denom_2) 1546 g_mat(nproj2,nproj) = g_mat(nproj,nproj2) 1547 1548 h_mat(nproj,nproj2) = h_mat_init(nproj,nproj2) 1549 h_mat(nproj2,nproj) = h_mat_init(nproj,nproj2) 1550 end do 1551 end do 1552 1553 ! --Inverse of the overlap matrix g 1554 inv_g_mat = g_mat 1555 call DGETRF(g_mat_size,g_mat_size,inv_g_mat,g_mat_size,ipvt,info) 1556 call DGETRI(g_mat_size,inv_g_mat,g_mat_size,ipvt,rework,lwork,info) 1557 1558 1559 ! ----------------------------------------------------------- 1560 ! --Now it calculates the exponential of the matrix h.g 1561 hg_mat = matmul(h_mat,g_mat) 1562 1563 call exp_mat(hg_mat,g_mat_size,-one/temperature) 1564 1565 ! --(exp(h.g)-Identity).(g^-1) 1566 hg_mat = hg_mat-identity(:,:) 1567 1568 1569 ! --results on output 1570 nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) = matmul(real(hg_mat,dp),inv_g_mat) 1571 1572 ! write(std_out,*) nlrec%mat_exp_psp_nl(:g_mat_size,:g_mat_size,1+iangol,ipseudo) 1573 1574 ABI_FREE(g_mat) 1575 ABI_FREE(hg_mat) 1576 ABI_FREE(h_mat) 1577 ABI_FREE(inv_g_mat) 1578 ABI_FREE(identity) 1579 end if 1580 1581 end do !enddo on angular moment 1582 end do !enddo on pseudos 1583 1584 1585 if(debug)then 1586 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_hgh_rec : exit ' 1587 call wrtout(std_out,msg,'COLL') 1588 end if 1589 1590 end subroutine pspnl_hgh_rec
ABINIT/pspnl_operat_rec [ Functions ]
NAME
pspnl_operat_rec
FUNCTION
It calculates the non-local projectors used in recursion for any psp non-local: The nl interaction in recursion is $$exp{-V_{NL}/beta}=\sum_A\sum_{lm} \sum{ij}Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)D^l_{i,j}Y_{lm}(\hat{r-R_A})f^l_j{r-R_A}$$ where $D^_{i,j}$ is a matrix previously (see pspnl_operat_rec). In this routine the projectors $Y_{lm}(\hat{r-R_A}')f^l_i(r-R_A)$ are calculated. So an array of dimensions rset%nl%projec(nfftrec,lmnmax,nlrec%npsp)
INPUTS
metrec<metricrec_type>=contains information concerning metric in recursion: grid_step, metric, infinitesimal volume ngfftrec(18)=is the ngfft grid (truncated if different from ngfft) of recursion debug=debug variable
OUTPUT
nlrec<nlpsprec_type>%projec= array containig the projectors on the real grid nlrec<nlpsprec_type>%intlen= integer linear size of the non-local grid
SIDE EFFECTS
nlrec<nlpsprec_type> data set of non-local pseudo for recursion The better Interaction length (Intlen) is also calculated and printed but recursion use intlen=ngfftrec/2
NOTES
SOURCE
1187 subroutine pspnl_operat_rec(nlrec,metrec,ngfftrec,debug) 1188 1189 implicit none 1190 1191 !Arguments ------------------------------------ 1192 !scalars 1193 logical,intent(in) :: debug 1194 type(metricrec_type),intent(in) ::metrec 1195 type(nlpsprec_type),intent(inout) :: nlrec 1196 !arrays 1197 integer,intent(in) :: ngfftrec(18) 1198 !Local variables------------------------------- 1199 integer :: ii,intlen 1200 integer :: iangol,ipsp,iproj 1201 integer :: mpsang,jj,kk,rr 1202 integer :: nfftrec 1203 integer :: ilmn,il,ilm,in,lmnmax 1204 real(dp) :: raggio,rloc,denom,step 1205 real(dp) :: delta_out,partial,err 1206 character(len=500) :: msg 1207 real(dp) :: part_sum(3) 1208 real(dp) :: ylmr_gr_dm(0,0,0) 1209 real(dp),allocatable :: ylmr(:,:),proj_arr(:,:,:) 1210 real(dp),allocatable :: radloc(:,:),nrm(:) 1211 1212 ! ************************************************************************* 1213 1214 if(debug)then 1215 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : enter ' 1216 call wrtout(std_out,msg,'PERS') 1217 end if 1218 1219 !##################################################################### 1220 !--CALCULATE THE (SEMI-)MAXIMUM INTERVAL WHERE ALL THE PROJECTORS ARE 1221 !DIFFERENT TO ZERO. 1222 !--For any pseudo potential: 1223 delta_out = zero 1224 step = metrec%tr(1)*half !--Scanning step= grid step/2 1225 1226 1227 do ipsp = 1, nlrec%npsp !--Loop on the pseudos 1228 1229 ! --For any angular moment: 1230 do iangol = 0,maxval(nlrec%indlmn(1,:,ipsp)) !--Loop on the angular moment 1231 rloc = nlrec%radii(iangol+1,ipsp) !--Local radius 1232 1233 ! --For any projector 1234 do iproj = 1,nlrec%pspinfo(iangol+1,ipsp) 1235 ! --Starting point to searching when the projector goes to zero. 1236 ! this correspond to twice the radius wher the projector has its maximum 1237 raggio = two*sqrt(real(-2+2*iproj+iangol,dp))*rloc 1238 ! --Caculate the gamma func at the denominator 1239 call gamma_function(real(iangol+2*iproj,dp)-half,denom) 1240 ! --Find the zero 1241 ! --The following while cycle should be replaced by a bisection 1242 ! --method. Bucause this is calculated only 1 time it is not very 1243 ! important. 1244 err = one 1245 ii=0 1246 ! tolloc = 1.d0*abs(minval(nlrec%mat_exp_psp_nl(:nlrec%pspinfo(iangol+1,ipsp),:nlrec%pspinfo(iangol+1,ipsp),iangol+1,ipsp))) 1247 do while(abs(err)>1.d-2) 1248 raggio = raggio + step 1249 err = project_prec(raggio,iproj,iangol,rloc)/sqrt(denom) 1250 ii = ii+1 1251 end do 1252 write(std_out,*)'local delta',raggio,ii 1253 delta_out=maxval((/ delta_out,raggio /)) 1254 end do !end loop on projectors 1255 1256 end do !enddo on angular moment 1257 end do !enddo on pseudos 1258 1259 !--CALCULATE how many grid steps correspond to delta_out 1260 intlen = int(delta_out/metrec%tr(1)) 1261 !--I want that intlen is odd 1262 if(mod(intlen,2)==0) intlen = intlen+1 1263 1264 write(msg,'(2a,i3,a)') ch10,' Interac. length of non-local psp(grid steps)=',intlen,ch10 1265 call wrtout(std_out,msg,'COLL') 1266 !##################################################################### 1267 1268 !--Initialisation 1269 nfftrec = product(ngfftrec(1:3)) 1270 lmnmax = nlrec%lmnmax 1271 intlen = ngfftrec(1)/2 1272 nlrec%intlen = intlen !--Setted in recursion variables 1273 1274 !##################################################################### 1275 !--CALCULATE E(q,q') 1276 !--Cration of the exponential*projectors*ylm matrix 1277 1278 !--Initialisation 1279 ABI_MALLOC(nlrec%projec,(nfftrec,lmnmax,nlrec%npsp)) 1280 nlrec%projec = zero 1281 ABI_MALLOC(radloc,(3,nfftrec)) 1282 radloc = zero 1283 ABI_MALLOC(nrm,(nfftrec)) 1284 nrm = zero 1285 1286 !--Loop on pseudo types 1287 pseudodo: do ipsp = 1, nlrec%npsp 1288 ! --Control if the psp is non-local, else continue 1289 if(all(nlrec%pspinfo(:,ipsp)==0)) cycle 1290 ! --Vector which stores localy the upper part of symmetrical 1291 ! matrix of the exponential of the non-local operator 1292 mpsang = maxval(nlrec%indlmn(1,:,ipsp))+1 1293 ABI_MALLOC(proj_arr,(nfftrec,maxval(nlrec%pspinfo(:,ipsp)),mpsang)) 1294 ABI_MALLOC(ylmr,(mpsang*mpsang,nfftrec)) 1295 proj_arr = zero 1296 ylmr = zero 1297 1298 ! !debug 1299 ! write(std_out,*)'mpsang,proj num',mpsang,maxval(nlrec%pspinfo(:,ipsp)) 1300 ! !enddebug 1301 1302 ! --Calculate the projctors 1303 do iangol = 0,mpsang-1 1304 rloc = nlrec%radii(iangol+1,ipsp) 1305 do iproj = 1,nlrec%pspinfo(iangol+1,ipsp) 1306 call gamma_function(real(iangol+2*iproj,dp)-half,denom) 1307 denom = one/sqrt(denom) 1308 do ii = 0,ngfftrec(1)-1 !--3-loop on coordinates 1309 do jj = 0,ngfftrec(2)-1 1310 do kk = 0,ngfftrec(3)-1 1311 ! --Calculate the radii 1312 part_sum(:) = real((/ ii,jj,kk /)-intlen,dp)*(metrec%tr) 1313 rr = 1+ii+(jj+kk*ngfftrec(2))*ngfftrec(3) 1314 radloc(:,rr) = part_sum 1315 nrm(rr) = sqrt(sum(part_sum(:)**two)) 1316 partial = project_prec(nrm(rr),iproj,iangol,rloc)*denom 1317 if(abs(partial)>tol12 ) proj_arr(rr,iproj,iangol+1) = partial 1318 end do 1319 end do 1320 end do !--End 3-loop on coordinates 1321 end do 1322 end do 1323 1324 1325 ! ------------------------------------------------------------- 1326 ! --Calculate the spherical harmonics (Verified: it works well) 1327 call initylmr(mpsang,1,nfftrec,nrm(:),1,radloc(:,:),ylmr(:,:),ylmr_gr_dm) 1328 ! ------------------------------------------------------------- 1329 1330 1331 do ilmn = 1,lmnmax 1332 ilm = nlrec%indlmn(4,ilmn,ipsp) 1333 il = nlrec%indlmn(1,ilmn,ipsp)+1 1334 in = nlrec%indlmn(3,ilmn,ipsp) 1335 write(msg,'(2a,i3,2i2)')ch10,'lm,l,n',ilm,il,in 1336 call wrtout(std_out,msg,'COLL') 1337 1338 nlrec%projec(:,ilmn,ipsp) = ylmr(ilm,:)*proj_arr(:,in,il) 1339 end do 1340 1341 ABI_FREE(ylmr) 1342 ABI_FREE(proj_arr) 1343 end do pseudodo !--end loop on pseudo types 1344 1345 1346 ABI_FREE(radloc) 1347 ABI_FREE(nrm) 1348 1349 if(debug)then 1350 write(msg,'(80a,a,a)') ('=',ii=1,80),ch10,' pspnl_operat_rec : exit ' 1351 call wrtout(std_out,msg,'PERS') 1352 end if 1353 1354 contains 1355 1356 function project_prec(raggio,iproj,iangol,rloc) 1357 !--Analytical expression of the projectors in hgh-pspeudopotential 1358 !--The gamma function at denominator is missing 1359 real(dp) :: project_prec 1360 integer,intent(in) :: iproj,iangol 1361 real(dp),intent(in) :: raggio,rloc 1362 1363 project_prec=sqrt2*(raggio/rloc)**real((iangol+2*(iproj-1)),dp)*& 1364 & exp(-((raggio/rloc)**two)*half)/rloc**onehalf 1365 end function project_prec 1366 1367 end subroutine pspnl_operat_rec
m_rec/Calcnrec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Calcnrec
FUNCTION
Calculate the new min_nrec.
INPUTS
rset<recursion_type>=Data type concerning recursion b2(:,:) recursion coefficients
OUTPUT
rset%min_nrec is changed
SOURCE
809 subroutine Calcnrec(rset,b2) 810 811 implicit none 812 813 !Arguments ------------------------------------ 814 ! scalars 815 type(recursion_type),intent(inout) :: rset 816 ! arrays 817 real(dp), intent(in):: b2(0:rset%min_nrec,1:rset%par%ntranche) 818 !Local ---------------------------------------- 819 ! scalars 820 integer :: kk,ii,jj,ierr,loc_nrec 821 character(len=500) :: msg 822 ! ********************************************************************* 823 ! @recursion_type 824 ! @pawfgr_type 825 kk = 1 826 loc_nrec = rset%min_nrec 827 do ii=1,rset%par%ntranche 828 !--Use to lbound because b2 passed as argument 829 ! doesn't have the same bounds as in the calling 830 ! subroutine, the +1 because b2(lbound,ii)=1. 831 jj = lbound(b2,dim=1)+1 832 do while (b2(jj,ii)>tol10 .and. jj<=rset%min_nrec-1) 833 jj = jj+1 834 kk = max(jj,kk) 835 end do 836 enddo 837 call xmpi_max(kk,rset%min_nrec,rset%mpi%comm_bandfft,ierr) 838 rset%min_nrec = rset%min_nrec+1-lbound(b2,dim=1) 839 840 write(msg,'(a,i2.2,a9,i2.2)') ' -- nrec adjustement nrec=',loc_nrec,' => nrec=',rset%min_nrec 841 call wrtout(std_out,msg,'COLL') 842 write(msg,'(51a)')' ',('-',ii=1,50) 843 call wrtout(std_out,msg,'COLL') 844 845 end subroutine Calcnrec
m_rec/CleanRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
CleanRec
FUNCTION
Deallocate the pointers of rset<recursion_type>=Data type concerning recursion.
INPUTS
rset<recursion_type>=Data type concerning recursion
SIDE EFFECTS
All pointers are deallocated.
SOURCE
735 subroutine CleanRec(rset) 736 737 implicit none 738 739 !Arguments ------------------------------------ 740 ! scalars 741 type(recursion_type),intent(inout) :: rset 742 ! arrays 743 ! ********************************************************************* 744 745 ! @recursion_type 746 747 if (allocated(rset%ZT_p)) then 748 ABI_FREE(rset%ZT_p) 749 end if 750 if (allocated(rset%par%displs)) then 751 ABI_FREE(rset%par%displs) 752 end if 753 if (allocated(rset%par%vcount)) then 754 ABI_FREE(rset%par%vcount) 755 end if 756 if (allocated(rset%nl%mat_exp_psp_nl)) then 757 ABI_FREE(rset%nl%mat_exp_psp_nl) 758 end if 759 if (allocated(rset%nl%eival)) then 760 ABI_FREE(rset%nl%eival) 761 end if 762 if (allocated(rset%nl%eivec)) then 763 ABI_FREE(rset%nl%eivec) 764 end if 765 if (allocated(rset%nl%pspinfo)) then 766 ABI_FREE(rset%nl%pspinfo) 767 end if 768 if (allocated(rset%nl%radii)) then 769 ABI_FREE(rset%nl%radii) 770 end if 771 if (allocated(rset%nl%indlmn)) then 772 ABI_FREE(rset%nl%indlmn) 773 end if 774 if (allocated(rset%nl%projec)) then 775 ABI_FREE(rset%nl%projec) 776 end if 777 if (allocated(rset%inf%gcart)) then 778 ABI_FREE(rset%inf%gcart) 779 end if 780 781 call pawfgr_destroy(rset%pawfgr) 782 783 ! No is needed deallocate rset%mpi: it is a copy of mpi_enreg which 784 ! pointers are deallocated in gstate 785 786 #ifdef HAVE_GPU_CUDA 787 call CleanRecGPU(rset%GPU,rset%load) 788 #endif 789 790 end subroutine CleanRec
m_rec/cpu_distribution [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
cpu_distribution
FUNCTION
Calculate the number of point,GPU,for any proc
INPUTS
ngfft(3)=nuber of point of the grid gratio=recgratio ratio between the fine and coarse grid beta_coeff=estimated time ratio between CPU_time and GPU_time calc_type=if 0 takes the possible max for nptrec (to test the completly full graphic card). 1 after test to calculate the min possible value for nptrec
OUTPUT
SOURCE
278 subroutine cpu_distribution(gratio,rset,ngfft,beta_coeff,calc_type) 279 280 implicit none 281 282 !Arguments ------------------------------------ 283 integer,intent(in) :: gratio,calc_type 284 real(dp),intent(in) :: beta_coeff 285 integer,intent(in) :: ngfft(3) 286 type(recursion_type),intent(inout),target :: rset 287 !Local --------------------------- 288 integer :: ii,nfft,ierr 289 integer,pointer :: proc_pt_dev(:,:) 290 type(recparall_type),pointer :: recpar 291 character(500) :: msg 292 ! ********************************************************************* 293 294 ! write(std_out,*)'start cpu_distribution' 295 296 nullify(proc_pt_dev) 297 ABI_MALLOC(proc_pt_dev,(2,0:rset%mpi%nproc-1)) 298 299 nfft = product(ngfft) 300 call H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff) 301 302 nullify(recpar) 303 if(rset%load == 0)then 304 ABI_MALLOC(rset%par%displs,(0:rset%mpi%nproc-1)) 305 ABI_MALLOC(rset%par%vcount,(0:rset%mpi%nproc-1)) 306 recpar => rset%par 307 #if defined HAVE_GPU_CUDA 308 else 309 if(rset%tp==4)then 310 if(.not. allocated(rset%GPU%par%displs)) then 311 ABI_MALLOC(rset%GPU%par%displs,(0:rset%mpi%nproc-1)) 312 ABI_MALLOC(rset%GPU%par%vcount,(0:rset%mpi%nproc-1)) 313 end if 314 endif 315 recpar => rset%GPU%par 316 #endif 317 endif 318 319 recpar%ntranche = nfft/(rset%mpi%nproc)!equipartitioned point 320 321 call find_maxmin_proc(recpar,rset%mpi%nproc,& 322 & rset%mpi%me,gratio,ngfft,proc_pt_dev) 323 324 recpar%vcount = 0 325 if(rset%load==0)then 326 recpar%vcount(rset%mpi%me) = recpar%ntranche 327 else 328 recpar%vcount(rset%mpi%me) = recpar%npt 329 endif 330 331 call xmpi_sum(recpar%vcount,rset%mpi%comm_bandfft,ierr) 332 333 recpar%displs = 0 334 if(rset%mpi%nproc>1) recpar%displs(1:) = (/(sum(recpar%vcount(:ii)),ii=0,rset%mpi%nproc-2)/) 335 336 !--INITALIZATION OF CUDA FOR RECURSION 337 #if defined HAVE_GPU_CUDA 338 if(rset%load == 0) rset%GPU%par = rset%par 339 call InitRecGPU(rset,nfft,gratio,rset%GPU%map(rset%mpi%me),calc_type) 340 #else 341 ierr = calc_type !only of abirule when there is not HAVE_GPU_CUDA 342 #endif 343 344 345 ! if(rset%debug ) then 346 write(msg,'(a,i7,2(2a,3i7),8(2a,i7),2(2a,3i7),(2a,e14.6))')& 347 & ' me ', rset%mpi%me,ch10,& 348 & ' ngfft ', ngfft(1:3),ch10,& 349 & ' ngfftrec ', rset%ngfftrec(1:3),ch10,& 350 & ' load ', rset%load,ch10,& 351 & ' ntranche ', recpar%ntranche,ch10,& 352 & ' min_pt ', recpar%min_pt,ch10,& 353 & ' max_pt ', recpar%max_pt,ch10,& 354 & ' rset%mpi%nproc ', rset%mpi%nproc,ch10,& 355 & ' rset%mpi%nproc_fft ', rset%mpi%nproc_fft,ch10,& 356 & ' dtset%ngfft(10) ', rset%ngfftrec(10),ch10,& 357 & ' recpar%npt ', recpar%npt,ch10,& 358 & ' recpar%pt0 ', recpar%pt0%x,recpar%pt0%y,recpar%pt0%z,ch10,& 359 & ' recpar%pt1 ', recpar%pt1%x,recpar%pt1%y,recpar%pt1%z,ch10,& 360 & ' grid step ', rset%inf%tr(1) 361 call wrtout(std_out,msg,'PERS') 362 #if defined HAVE_GPU_CUDA 363 write(msg,'(a,i7,2(2a,i7),a)')& 364 & ' rset%ngp ', rset%ngpu,ch10,& 365 & ' gpudevice ', rset%gpudevice,ch10,& 366 & ' nptrec ', rset%GPU%nptrec,ch10 367 call wrtout(std_out,msg,'PERS') 368 #endif 369 ! write(std_out,*)'display',recpar%displs 370 ! write(std_out,*)'vcount',recpar%vcount 371 ! end if 372 373 374 nullify(recpar) 375 if(associated(proc_pt_dev)) then 376 ABI_FREE(proc_pt_dev) 377 end if 378 379 ! write(std_out,*)'exit from cpu_distribution' 380 end subroutine cpu_distribution
m_rec/find_maxmin_proc [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
find_maxmin_proc
FUNCTION
To calculate max and min pt for any cpu, it is useful for recgratio!=1
INPUTS
nproc = number of procs me = identity of the proc ngfft(3) = fine grid (corresponds to dtset%ngfft(1:3)) proc_pt_dev(2,0:nproc-1) which device and how many points recpar%npt = number of points computed by the proc me (see side effects)
OUTPUT
recpar%pt0<type(vec_int)>=Intial point for this proc in x,y,z recpar%pt1<type(vec_int)>=Final point for this proc in x,y,z recpar%min_pt=Intial point for this proc recpar%max_pt=Final point for this proc
SIDE EFFECTS
recpar%ntranche=number of pts computed by the proc me on the fine grid. So when recgratio!=1, ntranche will not correspond to the npt!
SOURCE
218 subroutine find_maxmin_proc(recpar,nproc,me,gratio,ngfft,proc_pt_dev) 219 220 implicit none 221 222 !Arguments ------------------------------------ 223 integer,intent(in) :: nproc,me,gratio 224 integer,intent(in) :: ngfft(3) 225 type(recparall_type),intent(inout) :: recpar 226 integer,pointer :: proc_pt_dev(:,:) 227 !Local --------------------------- 228 integer :: pointoncpu 229 integer :: nfft,ntot,ii 230 integer :: inf,sup 231 integer :: proc_limit(0:nproc-1) 232 ! ********************************************************************* 233 ! write(std_out,*)'start find_maxmin_proc' 234 recpar%npt = 0 235 nfft = product(ngfft) 236 ntot = nfft/(gratio*gratio*gratio) 237 pointoncpu = ntot/nproc 238 239 proc_limit = (/(sum(proc_pt_dev(2,:ii)),ii=0,nproc-1)/) 240 241 if(gratio==1)then 242 recpar%ntranche = proc_limit(me) 243 if(me/=0) recpar%ntranche = recpar%ntranche-proc_limit(me-1) 244 endif 245 246 inf=0 247 if(me/=0) inf = proc_limit(me-1) 248 sup = proc_limit(me) 249 250 251 call get_pt0_pt1(ngfft,gratio,inf,sup,recpar) 252 253 recpar%npt = sup-inf 254 255 !write(std_out,*)'exit find_maxmin_proc' 256 end subroutine find_maxmin_proc
m_rec/H_D_distrib [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
H_D_distrib
FUNCTION
Calculate the number of point,GPU,for any proc
INPUTS
rset<recursion_type>= recursion variables cpu (-1 if there are not gpu) nfft=nuber of point of the fine grid ngfftrec=nuber of point of one edge of the coarse grid gratio=recgratio ratio between the fine and coarse grid beta_coeff=estimated time ratio between CPU_time and GPU_time
OUTPUT
proc_pt_dev(2,0:nproc-1) which device and how many points that proc has to compute: proc_pt_dev(1,iproc) which device associated to proc i (-1 if none), proc_pt_dev(2,iproc) how many points
SOURCE
100 subroutine H_D_distrib(rset,nfft,gratio,proc_pt_dev,beta_coeff) 101 102 implicit none 103 104 !Arguments ------------------------------------ 105 integer, intent(in) :: nfft,gratio 106 real(dp),intent(in) :: beta_coeff 107 integer,pointer :: proc_pt_dev(:,:) 108 type(recursion_type),intent(inout) :: rset 109 !Local --------------------------- 110 integer :: me,icpu,resto,ntot,ngpu 111 integer :: n_per_cpu,n_per_gpu 112 character(500) :: msg 113 #ifdef HAVE_GPU_CUDA 114 integer,pointer :: ndev(:) 115 #else 116 integer :: ndev(0:rset%mpi%nproc-1) 117 #endif 118 ! ********************************************************************* 119 120 121 #ifdef HAVE_GPU_CUDA 122 ndev => rset%GPU%map 123 #else 124 ndev = -1 125 #endif 126 127 me = rset%mpi%me 128 ntot = nfft/(gratio*gratio*gratio) 129 ngpu = rset%ngpu 130 131 !--If sequential code all points are computed by the proc 0 132 if(rset%mpi%nproc ==1) then 133 proc_pt_dev(1,0) = ndev(0) 134 proc_pt_dev(2,0) = ntot 135 return 136 end if 137 138 !--Number of points for any cpu 139 n_per_cpu = int(int(ntot/(rset%mpi%nproc+ngpu*(beta_coeff-1.d0)))) 140 n_per_gpu = int(n_per_cpu*beta_coeff) 141 !write(std_out,*)'n_per_cpu',n_per_cpu 142 !write(std_out,*)'rset%GPU%map',rset%GPU%map 143 do icpu=0,rset%mpi%nproc-1 144 proc_pt_dev(1,icpu) = ndev(icpu) 145 proc_pt_dev(2,icpu) = n_per_cpu 146 if(ndev(icpu)>-1) proc_pt_dev(2,icpu) = n_per_gpu 147 end do 148 149 !--Distribute the rest 150 resto = ntot-sum(proc_pt_dev(2,:)) 151 icpu = 0 152 !write(std_out,*)'rest',resto,ngpu 153 if(resto>0) then 154 if(ngpu/=0) then 155 !--distribute rest only on GPU 156 do while(resto/=0) 157 if(proc_pt_dev(1,icpu)>-1) then 158 proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1 159 resto = resto-1 160 endif 161 icpu = mod(icpu+1,rset%mpi%nproc) 162 enddo 163 else 164 !--distribute rest on all CPU 165 do while(resto/=0) 166 proc_pt_dev(2,icpu) = proc_pt_dev(2,icpu)+1 167 resto = resto-1 168 icpu = mod(icpu+1,rset%mpi%nproc) 169 enddo 170 return 171 endif 172 endif 173 174 !--Printing GPU and load distribution on procs 175 write(msg,'(3a)')& 176 & ' -Load on procs------------',ch10,& 177 & ' me device points' 178 call wrtout(std_out,msg,'COLL') 179 do icpu=0,rset%mpi%nproc-1 180 write(msg,'(i5,i8,i14)') icpu,proc_pt_dev(:,icpu); 181 call wrtout(std_out,msg,'COLL') 182 end do 183 184 end subroutine H_D_distrib
m_rec/Init_MetricRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Init_MetricRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion. In particular, the information on the infinitesimal metric. Also other variable are initialized
INPUTS
rmet: metrics ucvol=unit cell volume in bohr**3. ngfft(1:3)=fine grid used in recursion rprimd=Real space PRIMitive translations, Dimensional xred=vectors (X) of atom positions in reduced coordinates natom=number of atoms debug=debug variable
OUTPUT
metrec <type(metricrec_type)>= infinitesimal metrics used in recursion
SOURCE
572 subroutine Init_MetricRec(metrec,nlpsp,rmet,ucvol,rprimd,xred,ngfft,natom,debug) 573 574 implicit none 575 !Arguments ------------------------------------ 576 !scalars 577 integer, intent(in) ::natom 578 real(dp), intent(in) :: ucvol 579 logical,intent(in) ::nlpsp,debug 580 type(metricrec_type),intent(inout) :: metrec 581 !arrays 582 integer,intent(in) :: ngfft(3) 583 real(dp),intent(in) :: rmet(3,3),rprimd(3,3),xred(3,natom) 584 585 !Local --------------------------- 586 integer :: ii 587 real(dp) :: xcart(3,natom) 588 character(500) :: msg 589 ! ********************************************************************* 590 591 !--Intialisation of variables concerning the infinitesimal metric 592 do ii=1,3 593 metrec%rmet(ii,:) = rmet(ii,:)/(real(ngfft(1:3)*ngfft(ii),dp)) 594 end do 595 metrec%ucvol = ucvol/real(product(ngfft(1:3)),dp) 596 metrec%tr(:) = sqrt((/(metrec%rmet(ii,ii),ii=1,3)/)) !grid step 597 598 !--Initialisation of others variables 599 !--In non-loc-psp case: calculate the position of ions and conversion factor 600 if(nlpsp) then 601 do ii = 1,natom 602 xcart(:,ii) = matmul(rprimd(:,:),xred(:,ii)) 603 end do 604 metrec%gcart(:,:) = per_cond(natom,xcart,ngfft(1:3),metrec%tr(:)) 605 if(debug) then 606 do ii=1,natom 607 write (msg,'(a,3f8.2)')'xcart=',xcart(:,ii) 608 call wrtout(std_out,msg,'COLL') 609 write (msg,'(a,3i4)')'gcart=',metrec%gcart(:,ii) 610 call wrtout(std_out,msg,'COLL') 611 end do 612 end if 613 end if 614 615 end subroutine Init_MetricRec
m_rec/Init_nlpspRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
Init_nlpspRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion. In particular, the non-local part of pseudo-potential.
INPUTS
tempe=temperature psps <type(pseudopotential_type)>=variables related to pseudo-potentials metrec <type(metricrec_type)>=infinitesimal metrics used in recursion ngfftrec(18)=Number of Grid points for Fast Fourier Transform for Recursion (truncated box, if different from ngfft) debug=debug variable
SIDE EFFECTS
nlrec <type(nlpsprec_type)>=pseudo-potentials information for recursion
SOURCE
639 subroutine Init_nlpspRec(tempe,psps,nlrec,metrec,ngfftrec,debug) 640 641 implicit none 642 !Arguments ------------------------------------ 643 ! scalars 644 logical,intent(in) :: debug 645 real(dp), intent(in) :: tempe 646 type(pseudopotential_type),intent(in) ::psps 647 type(metricrec_type),intent(inout) :: metrec 648 type(nlpsprec_type),intent(inout) :: nlrec 649 ! arrays 650 integer,intent(in) :: ngfftrec(18) 651 !Local --------------------------- 652 integer :: ii,jj 653 character(500) :: msg 654 ! ********************************************************************* 655 !!--Routine for the calcul of the non-local pseudo 656 if(any(psps%pspcod/=3) .and. nlrec%nlpsp ) then 657 msg = "The non-local part of psp is used in Recursion only for hgh-psp" 658 ABI_WARNING(msg) 659 nlrec%nlpsp = .False. 660 if (allocated(metrec%gcart)) then 661 ABI_FREE(metrec%gcart) 662 end if 663 end if 664 665 if(any(psps%pspcod==3) .and. nlrec%nlpsp) then 666 667 nlrec%nlpsp = .True. 668 nlrec%npsp = psps%npsp 669 nlrec%lmnmax = count(psps%indlmn(3,:,psps%npsp)/=0) 670 ABI_MALLOC(nlrec%mat_exp_psp_nl,(3,3,psps%mpsang,psps%npsp)) 671 ABI_MALLOC(nlrec%eival,(3,psps%mpsang,psps%npsp)) 672 ABI_MALLOC(nlrec%eivec,(3,3,psps%mpsang,psps%npsp)) 673 ABI_MALLOC(nlrec%pspinfo,(psps%mpsang,psps%npsp)) 674 ABI_MALLOC(nlrec%radii,(psps%mpsang,psps%npsp)) 675 ABI_MALLOC(nlrec%indlmn,(6,nlrec%lmnmax,psps%npsp)) 676 nlrec%indlmn(:,:,:) = psps%indlmn(:,:nlrec%lmnmax,:) 677 nlrec%mat_exp_psp_nl(:,:,:,:) = zero 678 nlrec%eivec(:,:,:,:) = zero 679 nlrec%eival(:,:,:) = zero 680 nlrec%radii(:,:) = zero 681 nlrec%pspinfo(:,:) = 0 682 683 !--Get the exponential of the strength times the projectors overlap 684 ! of the non-local part of psp(hgh): 685 ! h_ij=strength; g_ij=ovelap => (exp(-h.g/temp/4p)-Identity).g^(-1) 686 ! And the diagonalisation of the projectors and associated eigenvectors 687 call pspnl_hgh_rec(psps,tempe,nlrec,debug) 688 689 if(debug)then 690 do jj=1,psps%npsp 691 write(msg,'(a)')' Exponential matrices:' 692 call wrtout(std_out,msg,'COLL') 693 do ii=1,psps%mpsang 694 write(msg,'(a,i2,a,3f15.10,a,3f15.10,a,3f15.10)')& 695 & 'angular moment',ii-1,ch10,& 696 & nlrec%mat_exp_psp_nl(1,:,ii,jj),ch10,& 697 & nlrec%mat_exp_psp_nl(2,:,ii,jj),ch10,& 698 & nlrec%mat_exp_psp_nl(3,:,ii,jj) 699 call wrtout(std_out,msg,'COLL') 700 end do 701 end do 702 end if 703 704 !--Now it calculates the matrix of the exp(V_NL) 705 call pspnl_operat_rec(nlrec,metrec,ngfftrec,debug) 706 707 else !--Only local pseudo potentials 708 nlrec%nlpsp = .False. 709 nlrec%npsp = psps%npsp 710 ABI_MALLOC(nlrec%mat_exp_psp_nl,(0,0,0,0)) 711 ABI_MALLOC(nlrec%pspinfo,(0,0)) 712 ABI_MALLOC(nlrec%radii,(0,0)) 713 ABI_MALLOC(nlrec%indlmn,(0,0,0)) 714 ABI_MALLOC(nlrec%projec,(0,0,0)) 715 endif 716 717 end subroutine Init_nlpspRec
m_rec/InitRec [ Functions ]
[ Top ] [ m_rec ] [ Functions ]
NAME
InitRec
FUNCTION
Initialise the rset<recursion_type>=Data type concerning recursion.
INPUTS
dtset <type(dataset_type)>=all input variables in this dataset mpi_ab <type(mpi_type)=MPI-parallelisation information mproj=0 if psp is only local
SIDE EFFECTS
All pointers set to null().
SOURCE
402 subroutine InitRec(dtset,mpi_ab,rset,rmet,mproj) 403 404 #ifdef HAVE_GPU_CUDA 405 use m_gpu_detect,only :get_topo,find_set_gpu 406 use m_hidecudarec,only :InitRecGPU_0 407 #include "cuda_common.h" 408 #endif 409 implicit none 410 411 !Arguments ------------------------------------ 412 ! scalars 413 integer,intent(in) :: mproj 414 type(dataset_type),intent(in) :: dtset 415 type(MPI_type),intent(in),target :: mpi_ab 416 type(recursion_type),intent(inout) :: rset 417 real(dp),intent(in) :: rmet(3,3) 418 ! arrays 419 !Local --------------------------- 420 integer :: ii 421 real(dp) :: beta,rtrotter 422 #if defined HAVE_GPU_CUDA 423 character(500) :: msg 424 #endif 425 ! ********************************************************************* 426 ! @recursion_type 427 ! @pawfgr_type 428 429 !--Initialisation 430 beta = one/dtset%tsmear !--Inverse of temperature 431 !--Rewriting the trotter parameter 432 rtrotter = max(half,real(dtset%recptrott,dp)) 433 434 rset%debug= (dtset%prtvol==-7) 435 rset%quitrec = 0 436 rset%min_nrec = dtset%recnrec 437 rset%efermi = dtset%recefermi !initial guess for fermie 438 439 rset%nfftrec = 0 440 rset%ngfftrec = 0 441 442 rset%tronc = .False. 443 444 rset%mpi => mpi_ab 445 446 !--Are all pseudo-potentials local? 447 rset%nl%nlpsp = (mproj/=0) 448 !--Some initialisation concerning the metrics 449 ! If non-local psps then it allocates the atoms positions 450 ! on the grid 451 if(rset%nl%nlpsp) then 452 ABI_MALLOC(rset%inf%gcart,(3,dtset%natom)) 453 else 454 ABI_MALLOC(rset%inf%gcart,(0,0)) 455 end if 456 rset%inf%gcart = 0 457 458 !---------------------------------------------------------- 459 !--TRONCATION OF THE BOX 460 !! determines new dimensions the method is similar to the one used 461 !! in getng (except that ecut and xboxcutmin give no constraint, 462 !! and symmetries are not handled) 463 464 call getngrec(dtset%ngfft,rmet,rset%ngfftrec,rset%nfftrec,dtset%recrcut,0.25d0*sqrt(beta/rtrotter),rset%tronc) 465 ! 1/4*sqrt(beta/trotter) for guess - should be modified 466 467 !------------------------------------------------------------ 468 !--DETERMINING WHICH POINT WILL COMPUTE THAT PROC 469 !---------------------------------------------------------- 470 !--Paralelism using the band communicator (not used in the recursion) 471 !--Distribution on procs with cuda 472 473 474 rset%ngpu = 0 !--Initial guess no GPU at all 475 rset%gpudevice = -1 !--Inital guess no GPU associated 476 rset%load = 0 !--Inital homogeneous work load 477 rset%tp = 0 !--Initial guess 1 cpu, 0 gpu 478 479 480 #ifdef HAVE_GPU_CUDA 481 !--Initialise GPU variables for recursion 482 call InitRecGPU_0(rset%GPU,mpi_ab) 483 484 !--Get the distribution of GPUs on CPUs 485 call find_set_gpu(mpi_ab%nproc,mpi_ab%comm_bandfft,rset%GPU%map,rset%ngpu) 486 487 !--Get the topology of the machine 488 call get_topo(rset%mpi%nproc,rset%ngpu,rset%tp) 489 if(rset%tp>4)then 490 msg = 'm_rec: number of gpu>number of cpu is not implemented' 491 ABI_ERROR(msg) 492 endif 493 ! rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1 494 ! rset%ngpu = 0; rset%GPU%map=-1 495 !--For the moment cuda doesnt take into account non-local psp 496 if(rset%nl%nlpsp) then 497 rset%tp = 0;if(rset%mpi%nproc>1)rset%tp = 1 498 rset%GPU%map = -1 499 endif 500 #else 501 if(rset%mpi%nproc>1)rset%tp = 1 502 #endif 503 504 !--Basic initialization for recursion metric (only needed for printing) 505 do ii=1,3 506 rset%inf%rmet(ii,:) = rmet(ii,:)/(real(dtset%ngfft(1:3)*dtset%ngfft(ii),dp)) 507 end do 508 rset%inf%tr(:) = sqrt((/(rset%inf%rmet(ii,ii),ii=1,3)/)) !grid step 509 510 !--Compute the work loqd distribution on devices (gpu,cpu) 511 call cpu_distribution(dtset%recgratio,rset,dtset%ngfft(:3),1.d0,0) 512 513 !------------------------------------------------------------ 514 !--DEFINITION VARIABLE COARSE-FINE GRID TO USE TRANSGRID--INGRID FUNCTIONS 515 call pawfgr_nullify(rset%pawfgr) 516 !if coarse grid is used 517 if (dtset%recgratio>1) then 518 !fine grid-- 519 rset%pawfgr%mgfft = 0 520 rset%pawfgr%nfft = product(dtset%ngfft(1:3)) 521 rset%pawfgr%ngfft(:) = dtset%ngfft(:) 522 rset%pawfgr%ngfft(9:11)=(/0,1,0/) 523 rset%pawfgr%ngfft(12:13)= dtset%ngfft(2:3) 524 !coarse grid-- 525 rset%pawfgr%mgfftc = 0 526 rset%pawfgr%ngfftc(:) = rset%pawfgr%ngfft(:) 527 rset%pawfgr%ngfftc(:3) = floor(real(dtset%ngfft(:3)+1,dp)/real(dtset%recgratio,dp)) 528 rset%pawfgr%nfftc = product(rset%pawfgr%ngfftc(1:3)) 529 530 rset%pawfgr%usefinegrid = 1 531 ABI_MALLOC(rset%pawfgr%fintocoa,(rset%pawfgr%nfft)) 532 ABI_MALLOC(rset%pawfgr%coatofin,(rset%pawfgr%nfftc)) 533 call indgrid(rset%pawfgr%coatofin,rset%pawfgr%fintocoa,& 534 rset%pawfgr%nfftc,rset%pawfgr%nfft,& 535 rset%pawfgr%ngfftc,rset%pawfgr%ngfft) 536 537 else 538 rset%pawfgr%mgfft = 0 539 rset%pawfgr%ngfft = 0 540 rset%pawfgr%mgfftc = 0 541 542 rset%pawfgr%usefinegrid = 0 543 end if 544 545 546 end subroutine InitRec