TABLE OF CONTENTS


ABINIT/m_memeval [ Modules ]

[ Top ] [ Modules ]

NAME

 m_memeval

FUNCTION

  Functions to estimate memory requirements from the calculation parameters.

COPYRIGHT

  Copyright (C) 2008-2024 ABINIT group (XG, DC, DW)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 MODULE m_memeval
23 
24  use defs_basis
25  use m_abicore
26  use m_xmpi
27  use m_errors
28  use m_dtset
29 
30  use defs_datatypes, only : pspheader_type
31  use defs_abitypes,   only : MPI_type
32  use m_geometry,      only : mkradim, mkrdim, xred2xcart, metric
33  use m_symtk,         only : mati3inv, littlegroup_q
34  use m_spgdata,       only : prtspgroup
35  use m_fftcore,       only : getng
36  use m_kg,            only : getmpw
37  use m_libpaw_tools,  only : libpaw_write_comm_set
38 
39  implicit none
40 
41  private

m_memeval/getdim_nloc [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 getdim_nloc

FUNCTION

 Determine the dimensions of arrays that contain
 the definition of non-local projectors : ekb, ffspl, indlmn

INPUTS

  mixalch(npspalch,ntypalch,nimage)=alchemical mixing coefficients
  nimage=number of images
  npsp=number of pseudopotentials
  npspalch=number of pseudopotentials for alchemical purposes
  ntypat=number of types of pseudo atoms
  ntypalch=number of types of alchemical pseudo atoms
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file headers, as well as the psp file names

OUTPUT

  lmnmax=maximum number of l,m,n projectors, not taking into account the spin-orbit
  lmnmaxso=maximum number of l,m,n projectors, taking into account the spin-orbit
  lnmax=maximum number of l,n projectors, not taking into account the spin-orbit
  lnmaxso=maximum number of l,n projectors, taking into account the spin-orbit

SOURCE

2259 subroutine getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,mixalch,nimage,npsp,npspalch,&
2260 & ntypat,ntypalch,pspheads)
2261 
2262 !Arguments ------------------------------------
2263 !scalars
2264  integer,intent(in) :: nimage,npsp,npspalch,ntypalch,ntypat
2265  integer,intent(out) :: lmnmax,lmnmaxso,lnmax,lnmaxso
2266 !arrays
2267  real(dp),intent(in) :: mixalch(npspalch,ntypalch,nimage)
2268  type(pspheader_type),intent(in) :: pspheads(npsp)
2269 
2270 !Local variables-------------------------------
2271 !scalars
2272  integer :: ilang,ipsp,ipspalch,itypalch,itypat,ntyppure
2273 !integer :: llmax
2274  character(len=500) :: msg
2275 !arrays
2276  integer,allocatable :: lmnproj_typat(:),lmnprojso_typat(:),lnproj_typat(:)
2277  integer,allocatable :: lnprojso_typat(:),nproj_typat(:,:),nprojso_typat(:,:)
2278 
2279 ! *************************************************************************
2280 
2281 !write(std_out,*)' getdim_nloc: 'pspheads(1)%nproj(0:3)=',pspheads(1)%nproj(0:3)
2282 
2283  ABI_MALLOC(lmnproj_typat,(ntypat))
2284  ABI_MALLOC(lmnprojso_typat,(ntypat))
2285  ABI_MALLOC(lnproj_typat,(ntypat))
2286  ABI_MALLOC(lnprojso_typat,(ntypat))
2287  ABI_MALLOC(nproj_typat,(0:3,ntypat))
2288  ABI_MALLOC(nprojso_typat,(3,ntypat))
2289  lmnproj_typat(:)=0 ; lmnprojso_typat(:)=0
2290  lnproj_typat(:)=0 ; lnprojso_typat(:)=0
2291  nproj_typat(:,:)=0 ; nprojso_typat(:,:)=0
2292 
2293  ntyppure=ntypat-ntypalch
2294 
2295 !For each type of pseudo atom, compute the number of projectors
2296 !First, pure pseudo atoms
2297  if(ntyppure>0)then
2298    do itypat=1,ntyppure
2299      nproj_typat(0:3,itypat)=pspheads(itypat)%nproj(0:3)
2300      nprojso_typat(:,itypat)=pspheads(itypat)%nprojso(:)
2301    end do
2302  end if
2303 
2304 !Then, alchemical pseudo atoms
2305  if(ntypalch>0)then
2306    do itypat=ntyppure+1,ntypat
2307      itypalch=itypat-ntyppure
2308      do ipsp=ntyppure+1,npsp
2309        ipspalch=ipsp-ntyppure
2310 !      If there is some mixing, must accumulate the projectors
2311        if(sum(abs(mixalch(ipspalch,itypalch,:)))>tol10)then
2312          nproj_typat(0:3,itypat)=nproj_typat(0:3,itypat)+pspheads(ipsp)%nproj(0:3)
2313          nprojso_typat(:,itypat)=nprojso_typat(:,itypat)+pspheads(ipsp)%nprojso(:)
2314        end if
2315      end do
2316    end do
2317  end if
2318 
2319 !Now that the number of projectors is known, accumulate the dimensions
2320  do itypat=1,ntypat
2321    do ilang=0,3
2322      lnproj_typat(itypat)=lnproj_typat(itypat)+nproj_typat(ilang,itypat)
2323      lmnproj_typat(itypat)=lmnproj_typat(itypat)+nproj_typat(ilang,itypat)*(2*ilang+1)
2324    end do
2325    lnprojso_typat(itypat)=lnproj_typat(itypat)
2326    lmnprojso_typat(itypat)=lmnproj_typat(itypat)
2327    do ilang=1,3
2328      lnprojso_typat(itypat)=lnprojso_typat(itypat)+nprojso_typat(ilang,itypat)
2329      lmnprojso_typat(itypat)=lmnprojso_typat(itypat)+nprojso_typat(ilang,itypat)*(2*ilang+1)
2330    end do
2331  end do
2332 
2333 !Compute the maximal bounds, at least equal to 1, even for local psps
2334  lmnmax=1;lmnmaxso=1;lnmax=1;lnmaxso=1
2335  do itypat=1,ntypat
2336    lmnmax  =max(lmnmax  ,lmnproj_typat  (itypat))
2337    lmnmaxso=max(lmnmaxso,lmnprojso_typat(itypat))
2338    lnmax   =max(lnmax   ,lnproj_typat   (itypat))
2339    lnmaxso =max(lnmaxso ,lnprojso_typat (itypat))
2340  end do
2341 !The initial coding (below) was not totally portable (MT 110215)
2342 !lmnmax=max(maxval(lmnproj_typat(1:ntypat)),1)
2343 !lmnmaxso=max(maxval(lmnprojso_typat(1:ntypat)),1)
2344 !lnmax=max(maxval(lnproj_typat(1:ntypat)),1)
2345 !lnmaxso=max(maxval(lnprojso_typat(1:ntypat)),1)
2346 
2347  if(maxval(lmnproj_typat(1:ntypat))==0)then
2348    write(msg, '(3a)' )&
2349     'Despite there is only a local part to pseudopotential(s),',ch10,&
2350     'lmnmax and lnmax are set to 1.'
2351    ABI_COMMENT(msg)
2352  end if
2353 
2354 !XG040806 : These lines make modifications of lnmax and lmnmax
2355 !that are unjustified in many cases, according to the many tests cases
2356 !where they produce a changes, while the test case was working properly.
2357 !One should understand better the needs, and code more appropriate changes ...
2358 !lnmax/lmnmax has to be bigger than 1+lmax (for compatibility reasons)
2359 !llmax=maxval(pspheads(1:ntypat)%lmax)+1 ! And this line might have trouble with HP compiler
2360 !if (lnmax   <llmax) lnmax=llmax
2361 !if (lnmaxso <llmax) lnmaxso=llmax
2362 !if (lmnmax  <llmax) lmnmax=llmax
2363 !if (lmnmaxso<llmax) lmnmaxso=llmax
2364 
2365  write(msg, '(a,a,i4,a,i4,3a,i4,a,i4,a)' ) ch10,&
2366  ' getdim_nloc: deduce lmnmax  =',lmnmax,', lnmax  =',lnmax,',',ch10,&
2367  '                      lmnmaxso=',lmnmaxso,', lnmaxso=',lnmaxso,'.'
2368  call wrtout(std_out,msg)
2369 
2370  ABI_FREE(lmnproj_typat)
2371  ABI_FREE(lmnprojso_typat)
2372  ABI_FREE(lnproj_typat)
2373  ABI_FREE(lnprojso_typat)
2374  ABI_FREE(nproj_typat)
2375  ABI_FREE(nprojso_typat)
2376 
2377 end subroutine getdim_nloc

m_memeval/memana [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memana

FUNCTION

 Analysis of the memory and disk space needed for the job,
 thanks to the data computed in the calling routine: for each
 array, the number of blocks of size mpw or nfft bytes, and the
 additional memory occupation;
 the list of arrays that are used for each chain.

 According to the value of the option variable,
 the routine will eventually try to allocate this amount of memory,
 and if it fails, estimate the maximum value nfft compatible with
 the available memory.

INPUTS

  cadd(marrays)= count of bytes needed in addition of cmpw, cfftc and cfft.
  cfft(marrays) =for each array, count of blocks of size nfft bytes (coarse grid, if PAW)
  cfftf(marrays)=for each array, count of blocks of size nfft bytes (fine grid, if PAW)
  chain(marrays,nchain)=logical variable, that informs whether an array
    belongs to a given chain.
  cmpw(marrays)=for each array, count of blocks of size mpw bytes.
  dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
  iout=unit number for output of formatted data.
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  marrays=maximal number of arrays (or group of arrays) to be monitored.
  mbcg=number of MB needed for the cg array.
  mbdiskpd=number of MB needed to store a density or potential file on disk
  mbdiskwf=number of MB needed to store a wavefunction file on disk
  mbf_fftgr=number of MB needed for the f_fftgr array.
  mbgylm=number of MB needed for the pawfgrtab%gylm array (paw only)
  mffmem =governs the number of FFT arrays which are fit in core memory
  mpw   =maximum number of planewaves in basis sphere (large number)
  natom =number of atoms in unit cell
  nchain=number of chains to be used in the estimation of memory.
  nfft =(effective) number of FFT grid points (for one processor) (coarse grid, if PAW)
  nfftf=(effective) number of FFT grid points (for one processor) (fine grid, if PAW)
  occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  prtvol=control print volume

OUTPUT

  (only writing)

SOURCE

1369 subroutine memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,&
1370 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
1371 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol)
1372 
1373 !Arguments ------------------------------------
1374 !scalars
1375  integer,intent(in) :: iout,iprcel,iscf,marrays,mffmem,mpw,natom,nchain
1376  integer,intent(in) :: nfft,nfftf,occopt,option,prtvol
1377  real(dp),intent(in) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
1378 !arrays
1379  integer,intent(in) :: dttyp(marrays)
1380  logical,intent(in) :: chain(marrays,nchain)
1381  real(dp),intent(in) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays)
1382 
1383 !Local variables-------------------------------
1384 !scalars
1385  integer :: biggest,ichain,ier,ier1,ier2,ier3,ier4,ier5,ier6,ier7,ier8,ii
1386 !integer :: jj,kk
1387  integer :: mu,nmbytes,nquarter_mbytes,quit
1388  real(dp) :: mbbigarr,mbbiggest
1389  character(len=500) :: msg
1390 !arrays
1391  real(dp),allocatable :: bigarray(:,:),bigarray1(:,:),bigarray2(:,:)
1392  real(dp),allocatable :: bigarray3(:,:),bigarray4(:,:),bigarray5(:,:)
1393  real(dp),allocatable :: bigarray6(:,:),bigarray7(:,:),bigarray8(:,:)
1394  real(dp),allocatable :: cdpadd(:),cdpfft(:),cdpfftf(:),cdpmpw(:)
1395  real(dp),allocatable :: cintfft(:),cintfftf(:),cintmpw(:),cintadd(:)
1396  real(dp),allocatable :: mbdpadd(:),mbdpfft(:),mbdpfftf(:)
1397  real(dp),allocatable :: mbdpmpw(:),mbintadd(:),mbintfft(:),mbintfftf(:)
1398  real(dp),allocatable :: mbintmpw(:),mbother(:),mbtot(:)
1399 
1400 ! **************************************************************************
1401 
1402 !write(std_out,*)' memana : nchain=',nchain
1403 
1404  ABI_MALLOC(cdpfftf,(nchain))
1405  ABI_MALLOC(cdpfft,(nchain))
1406  ABI_MALLOC(cdpmpw,(nchain))
1407  ABI_MALLOC(cintfftf,(nchain))
1408  ABI_MALLOC(cintfft,(nchain))
1409  ABI_MALLOC(cintmpw,(nchain))
1410  ABI_MALLOC(cdpadd,(nchain))
1411  ABI_MALLOC(cintadd,(nchain))
1412  ABI_MALLOC(mbdpadd,(nchain))
1413  ABI_MALLOC(mbdpfftf,(nchain))
1414  ABI_MALLOC(mbdpfft,(nchain))
1415  ABI_MALLOC(mbdpmpw,(nchain))
1416  ABI_MALLOC(mbintadd,(nchain))
1417  ABI_MALLOC(mbintfftf,(nchain))
1418  ABI_MALLOC(mbintfft,(nchain))
1419  ABI_MALLOC(mbintmpw,(nchain))
1420  ABI_MALLOC(mbother,(nchain))
1421  ABI_MALLOC(mbtot,(nchain))
1422 
1423  biggest=0
1424  mbbiggest=0.0_dp
1425 
1426 !For each chain, compute the number of bytes
1427  do ichain=1,nchain
1428 
1429 !  First, the number of integer or real(dp), fft, mpw or add blocks
1430    cdpmpw(ichain) =sum(cmpw(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1431    cintmpw(ichain)=sum(cmpw(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1432    cdpfftf(ichain) =sum(cfftf(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1433    cintfftf(ichain)=sum(cfftf(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1434    cdpfft(ichain) =sum(cfft(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1435    cintfft(ichain)=sum(cfft(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1436    cdpadd(ichain) =sum(cadd(:),MASK=(dttyp(:)==8).and.chain(:,ichain))
1437    cintadd(ichain)=sum(cadd(:),MASK=(dttyp(:)==4).and.chain(:,ichain))
1438 
1439 !  Compute the corresponding number of Mbytes
1440    mbdpmpw(ichain) =8*cdpmpw(ichain) *dble(mpw) /1024._dp**2
1441    mbintmpw(ichain)=4*cintmpw(ichain)*dble(mpw) /1024._dp**2
1442    mbdpfftf(ichain) =8*cdpfftf(ichain) *dble(nfftf)/1024._dp**2
1443    mbintfftf(ichain)=4*cintfftf(ichain)*dble(nfftf)/1024._dp**2
1444    mbdpfft(ichain) =8*cdpfft(ichain) *dble(nfft)/1024._dp**2
1445    mbintfft(ichain)=4*cintfft(ichain)*dble(nfft)/1024._dp**2
1446    mbdpadd(ichain) =8*cdpadd(ichain)              /1024._dp**2
1447    mbintadd(ichain)=4*cintadd(ichain)             /1024._dp**2
1448    mbother(ichain) =dble(231+6*natom)/1024._dp
1449    if(3<=occopt .and. occopt<=8)mbother(ichain)=dble(991+natom)/1024._dp
1450 
1451 !  Compute the total number of Mbytes
1452    mbtot(ichain)=mbdpmpw(ichain)+mbintmpw(ichain)&
1453 &   +mbdpfftf(ichain)+mbintfftf(ichain)&
1454 &   +mbdpfft(ichain)+mbintfft(ichain)&
1455 &   +mbdpadd(ichain)+mbintadd(ichain)+mbother(ichain)
1456 
1457 !  Select the biggest chain
1458    if(mbtot(ichain)>mbbiggest)then
1459      mbbiggest=mbtot(ichain)
1460      biggest=ichain
1461    end if
1462  end do
1463 !When iprcel<20, the biggest chains cannot be number 8 or 9 ...
1464  if(modulo(iprcel,100)<20 .and. (biggest==8 .or. biggest==9))then
1465    write(msg,'(a,a,a,a,i3,a,a,a)') ch10,&
1466 &   ' memana: BUG -',ch10,&
1467 &   '  The biggest chain is number',biggest,' while iprcel==20.',ch10,&
1468 &   '  This is not allowed.'
1469    call wrtout(std_out,msg)
1470  end if
1471 
1472  write(msg, '(a,f11.3,a)' ) &
1473 & 'P This job should need less than                 ',&
1474 & mbbiggest+tol10,' Mbytes of memory. '
1475  call wrtout(std_out,msg)
1476  call wrtout(iout,msg)
1477 
1478  if(prtvol>=10)then
1479    if(biggest==1)write(msg,'(a)')'P Max. in main chain + fourwf.f '
1480    if(biggest==2)write(msg,'(a)')'P Max. in main chain + nonlop.f + opernl.f '
1481    if(biggest==3)write(msg,'(a)')'P Max. in XC chain '
1482    if(biggest==4)write(msg,'(a)')'P Max. in mkrho chain '
1483    if(biggest==5)write(msg,'(a)')'P Max. in fourdp chain '
1484    if(biggest==6)write(msg,'(a)')'P Max. in parallel k-point chain '
1485    if(biggest==7)write(msg,'(a)')'P Max. in newvtr chain '
1486    if(biggest==8)write(msg,'(a)')'P Max. in suscep chain '
1487    if(biggest==9)write(msg,'(a)')'P Max. in dielmt chain '
1488    if(biggest==10)write(msg,'(a)')'P Max. in tddft chain '
1489    call wrtout(iout,msg)
1490 
1491    write(msg, '(a,i13,a,f11.3,a)' )&
1492 &   'P',nint(cintmpw(biggest)),' blocks of mpw  integer numbers, for',&
1493 &   mbintmpw(biggest)+tol10,' Mbytes. '
1494    call wrtout(iout,msg)
1495    write(msg, '(a,i13,a,f11.3,a)' )&
1496 &   'P',nint(cdpmpw(biggest)),' blocks of mpw  real(dp)  numbers, for',&
1497 &   mbdpmpw(biggest)+tol10,' Mbytes. '
1498    call wrtout(iout,msg)
1499    if (nfft==nfftf) then
1500      if(mbintfft(biggest)+mbintfftf(biggest)>0.001)then
1501        write(msg, '(a,i13,a,f11.3,a)' )&
1502 &       'P',nint(cintfft(biggest)+cintfftf(biggest)),' blocks of nfft integer numbers, for',&
1503 &       mbintfft(biggest)+mbintfftf(biggest)+tol10,' Mbytes. '
1504        call wrtout(iout,msg)
1505      end if
1506      write(msg, '(a,i13,a,f11.3,a)' )&
1507 &     'P',nint(cdpfft(biggest)+cdpfftf(biggest)),' blocks of nfft real(dp)  numbers, for',&
1508 &     mbdpfft(biggest)+mbdpfftf(biggest)+tol10,' Mbytes. '
1509      call wrtout(iout,msg)
1510    else
1511      if(mbintfftf(biggest)>0.001)then
1512        write(msg, '(a,i13,a,f11.3,a)' )&
1513 &       'P',nint(cintfftf(biggest)),' blocks of nfft (fine grid) integer numbers, for',&
1514 &       mbintfftf(biggest)+tol10,' Mbytes. '
1515        call wrtout(iout,msg)
1516      end if
1517      write(msg, '(a,i13,a,f11.3,a)' )&
1518 &     'P',nint(cdpfftf(biggest)),' blocks of nfft (fine grid) real(dp)  numbers, for',&
1519 &     mbdpfftf(biggest)+tol10,' Mbytes. '
1520      call wrtout(iout,msg)
1521      if(mbintfft(biggest)>0.001)then
1522        write(msg, '(a,i13,a,f11.3,a)' )&
1523 &       'P',nint(cintfft(biggest)),' blocks of nfft (coarse grid) integer numbers, for',&
1524 &       mbintfft(biggest)+tol10,' Mbytes. '
1525        call wrtout(iout,msg)
1526      end if
1527      write(msg, '(a,i13,a,f11.3,a)' )&
1528 &     'P',nint(cdpfft(biggest)),' blocks of nfft (coarse grid) real(dp)  numbers, for',&
1529 &     mbdpfft(biggest)+tol10,' Mbytes. '
1530      call wrtout(iout,msg)
1531    end if
1532    if(mbintadd(biggest)>0.001)then
1533      write(msg, '(a,13x,a,f11.3,a)' )'P',' Additional     integer numbers, for',mbintadd(biggest)+tol10,' Mbytes. '
1534      call wrtout(iout,msg)
1535    end if
1536    write(msg, '(a,13x,a,f11.3,a)' )'P',' Additional     real(dp)  numbers, for',mbdpadd(biggest)+tol10,' Mbytes. '
1537    call wrtout(iout,msg)
1538    write(msg, '(a,13x,a,f11.3,a)' )'P',' With residue estimated to be       ',mbother(biggest)+tol10,' Mbytes. '
1539    call wrtout(iout,msg)
1540    write(msg, '(a)' )'P'
1541    call wrtout(iout,msg)
1542    write(msg, '(a)' )'P Comparison of the memory needs of different chains'
1543    call wrtout(iout,msg)
1544 
1545    write(msg, '(a,f11.3,a)' )'P Main chain + fourwf.f           ',mbtot(1)+tol10,' Mbytes. '
1546    call wrtout(iout,msg)
1547    write(msg, '(a,f11.3,a)' )'P Main chain + nonlop.f + opernl.f',mbtot(2)+tol10,' Mbytes. '
1548    call wrtout(iout,msg)
1549 
1550 !  The next chains are not defined in the RF case.
1551    if(nchain>2)then
1552      write(msg, '(a,f11.3,a)' )'P XC chain                        ',mbtot(3)+tol10,' Mbytes. '
1553      call wrtout(iout,msg)
1554      write(msg, '(a,f11.3,a)' )&
1555 &     'P mkrho chain                     ',mbtot(4)+tol10,' Mbytes. '
1556      call wrtout(iout,msg)
1557      write(msg, '(a,f11.3,a)' )&
1558 &     'P fourdp chain                    ',mbtot(5)+tol10,' Mbytes. '
1559      call wrtout(iout,msg)
1560      if(xmpi_paral==1)then
1561        write(msg, '(a,f11.3,a)' )&
1562 &       '- parallel k-point chain          ',mbtot(6)+tol10,' Mbytes. '
1563        call wrtout(iout,msg)
1564      end if
1565      write(msg, '(a,f11.3,a)' )&
1566 &     'P newvtr chain                    ',mbtot(7)+tol10,' Mbytes. '
1567      call wrtout(iout,msg)
1568      if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1569        write(msg, '(a,f11.3,a)' )&
1570 &       'P suscep chain                    ',mbtot(8)+tol10,' Mbytes. '
1571        call wrtout(iout,msg)
1572        write(msg, '(a,f11.3,a)' )&
1573 &       'P dielmt chain                    ',mbtot(9)+tol10,' Mbytes. '
1574        call wrtout(iout,msg)
1575      end if
1576      if(iscf==-1)then
1577        write(msg, '(a,f11.3,a)' )&
1578 &       'P tddft  chain                    ',mbtot(10)+tol10,' Mbytes. '
1579      end if
1580    end if ! nchain>2
1581 
1582  end if
1583 
1584 !--------------------------------------------------------------------
1585 
1586  write(msg, '(a)' ) '  Rough estimation (10% accuracy) of disk space for files :'
1587  call wrtout(iout,msg)
1588  call wrtout(std_out,msg)
1589 
1590  write(msg, '(a,f11.3,a,a,f11.3,a)' ) &
1591 & '_ WF disk file :',mbdiskwf+tol10,' Mbytes ;',&
1592 & ' DEN or POT disk file :',mbdiskpd+tol10,' Mbytes.'
1593  call wrtout(iout,msg)
1594  call wrtout(std_out,msg)
1595 
1596  if(mffmem==0 .and. iscf>0)then
1597    if(iscf==1)then
1598      write(msg, '(a,a,a)' )&
1599 &     '  mffmem==0, iscf==1 => use of 1 FFT temporary disk file,',ch10,&
1600 &     '                       5 times bigger than a DEN file.'
1601    else if(iscf==2.or.iscf==12)then
1602      write(msg, '(a,a,a)' )&
1603 &     '  mffmem==0, iscf==2 => use of 1 FFT temporary disk file,',ch10,&
1604 &     '                       3 times bigger than a DEN file.'
1605    else if(iscf==3.or.iscf==13)then
1606      write(msg, '(a,a,a)' )&
1607 &     '  mffmem==0, iscf==3 => use of 1 FFT temporary disk file,',ch10,&
1608 &     '                       4 times bigger than a DEN file.'
1609    else if(iscf==4.or.iscf==14)then
1610      write(msg, '(a,a,a)' )&
1611 &     '  mffmem==0, iscf==4 => use of 1 FFT temporary disk file,',ch10,&
1612 &     '                       6 times bigger than a DEN file.'
1613    else if(iscf==5)then
1614      write(msg, '(a,a,a)' )&
1615 &     '  mffmem==0, iscf==5 => use of 1 FFT temporary disk file,',ch10,&
1616 &     '                       10 times bigger than a DEN file.'
1617    else if(iscf==6)then
1618      write(msg, '(a,a,a)' )&
1619 &     '  mffmem==0, iscf==6 => use of 1 FFT temporary disk file,',ch10,&
1620 &     '                       10 times bigger than a DEN file.'
1621    else if(iscf==7.or.iscf==17)then
1622      write(msg, '(a,a,a)' )&
1623 &     '  mffmem==0, iscf==7 => use of 1 FFT temporary disk file,',ch10,&
1624 &     '                       (2+2*npulayit) times bigger than a DEN file.'
1625    end if
1626    call wrtout(iout,msg)
1627    call wrtout(std_out,msg)
1628  end if
1629 
1630 !Temporary msg - estimation of PAW specific data has to be done...
1631 !Have to add the usepaw argument to use this.
1632 !if (usepaw==1) then
1633 !write(msg,'(5a)') '  WARNING: You are using PAW formalism;',ch10,&
1634 !&       '           Above estimations do not take PAW',ch10,&
1635 !&       '           specific data into account !'
1636 !call wrtout(iout,msg)
1637 !call wrtout(std_out,msg)
1638 !end if
1639 
1640  write(msg,'(80a,a)') ('=',mu=1,80),ch10
1641  call wrtout(iout,msg)
1642  call wrtout(std_out,msg)
1643 
1644 !--------------------------------------------------------------------
1645 !Here, each processor must test its memory, so use
1646 !the PERS mode for error msgs, followed by synchronisation
1647 
1648  mbbigarr=max(mbf_fftgr,mbcg,mbgylm)
1649  if(mbbigarr==mbcg) then
1650    write(msg, '(a,f12.4,a)' ) ' Biggest array : cg(disk), with',mbcg+tol10,' MBytes.'
1651  else if (mbbigarr==mbf_fftgr) then
1652    write(msg, '(a,f12.4,a)' ) ' Biggest array : f_fftgr(disk), with',mbf_fftgr+tol10,' MBytes.'
1653  else if (mbbigarr==mbgylm)then
1654    write(msg, '(a,f12.4,a)' ) ' Biggest array : pawfgrtab%gylm(gr), with',mbgylm+tol10,' MBytes.'
1655  end if
1656  call wrtout(std_out,msg)
1657 
1658 !if (mpi_enreg%my_nimage>1) then
1659 !write(msg, '(a,f12.4,a)' ) &
1660 !&   ' These estimations take the distribution over replicas (images) of the cell into account.'
1661 !call wrtout(std_out,msg)
1662 !end if
1663 
1664  quit=0
1665 
1666  if(option>=1)then
1667 
1668 !  Test the ability to allocate the biggest array
1669    nquarter_mbytes=4.0_dp*mbbigarr+1.0_dp
1670    ABI_STAT_MALLOC(bigarray,(32*1024,nquarter_mbytes), ier)
1671    if(ier/=0)then
1672      write(msg,'(a,f11.3,a,a,a,a,a,a,a)')&
1673 &     'Test failed to allocate an array of',mbbigarr,' Mbytes',ch10,&
1674 &     'It is not worth to continue ',ch10,&
1675 &     'Action: modify input variable to fit the available memory,',ch10,&
1676 &     'increase limit on maximal array size or set mem_test to 0 to disable this test.'
1677      call wrtout(std_out,msg,'PERS')
1678      if(option==1)then
1679        ABI_ERROR_CLASS(msg, "MemanaError")
1680      else
1681        ABI_WARNING(msg)
1682        quit=1
1683      end if
1684    end if
1685    if(quit==0)then
1686      write(msg,'(a,f11.3,a)')' memana : allocated an array of',mbbigarr+tol10,' Mbytes, for testing purposes. '
1687      call wrtout(std_out,msg)
1688    end if
1689 
1690    ABI_SFREE(bigarray)
1691 
1692 !  Test the ability to allocate the needed total memory : use 8 segments,
1693 !  hoping that the maximal segment size is not so much smaller than the
1694 !  total memory
1695    nquarter_mbytes=0.5_dp*mbbiggest+1.0_dp
1696    ABI_STAT_MALLOC(bigarray1,(32*1024,nquarter_mbytes), ier1)
1697    ABI_STAT_MALLOC(bigarray2,(32*1024,nquarter_mbytes), ier2)
1698    ABI_STAT_MALLOC(bigarray3,(32*1024,nquarter_mbytes), ier3)
1699    ABI_STAT_MALLOC(bigarray4,(32*1024,nquarter_mbytes), ier4)
1700    ABI_STAT_MALLOC(bigarray5,(32*1024,nquarter_mbytes), ier5)
1701    ABI_STAT_MALLOC(bigarray6,(32*1024,nquarter_mbytes), ier6)
1702    ABI_STAT_MALLOC(bigarray7,(32*1024,nquarter_mbytes), ier7)
1703    ABI_STAT_MALLOC(bigarray8,(32*1024,nquarter_mbytes), ier8)
1704 
1705    if(ier1/=0 .or. ier2/=0 .or. ier3/=0 .or. ier4/=0 .or. ier5/=0 .or. ier6/=0 .or. ier7/=0 .or. ier8/=0) then
1706      write(msg,'(a,f11.3,a,a,a,a,a,a,a)')&
1707 &     'Test failed to allocate ',mbbiggest,' Mbytes',ch10,&
1708 &     'It is not worth to continue ',ch10,&
1709 &     'Action: modify input variables or submission parameters to fit the available memory,',ch10,&
1710 &     'increase limit on available memory or set mem_test to 0 to disable this test.'
1711      if(option==1)then
1712        ABI_ERROR_CLASS(msg, "MemanaError")
1713      else
1714        ABI_WARNING(msg)
1715        quit=1
1716      end if
1717    end if
1718 
1719    if(quit==0)then
1720      write(msg,'(a,f11.3,a,a,a)')&
1721 &     ' memana: allocated ',mbbiggest,'Mbytes, for testing purposes. ',ch10,&
1722 &     ' The job will continue.'
1723      call wrtout(std_out,msg)
1724    end if
1725    ABI_SFREE(bigarray1)
1726    ABI_SFREE(bigarray2)
1727    ABI_SFREE(bigarray3)
1728    ABI_SFREE(bigarray4)
1729    ABI_SFREE(bigarray5)
1730    ABI_SFREE(bigarray6)
1731    ABI_SFREE(bigarray7)
1732    ABI_SFREE(bigarray8)
1733 
1734  end if
1735 
1736 !--------------------------------------------------------------------
1737 
1738  if(option==2 .and. quit==1 )then
1739 
1740 !  Estimation of the available memory
1741 !
1742 !  A quarter of Mbyte is 256*1024/8 real(dp) numbers,
1743 !  that is 32*1024 dp numbers.
1744 !  One begins with the allocation of 4 Mbytes. If successful,
1745 !  one increases that number, until the allocation is not successfull
1746 !  any more. Unfortunately, on a P6 with the pghpf compiler, the
1747 !  allocate instruction generate a core dump, instead of returning
1748 !  an error code, so that this part of code has been made optional.
1749 
1750    nquarter_mbytes=16
1751    nmbytes=nquarter_mbytes/4.0_dp
1752 
1753 !  With an increase ratio of 1.25_dp (see below), ii=5 leads to 9 MB,
1754 !  ii=10 leads to 28 MB, ii=15 leads to 85 MB, ii=18 leads to 165 MB,
1755 !  ii=30 is over 2 GB
1756    do ii=1,30
1757      ABI_STAT_MALLOC(bigarray,(32*1024,nquarter_mbytes), ier)
1758      if(ier/=0)then
1759        write(msg,'(a,i0,a)')' memana : failed to allocate ',nmbytes,' Mbytes'
1760        call wrtout(std_out,msg,'PERS')
1761        exit
1762      end if
1763      write(msg,'(a,i0,a)')' memana : succeeded to allocate ',nmbytes,' Mbytes'
1764      call wrtout(std_out,msg,'PERS')
1765 !    Here really test the space
1766 !    do kk=1,nquarter_mbytes
1767 !    do jj=1,32*1024,37
1768 !    bigarray(jj,kk)=0.0_dp
1769 !    end do
1770 !    write(std_out,*)' memana : wrote ',kk,' quarter of mbytes'
1771 !    end do
1772      ABI_FREE(bigarray)
1773      nquarter_mbytes=dble(nquarter_mbytes)*1.25_dp
1774      nmbytes=nquarter_mbytes/4.0_dp
1775    end do
1776    ABI_SFREE(bigarray)
1777 
1778    ABI_ERROR_CLASS("in memana with option==2 .and. quit==1", "MemanaError")
1779  end if !  End the test of the available memory
1780 
1781 !--------------------------------------------------------------------
1782 
1783  ABI_FREE(cdpfftf)
1784  ABI_FREE(cdpfft)
1785  ABI_FREE(cdpmpw)
1786  ABI_FREE(cintfftf)
1787  ABI_FREE(cintfft)
1788  ABI_FREE(cintmpw)
1789  ABI_FREE(cdpadd)
1790  ABI_FREE(cintadd)
1791  ABI_FREE(mbdpadd)
1792  ABI_FREE(mbdpfftf)
1793  ABI_FREE(mbdpfft)
1794  ABI_FREE(mbdpmpw)
1795  ABI_FREE(mbintadd)
1796  ABI_FREE(mbintfftf)
1797  ABI_FREE(mbintfft)
1798  ABI_FREE(mbintmpw)
1799  ABI_FREE(mbother)
1800  ABI_FREE(mbtot)
1801 
1802 end subroutine memana

m_memeval/memorf [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memorf

FUNCTION

 Estimation of the memory needed for a response-function job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  cplex=1 or 2, indicate whether the den and pot functions are real or complex
  getcell=if non-zero, the values of acell and rprim are taken from
   the output of another dataset
  idtset=number of the current dataset
  intxc=control xc quadrature
  iout=unit number for output of formatted data.
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  jdtset=index of the current dataset
  lmnmax=max. number of (l,m,n) components over all type of psps
  lnmax =max. number of (l,n)   components over all type of psps
  mband =maximum number of bands
  mffmem =governs the number of FFT arrays which are fit in core memory
  mgfft =maximum single fft dimension
  mkmems=number of k points which can fit in memory; set to 0 if use disk
    the three values correspond to mkmem, mkqmem and mk1mem
  mpi_enreg=information about MPI parallelization
  mpssang is 1+maximum angular momentum for nonlocal pseudopotential
  mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential
  mpw   =maximum number of planewaves in basis sphere (large number)
  mqgrid=maximum dimension of grid of q values for psp representations
  natom =number of atoms in unit cell
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  nfft  =(effective) number of FFT grid points (for one processor)
  ngfft(18)=contain all needed information about 3D FFT, see ~abinit/doc/variables/vargs.htm#ngfft
  nkpt  =number of k points
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  nspden=number of spin-density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=number of channels for spin-polarization (1 or 2)
  nsym  =number of symmetry elements in space group
  ntypat=number of types of atoms
  n1xccc=dimension of xccc1d ; 0 if no XC core correction is used
  occopt=option for occupation numbers. If 3<=occopt<=7, varying occupation
  optddk=1 if ddk is computed during run
  optphon=1 if phonons are computed during run
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  optstrs=1 if strain perturbation is computing during run
  prtvol=control print volume
  useylm=governs the way the nonlocal operator is to be applied:
         1=using Ylm, 0=using Legendre polynomials
  gpu_option= GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)
  xclevel= level of the XC functional

OUTPUT

  (only writing)

NOTES

 for the estimation, it is only taken into account those
 arrays that have some probability of being larger than 1000*8 bytes :
 - All the arrays that have large numbers as one of their dimensions
 (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),n1xccc
                                      or a constant larger than 1000)
 - All the arrays that have a product of two moderately large numbers
 (potential size above 30  : mband, mgfft, mkmems, natom, nkpt, nsym,
  or a constant larger than 30)
 After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added
 to take into account the static arrays declared
 in rhotoxc and daughter routines (at maximum 22*1000 dp numbers),
 as well as other arrays like
 character(len=500) :: msg (present in about 100 routines), or the different
 arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f
 In the case 3<=occopt<=7 this amount is increased by 760 Kbytes
 to take into account the arrays smdfun, occfun, entfun, workfun and xgrid,
 declared in getnel

 The current version takes into account only :
 1) and 2) the "main chain" in its two slightly different versions :
 driver - respfn - dfpt_looppert - dfpt_scfcv - dfpt_vtorho - dfpt_vtowfk -
     dfpt_cgwf - getghc - fourwf or (nonlop+opernl)

 Also, it is assumed that the potentials are non-local, even if there
     are local ! It would be necessary to update this routine
     now that the beginning of psp files is read before
     the present call (XG 980502)

 Some BIG approximations, not present in the GS corresponding routine
  have been done : nsym=nsym1, nkpt=nkpt_rbz, mpw=mpw1 ...

SOURCE

1901 subroutine memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,&
1902 & iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft,&
1903 & mkmems,mpi_enreg,mpsang,mpssoang,mpw,mqgrid,&
1904 & natom,nband,nfft,ngfft,&
1905 & nkpt,nloalg,nspden,nspinor,nsppol,nsym,ntypat,&
1906 & occopt,optddk,optphon,option,optstrs,prtvol,useylm,gpu_option,xclevel)
1907 
1908 !Arguments ------------------------------------
1909 !scalars
1910  integer,intent(in) :: cplex,getcell,idtset,intxc,iout,iprcel,iscf
1911  integer,intent(in) :: jdtset,lmnmax,lnmax,mband,mffmem,mgfft,mpsang
1912  integer,intent(in) :: mpssoang,mpw,mqgrid,n1xccc,natom,nfft,nkpt
1913  integer,intent(in) :: nspden,nspinor,nsppol,nsym,ntypat,occopt
1914  integer,intent(in) :: optddk,option,optphon,optstrs,prtvol,useylm
1915  integer,intent(in) :: gpu_option,xclevel
1916  type(MPI_type),intent(in) :: mpi_enreg
1917 !arrays
1918  integer,intent(in) :: mkmems(3),nband(nkpt*nsppol),ngfft(18)
1919  integer,intent(in) :: nloalg(3)
1920 
1921 !Local variables-------------------------------
1922 !marrays= maximal number of arrays to be monitored (or group of arrays)
1923 !cmpw(marrays)=count of blocks of size mpw bytes
1924 !cfft(marrays)=number of blocks of size nfft bytes
1925 !cadd(marrays)=additional storage needed (in bytes)
1926 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
1927 !nchain= number of different chains of routines
1928 !chain(marrays,nchain)=different chains of routines
1929 !scalars
1930  integer,parameter :: marrays=150,nchain=2
1931  integer :: fftalgb,matblk,maxmkmem,mincat,mk1mem,mkmem,mkqmem,mu,n_fftgr
1932  integer :: narr_fourdp,ngrad,nprocwf
1933  integer :: my_natom
1934  real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
1935  character(len=500) :: msg
1936  character(len=1) :: firstchar
1937 !arrays
1938  integer :: dttyp(marrays)
1939  real(dp) :: cadd(marrays),cfft(marrays),cmpw(marrays)
1940  real(dp),allocatable :: cfft_dum(:)
1941  logical :: chain(marrays,nchain)
1942 
1943 ! **************************************************************************
1944 
1945  if(option<0 .or. option>2)then
1946    write(msg, '(a,i0,a)')'option= ',option,' while the only allowed values are 0, 1, or 2.'
1947    ABI_BUG(msg)
1948  end if
1949 
1950  firstchar=' ';if (gpu_option/=ABI_GPU_DISABLED) firstchar='_'
1951  cmpw(:)=zero ; cfft(:)=zero ; cadd(:)=zero
1952  dttyp(:)=0
1953 
1954  call wrtout(std_out,' memorf : analysis of memory needs ')
1955 
1956  if(jdtset>=100)then
1957    write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
1958    ' Values of the parameters that define the memory need for DATASET',jdtset,' (RF).'
1959  else if(jdtset/=0)then
1960    write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
1961    ' Values of the parameters that define the memory need for DATASET',jdtset,' (RF).'
1962  else
1963    write(msg,'(80a,a,a,a)')('=',mu=1,80),ch10,&
1964    ' Values of the parameters that define the memory need of the present run',' (RF).'
1965  end if
1966  call wrtout(iout,msg)
1967  call wrtout(std_out,msg)
1968 
1969  mkmem=mkmems(1)
1970  mkqmem=mkmems(2)
1971  mk1mem=mkmems(3)
1972  my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom
1973 
1974  write(msg,'( 4(a,i8),a,4(a,i8) )' ) &
1975 & '     intxc =',intxc   ,'      iscf =',iscf,&
1976 & '    lmnmax =',lmnmax  ,'     lnmax =',lnmax,ch10,&
1977 & '     mgfft =',mgfft,'  mpssoang =',mpssoang,&
1978 & '    mqgrid =',mqgrid,'     natom =',natom
1979  call wrtout(iout,msg)
1980  call wrtout(std_out,msg)
1981 
1982  write(msg,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) &
1983 & '  nloc_mem =',nloalg(2)*(nloalg(3)+1),'    nspden =',nspden ,&
1984 & '   nspinor =',nspinor,'    nsppol =',nsppol ,ch10,&
1985 & '      nsym =',nsym,'    n1xccc =',n1xccc ,&
1986 & '    ntypat =',ntypat,'    occopt =',occopt ,ch10,&
1987 & '   xclevel =',xclevel
1988  call wrtout(iout,msg)
1989  call wrtout(std_out,msg)
1990 
1991  write(msg,'(4(3(a,i12),a))') &
1992 & '-    mband =',mband  ,'        mffmem =',mffmem,&
1993 & '         mkmem =',mkmem  ,ch10,&
1994 & '-   mkqmem =',mkqmem ,'        mk1mem =',mk1mem,&
1995 & '           mpw =',mpw  ,ch10,&
1996 & '      nfft =',nfft ,'          nkpt =',nkpt
1997  call wrtout(iout,msg)
1998  call wrtout(std_out,msg)
1999 
2000  if (my_natom/=natom)then
2001    write(msg,'(a,i10)') 'Pmy_natom=',my_natom
2002    call wrtout(iout,msg)
2003    call wrtout(std_out,msg)
2004  end if
2005 
2006  write(msg,'(80a)') ('=',mu=1,80)
2007  call wrtout(iout,msg)
2008  call wrtout(std_out,msg)
2009 
2010  if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then
2011    write(msg,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,&
2012 &   ' memorf : COMMENT -',ch10,&
2013 &   '  The determination of memory needs at this stage is meaningless,',ch10,&
2014 &   '  since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,&
2015 &   '  The following numbers are obtained by supposing that acell and rprim',ch10,&
2016 &   '  are NOT taken from a previous dataset. You cannot rely on them.',ch10
2017    call wrtout(iout,msg)
2018    call wrtout(std_out,msg)
2019  end if
2020 
2021  n_fftgr=1
2022  if(iscf==1)            n_fftgr=5
2023  if(iscf==2.or.iscf==3) n_fftgr=4
2024  if(iscf==5.or.iscf==6) n_fftgr=10
2025 
2026 !work1 and work2 in fourdp : take into account approximately fftalgb
2027  fftalgb=mod(ngfft(7),100)/10
2028  if(fftalgb==0)narr_fourdp=2*2
2029  if(fftalgb==1)narr_fourdp=2
2030 
2031  ngrad=1
2032  if(xclevel==2)ngrad=2
2033 
2034 !(0)                     in main, driver, and respfn -------------------
2035 !indsym (respfn)
2036  cadd(1)=4*nsym*natom          ; dttyp(1)=4
2037 !rhor,rhog (respfn)
2038  cfft(2)=nspden+2              ; dttyp(2)=8
2039 !occ (driver), doccde (respfn)
2040  cadd(3)=2*mband*nkpt*nsppol   ; dttyp(3)=8
2041 !qgrid,vlspl,ffspl (driver)
2042  cadd(4)=mqgrid*(1+2*ntypat*(1+lnmax))   &
2043 & ; dttyp(4)=8
2044 !xccc1d (driver)
2045  cadd(5)=n1xccc*6*ntypat       ; dttyp(5)=8
2046 !vtrial (respfn)
2047  cfft(6)=nspden                ; dttyp(6)=8
2048 !kxc (respfn)
2049  cfft(7)=2*nspden-1            ; dttyp(7)=8
2050 
2051 !(1-2)                   in dfpt_looppert --------------------------------------
2052 !ph1d
2053  cadd(11)=2*3*(2*mgfft+1)*natom ; dttyp(11)=8
2054 !vpsp1
2055  cfft(12)=cplex                ; dttyp(12)=8
2056 !indsy1  assume that nsym=nsym1
2057  cadd(13)=4*nsym*natom         ; dttyp(13)=4
2058 !irrzonr1 and phnons1  assume that nsym=nsym1
2059  if(nsym/=1)then
2060    cfft(14)=(2+(nspden/4))*((nspden/nsppol)-3*nspden/3)     ; dttyp(14)=4
2061    cfft(15)=2*((nspden/nsppol)-3*nspden/3)                  ; dttyp(15)=8
2062  end if
2063 !doccde_rbz, eigen0, eigenq, occ_rbz, docckqde, occkq, resid
2064 !assume than nkpt=nkpt_rbz
2065  cadd(16)=7*mband*nkpt*nsppol  ; dttyp(16)=8
2066 !kg
2067  cmpw(18)=3*mkmem              ; dttyp(18)=4
2068 !cg
2069  cmpw(19)=2*nspinor*mband*mkmem*nsppol  ; dttyp(19)=8
2070 !kg1
2071  cmpw(21)=3*mk1mem             ; dttyp(21)=4
2072 !cgq
2073  cmpw(22)=2*nspinor*mband*mkqmem*nsppol  ; dttyp(22)=8
2074 !cg1
2075  cmpw(23)=2*nspinor*mband*mk1mem*nsppol  ; dttyp(23)=8
2076 !rhor1,rhog1
2077  cfft(24)=cplex*nspden+2       ; dttyp(24)=8
2078 !eigen1
2079 !assume than nkpt=nkpt_rbz
2080  cadd(25)=2*mband*mband*nkpt*nsppol      ; dttyp(25)=8
2081 !ylm
2082  cmpw(26)=mkmem*mpsang*mpsang*useylm     ; dttyp(26)=8
2083 
2084 !(3)                     in dfpt_scfcv --------------------------------------
2085 
2086 !vhartr1,vtrial1,vxc
2087  cfft(31)=cplex+cplex*nspden+nspden      ; dttyp(31)=8
2088  if(iscf>0)then
2089 !  f_fftgr
2090    cfft(32)=cplex*nspden*n_fftgr*mffmem    ; dttyp(32)=8
2091  end if
2092 
2093 !(4)                   in dfpt_vtorho----------------------------------------
2094 
2095 !proc_distrb
2096  cadd(41)=nkpt*mband*nsppol    ; dttyp(41)=4
2097 !kg_k,kg1_k
2098  cmpw(42)=6                    ; dttyp(42)=4
2099 !rhoaug1, vlocal, vlocal1
2100  cfft(43)=2*cplex+1            ; dttyp(43)=8
2101  cadd(43)=(2*cplex+1)*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2102 
2103  if(mkqmem==0)then
2104 !  cgq_disk
2105    cmpw(45)=2*nspinor*mband      ; dttyp(45)=8
2106  end if
2107 !doccde_k,doccde_kq,eig0_k, ..., eig1_k, rocceig
2108  cadd(47)=(14+3*mband)*mband   ; dttyp(47)=8
2109 !ylm_k,ylm1_k
2110  cmpw(49)=2*mpsang*mpsang*useylm  ; dttyp(49)=8
2111 
2112 !(5)                     in dfpt_vtowfk --------------------------------------
2113 
2114 !dkinpw,kinpw1
2115  cmpw(51)=2                    ; dttyp(51)=8
2116 !ffnlk,ffnl1,ffnlkq
2117  cmpw(52)=2*(ntypat+2)*lmnmax  ; dttyp(52)=8
2118 !ghc,gvnlxc,gvnlx1
2119  cmpw(53)=6*nspinor            ; dttyp(53)=8
2120 !ph3d
2121  matblk=NLO_MINCAT
2122  if(nloalg(2)<=0)matblk=natom
2123  cmpw(54)=2*matblk             ; dttyp(54)=8
2124 !wfraug,wfraug1,rhoaug
2125  cfft(55)=5                    ; dttyp(55)=8
2126  cadd(55)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2127 !cwavef,cwave0,cwave1
2128  cmpw(56)=6*nspinor            ; dttyp(56)=8
2129 
2130 !(6)                     in dfpt_cgwf ----------------------------------------
2131 
2132 !gh1, gh_direc, gvnlx_direc, conjgr, direc, vresid, cwaveq
2133  cmpw(61)=14*nspinor            ; dttyp(61)=8
2134 
2135 !(9a)                    in getghc and fourwf----------------------------
2136 
2137 !work (in getghc)
2138  cfft(91)=2                    ; dttyp(91)=8
2139  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2140 !work1 (in fourwf)
2141  cfft(92)=2                    ; dttyp(92)=8
2142  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
2143 
2144 !(9b)                    in getghc, nonlop and opernl--------------------
2145  mincat=min(NLO_MINCAT,natom-ntypat+1)
2146  if (useylm==0) then                          ! ===== nonlop_pl
2147 !  gxa  (in nonlop)
2148    cadd(94)=2*20*mincat*2       ; dttyp(94)=8
2149 !  dgxdt  (in nonlop)
2150    cadd(95)=2*3*20*mincat*2    ; dttyp(95)=8
2151 !  dgxds  (in nonlop)
2152    cadd(96)=2*56*mincat*2      ; dttyp(96)=8
2153 !  teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3)
2154 !  kpgx, ffkg
2155 !  here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not
2156 !  considering the stress, since it will be called outside of the main chain
2157    cadd(97)=NLO_MBLKPW*40        ; dttyp(97)=8
2158 !  kpg if nloalg(3)=1
2159    cadd(98)=3*mpw*nloalg(3)     ; dttyp(98)=8
2160  else                                        ! ===== nonlop_ylm
2161 !  gx + gxfac
2162    cadd(94)=2* 2*mpw*lmnmax*mincat    ; dttyp(94)=8
2163 !  dgxdt + dgxdtfac + d2gxdt
2164    if (optddk>0.and.optphon==0.and.optstrs==0) cadd(95)=2*2*mpw*lmnmax*mincat
2165    if (optphon>0) cadd(95)=12*2*mpw*lmnmax*mincat
2166    if (optstrs>0) cadd(95)=72*2*mpw*lmnmax*mincat
2167    dttyp(95)=8
2168 !  kpg
2169    cadd(96)=2*3*mpw       ; dttyp(96)=8
2170    if (optphon>0) cadd(96)=cadd(96)+2*6*mpw
2171 !  miscelaneous: indlmn_typ, ffnl_typ
2172    cadd(97)=lmnmax*(6+mpw*(2+optstrs)); dttyp(97)=8
2173 !  opernla_ylm: scalar,scali,scalarr,scalari
2174    cadd(98)=2*mpw+2*mpw
2175    if (optddk>0.and.optstrs==0) cadd(98)=cadd(98)+2*mpw
2176    if (optstrs>0) cadd(98)=cadd(98)+9*2*mpw
2177    dttyp(98)=8
2178  end if
2179 
2180 !--------------------------------------------------------------------------
2181 
2182  chain(:,:)=.true.
2183 
2184 !Define the main chain version a (fourwf)
2185  chain(93:100,1)=.false.
2186 
2187 !Define the main chain version b (nonlop+opernl)
2188  chain(91:92,2)=.false.
2189 
2190 !The memory needed for each chain has been computed
2191 !-------------------------------------------------------------------------
2192 !Still need some auxiliary data : estimate the disk space
2193 !or the maximum segment size.
2194 
2195 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors
2196 !in the WF group. For the time being, nprocwf=1
2197  nprocwf=mpi_enreg%nproc_fft
2198 
2199  mbdiskwf=(8*2*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp
2200  mbdiskpd=(8*nfft*nsppol)/1024._dp**2 + 0.002_dp
2201 
2202 !Determine the largest array out of cg,cg1,cgq, cg_disk or f_fftgr (f_fftgr_disk)
2203  if(mkmem==0 .and. mk1mem==0 .and. mkqmem==0)then
2204    mbcg=(8*2*mpw*nspinor*mband)/1024._dp**2 + 0.002_dp
2205  else
2206    maxmkmem=maxval(mkmems(:))
2207    mbcg=(8*2*mpw*nspinor*mband*maxmkmem*nsppol)/1024._dp**2 + 0.002_dp
2208  end if
2209  if(mffmem==0)then
2210    mbf_fftgr=(8*cplex*nfft*n_fftgr)/1024._dp**2 + 0.002_dp
2211  else
2212    mbf_fftgr=(8*cplex*nfft*n_fftgr*nspden*mffmem)/1024._dp**2 + 0.002_dp
2213  end if
2214 
2215 !---------------------------------------------------------------------
2216 !Now, analyze the data
2217 
2218 !DEBUG
2219 !write(std_out,*)' memorf : nchain=',nchain
2220 !ENDDEBUG
2221 
2222  ABI_MALLOC(cfft_dum,(marrays))
2223  cfft_dum=zero
2224  mbgylm=zero
2225  call memana(cadd,cfft,cfft_dum,chain,cmpw,dttyp,iout,iprcel,iscf,&
2226 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
2227 & mpw,natom,nchain,nfft,nfft,occopt,option,prtvol)
2228  ABI_FREE(cfft_dum)
2229 
2230 end subroutine memorf

m_memeval/memory [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memory

FUNCTION

 Estimation of the memory needed for a ground-state job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  extrapwf=flag controlling the extrapolation of wave functions during MD or relaxation
  getcell=if non-zero, the values of acell and rprim are taken from
   the output of another dataset
  idtset=number of the current dataset
  icoulomb=0 for periodic Fourier calculation of Hartree potential; 1 for isolated system using Poisson solver.
  intxc=control xc quadrature
  ionmov=control force calculations
  iout=unit number for output of formatted data.
  densfor_pred=govern the choice of density prediction and/or forces correction
  iprcel=govern the choice of preconditioner for the SCF cycle
  iscf=governs the choice of SCF algorithm, or non-SCF calculation.
  jdtset=index of the current dataset
  lmnmax=max. number of (l,m,n) components over all type of psps
  lnmax =max. number of (l,n)   components over all type of psps
  mband =maximum number of bands
  mffmem =governs the number of FFT arrays which are fit in core memory
  mgfftf =maximum single fft dimension (fine grid, if PAW)
  mgfft  =maximum single fft dimension (coarse grid, if PAW)
  mgfftdiel =maximum single fft dimension for susceptibility and dielectric
   matrices.
  mkmem =maximum number of k points which can fit in core memory
  mpi_enreg=information about MPI parallelization
  mpssang is 1+maximum angular momentum for nonlocal pseudopotential
  mpssoang is 1+maximum (spin*angular momentum) for nonlocal pseudopotential
  mpw   =maximum number of planewaves in basis sphere (large number)
  mqgrid_ff=dimension of q (or G) grid for nl form factors (array ffspl)
  mqgrid_vl=dimension of q (or G) grid for Vloc (array vlspl)
  natom =number of atoms in unit cell
  nband(nkpt*nsppol)=number of bands at each k point, for each polarization
  nfftf =number of fft grid points for density        (fine grid, if PAW)
  nfft  =number of fft grid points for wavefunctions  (coarse grid, if PAW)
  nfftdiel  =maximum number of fft grid points for susceptibility
    and dielectric matrices
  ngfftf(18)=contain all needed information about 3D FFT (fine grid, if PAW)
  ngfft(18) =contain all needed information about 3D FFT (coarse grid, if PAW)
  ngfftdiel(18)=contain all needed information about 3D FFT, dielectric case,
                 see ~abinit/doc/variables/vargs.htm#ngfft
    for susceptibility and dielectric matrices
  nimage=number of images (replicas) of the cell
  nkpt  =number of k points
  npsp=number of different pseudopotentials
  npwdiel=number of plane wave for susceptibility and dielectric matrix
  npulayit=number of iterations used in Pulay SCF mixing
  nloalg(3)=governs the choice of the algorithm for non-local operator.
  nspden=number of spin-density components
  nspinor=number of spinorial components of the wavefunctions
  nsppol=number of channels for spin-polarization (1 or 2)
  nsym  =number of symmetry elements in space group
  ntypat =number of types of atoms
  n1xccc=dimension of xccc1d ; 0 if no XC core correction is used
  occopt=option for occupation numbers. If 3<=occopt<=8, varying occupation
  optforces=1 if forces are computed during run
  option : if 0 , no test of available memory
           if 1 , the routine tries to allocate the estimated memory, for testing
                    purposes, and if a failure occurs, the routine stops.
           if 2 , like 1, but before stopping, the routine will provide
                    an estimation of the available memory.
  optstress=1 if stresses are computed during run
  pawcpxocc=2 if PAW occupancies (rhoij) are complex
  pawmixdg=1 if mixing (in PAW) is done on the fine grid
  pawnhatxc=1 if nhat PAW density has to be analytically included in XC
  pawspnorb=1 when spin-orbit is activated within PAW
  pawstgylm=1 if g_l(r).Y_lm(r) factors are stored in memory (PAW)
  prtvol=control print volume
  pspheads(npsp)=<type pspheader_type>all the important information from the header
  tfkinfun=flag controling the use of Thomas-Fermi algorithme (without WF)
  typat(natom)=type of each atom
  ucvol= unit cell volume
  usepaw= 0 for non paw calculation; =1 for paw calculation
  useylm=governs the way the nonlocal operator is to be applied:
         1=using Ylm, 0=using Legendre polynomials
  gpu_option= GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)
  xclevel=XC functional level

OUTPUT

  (only writing)

NOTES

 for the estimation, it is only taken into account those
 arrays that have some probability of being larger than 1000*8 bytes :
 - All the arrays that have large numbers as one of their dimensions
 (mqgrid, mpw, nfft, ngfft(4)*ngfft(5)*ngfft(6),
                     ngfftdiel(4)*ngfftdiel(5)*ngfftdiel(6), n1xccc
                                      or a constant larger than 1000)
 - All the arrays that have a product of two moderately large numbers
 (potential size above 30  : mband, mgfft, mkmem, natom, nkpt, nsym,
  or a constant larger than 30)
 After this estimation, an amount of (176 + 55 + 6*natom) Kbytes is added
 to take into account the static arrays declared
 in rhotoxc and daughter routines (at maximum 22*1000 dp numbers),
 as well as other arrays like
 character(len=500) :: message (present in about 100 routines), or the different
 arrays allocated in move.f, brdmin.f, gstate.f (xf array) or pspini.f
 In the case 3<=occopt<=8 this amount is increased by 760 Kbytes
 to take into account the arrays smdfun, occfun, entfun, workfun and xgrid,
 declared in getnel.

 The current version takes into account
 1) and 2) the "main chain" in its two slightly different versions :
 driver - gstate - (move or brdmin) - scfcv - vtorho - vtowfk -
 3) the xc chain :
 driver - gstate - (move or brdmin) - scfcv - (vresfo) - rhotoxc - xcden
 4) the mkrho chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - mkrho
 5) the fourdp chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho
         ( + ftofr - fourdp - symrhg )
 6) the parallel k-point chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - MPI_ALLREDUCE
 7) the newvtr chain :
 driver - gstate - (move or brdmin) - scfcv - newvtr
 8) the susceptibility chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - suscep - suskmm
 9) the dielectric chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - dielmt
 10) the tddft chain :
 driver - gstate - (move or brdmin) - scfcv - vtorho - tddft

 It is valid for all values of iscf, but not for nstep=0 (when the chain
 goes through energy instead of vtorho).

 Also, it is assumed that the potentials are non-local, even if there
 are local ! It would be necessary to update this routine
 now that the beginning of psp files is read before the present call (XG 980502)

 One might also estimate if there must be a chain arriving at:
  strnps , mkffnl, mkcore, mklocl, mkrho, prcpot, irrzg, initro, clnup1.
 This is because there are allocated arrays in these routines.

SOURCE

 473 subroutine memory(n1xccc,extrapwf,getcell,idtset,icoulomb,intxc,ionmov,iout,densfor_pred,iprcel,&
 474 & iscf,jdtset,lmnmax,lnmax,&
 475 & mband,mffmem,mgfft,mgfftdiel,mgfftf,mkmem,mpi_enreg,mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,&
 476 & natom,nband,nfft,nfftdiel,nfftf,ngfft,ngfftdiel,ngfftf,nimage,&
 477 & nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat,&
 478 & occopt,optforces,option,optstress,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,&
 479 & prtvol,pspheads,qphon,tfkinfunc,typat,ucvol,usepaw,useylm,gpu_option,xclevel)
 480 
 481 !Arguments ------------------------------------
 482 !scalars
 483  integer,intent(in) :: extrapwf,getcell,icoulomb,idtset,intxc,ionmov,iout,densfor_pred
 484  integer,intent(in) :: iprcel,iscf,jdtset,lmnmax,lnmax,mband,mffmem,mgfft
 485  integer,intent(in) :: mgfftdiel,mgfftf,mkmem,mpsang,mpssoang,mpw,mqgrid_ff
 486  integer,intent(in) :: mqgrid_vl,n1xccc,natom,nfft,nfftdiel,nfftf,nimage,nkpt,npsp
 487  integer,intent(in) :: npulayit,npwdiel,nspden,nspinor,nsppol,nsym,ntypat
 488  integer,intent(in) :: occopt,optforces,option,optstress
 489  integer,intent(in) :: pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm
 490  integer,intent(in) :: prtvol,tfkinfunc,usepaw,useylm,gpu_option,xclevel
 491  real(dp) :: ucvol
 492  type(MPI_type),intent(in) :: mpi_enreg
 493 !arrays
 494  integer,intent(in) :: nband(nkpt*nsppol),ngfft(18),ngfftdiel(18),ngfftf(18)
 495  integer,intent(in) :: nloalg(3),typat(natom)
 496  real(dp),intent(in) :: qphon(3)
 497  type(pspheader_type) :: pspheads(npsp)
 498 
 499 !Local variables-------------------------------
 500 !marrays=maximal number of arrays to be monitored (or group of arrays)
 501 !cmpw(marrays)=count of blocks of size mpw bytes
 502 !cfft(marrays) =count of blocks of size nfft bytes (coarse grid, if PAW)
 503 !cfftf(marrays)=count of blocks of size nfft bytes (fine grid, if PAW)
 504 !cadd(marrays)=count of additional storage needed (in bytes)
 505 !dttyp(marrays)=datatype of the array : 4 for integers, 8 for real(dp)
 506 !nchain=number of different chains of routines
 507 !chain(marrays,nchain)=different chains of routines
 508  ! The cfoo arrays are used to store the allocated memory in the different
 509  ! routines of the program. Each stack of the program can allocate some
 510  ! memory and the amount is estimated and stored in cfoo(i). The lower i,
 511  ! the higher routine. cfft is memory used by FFT handling, cmpw for
 512  ! plane waves storage and cadd for miscellaneous memory occupation.
 513  ! The unit is the multiplier of the size of nfft for cfft, the multiplier
 514  ! of mpw for cmpw and the actually allocated memory for cadd.
 515  ! This array stores the size of each chunk of memory (8 for double
 516  ! floating point precision, 4 for integers and so on).
 517  ! This array defines if the chain defined above allocate or not the
 518  ! memory (depending on options).
 519 !scalars
 520  integer,parameter :: marrays=150,nchain=10
 521  integer :: fftalgb,histsz,ii,iscf10,jj,l_max,l_size_max,matblk,mblk,mincat,mu
 522  integer :: my_natom,n_fftgr,narr_fourdp,nbnd_in_blk,ndiel4,ndiel456,ndiel5,ndiel6
 523  integer :: ngrad,nprocwf,nspgrad,qphase_rhoij,rhoij_nspden
 524  real(dp) :: mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm
 525  character(len=500) :: msg
 526 ! character(len=1) :: firstchar
 527 !arrays
 528  integer :: dttyp(marrays),nattyp(ntypat)
 529  integer,allocatable :: basis_size(:),l_size(:),lmn2_size(:),lmn_size(:)
 530  integer,allocatable :: mesh_size(:),my_nattyp(:),pawver(:),shape_type(:)
 531  real(dp) :: cadd(marrays),cfft(marrays),cfftf(marrays),cmpw(marrays)
 532  real(dp),allocatable :: rshp(:)
 533  logical :: chain(marrays,nchain)
 534 
 535 ! **************************************************************************
 536 
 537  if(option<0 .or. option>2)then
 538    write(msg,'(A,I0,A)')'option=',option,' while the only allowed values are 0, 1, or 2.'
 539    ABI_BUG(msg)
 540  end if
 541 
 542 !firstchar=' ';if (gpu_option/=0) firstchar='_'
 543  cmpw(:)=zero ; cfft(:)=zero ; cfftf(:)=zero ; cadd(:)=zero
 544  dttyp(:)=0
 545 
 546  my_natom=natom;if (mpi_enreg%nproc_atom>1) my_natom=mpi_enreg%my_natom
 547 
 548  call wrtout(std_out,'memory: analysis of memory needs ')
 549 
 550  if(jdtset>=100)then
 551    write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
 552     ' Values of the parameters that define the memory need for DATASET',jdtset,'.'
 553  else if(jdtset/=0)then
 554    write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
 555     ' Values of the parameters that define the memory need for DATASET',jdtset,'.'
 556  else
 557    write(msg,'(80a,a,a)')('=',mu=1,80),ch10,&
 558     ' Values of the parameters that define the memory need of the present run '
 559  end if
 560  call wrtout(iout,msg)
 561  call wrtout(std_out,msg)
 562 
 563  write(msg,'( 4(a,i8),a,4(a,i8) )' ) &
 564 & '     intxc =',intxc   ,'    ionmov =',ionmov,&
 565 & '      iscf =',iscf    ,'    lmnmax =',lmnmax,ch10,&
 566 & '     lnmax =',lnmax   ,'     mgfft =',mgfft,&
 567 & '  mpssoang =',mpssoang,'    mqgrid =',mqgrid_vl
 568  call wrtout(iout,msg)
 569  call wrtout(std_out,msg)
 570 
 571  write(msg,'( 4(a,i8),a,4(a,i8),a,4(a,i8) )' ) &
 572 & '     natom =',natom  ,'  nloc_mem =',nloalg(2)*(nloalg(3)+1),&
 573 & '    nspden =',nspden ,'   nspinor =',nspinor,ch10,&
 574 & '    nsppol =',nsppol ,'      nsym =',nsym,&
 575 & '    n1xccc =',n1xccc ,'    ntypat =',ntypat,ch10,&
 576 & '    occopt =',occopt ,'   xclevel =',xclevel
 577  call wrtout(iout,msg)
 578  call wrtout(std_out,msg)
 579 
 580  write(msg,'(4(3(a,i12),a))') &
 581 & '-    mband =',mband  ,'        mffmem =',mffmem,&
 582 & '         mkmem =',mkmem  ,ch10,&
 583 & '       mpw =',mpw    ,'          nfft =',nfft ,&
 584 & '          nkpt =',nkpt
 585  call wrtout(iout,msg)
 586  call wrtout(std_out,msg)
 587 
 588  if (my_natom/=natom)then
 589    write(msg,'(a,i10)') 'Pmy_natom=',my_natom
 590    call wrtout(iout,msg)
 591    call wrtout(std_out,msg)
 592  end if
 593 
 594 !Additional information if imgmov is activated (use of replicas of the cell)
 595  if (nimage>1) then
 596    write(msg,'(1(a,i10))' ) '  nimage =',nimage
 597    call wrtout(iout,msg)
 598    call wrtout(std_out,msg)
 599  end if
 600 
 601 !Additional information on FFT grids if PAW
 602  if (usepaw==1) then
 603    write(msg, '(a,a,a,i10,a,i10)' )&
 604 &   ' PAW method is used; the additional fine FFT grid is defined by:',ch10,&
 605 &   '   mgfftf=',mgfftf,'    nfftf =',nfftf
 606    call wrtout(iout,msg)
 607    call wrtout(std_out,msg)
 608  end if
 609 
 610 !Additional information if GPU
 611  if (gpu_option/=ABI_GPU_DISABLED) then
 612 !  write(msg, '(a)' )' GPU method is used'
 613 !  call wrtout(iout,msg)
 614 !  call wrtout(std_out,msg)
 615  end if
 616 
 617 !Additional information needed for the susceptibility and dielectric matrices
 618  if((modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70) .or. iscf==-1)then
 619 
 620 !  Compute the number of bands in blocks (nbnd_in_blk) from mband (see suskmm.f)
 621 !  Consider that if the number of bands is large, there are at most 8 blocks
 622    if(mband>=48)then
 623      mblk=8
 624      nbnd_in_blk=(mband-1)/mblk+1
 625 !    If the number of bands is medium, place 6 bands per block
 626    else if(mband>=12)then
 627      nbnd_in_blk=6
 628 !    Otherwise, must have at least 2 blocks
 629    else
 630      mblk=2
 631      nbnd_in_blk=(mband-1)/mblk+1
 632    end if
 633 
 634    write(msg, '(a,a,a,i10,a,i6,a,i10,a,i10)' )&
 635 &   ' For the susceptibility and dielectric matrices, or tddft :',ch10,&
 636 &   '   mgfft =',mgfftdiel,'  nbnd_in_blk=',nbnd_in_blk,'    nfft =',nfftdiel,&
 637 &   '     npw =',npwdiel
 638    call wrtout(iout,msg)
 639    call wrtout(std_out,msg)
 640    ndiel4=ngfftdiel(4) ; ndiel5=ngfftdiel(5) ; ndiel6=ngfftdiel(6)
 641    ndiel456=ndiel4*ndiel5*ndiel6
 642  else
 643 !  To be sure of initialisation.
 644    ndiel456 = 1
 645  end if
 646 
 647  write(msg,'(80a)') ('=',mu=1,80)
 648  call wrtout(iout,msg)
 649  call wrtout(std_out,msg)
 650 
 651  if(getcell>0 .or. (getcell<0 .and. idtset+getcell>0) )then
 652    write(msg,'(a,a,a,a,a,a,i3,a,i3,a,a,a,a,a,a)' )ch10,&
 653 &   ' memory : COMMENT -',ch10,&
 654 &   '  The determination of memory needs at this stage is meaningless,',ch10,&
 655 &   '  since getcell = ',getcell,' is non-zero, while idtset=',idtset,'.',ch10,&
 656 &   '  The following numbers are obtained by supposing that acell and rprim',ch10,&
 657 &   '  are NOT taken from a previous dataset. You cannot rely on them.',ch10
 658    call wrtout(iout,msg)
 659    call wrtout(std_out,msg)
 660  end if
 661 
 662 !Compute number of atoms per type for current proc
 663  nattyp(:)=0
 664  do ii=1,natom
 665    nattyp(typat(ii))=nattyp(typat(ii))+1
 666  end do
 667 
 668 !PAW: store useful dims
 669  if (usepaw==1) then
 670    ABI_MALLOC(basis_size,(npsp))
 671    ABI_MALLOC(l_size,(npsp))
 672    ABI_MALLOC(lmn_size,(npsp))
 673    ABI_MALLOC(lmn2_size,(npsp))
 674    ABI_MALLOC(mesh_size,(npsp))
 675    ABI_MALLOC(shape_type,(npsp))
 676    ABI_MALLOC(pawver,(npsp))
 677    ABI_MALLOC(rshp,(npsp))
 678    do ii=1,npsp
 679      basis_size(ii)=pspheads(ii)%pawheader%basis_size
 680      mesh_size(ii)=pspheads(ii)%pawheader%mesh_size
 681      l_size(ii)=pspheads(ii)%pawheader%l_size
 682      lmn_size(ii)=pspheads(ii)%pawheader%lmn_size
 683      lmn2_size(ii)=lmn_size(ii)*(lmn_size(ii)+1)/2
 684      pawver(ii)=pspheads(ii)%pawheader%pawver
 685      rshp(ii)=pspheads(ii)%pawheader%rshp
 686      shape_type(ii)=pspheads(ii)%pawheader%shape_type
 687    end do
 688    l_max=maxval(pspheads(:)%lmax)
 689    l_size_max=maxval(pspheads(:)%pawheader%l_size)
 690    rhoij_nspden=nspden;if (pawspnorb>0) rhoij_nspden=4
 691    ABI_MALLOC(my_nattyp,(ntypat))
 692    if ((mpi_enreg%nproc_atom<=1).or.(.not.associated(mpi_enreg%my_atmtab))) then
 693      my_nattyp=nattyp
 694    else
 695      my_nattyp=0
 696      do ii=1,my_natom
 697        jj=typat(mpi_enreg%my_atmtab(ii))
 698        my_nattyp(jj)=my_nattyp(jj)+1
 699      end do
 700    end if
 701    qphase_rhoij=merge(2,1,any(qphon(:)>tol8))
 702  else
 703 !  Do the allocation to avoid uninitialised variables.
 704    ABI_MALLOC(my_nattyp,(1))
 705    ABI_MALLOC(basis_size,(1))
 706    ABI_MALLOC(l_size,(1))
 707    ABI_MALLOC(lmn_size,(1))
 708    ABI_MALLOC(lmn2_size,(1))
 709    ABI_MALLOC(mesh_size,(1))
 710    ABI_MALLOC(shape_type,(1))
 711    ABI_MALLOC(pawver,(1))
 712    ABI_MALLOC(rshp,(1))
 713    rhoij_nspden=nspden
 714    l_size_max=1
 715    l_max=1
 716    qphase_rhoij=1
 717  end if
 718 
 719  n_fftgr=1;iscf10=mod(iscf,10)
 720  if(iscf10==1)              n_fftgr=5
 721  if(iscf10==2)              n_fftgr=3
 722  if(iscf10==3)              n_fftgr=4
 723  if(iscf10==4)              n_fftgr=6
 724  if(iscf10==5.or.iscf10==6) n_fftgr=10
 725  if(iscf10==7)              n_fftgr=2+2*npulayit
 726 
 727 !work1 and work2 in fourdp : take into account approximately fftalgb
 728  fftalgb=mod(ngfft(7),100)/10
 729  if(fftalgb==0)narr_fourdp=2*2
 730  if(fftalgb==1)narr_fourdp=2
 731 
 732  ngrad=1;if(xclevel==2.or.tfkinfunc>10)ngrad=2
 733 
 734 !(1)                     in main, driver, gstate and brdmin ----------------
 735 !in move, nothing interesting is allocated.
 736 !kg (gstate)
 737  cmpw(1)=3*mkmem               ; dttyp(1)=4
 738 !indsym (gstate)
 739  cadd(3)=4*nsym*natom          ; dttyp(3)=4
 740 !irrzon  (gstate)
 741  if(nsym/=1)then
 742    cfft(4)=2*((nspden/nsppol)-3*(nspden/4))    ; dttyp(4)=4
 743  end if
 744 !ylm (gstate)
 745  cmpw(5)=mkmem*mpsang*mpsang*useylm ; dttyp(5)=8
 746 !
 747 !rhor,rhog (gstate)
 748  cfftf(5)=nspden+2              ; dttyp(5)=8
 749 !cg (gstate)
 750  cmpw(6)=2*nspinor*mband*mkmem*nsppol  ; dttyp(6)=8
 751 !eigen,resid,occ (occ is initialized in abinit, and not in driver)
 752  cadd(7)=3*mband*nkpt*nsppol   ; dttyp(7)=8
 753 !qgrid_vl,qgrid_ff,vlspl,ffspl
 754  cadd(8)=mqgrid_vl*(1+2*ntypat)   &
 755 & +mqgrid_ff*(1+2*ntypat*lnmax)   &
 756 & ; dttyp(8)=8
 757 !ph1d (actually allocated in scfcv !!)
 758  cadd(9)=2*3*(2*mgfft+1)*natom ; dttyp(9)=8
 759  cadd(9)=cadd(9)+2*3*(2*mgfftf+1)*natom*usepaw  !Additional ph1df for PAW
 760 !phnons (in gstate)
 761  if(nsym/=1)then
 762    cfft(10)=2*((nspden/nsppol)-3*(nspden/4))    ; dttyp(10)=8
 763  end if
 764 !xccc1d (in driver)
 765  cadd(11)=n1xccc*6*ntypat      ; dttyp(11)=8
 766 
 767 !hessin in brdmin
 768  if(ionmov==2)then
 769    cadd(15)=3*natom*3*natom      ; dttyp(15)=8
 770  end if
 771 
 772 !Additional PAW arrays
 773 !PAW datasets (pawtab)
 774  if (usepaw==1) then
 775    dttyp(16)=8 ; dttyp(17)=4
 776    do ii=1,npsp
 777      cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii)   !phi,tphi
 778      cadd(16)=cadd(16)+2*mesh_size(ii)*basis_size(ii)&  !phiphj,tphiphj
 779 &    *(basis_size(ii)+1)/2
 780      cadd(16)=cadd(16)+mesh_size(ii)*l_size(ii)         !shapefunc
 781      cadd(16)=cadd(16)+lmn2_size(ii)*l_size(ii)**2      !qijl
 782      cadd(16)=cadd(16)+l_size(ii)*5                     !gnorm,shape_a,shape_q
 783      cadd(16)=cadd(16)+lmn2_size(ii)*(4+lmn2_size(ii))  !eijkl,dltij,dij0,rhoij0,sij
 784      cadd(17)=cadd(17)+lmn2_size(ii)*8                  !indklmn
 785      cadd(16)=cadd(16)+mesh_size(ii)*5                  !coreden,tcoreden,rad,radfact,simfact
 786      if (shape_type(ii)==-1) cadd(16)=cadd(16)+4*mesh_size(ii)*l_size(ii)  !dshpfunc
 787      cadd(16)=cadd(16)+mqgrid_vl*2                      !tncorespl
 788      if (pawver(ii)>=4) cadd(16)=cadd(16)+mqgrid_vl*2   !tnvalespl
 789    end do
 790 !  additional arrays
 791    cadd(16)=cadd(16)+l_size_max*2*l_max*nsym                 !zarot
 792    cadd(16)=cadd(16)+(2*l_max-1)**2*l_max**2*(l_max**2+1)/2  !realgnt
 793    cadd(17)=cadd(17)+nfft+nfftf                              ! fintocoa,coatofin
 794    do ii=1,ntypat
 795      cadd(16)=cadd(16)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc ! Rhoij and related data
 796      cadd(17)=cadd(17)+my_nattyp(ii)*(2+lmn2_size(ii))    ! (rhoijselect, ...)
 797    end do
 798    !PAW:cprj
 799    do ii=1,ntypat
 800      cadd(16)=cadd(16)+2*nattyp(ii)*nkpt*nspinor*mband*nsppol*lmn_size(ii)/max(mpi_enreg%nproc_band,1)
 801    end do
 802  end if
 803 
 804 !SCF history (if selected)
 805  if (abs(densfor_pred)==5.or.abs(densfor_pred)==6) then          ! scf_history...
 806    histsz=2
 807    cfftf(18)=nspden*(histsz+1)+1      ; dttyp(18)=8  ! %deltarhor, %atmrho_last, %rhor_last
 808    cadd(19)=3*natom*2*histsz          ; dttyp(19)=8  ! %xreddiff,xred_last
 809    dttyp(20)=4
 810    if (usepaw==1) then
 811      do ii=1,ntypat
 812        cadd(19)=cadd(19)+histsz*2*my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*qphase_rhoij*pawcpxocc ! %pawrhoij()%rhoijp
 813        cadd(20)=cadd(20)+histsz*2*my_nattyp(ii)*(2+lmn2_size(ii))*nspden ! %pawrhoij()%rhoijselect
 814      end do
 815    end if
 816    if (extrapwf>0) then
 817      cadd(19)=cadd(19)+histsz*2*nspinor*mband*mkmem*nsppol  ; dttyp(19)=8  ! %cg
 818    end if
 819  end if
 820 
 821 !(2)                     in scfcv----------------------------------------
 822 
 823 !vhartr,vpsp,vtrial,vxc
 824  cfftf(21)=2+2*nspden           ; dttyp(21)=8
 825 !kxc
 826  if (abs(densfor_pred)>0.and.iscf>=10) then
 827    cfftf(21)=cfftf(21)+3*nspden
 828    if (densfor_pred<0.and.xclevel==2) cfftf(21)=cfftf(21)+20*nspden
 829  end if
 830  if(iscf>0)then
 831 !  f_fftgr
 832    if (pawmixdg==1) then
 833      cfftf(22)=nspden*n_fftgr*mffmem; dttyp(22)=8
 834    else
 835      cfft(22)=nspden*n_fftgr*mffmem; dttyp(22)=8
 836    end if
 837  end if
 838  if( iscf>0 .and. (modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70))then
 839 !  dielinv, susmat
 840    cadd(23)=4*(npwdiel*min(nspden,2))**2; dttyp(23)=8
 841  end if
 842 !Kernel of Poisson's solver
 843  if (icoulomb == 1) then
 844    cadd(24) = ngfft(4)*ngfft(5)*ngfft(6) ; dttyp(24) = 8
 845  end if
 846  if( (iscf>0 .and. modulo(iprcel,100)>=20 .and. modulo(iprcel,100)<70) .or. iscf==-1 )then
 847 !  kg_diel
 848    cadd(27)=3*npwdiel             ; dttyp(27)=4
 849    if(nsym/=1)then
 850 !    irrzondiel
 851      cadd(27)=cadd(27)+2*nfftdiel*(nspden/nsppol)
 852 !    phnonsdiel
 853      cadd(28)=2*nfftdiel*(nspden/nsppol)   ; dttyp(28)=8
 854    end if
 855  end if
 856  if(n1xccc/=0)then
 857 !  xccc3d
 858    cfftf(29)=1                    ; dttyp(29)=8
 859  end if
 860 
 861 !Additional PAW arrays
 862  dttyp(25)=8 ; dttyp(26)=4
 863  if (usepaw==1) then
 864    do ii=1,ntypat
 865      jj=(1+int(nfftf*four_pi*rshp(ii)**3/(three*ucvol)))        ! pawfgrtab
 866      cadd(26)=cadd(26)+my_nattyp(ii)*jj                         !   %ifftsph
 867      cadd(25)=cadd(25)+my_nattyp(ii)*jj*(1-pawstgylm)*3         !   %rfgd (if pawstgylm=0)
 868      cadd(25)=cadd(25)+my_nattyp(ii)*jj*pawstgylm*l_size(ii)**2 !   %gylm (if pawstgylm=1)
 869      if (optforces==1) cadd(25)=cadd(25)+my_nattyp(ii)*jj&      !   %gylmgr,%rfgd (if pawstgylm=1)
 870 &    *pawstgylm*(3*l_size(ii)**2+3*optstress)
 871      cadd(26)=cadd(26)+my_nattyp(ii)*l_size(ii)**2/32           ! lmselect  !now a boolean
 872      cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*nspinor**3   ! dij
 873      if (iscf>0) then
 874        cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc                ! rhoijres
 875        cadd(25)=cadd(25)+my_nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc*n_fftgr*mffmem ! f_paw
 876      end if
 877    end do
 878 !   cadd(25)=cadd(25)+(1+3*pawnhatxc*(ngrad/2))*nspden*nfftf       !nhat,nhatgr
 879    cfftf(29)=cfftf(29)+(1+3*pawnhatxc*(ngrad/2))*nspden       !nhat,nhatgr
 880  end if
 881 
 882 !(3)                     in rhotoxc, xcden -------------------------------
 883 
 884  if(xclevel/=0)then
 885    if(n1xccc/=0)then
 886 !    rhocorval
 887      cfftf(31)=nspden               ; dttyp(31)=8
 888    end if
 889 !  dnexcdn, rhonow
 890    nspgrad=nspden*ngrad
 891    if(nspden==2 .and. ngrad==2)nspgrad=5
 892    cfftf(32)=nspden*ngrad*ngrad+nspgrad  ; dttyp(32)=8
 893    if(intxc==1 .or. ngrad==2)then
 894 !    wkcmpx,work in xcden +work1,work2 in fourdp
 895      cfftf(33)=3+narr_fourdp        ; dttyp(33)=8
 896      cadd(33)=narr_fourdp*(ngfftf(4)*ngfftf(5)*ngfftf(6)-nfftf)
 897    end if
 898    if(ngrad==2)then
 899 !    workgr in xcden
 900      cfftf(34)=2                    ; dttyp(34)=8
 901    end if
 902  end if
 903  if(iscf>0)then
 904 !  In this case, rhotoxc is called from rhotov also,
 905 !  for which vresid was allocated in scfcv
 906 !  vresid
 907    cfftf(35)=nspden               ; dttyp(35)=8
 908  end if
 909 !Poisson's solver with zero padding
 910  if (icoulomb == 1) then
 911    cfft(36) = 8                   ; dttyp(36) = 8
 912    cadd(36) = ngfft(4) * ngfft(5) * ngfft(6) - nfft
 913  end if
 914 
 915 !Note : in hartre, called by rhotoxc, one uses
 916 !2 dp arrays of total size 3*nfft,
 917 !and 2 arrays of total size 4*n4*n5*n6 for fourdp
 918 !This will be smaller than the total use for symrhg
 919 
 920 !(4)                     in newvtr/newrho --------------------------------------
 921 
 922  if(iscf>0)then
 923 !  vresid (allocated in scfcv) and vrespc
 924    if (pawmixdg==1) then
 925      cfftf(41)=2*nspden             ; dttyp(41)=8
 926    else
 927      cfft(41)=2*nspden             ; dttyp(41)=8
 928    end if
 929    if(mffmem==0)then
 930 !    f_fftgr_disk
 931      if (pawmixdg==1) then
 932        cfftf(42)=nspden*n_fftgr       ; dttyp(42)=8
 933      else
 934        cfft(42)=nspden*n_fftgr       ; dttyp(42)=8
 935      end if
 936 !    f_paw_disk
 937      if (usepaw==1) then
 938        dttyp(43)=8
 939        do ii=1,ntypat
 940          cadd(43)=cadd(43)+my_nattyp(ii)*lmn2_size(ii)*nspden*n_fftgr
 941        end do
 942      end if
 943    end if
 944 !  rhoupdn, n(v)resid0, vtrialg, rhog2, magng
 945    if (pawmixdg==1) then
 946      cfftf(43)=2*nspden       ; dttyp(43)=8
 947    else
 948      cfft(43)=2*nspden       ; dttyp(43)=8
 949      if (nspden>1) cfftf(43)=2*(nspden-1)
 950    end if
 951  end if
 952 
 953 !(5-6)                   in vtorho-----------------------------------------
 954 
 955 !Note : (5) is for the arrays inside the spin and k-point loop
 956 !they belong to the main chain
 957 !(6) is for the arrays after the spin and k-point loop
 958 !(6a) is for the arrays after that loop, for the parallel k-point chain
 959 !(6b) is for the arrays in mkrho, for the mkrho chain
 960 !(6c) is for the arrays in symrhg, for the fourdp chain
 961 !(6d) is for the arrays in suscep, for the suscep chain, see (10)
 962 !(6e) is for the arrays in dielmt, for the dielmt chain, see (11)
 963 !(6f) is for the arrays in pawmkrhoij
 964 
 965 !eknlk, enlxnk, grnlnk
 966  cadd(51)=(11+3*natom)*mband*nkpt*nsppol &
 967 & ; dttyp(51)=8
 968 !kg_k
 969  cmpw(52)=3                    ; dttyp(52)=4
 970 !rhoaug,vlocal
 971  cfft(53)=2                    ; dttyp(53)=8
 972  cadd(53)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
 973 !rhowfr,rhowfg
 974  cfft(53)=cfft(53)+2+nspden
 975  if(mkmem==0)then
 976 !  cg_disk
 977    cmpw(54)=2*nspinor*mband      ; dttyp(54)=8
 978  end if
 979 !eig_k, ek_k, enlx_k, grnl_k, occ_k, resid_k
 980  cadd(56)=(14+3*natom)*mband   ; dttyp(56)=8
 981 !ylm_k
 982  cmpw(57)=mpsang*mpsang*useylm ; dttyp(57)=8
 983 !!PAW:cprj
 984 ! if (usepaw==1) then
 985 !   dttyp(58)=8
 986 !   do ii=1,ntypat
 987 !     cadd(58)=cadd(58)+2*nattyp(ii)*nkpt*nspinor*mband*nsppol*lmn_size(ii)/max(mpi_enreg%nproc_band,1)
 988 !   end do
 989 ! end if
 990 
 991 !(6)                     in vtorho----------------------------------------
 992 
 993 !doccde
 994  cadd(60)=mband*nkpt*nsppol    ; dttyp(60)=8
 995 
 996 !(6a)                    in vtorho----------------------------------------
 997  if(xmpi_paral==1)then
 998 !  Parallel case
 999 !  buffer1
1000 !  buffer2
1001    if(occopt>=3 .and. occopt <=8) then
1002      dttyp(61)=8
1003      if(nsppol*nfft >= (13+3*natom)*mband*nkpt*nspden)then
1004        cfft(61)=2*nspden
1005      else
1006        cadd(61)=(13+3*natom)*mband*nkpt*nspden
1007      end if
1008    else
1009      cfft(61)=2*nspden             ; dttyp(61)=8
1010      cadd(61)=9+3*natom+2+2*mband*nkpt*nspden
1011    end if
1012  end if
1013 
1014 
1015 !(6b)                    in mkrho, called by vtorho--------------------------
1016  if(occopt>=3 .and. occopt <=8)then
1017    if(mkmem==0)then
1018 !    cg_disk
1019      cmpw(62)=2*nspinor*mband      ; dttyp(62)=8
1020    end if
1021 !  cwavef
1022    cmpw(65)=2*nspinor            ; dttyp(65)=8
1023 
1024 !  rhoaug, wfraug, work1 in fourwf
1025    cfft(66)=5                    ; dttyp(66)=8
1026    cadd(66)=5*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1027  end if
1028 
1029 !(6c)                    in symrhg, called by vtorho--------------------------
1030  if(iscf>0)then
1031    cfft(67)=narr_fourdp          ; dttyp(67)=8
1032    cadd(67)=narr_fourdp*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1033    if(nsym>1)then
1034 !    work1  in symrhg
1035      cfft(68)=2                    ; dttyp(68)=8
1036      cadd(68)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1037    end if
1038  end if
1039 
1040 
1041 !(6d) and (6e)           in suscep and dielmt, called by vtorho,
1042 !see (10) and (11) -------------------------------
1043 
1044 !(6f)  in pawmkrhoij or pawrhoij_symrhoij called by pawmkrho, called by vtorho--------
1045 !only when paralellim over atoms is activated
1046  dttyp(63)=8
1047  if((usepaw==1) .and. ((iscf>0) .or. (iscf == -3) .and. mpi_enreg%nproc_atom>1 ))then
1048    do ii=1,ntypat
1049      cadd(63)=cadd(63)+nattyp(ii)*lmn2_size(ii)*rhoij_nspden*pawcpxocc*qphase_rhoij ! Rhoij_gather and related data
1050      cadd(63)=cadd(63)+nattyp(ii)*(2+lmn2_size(ii)) ! Rhoij_gather (rhoijselect, ...)
1051    end do
1052  end if
1053 
1054 !(7)                     in vtowfk----------------------------------------
1055 
1056 !evec
1057  cadd(71)=2*mband*mband        ; dttyp(71)=8
1058 !subham, subvnlx(if not PAW or if usefock_ACE)
1059  cadd(72)=(1+usepaw)*mband*(mband+1)    ; dttyp(72)=8
1060 !gkpsq
1061  cmpw(73)=1                    ; dttyp(73)=8
1062 !ffnl
1063  cmpw(74)=2*ntypat*lmnmax      ; dttyp(74)=8
1064 !ph3d
1065  matblk=min(NLO_MINCAT,maxval(nattyp))
1066  if(nloalg(2)<=0)matblk=natom
1067  cmpw(75)=2*matblk             ; dttyp(75)=8
1068 !gsc(if PAW)
1069 ! cmpw(76)=2*mband*nspinor*usepaw          ; dttyp(76)=8
1070 !Note : matvnl and mat1 do not belong to a chain defined until now
1071 !
1072  if(occopt<3 .and. iscf>0)then
1073 !  cwavef
1074    cmpw(77)=2*nspinor            ; dttyp(77)=8
1075 !  wfraug
1076    cfft(78)=2                    ; dttyp(78)=8
1077    cadd(78)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1078 !  work1 in fourwf
1079    cfft(79)=2                    ; dttyp(79)=8
1080    cadd(79)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1081  end if
1082 
1083 
1084 !(8)                     in cgwf_cprj-------------------------------------
1085 
1086 !conjgr, direc, direc_tmp, gvnlx
1087  cmpw(81)=2*4*nspinor          ; dttyp(81)=8
1088 ! cwavef_r,direc_r
1089  cfft(82)=2*2*nspinor          ; dttyp(82)=8
1090 
1091 !(8)                     in cgwf------------------------------------------
1092 
1093 !!conjgr, cwavef, direc, gh_direc, gvnlx_direc
1094 ! cmpw(81)=2*5*nspinor          ; dttyp(81)=8
1095 !!ghc,gvnlxc
1096 ! cmpw(82)=2*2*nspinor          ; dttyp(82)=8
1097 !!PAW: scwavef,direc_tmp,ghc_all
1098 ! cmpw(83)=2*(2+mband)*nspinor*usepaw  ; dttyp(83)=8
1099 
1100 
1101 !(9a)                    in getghc and fourwf----------------------------
1102 
1103 !work (in getghc)
1104  cfft(91)=2                    ; dttyp(91)=8
1105  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1106 !work1 (in fourwf)
1107  cfft(92)=2                    ; dttyp(92)=8
1108  cadd(92)=2*(ngfft(4)*ngfft(5)*ngfft(6)-nfft)
1109 
1110 !(9b)                    in getghc, nonlop and opernl--------------------
1111  mincat=min(NLO_MINCAT,natom-ntypat+1)
1112  if (useylm==0) then                          ! ===== nonlop_pl
1113 !  gxa  (in nonlop)
1114    cadd(94)=2*20*mincat*2       ; dttyp(94)=8
1115 !  dgxdt  (in nonlop)            !MT20072002: not allocated in getghc !!
1116    if (optforces==1) then
1117      cadd(95)=2*3*20*mincat*2    ; dttyp(95)=8
1118    end if
1119 !  teffv (in opernl4 - no distinction is made for opernl, opernl2 or opernl3)
1120 !  kpgx, ffkg
1121 !  here, evaluate an upper value, with nproj=2, p,d and f orbitals, but not
1122 !  considering the stress, since it will be called outside of the main chain
1123    cadd(97)=NLO_MBLKPW*40        ; dttyp(97)=8
1124 !  kpg if nloalg(3)=1
1125    cadd(98)=3*mpw*nloalg(3)      ; dttyp(98)=8
1126  else                                        ! ===== nonlop_ylm
1127 !  gx + gxfac + gxfac_sij
1128 !   cadd(94)=2*lmnmax*mincat*(mpw+1+usepaw)    ; dttyp(94)=8
1129    cmpw(94)=2*lmnmax*mincat                   ; dttyp(94)=8
1130    cadd(99)=2*lmnmax*mincat*(1+usepaw)        ; dttyp(99)=8
1131 !  kpg
1132    cmpw(95)=3             ; dttyp(95)=8
1133 !  indlmn_typ, ffnl_typ
1134    cadd(96)=lmnmax*6; dttyp(96)=4
1135 !  ffnl_typ
1136    cmpw(97)=lmnmax; dttyp(97)=8
1137 !  opernla_ylm: scalar,scali
1138    cmpw(98)=2; dttyp(98)=8
1139  end if
1140 
1141 !(10)                    in suscep and suskmm ----------------------------
1142 
1143  if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1144 !  Variables allocated in suscep
1145    if(mkmem==0)then
1146 !    cg_disk
1147      cmpw(101)=2*mband             ; dttyp(101)=8
1148    end if
1149    if(occopt>=3)then
1150 !    drhode
1151      cadd(103)=2*npwdiel*nsppol    ; dttyp(103)=8
1152    end if
1153 !  rhoextrap (always included, although it appears only when extrap==1)
1154    cadd(104)=ndiel456            ; dttyp(104)=8
1155 
1156 !  Variables allocated in suskmm
1157 !  cwavef
1158    cmpw(106)=2                   ; dttyp(106)=8
1159 !  rhoaug, wfraug
1160    cadd(107)=3*ndiel456          ; dttyp(107)=8
1161 !  wfprod
1162    cadd(108)=2*npwdiel           ; dttyp(108)=8
1163 !  wfrspa1, wfrspa2
1164    cadd(109)=4*ndiel456*nbnd_in_blk ; dttyp(109)=8
1165 
1166  end if
1167 
1168 !(11)                    in dielmt ---------------------------------------
1169 
1170  if(modulo(iprcel,100)>=20.and.modulo(iprcel,100)<70)then
1171 !  dielh,dielvec,eig_diel,zhpev1,zhpev2
1172    cadd(111)=3*npwdiel*npwdiel                   &
1173 &   +9*npwdiel           ; dttyp(111)=8
1174  end if
1175 
1176 !(12)                    in tddft  ---------------------------------------
1177 
1178  if(iscf==-1)then
1179    if(mkmem/=0)then
1180 !    cg_disk
1181      cmpw(121)=2*mband            ; dttyp(121)=8
1182    end if
1183 !  cwavef
1184    cmpw(124)=2*mband             ; dttyp(124)=8
1185 !  rhoaug,wfraug,wfrspa
1186    cadd(125)=(2+mband)*ndiel456  ; dttyp(125)=8
1187  end if
1188 
1189 !--------------------------------------------------------------------------
1190 
1191  chain(:,:)=.true.
1192 
1193 !Define the main chain version a (fourwf)
1194  chain(31:50,1)=.false.
1195  chain(60:70,1)=.false.
1196  chain(77:80,1)=.false.
1197  chain(93:100,1)=.false.
1198  chain(101:marrays,1)=.false.
1199 
1200 !Define the main chain version b (nonlop+opernl)
1201  chain(31:50,2)=.false.
1202  chain(60:70,2)=.false.
1203  chain(77:80,2)=.false.
1204  chain(91:92,2)=.false.
1205  chain(101:marrays,2)=.false.
1206 
1207 !Define the XC chain ( 31:40 belong only to this chain)
1208  chain(41:marrays,3)=.false.
1209 
1210 !Define the mkrho chain ( 62:66 and 76:77 belong only to this chain)
1211 !is it sure that they have to be summed ?)
1212  chain(31:50,4)=.false.
1213  chain(51:59,4)=.false.
1214  chain(61   ,4)=.false.
1215  chain(67:70,4)=.false.
1216  chain(71:marrays,4)=.false.
1217  chain(77:80,4)=.true.
1218 
1219 !Define the fourdp chain ( 67:70 belong only to this chain)
1220  chain(31:50,5)=.false.
1221  chain(51:66,5)=.false.
1222  chain(60   ,5)=.true.
1223  chain(71:marrays,5)=.false.
1224 
1225 !Define the parallel k-point chain ( 61 belong only to this chain )
1226  chain(31:50,6)=.false.
1227  chain(51:59,6)=.false.
1228  chain(62:70,6)=.false.
1229  chain(71:marrays,6)=.false.
1230 
1231 !Define the newvtr chain ( 41:50 belong only to this chain)
1232  chain(31:40,7)=.false.
1233  chain(51:marrays,7)=.false.
1234 
1235 !Define the suscep chain ( 101:110 belong only to this chain)
1236  chain(31:marrays,8)=.false.
1237  chain(60    ,8)=.true.
1238  chain(101:110,8)=.true.
1239 
1240 !Define the dielmt chain ( 111:120 belong only to this chain)
1241  chain(31:marrays,9)=.false.
1242  chain(60    ,9)=.true.
1243  chain(111:120,9)=.true.
1244 
1245 !Define the tddft chain ( 121:130 belong only to this chain)
1246  chain(31:marrays,10)=.false.
1247  chain(60    ,10)=.true.
1248  chain(121:130,10)=.true.
1249 
1250 !The memory needed for each chain has been computed
1251 !-------------------------------------------------------------------------
1252 !Still need some auxiliary data : estimate the disk space
1253 !or the maximum segment size.
1254 
1255 !XG030513 : MPIWF need to multiply mbdiskwf by the number of processors
1256 !in the WF group. For the time being, nprocwf=1
1257  nprocwf=mpi_enreg%nproc_fft
1258 
1259  mbdiskwf=(8*two*mpw*nprocwf*sum(nband(1:nkpt*nsppol)))/1024._dp**2 + 0.002_dp
1260  mbdiskpd=(8*nfftf*nsppol)/1024._dp**2 + 0.002_dp
1261 
1262 !Determine the largest array out of cg (cg_disk), f_fftgr (f_fftgr_disk), or pawfgrtab%gylm
1263  if(mkmem==0)then
1264    mbcg=(8*2*mpw*mband)/1024._dp**2 + 0.002_dp
1265  else
1266    mbcg=(8*2*mpw*mband*mkmem*nsppol)/1024._dp**2 + 0.002_dp
1267  end if
1268  if(mffmem==0)then
1269    if (pawmixdg==1) then
1270      mbf_fftgr=(8*nfftf*n_fftgr)/1024._dp**2 + 0.002_dp
1271    else
1272      mbf_fftgr=(8*nfft*n_fftgr)/1024._dp**2 + 0.002_dp
1273    end if
1274  else
1275    if (pawmixdg==1) then
1276      mbf_fftgr=(8*nfftf*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp
1277    else
1278      mbf_fftgr=(8*nfft*n_fftgr*nsppol*mffmem)/1024._dp**2 + 0.002_dp
1279    end if
1280  end if
1281  if(usepaw==1)then
1282    mbgylm=0
1283    do ii=1,ntypat                                        ! pawfgrtab
1284      jj=(1+int(nfftf*four_pi/(three*ucvol)*rshp(ii)**3))
1285      mbgylm=mbgylm+my_nattyp(ii)*jj &
1286 &     *( l_size(ii)**2*pawstgylm &                              !   %gylm   (if pawstgylm=1)
1287 &    +3*max((optforces+1)/2,optstress)*l_size(ii)**2*pawstgylm& !   %gylmgr (if pawstgylm=1)
1288 &    +3*optstress*pawstgylm&                                    !   %rfgd   (if pawstgylm=1)
1289 &    +3*(1-pawstgylm) )                                         !   %rfgd   (if pawstgylm=0)
1290    end do
1291    mbgylm=8*mbgylm/1024._dp**2 + 0.002_dp
1292  else
1293    mbgylm=0
1294  end if
1295 
1296 !-------------------------------------------------------------------------
1297  ABI_FREE(my_nattyp)
1298  ABI_FREE(basis_size)
1299  ABI_FREE(l_size)
1300  ABI_FREE(lmn_size)
1301  ABI_FREE(lmn2_size)
1302  ABI_FREE(mesh_size)
1303  ABI_FREE(pawver)
1304  ABI_FREE(shape_type)
1305  ABI_FREE(rshp)
1306 
1307 !---------------------------------------------------------------------
1308 !Now, analyze the data
1309 
1310  call memana(cadd,cfft,cfftf,chain,cmpw,dttyp,iout,iprcel,iscf,&
1311 & marrays,mbcg,mbdiskpd,mbdiskwf,mbf_fftgr,mbgylm,mffmem,&
1312 & mpw,natom,nchain,nfft,nfftf,occopt,option,prtvol)
1313 
1314 end subroutine memory

m_memeval/memory_eval [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 memory_eval

FUNCTION

 Big loop on the datasets:
 - for each of the datasets, write one line about the crystallographic data
 - compute the memory needs for this data set.

INPUTS

  dtsets(0:ndtset_alloc)=<type datafiles_type>contains all input variables
  iout=unit number of output file
  mpi_enregs=information about MPI parallelization
  ndtset= number of datasets to be read; if 0, no multi-dataset mode
  ndtset_alloc=number of datasets, corrected for allocation of at least
      one data set.
  npsp=number of pseudopotentials
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file header, as well as the psp file name

OUTPUT

   printing only

SOURCE

 75 subroutine memory_eval(dtsets,iout,mpi_enregs,ndtset,ndtset_alloc,npsp,pspheads)
 76 
 77 !Arguments ------------------------------------
 78 !scalars
 79  integer,intent(in) :: iout,ndtset,ndtset_alloc,npsp
 80  type(MPI_type),intent(inout) :: mpi_enregs(0:ndtset_alloc)
 81 !arrays
 82  type(dataset_type),intent(inout) :: dtsets(0:ndtset_alloc)
 83  type(pspheader_type),intent(in) :: pspheads(npsp)
 84 
 85 !Local variables -------------------------------
 86 !scalars
 87  integer :: cplex,exchn2n3d,extrapwf,getcell,idtset,ii,intxc,densfor_pred,iprcel
 88  integer :: iscf,isym,jdtset,lmnmax,mem_test
 89  integer :: lmnmax_eff,lmnmaxso,lnmax,lnmax_eff,lnmaxso,mband
 90  integer :: me_fft,mffmem,mgfftdiel,mgfftf,mkmem,mpsang,mpspso
 91  integer :: mpssoang,mpw,mqgrid,mqgriddg,mqgrid_ff,mqgrid_vl,n1xccc,natom
 92  integer :: nfftdiel,nfftf,nkpt,nproc_fft,nptsgvec,npulayit,npwdiel,nspden,nspinor
 93  integer :: nsppol,nsym,ntypat,occopt,optddk,optforces,optphon,optstress
 94  integer :: optstrs,paral_fft,pawcpxocc,pawmixdg,pawnhatxc,pawspnorb,pawstgylm,prtvol,ptgroupma,response
 95  integer :: spgroup,timrev,usepaw,useylm,gpu_option,xclevel
 96  real(dp) :: diecut,dilatmx,ecut,ecut_eff,ecutdg_eff,ecutsus,ucvol
 97 !arrays
 98  integer :: bravais(11),mkmems(3),ngfftdiel(18)
 99  integer :: ngfftf(18),nloalg(3)
100  integer,allocatable :: nband(:),symq(:,:,:),symrec(:,:,:),symrel(:,:,:)
101  real(dp),parameter :: k0(3)=(/zero,zero,zero/)
102  real(dp) :: genafm(3),gmet(3,3),gprimd(3,3),kpt_diel(3),qphon(3),rmet(3,3),rprimd(3,3)
103 
104 !*************************************************************************
105 
106  do idtset=1,ndtset_alloc
107    if(mpi_enregs(idtset)%me<0) cycle
108    call abi_io_redirect(new_io_comm=mpi_enregs(idtset)%comm_world)
109    call libpaw_write_comm_set(mpi_enregs(idtset)%comm_world)
110 
111 !  Initialisations
112    bravais(:)=dtsets(idtset)%bravais(:)
113    exchn2n3d=dtsets(idtset)%exchn2n3d
114    extrapwf=dtsets(idtset)%extrapwf
115    genafm(:) =dtsets(idtset)%genafm(:)
116    getcell=dtsets(idtset)%getcell
117    intxc=dtsets(idtset)%intxc
118    densfor_pred=dtsets(idtset)%densfor_pred
119    iprcel=dtsets(idtset)%iprcel
120    iscf=dtsets(idtset)%iscf
121    jdtset=dtsets(idtset)%jdtset ; if(ndtset==0)jdtset=0
122    me_fft=mpi_enregs(idtset)%me_fft
123    mffmem=dtsets(idtset)%mffmem
124    mpw=dtsets(idtset)%mpw
125    mqgrid=dtsets(idtset)%mqgrid
126    mqgriddg=dtsets(idtset)%mqgriddg
127    natom=dtsets(idtset)%natom
128    nkpt  =dtsets(idtset)%nkpt
129    nloalg(:)=dtsets(idtset)%nloalg(:)
130    nproc_fft=mpi_enregs(idtset)%nproc_fft
131    npulayit=dtsets(idtset)%npulayit
132    nspden=dtsets(idtset)%nspden
133    nspinor=dtsets(idtset)%nspinor
134    nsppol=dtsets(idtset)%nsppol
135    nsym     =dtsets(idtset)%nsym
136    ntypat=dtsets(idtset)%ntypat
137    occopt=dtsets(idtset)%occopt
138    optforces=dtsets(idtset)%optforces
139    paral_fft=mpi_enregs(idtset)%paral_kgb
140    pawcpxocc=dtsets(idtset)%pawcpxocc
141    pawmixdg=dtsets(idtset)%pawmixdg
142    pawnhatxc=dtsets(idtset)%pawnhatxc
143    pawspnorb=dtsets(idtset)%pawspnorb
144    pawstgylm=dtsets(idtset)%pawstgylm
145    prtvol=dtsets(idtset)%prtvol
146    ptgroupma =dtsets(idtset)%ptgroupma
147    qphon(:)=dtsets(idtset)%qptn(:)
148    spgroup   =dtsets(idtset)%spgroup
149    usepaw=dtsets(idtset)%usepaw
150    useylm=dtsets(idtset)%useylm
151    gpu_option=dtsets(idtset)%gpu_option
152    xclevel=dtsets(idtset)%xclevel
153 
154    ABI_MALLOC(symrel,(3,3,nsym))
155    symrel(:,:,1:nsym)=dtsets(idtset)%symrel(:,:,1:nsym)
156 
157 !  Space group output
158    call prtspgroup(bravais,genafm,std_out,jdtset,ptgroupma,spgroup)
159    call prtspgroup(bravais,genafm,iout,jdtset,ptgroupma,spgroup)
160 
161    if (dtsets(idtset)%toldff>tol16.and.optforces==0) optforces=1
162    if (dtsets(idtset)%tolrff>tol16.and.optforces==0) optforces=1
163    if (dtsets(idtset)%ionmov>tol16.and.optforces==0) optforces=1
164    if (dtsets(idtset)%imgmov>tol16.and.optforces==0) optforces=1
165    optstress=dtsets(idtset)%optstress
166    optddk=0;optphon=0;optstrs=0
167    if (dtsets(idtset)%rfddk>0.or.dtsets(idtset)%rf2_dkdk>0.or.dtsets(idtset)%rf2_dkde>0) optddk=1
168    if (dtsets(idtset)%rfelfd>0.or.dtsets(idtset)%d3e_pert1_elfd>0.or.&
169 &   dtsets(idtset)%d3e_pert2_elfd>0.or.dtsets(idtset)%d3e_pert3_elfd>0) optddk=1
170    if (dtsets(idtset)%rfphon>0.or.dtsets(idtset)%d3e_pert1_phon>0.or.&
171 &   dtsets(idtset)%d3e_pert2_phon>0.or.dtsets(idtset)%d3e_pert3_phon>0) optphon=1
172    if (dtsets(idtset)%rfstrs>0) optstrs=1
173 
174    ABI_MALLOC(nband,(nkpt*nsppol))
175    nband(1:nkpt*nsppol)=dtsets(idtset)%nband(1:nkpt*nsppol)
176    mband=maxval(nband(1:nkpt*nsppol))
177    dtsets(idtset)%mband=mband
178 
179 !  mpsang=max(maxval(pspheads(1:npsp)%lmax)+1,1) ! Likely problems with the HP compiler
180 !  n1xccc=maxval(pspheads(1:npsp)%xccc)
181    mpsang=1
182    n1xccc=pspheads(1)%xccc
183    do ii=1,npsp
184      mpsang=max(pspheads(ii)%lmax+1,mpsang)
185      n1xccc=max(pspheads(ii)%xccc,n1xccc)
186    end do
187 
188 !  Determine the maximum number of projectors, for the set of pseudo atom
189    call getdim_nloc(lmnmax,lmnmaxso,lnmax,lnmaxso,dtsets(idtset)%mixalch_orig,&
190 &   dtsets(idtset)%nimage,npsp,dtsets(idtset)%npspalch,ntypat,dtsets(idtset)%ntypalch,pspheads)
191 
192 !  Treatment of the effect of using a spin-orbit part
193 !  Warning: mpspso is different for each dataset; not relevant for PAW
194    mpspso=1
195    if (dtsets(idtset)%usepaw==0) then
196      do ii=1,npsp
197        if(nspinor/=1)then
198          if(pspheads(ii)%pspso/=0)then
199            if(dtsets(idtset)%so_psp(ii)/=0)then
200              mpspso=2
201            end if
202          end if
203        end if
204      end do
205    end if
206 !  In case of no spin-orbit
207    if(mpspso==1)then
208      mpssoang=mpsang ; lmnmax_eff =lmnmax; lnmax_eff =lnmax
209    else ! spin-orbit will be used
210      mpssoang=2*mpsang-1 ; lmnmax_eff =lmnmaxso ; lnmax_eff =lnmaxso
211    end if
212 !  lmnmax is not used if the Ylm are not used
213    if (useylm==0) lmnmax_eff =lnmax_eff
214 
215    ecut     =dtsets(idtset)%ecut
216    dilatmx  =dtsets(idtset)%dilatmx
217    ecut_eff=ecut*dilatmx**2
218    ecutdg_eff=dtsets(idtset)%pawecutdg*dtsets(idtset)%dilatmx**2
219 
220 !  Compute mgfft,mpw,nfft for this data set
221    call mkrdim(dtsets(idtset)%acell_orig(1:3,1),dtsets(idtset)%rprim_orig(1:3,1:3,1),rprimd)
222    call metric(gmet,gprimd,-1,rmet,rprimd,ucvol)
223 
224    if (usepaw==0) then
225      mgfftf=dtsets(idtset)%mgfft;nfftf=dtsets(idtset)%nfft;ngfftf(:)=dtsets(idtset)%ngfft(:)
226    else
227      mgfftf=dtsets(idtset)%mgfftdg;nfftf=dtsets(idtset)%nfftdg;ngfftf(:)=dtsets(idtset)%ngfftdg(:)
228    end if
229    response=0
230    if(dtsets(idtset)%rfddk/=0  .or. dtsets(idtset)%rf2_dkdk/=0 .or. dtsets(idtset)%rf2_dkde/=0 .or. &
231 &   dtsets(idtset)%rfphon/=0 .or. dtsets(idtset)%rfelfd/=0 .or. &
232 &   dtsets(idtset)%rfstrs/=0 .or. dtsets(idtset)%rfuser/=0 .or. &
233 &   dtsets(idtset)%rfmagn/=0    ) response=1
234 
235 !  Compute mgfftdiel,npwdiel,nfftdiel for this data set
236    if((modulo(iprcel,100)>=20 .and.modulo(iprcel,100) < 71).or. iscf==-1)then
237 !    Get diecut, and the fft grid to be used for the susceptibility computation
238      diecut=abs(dtsets(idtset)%diecut)
239      if( dtsets(idtset)%diecut < zero )then
240        ecutsus=ecut
241      else
242        ecutsus= ( sqrt(ecut) *0.5_dp + sqrt(diecut) *0.25_dp )**2
243      end if
244 !    Beware, for the dielectric matrix fftalg=ngfftdiel(7) is default here
245      ngfftdiel(1:3)=0 ; ngfftdiel(7)=101 ; ngfftdiel(8:18)=dtsets(idtset)%ngfft(8:18)
246      if(iscf==-1)ngfftdiel(7)=102
247      ecut_eff=ecutsus*dilatmx**2
248      call getng(dtsets(idtset)%boxcutmin,dtsets(idtset)%chksymtnons,&
249 &     ecut_eff,gmet,k0,me_fft,mgfftdiel,nfftdiel,&
250 &     ngfftdiel,nproc_fft,nsym,paral_fft,symrel,dtsets(idtset)%tnons,&
251 &     gpu_option=dtsets(idtset)%gpu_option)
252 !    Compute the size of the dielectric matrix : npwdiel
253      kpt_diel(1:3)=(/ 0.0_dp, 0.0_dp, 0.0_dp /)
254      ecut_eff=diecut*dilatmx**2
255      call getmpw(ecut_eff,exchn2n3d,gmet,(/1/),kpt_diel,mpi_enregs(idtset),npwdiel,1)
256    else
257      npwdiel=1 ; mgfftdiel=1 ; nfftdiel=1 ; ngfftdiel(1:8)=1
258    end if
259 
260 !  Special treatment for the value of mqgrid to be fed in memory.F90
261 
262    nptsgvec         = 200 ! At present, this has to be chosen once and for all ...
263    if ( dtsets(idtset)%usewvl == 0) then
264      call setmqgrid(mqgrid,mqgriddg,ecut_eff,ecutdg_eff,gprimd,nptsgvec,usepaw)
265    else
266      call setmqgrid(mqgrid,mqgriddg,one,one,gprimd,nptsgvec,usepaw)
267    end if
268    mqgrid_ff=mqgrid
269    if (usepaw==0) mqgrid_vl=mqgrid
270    if (usepaw==1) mqgrid_vl=mqgriddg
271 
272 !  Compute the memory needs for this data set.
273    if(response==0)then
274 
275      if (dtsets(idtset)%usewvl == 0) then
276        mkmem=dtsets(idtset)%mkmem
277        mband=maxval(dtsets(idtset)%nband(1:nkpt*nsppol))
278 
279        ! Don't perform memory tests if MBPT.
280        mem_test = dtsets(idtset)%mem_test
281        if (any(dtsets(idtset)%optdriver == [RUNL_SIGMA, RUNL_SCREENING, RUNL_BSE, RUNL_EPH, RUNL_GWR])) mem_test = 0
282 
283        call memory(n1xccc,extrapwf,getcell,idtset,dtsets(idtset)%icoulomb,&
284 &       intxc,dtsets(idtset)%ionmov,iout,densfor_pred,&
285 &       iprcel,iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,mgfftdiel,mgfftf,mkmem,&
286 &       mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,mqgrid_vl,natom,nband,dtsets(idtset)%nfft,nfftdiel,nfftf,&
287 &       dtsets(idtset)%ngfft,ngfftdiel,ngfftf,dtsets(idtset)%nimage,nkpt,nloalg,npsp,npulayit,npwdiel,nspden,nspinor,&
288 &       nsppol,nsym,ntypat,occopt,optforces,mem_test,optstress,pawcpxocc,pawmixdg,&
289 &       pawnhatxc,pawspnorb,pawstgylm,prtvol,pspheads,qphon,dtsets(idtset)%tfkinfunc,&
290 &       dtsets(idtset)%typat,ucvol,usepaw,useylm,gpu_option,xclevel)
291      else if( dtsets(idtset)%usepaw==0) then
292        if (mpi_enregs(idtset)%me == 0) then
293          call wvl_memory(dtsets(idtset), idtset, mpi_enregs(idtset), npsp, 1, pspheads)
294        end if
295      end if
296 
297    else
298 !    Compute the value of cplex, for which one needs symrec
299      ABI_MALLOC(symq,(4,2,nsym))
300      ABI_MALLOC(symrec,(3,3,nsym))
301      do isym=1,nsym
302        call mati3inv(symrel(:,:,isym),symrec(:,:,isym))
303      end do
304      call littlegroup_q(nsym,qphon,symq,symrec,dtsets(idtset)%symafm,timrev)
305      cplex=2-timrev
306      ABI_FREE(symq)
307      ABI_FREE(symrec)
308      mkmems(1)=dtsets(idtset)%mkmem
309      mkmems(2)=dtsets(idtset)%mkqmem
310      mkmems(3)=dtsets(idtset)%mk1mem
311 
312      mem_test = dtsets(idtset)%mem_test
313 
314      call memorf(cplex,n1xccc,getcell,idtset,intxc,iout,iprcel,&
315 &     iscf,jdtset,lmnmax_eff,lnmax_eff,mband,mffmem,dtsets(idtset)%mgfft,&
316 &     mkmems,mpi_enregs(idtset),mpsang,mpssoang,mpw,mqgrid_ff,natom,nband,dtsets(idtset)%nfft,&
317 &     dtsets(idtset)%ngfft,nkpt,nloalg,nspden,nspinor,nsppol,nsym,&
318 &     ntypat,occopt,optddk,optphon,mem_test,optstrs,prtvol,useylm,gpu_option,xclevel)
319    end if
320 
321 !  Deallocate temporary arrays (when they will really be temporary !)
322    ABI_FREE(nband)
323    ABI_FREE(symrel)
324 
325  end do ! idtset
326 
327 end subroutine memory_eval

m_memeval/setmqgrid [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

  setmqgrid

FUNCTION

  Sets the number of points needed to represent the pseudopotentials in
  reciprocal space for a specified resolution.

INPUTS

  ecut=cutoff energy for the wavefunctions
  ecutdg=cutoff energy for the fine grid in case usepaw==1
  gprimd=primitive translation vectors for reciprocal space
  nptsgvec=number of points along the smallest primitive translation vector
    of the reciprocal space
  usepaw=1 if PAW is used, 0 otherwise

OUTPUT

SOURCE

2400 subroutine setmqgrid(mqgrid,mqgriddg,ecut,ecutdg,gprimd,nptsgvec,usepaw)
2401 
2402 !Arguments ------------------------------------
2403  integer , intent(inout)  :: mqgrid,mqgriddg
2404  integer , intent(in)  :: nptsgvec,usepaw
2405  real(dp), intent(in) :: ecut,ecutdg
2406  real(dp), intent(in) :: gprimd(3,3)
2407 
2408 !Local variables-------------------------------
2409  integer :: mqgrid2,mqgriddg2
2410  real(dp) :: gmax,gmaxdg,gvecnorm
2411  character(len=500) :: msg
2412 
2413 ! *************************************************************************
2414 
2415  gvecnorm=sqrt(min(dot_product(gprimd(:,1),gprimd(:,1)), &
2416 & dot_product(gprimd(:,2),gprimd(:,2)), &
2417 & dot_product(gprimd(:,3),gprimd(:,3))))
2418  gmax=one/(sqrt2*pi)*sqrt(ecut)
2419 
2420  if (mqgrid == 0) then
2421    mqgrid2=ceiling(gmax/gvecnorm*nptsgvec)
2422    mqgrid=max(mqgrid2,3001)
2423    write(msg, '(5a,i0,a)' )&
2424 &   'The number of points "mqgrid" in reciprocal space used for the',ch10,&
2425 &   'description of the pseudopotentials has been set automatically',ch10,&
2426 &   'by abinit to: ',mqgrid,'.'
2427    !ABI_COMMENT(msg)
2428  else
2429    mqgrid2=ceiling(gmax/gvecnorm*nptsgvec)
2430    if (mqgrid2>mqgrid) then
2431      write(msg, '(3a,i8,3a,i8,3a)' )&
2432 &     'The number of points "mqgrid" in reciprocal space used for the',ch10,&
2433 &     'description of the pseudopotentials is : ',mqgrid,'.',ch10,&
2434 &     'It would be better to increase it to at least ',mqgrid2,', or',ch10,&
2435 &     'let abinit choose it automatically by setting mqgrid = 0.'
2436      ABI_WARNING(msg)
2437    end if
2438  end if
2439 
2440  if (usepaw==1) then
2441    if(ecutdg<tol6)then
2442      write(msg,'(a)')'The value of (paw)ecutdg is zero or negative, which is forbidden.'
2443      ABI_ERROR(msg)
2444    end if
2445    gmaxdg=one/(sqrt2*pi)*sqrt(ecutdg)
2446    if (mqgriddg == 0) then
2447      mqgriddg2=ceiling(gmaxdg/gvecnorm*nptsgvec)
2448      mqgriddg=max(mqgriddg2,3001)
2449      write(msg, '(5a,i0,a)' )&
2450 &     'The number of points "mqgriddg" in reciprocal space used for the',ch10,&
2451 &     'description of the pseudopotentials has been set automatically',ch10,&
2452 &     'by abinit to: ',mqgriddg,'.'
2453      !ABI_COMMENT(msg)
2454    else
2455      mqgriddg2=ceiling(gmax/gvecnorm*nptsgvec)
2456      if (mqgriddg2>mqgriddg) then
2457        write(msg, '(3a,i8,3a,i8,3a)' )&
2458 &       'The number of points "mqgriddg" in reciprocal space used for the',ch10,&
2459 &       'description of the pseudopotentials (fine grid) is :',mqgriddg,'.',ch10,&
2460 &       'It would be better to increase it to at least ',mqgriddg2,', or',ch10,&
2461 &       'let abinit choose it automatically by setting mqgrid = 0.'
2462        ABI_WARNING(msg)
2463      end if
2464    end if
2465  end if
2466 
2467 end subroutine setmqgrid

m_memeval/wvl_memory [ Functions ]

[ Top ] [ m_memeval ] [ Functions ]

NAME

 wvl_memory

FUNCTION

 Estimation of the memory needed for waelet based computation job.
 According to the value of the option variable,
 might also try to allocate this amount of memory, and if it fails,
 might estimate the available memory.

INPUTS

  dtset=<type datafiles_type>contains all input variables.
  idtset=number of the current dataset
  mpi_enreg=information about MPI parallelization
  npsp=number of pseudopotentials
  option: if 0, no test of available memory
          if 1, the routine tries to allocate the estimated memory, for testing
                purposes, and if a failure occurs, the routine stops.
          if 2, like 1, but before stopping, the routine will provide
                an estimation of the available memory.
  pspheads(npsp)=<type pspheader_type>all the important information from the
   pseudopotential file header, as well as the psp file name

OUTPUT

  (only writing)

NOTES

 The estimator is the one provided by BigDFT.

SOURCE

2501 subroutine wvl_memory(dtset, idtset, mpi_enreg, npsp, option, pspheads)
2502 
2503  use defs_wvltypes
2504  use m_abi2big, only : wvl_setBoxGeometry
2505  use m_wvl_descr_psp,    only : wvl_descr_free, wvl_descr_atoms_set
2506 
2507 #if defined HAVE_BIGDFT
2508  use BigDFT_API, only: MemoryEstimator, createWavefunctionsDescriptors, deallocate_lr, &
2509       & atomic_info, memory_estimation
2510 #endif
2511 
2512 !Arguments ------------------------------------
2513   !scalars
2514   integer,intent(in) :: idtset, npsp, option
2515   type(dataset_type),intent(in) :: dtset
2516   type(MPI_type),intent(in) :: mpi_enreg
2517   !arrays
2518   type(pspheader_type),intent(in) :: pspheads(npsp)
2519 
2520 !Local variables-------------------------------
2521 #if defined HAVE_BIGDFT
2522   !scalars
2523   integer :: ityp, i, mu, nstates, me, nproc, comm
2524   character(len=500) :: msg
2525   real(dp) :: ehomo, radfine
2526   type(wvl_internal_type) :: wvl
2527   type(memory_estimation) :: peakmem
2528   !arrays
2529   real(dp) :: acell(3), rprimd(3,3), rprim(3,3)
2530   real(dp), allocatable :: radii_cf(:,:)
2531   real(dp), allocatable :: xred(:,:), xcart(:,:)
2532 #endif
2533 
2534 ! **************************************************************************
2535 
2536 #if defined HAVE_BIGDFT
2537 
2538  comm=mpi_enreg%comm_wvl
2539  me=xmpi_comm_rank(comm)
2540  nproc=xmpi_comm_size(comm)
2541 
2542  if(option<0 .or. option>2)then
2543    write(msg, '(A,A,A,A,I0,A)') ch10,&
2544 &   ' wvl_memory : BUG -',ch10,&
2545 &   '  option=',option,' while the only allowed values are 0, 1, or 2.'
2546    call wrtout(std_out,msg)
2547  end if
2548 
2549  wvl%paw%usepaw=0 !no PAW here
2550  nullify(wvl%rholoc%d)
2551  nullify(wvl%rholoc%msz)
2552  nullify(wvl%rholoc%rad)
2553  nullify(wvl%rholoc%radius)
2554  nullify(wvl%paw%spsi)
2555  nullify(wvl%paw%indlmn)
2556  nullify(wvl%paw%spsi)
2557  nullify(wvl%paw%indlmn)
2558 
2559  write(msg,*)' wvl_memory : analysis of memory needs '
2560  call wrtout(std_out,msg)
2561 
2562  if(idtset>=100)then
2563    write(msg,'(80a,a,a,i5,a)')('=',mu=1,80),ch10,&
2564 &   ' Values of the parameters that define the memory need for DATASET', idtset,&
2565 &   ' (WVL).'
2566  else if(idtset/=0)then
2567    write(msg,'(80a,a,a,i3,a)')('=',mu=1,80),ch10,&
2568 &   ' Values of the parameters that define the memory need for DATASET', idtset,&
2569 &   ' (WVL).'
2570  else
2571    write(msg,'(80a,a,a,a)')('=',mu=1,80),ch10,&
2572 &   ' Values of the parameters that define the memory need of the present run',&
2573 &   ' (WVL).'
2574  end if
2575  call wrtout(ab_out,msg)
2576  call wrtout(std_out,msg)
2577 
2578  write(msg,'( a,f7.3,a,i7,2(a,F7.3),a,a,f7.3,a,i7 )' ) &
2579 & '  wvl_hgrid =', dtset%wvl_hgrid , '   nwfshist =', dtset%nwfshist, &
2580 & ' wvl_crmult =', dtset%wvl_crmult, ' wvl_frmult =', dtset%wvl_frmult, ch10,&
2581 & '  tl_radius =', dtset%tl_radius , '  tl_nprccg =', dtset%tl_nprccg
2582  call wrtout(ab_out,msg)
2583  call wrtout(std_out,msg)
2584 
2585  if (dtset%nsppol == 2) then
2586    nstates = dtset%nelect
2587  else
2588    nstates = dtset%mband
2589  end if
2590  write(msg,'(4(a,i7))')&
2591 & '      natom =', dtset%natom, '     ntypat =', dtset%ntypat, &
2592 & '    nstates =', nstates,     '     nsppol =', dtset%nsppol
2593  call wrtout(ab_out,msg)
2594  call wrtout(std_out,msg)
2595 
2596  write(msg,'(80a)') ('=',mu=1,80)
2597  call wrtout(ab_out,msg)
2598  call wrtout(std_out,msg)
2599 
2600 !First, use eleconf to get radii_cf().
2601  ABI_MALLOC(radii_cf,(npsp, 3))
2602  do ityp = 1, npsp, 1
2603    call atomic_info(int(pspheads(ityp)%znuclpsp), int(pspheads(ityp)%zionpsp), ehomo = ehomo)
2604 
2605 !  new method for assigning the radii
2606    radii_cf(ityp, 1) = one / sqrt(abs(two * ehomo))
2607    radfine = 100.d0
2608    do i = 0, 4, 1
2609      if (pspheads(ityp)%GTHradii(i) /= zero) then
2610        radfine = min(radfine, pspheads(ityp)%GTHradii(i))
2611      end if
2612    end do
2613    radii_cf(ityp,2) = radfine
2614  end do
2615 
2616 !Compute the shifted positions and acell
2617  acell = dtset%acell_orig(1:3,1)
2618  call wvl_descr_atoms_set(acell, dtset%icoulomb, dtset%natom, dtset%ntypat, dtset%typat, wvl)
2619  ABI_MALLOC(xred,(3, dtset%natom))
2620  xred = dtset%xred_orig(:,:,1)
2621  rprimd = dtset%rprimd_orig(1:3,1:3,1)
2622  wvl%h(:) = dtset%wvl_hgrid
2623  call wvl_setBoxGeometry(1, radii_cf, rprimd, xred, &
2624 & wvl, dtset%wvl_crmult, dtset%wvl_frmult)
2625 !Compute acell and rprim from rprimd
2626  call mkradim(acell,rprim,rprimd)
2627  ABI_MALLOC(xcart,(3, dtset%natom))
2628  call xred2xcart(dtset%natom, rprimd, xcart, xred)
2629  call createWavefunctionsDescriptors(me, wvl%h(1), wvl%h(2), wvl%h(3), &
2630 & wvl%atoms, xcart, radii_cf, dtset%wvl_crmult, dtset%wvl_frmult, wvl%Glr)
2631  call MemoryEstimator(nproc, dtset%nwfshist, wvl%Glr, &
2632 & dtset%mband, dtset%nspinor, dtset%nkpt, 0, dtset%nsppol, &
2633 & 0, dtset%iscf, peakmem)
2634 
2635  call deallocate_lr(wvl%Glr)
2636  call wvl_descr_free(wvl)
2637  ABI_FREE(radii_cf)
2638  ABI_FREE(xred)
2639  ABI_FREE(xcart)
2640 
2641  write(msg,'(80a,a)') ('=',mu=1,80), ch10
2642  call wrtout(ab_out,msg)
2643  call wrtout(std_out,msg)
2644 
2645 #else
2646  BIGDFT_NOTENABLED_ERROR()
2647  if (.false.) write(std_out,*) idtset,npsp,option,dtset%nstep,mpi_enreg%nproc,pspheads(1)%zionpsp
2648 #endif
2649 
2650 end subroutine wvl_memory