TABLE OF CONTENTS
- ABINIT/m_gwls_hamiltonian
- m_gwls_hamiltonian/build_H
- m_gwls_hamiltonian/build_vxc
- m_gwls_hamiltonian/destroy_H
- m_gwls_hamiltonian/dft_xc_energy
- m_gwls_hamiltonian/DistributeValenceKernel
- m_gwls_hamiltonian/DistributeValenceWavefunctions
- m_gwls_hamiltonian/exchange
- m_gwls_hamiltonian/g_to_r
- m_gwls_hamiltonian/gr_to_g
- m_gwls_hamiltonian/Hpsik
- m_gwls_hamiltonian/Hpsikc
- m_gwls_hamiltonian/kbkb_to_kb
- m_gwls_hamiltonian/pc_k_valence_kernel
- m_gwls_hamiltonian/precondition
- m_gwls_hamiltonian/precondition_cplx
- m_gwls_hamiltonian/set_precondition
- m_gwls_hamiltonian/sqrt_vc_k
- m_gwls_hamiltonian/unset_precondition
- m_gwls_hamiltonian/wf_block_distribute
ABINIT/m_gwls_hamiltonian [ Modules ]
NAME
m_gwls_hamiltonian
FUNCTION
COPYRIGHT
Copyright (C) 2009-2024 ABINIT group (JLJ, BR, MC) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 23 module m_gwls_hamiltonian 24 25 ! local modules 26 use m_gwls_utility 27 use m_gwls_wf 28 use m_dtset 29 30 ! abinit modules 31 use m_bandfft_kpt 32 use m_cgtools 33 use defs_basis 34 use m_abicore 35 use m_xmpi 36 use m_pawang 37 use m_errors 38 use m_ab7_mixing 39 use m_mpinfo 40 use m_crystal 41 42 use defs_abitypes, only : MPI_type 43 use m_io_tools, only : get_unit 44 use m_hamiltonian, only : gs_hamiltonian_type, copy_hamiltonian 45 use m_pawcprj, only : pawcprj_type 46 use m_vcoul, only : vcoul_t 47 use m_gsphere, only : gsphere_t 48 use m_bz_mesh, only : kmesh_t, find_qmesh 49 use m_fft, only : fftpac, fourwf 50 use m_getghc, only : getghc 51 use m_io_kss, only : make_gvec_kss 52 53 implicit none 54 save 55 private
m_gwls_hamiltonian/build_H [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
build_H
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1508 subroutine build_H(dtset2,mpi_enreg2,cpopt2,cg2,gs_hamk2,kg_k2,kinpw2) 1509 1510 !use m_bandfft_kpt 1511 use m_cgtools 1512 use m_wfutils 1513 1514 !Arguments of gw_sternheimer, reveived as argument by build_H------------------------- 1515 type(dataset_type), intent(in) :: dtset2 1516 type(MPI_type), intent(in) :: mpi_enreg2 1517 type(gs_hamiltonian_type), intent(inout) :: gs_hamk2 1518 1519 integer, intent(in) :: cpopt2 1520 integer, intent(in) :: kg_k2(3,gs_hamk2%npw_k) 1521 !Since mcg = npw_k*nband when there is only gamma, the next line works. If there is not only Gamma, mcg is the proper size of cg. 1522 real(dp), intent(in) :: cg2(2,dtset2%mpw*dtset2%nspinor*dtset2%mband*dtset2%mkmem*dtset2%nsppol) 1523 real(dp), intent(in) :: kinpw2(gs_hamk2%npw_k) 1524 !Local variables : most of them are in the module for now. 1525 real(dp) :: commutation_error 1526 integer :: io_unit 1527 integer :: io_unit_debug 1528 integer :: ierr 1529 integer :: cplx 1530 integer :: j, k 1531 integer :: dimph3d 1532 integer :: mb 1533 1534 integer :: mpi_communicator 1535 1536 character(128):: filename_debug 1537 1538 1539 real(dp), allocatable :: wfk_tmp1(:,:) ,wfk_tmp2(:,:) 1540 1541 ! ************************************************************************* 1542 1543 ! Hartree-Fock cannot be used with GWLS. 1544 if(dtset2%usefock==1 .and. associated(gs_hamk2%fockcommon)) then 1545 ABI_ERROR(' build_H : Hartree-Fock option can not be used with optdriver==66 (GWLS calculations).') 1546 end if 1547 1548 !First we copy the data structure types 1549 dtset = dtset2%copy() 1550 1551 call copy_mpi_enreg(mpi_enreg2,mpi_enreg) 1552 1553 call copy_hamiltonian(gs_hamk,gs_hamk2) 1554 1555 !Then we copy the standard types 1556 cpopt = cpopt2 1557 npw_k = gs_hamk2%npw_k 1558 1559 ABI_MALLOC(cg,(2,dtset%mpw*dtset%nspinor*dtset%mband*dtset%mkmem*dtset%nsppol)) 1560 cg = cg2 1561 1562 ABI_MALLOC(vlocal,(gs_hamk2%n4,gs_hamk2%n5,gs_hamk2%n6,gs_hamk2%nvloc)) 1563 vlocal = gs_hamk2%vlocal 1564 gs_hamk%vlocal => vlocal 1565 1566 ABI_MALLOC(kg_k,(3,npw_k)) 1567 kg_k = kg_k2 1568 ABI_MALLOC(kinpw,(npw_k)) 1569 kinpw = kinpw2 1570 1571 dimffnl=0; if (blocksize<=1) dimffnl=size(gs_hamk2%ffnl_k,2) 1572 ABI_MALLOC(ffnl,(npw_k,dimffnl,gs_hamk%lmnmax,gs_hamk%ntypat)) 1573 dimph3d=0; if (blocksize<=1) dimph3d=gs_hamk2%matblk 1574 ABI_MALLOC(ph3d,(2,npw_k,dimph3d)) 1575 1576 !Initializing variables from dataset 1577 nfft = dtset%nfft 1578 nline = dtset%nline 1579 tolwfr = dtset%tolwfr 1580 ngfft = dtset%ngfft 1581 mcg = dtset%mpw*dtset%nspinor*dtset%mband*dtset%mkmem*dtset%nsppol 1582 nspinor=dtset%nspinor 1583 n1=ngfft(1) 1584 n2=ngfft(2) 1585 n3=ngfft(3) 1586 n4=ngfft(4) 1587 n5=ngfft(5) 1588 n6=ngfft(6) 1589 mgfft=maxval(ngfft(1:3)) 1590 nbandv = int(dtset%nelect)/2 1591 ABI_MALLOC(istwfk,(dtset%nkpt)) 1592 istwfk(:)=dtset%istwfk 1593 1594 !Initializing variables from gs_hamk 1595 ucvol = gs_hamk%ucvol 1596 ABI_MALLOC(gbound,(2*mgfft+8,2)) 1597 !gbound = gs_hamk%gbound_k !Must be done later for bandft paralelism 1598 1599 !Parameters which need to be set by hand for now... 1600 weight = 1 ! The weight of the k-pts, which sum to 1. 1601 tim_fourwf = 0 1602 ckpt = 1 ! The kpt of the wf to FFT. We only consider gamma point for now. 1603 1604 ! ndat = 1 used to be hard coded, which works fine for FFT only parallelism. 1605 ndat = 1 ! # of FFT to do in parallel in fourwf 1606 1607 eshift = 0.0 ! For PAW, opt_sij=-1 gives <G|H-lambda.S|C> in ghc instead of <G|H|C> 1608 sij_opt = 0 ! Option in PAW to tell getghc what to compute... 1609 tim_getghc = 0 ! timing code of the calling subroutine(can be set to 0 if not attributed) 1610 type_calc = 0 ! option governing which part of Hamitonian is to be applied: 0: whole Hamiltonian 1611 nband = dtset%mband 1612 ispden = 1 !When required, the spin index to be used. We don't support spin polarized calculations yet... 1613 1614 1615 1616 !======================================================================================================================== 1617 ! Trying to implement band parallelism (Bruno, 14/10/2013) 1618 !------------------------------------------------------------------------------------------------------------------------ 1619 ! 1620 ! Band parallelism is described in the article on the LOBPCG method by F. Bottin et al, 1621 ! "Large scale ab initio calculations based on three levels of parallelisation", Computational Materials Science 42 (2008) 329-336 1622 ! 1623 ! The article describes the ABINIT implementation specifically, suggesting that the information there is directly related 1624 ! to the code. 1625 ! 1626 ! The main points in order to understand band + FFT parallelism are: 1627 ! 1628 ! 1) The processors are arranged in a 2D Cartesian MPI topology, or dimensions M x P, with 1629 ! M = nproc_bands 1630 ! P = nproc_fft 1631 ! There are of course M x P processors 1632 ! 1633 ! 2) the wavefunction coefficients (namely the array cg) are distributed on these processors. Unfortunately, 1634 ! two different distribution schemes are necessary: one for linear algebra (matrix products, etc), and one 1635 ! consistent with the Goedecker FFTs. The code must thus change the data distribution back and forth. 1636 ! 1637 ! 3) the "linear algebra" distribution describes the array cg directly. The G vectors are distributed among all 1638 ! M x P processors, such that npw = npw_tot / (nproc_bands x nproc_fft). Every processor has information about 1639 ! all bands. Schematically: 1640 ! 1641 ! 1642 ! <- P = nproc_fft -> 1643 ! ---------------------------------- 1644 ! | n | n | n | n | ( for all band indices n) 1645 ! ^ | G_11 | ... | ... | G_P1 | 1646 ! | ---------------------------------- 1647 ! M = nproc_bands | n | n | n | n | 1648 ! | | G_1. | ... | ... | G_P. | 1649 ! v ---------------------------------- 1650 ! | n | n | n | n | 1651 ! | G_1M | ... | ... | G_PM | 1652 ! ---------------------------------- 1653 ! 1654 ! 1655 ! the LOBPCG algorithm acts on blocks of bands. Thus, although each processor has information about all bands, 1656 ! we must conceptually view the bands grouped in blocks of size M, as {c_{1}(G),...,c_{M}(G)}, 1657 ! {c_{M+1}(G)...,c_{2M}(G)}, ... 1658 ! 1659 ! With this conceptual grouping, for a given block each processor has M x NG/(M x P) coefficients, where NG is the 1660 ! total number of G-vectors. 1661 ! 1662 ! 4) The distribution of information above is not appropriate for parallel FFTs. The data for one block is thus 1663 ! transposed and takes the form 1664 ! 1665 ! <- P = nproc_fft -> 1666 ! ---------------------------------- 1667 ! | 1 | 1 | ... | 1 | 1668 ! ^ | G_1 | G_2 | ... | G_P | 1669 ! | ---------------------------------- 1670 ! M = nproc_bands | .. | ... | ... | ... | 1671 ! | | G_1 | G_2 | ... | G_P | 1672 ! v ---------------------------------- 1673 ! | M | M | ... | M | 1674 ! | G_1 | G_2 | ... | G_P | 1675 ! ---------------------------------- 1676 ! 1677 ! where the set of G vectors G_i = (G_{i1}, G_{i2}, ..., G_{iM}). Thus, a given processor has information about a single 1678 ! band, but more G vectors. Each processor has 1679 ! NG/(M x P) x M = NG/P coefficients, just like before. 1680 ! Also, it is clear that the information is only communicated in the columns of the diagram above, avoiding 1681 ! "all processors to all processors" communication. 1682 ! 1683 ! The data is now distributed properly to do parallel FFTs! Each row in the diagram above corresponds to FFTs done 1684 ! in parallel over nproc_fft processors, on a given band. There are M rows running thus in parallel! 1685 ! 1686 ! The underlying ABINIT routines are equiped to handle FFT parallelism, not band distributed parallelism. 1687 ! prep_getghc.F90 and lobpgcwf.F90 show how to rearange information to be able to apply basic ABINIT routines. 1688 ! 1689 ! The code below is inspired / guessed from lobpcgwf and prep_getghc. WE ASSUME THERE IS NO SPINORS! CODE SHOULD 1690 ! BE HEAVILY REVIEWED for k-points and spinors, if and when they come to be useful. 1691 ! 1692 !======================================================================================================================== 1693 1694 ! variables for band parallelism, following the language in lobpcgwf 1695 nspinor = 1 ! how many spinors are present. Hardcoded to 1 for now. 1696 blocksize = mpi_enreg%nproc_band*mpi_enreg%bandpp ! how many bands treated in a block; # of FFT done in parallel in getghc 1697 nbdblock = nband/blocksize ! how many blocks of bands are there 1698 ikpt_this_proc = 1 ! Assuming only 1 kpoint, for molecules. 1699 ! This will have to be reviewed for crystals! 1700 1701 npw_kb = npw_k*blocksize 1702 npw_g = gs_hamk2%npw_fft_k 1703 1704 if (blocksize>1) then 1705 kg_k_gather => bandfft_kpt(ikpt_this_proc)%kg_k_gather 1706 ffnl_gather => bandfft_kpt(ikpt_this_proc)%ffnl_gather 1707 ph3d_gather => bandfft_kpt(ikpt_this_proc)%ph3d_gather 1708 kinpw_gather => bandfft_kpt(ikpt_this_proc)%kinpw_gather 1709 else 1710 ffnl = gs_hamk2%ffnl_k 1711 ph3d = gs_hamk2%ph3d_k 1712 kg_k_gather => kg_k 1713 ffnl_gather => ffnl 1714 ph3d_gather => ph3d 1715 kinpw_gather => kinpw 1716 endif 1717 call gs_hamk%load_k(kinpw_k=kinpw_gather,kg_k=kg_k_gather,ffnl_k=ffnl_gather,ph3d_k=ph3d_gather) 1718 1719 gbound = gs_hamk%gbound_k 1720 1721 !Set up wfk routines 1722 call set_wf(ucvol/nfft,n4,n5,n6,npw_k,blocksize,npw_g,mpi_enreg%comm_bandfft,mpi_enreg%comm_fft,mpi_enreg%comm_band) 1723 1724 ! prepare the valence wavefunctions and projection operator to work in band+FFT parallel 1725 call DistributeValenceWavefunctions() 1726 call DistributeValenceKernel() 1727 1728 cplx = 2 ! wavefunctions have complex coefficients 1729 1730 ! Initialize the dummy variables 1731 ABI_MALLOC(conjgrprj,(0,0)) 1732 ABI_MALLOC(dummy2,(0,0)) 1733 ABI_MALLOC(dummy3,(0,0,0)) 1734 1735 !Initialisation of the total counter for iterations of SQMR : 1736 ktot = 0 1737 1738 !Allocations of working arrays for the module 1739 !- for pc_k function 1740 ABI_MALLOC(scprod2,(2,nband)) 1741 1742 !- for precondition function (and set_precondition subroutine) 1743 ABI_MALLOC(pcon,(npw_g)) 1744 pcon = one 1745 1746 !- for (private) working wf 1747 ABI_MALLOC(psik1,(2,npw_k)) 1748 ABI_MALLOC(psik2,(2,npw_k)) 1749 ABI_MALLOC(psik3,(2,npw_k)) 1750 ABI_MALLOC(psik4,(2,npw_k)) 1751 ABI_MALLOC(psikb1,(2,npw_kb)) 1752 ABI_MALLOC(psikb2,(2,npw_kb)) 1753 ABI_MALLOC(psikb3,(2,npw_kb)) 1754 ABI_MALLOC(psikb4,(2,npw_kb)) 1755 ABI_MALLOC(psig1,(2,npw_g)) 1756 ABI_MALLOC(psig2,(2,npw_g)) 1757 ABI_MALLOC(psig3,(2,npw_g)) 1758 ABI_MALLOC(psig4,(2,npw_g)) 1759 ABI_MALLOC(psir1,(2,n4,n5,n6)) 1760 ABI_MALLOC(psir2,(2,n4,n5,n6)) 1761 ABI_MALLOC(psir3,(2,n4,n5,n6)) 1762 ABI_MALLOC(psidg,(2,nfft)) 1763 ABI_MALLOC(denpot,(2*n4,n5,n6)) 1764 1765 psir1 = zero 1766 psir2 = zero 1767 psir3 = zero 1768 denpot = zero 1769 1770 !Construct the vector of eigenvalues and write then to std output 1771 ABI_MALLOC(eig,(nband)) 1772 eig=zero 1773 1774 write(std_out,*) ch10,"Eigenvalues computation check, routine build_H:",ch10 1775 io_unit_debug = get_unit() 1776 write(filename_debug,'(A,I0.4,A)') "DEBUG_PROC=",mpi_enreg%me,".dat" 1777 1778 1779 1780 mpi_communicator = mpi_enreg%comm_bandfft 1781 1782 open(file=filename_debug,status=files_status_old,unit=io_unit_debug) 1783 1784 write(io_unit_debug,'(A)') " Parameters:" 1785 write(io_unit_debug,'(A,I5)') " nband = ", nband 1786 write(io_unit_debug,'(A,I5)') " blocksize = ", blocksize 1787 write(io_unit_debug,'(A,I5)') " npw_k = ", npw_k 1788 write(io_unit_debug,'(A,I5)') " nbdblock = ", nbdblock 1789 1790 ! temporary wavefunction array, for data in the "linear algebra" distribution 1791 ABI_MALLOC( wfk_tmp1, (2,npw_k)) 1792 ABI_MALLOC( wfk_tmp2, (2,npw_k)) 1793 1794 do n=1, nband 1795 ! Extract i^t/h wavefunction 1796 wfk_tmp1(:,1:npw_k) = cg(:,(n-1)*npw_k+1:n*npw_k) 1797 1798 ! Are the wavefunctions normalized? 1799 !tmpc = cg_zdotc(npw_k,wfk_tmp1,wfk_tmp1) 1800 !call xmpi_sum(tmpc,mpi_enreg%comm_bandfft,ierr) ! sum on all processors 1801 1802 ! DEBUGGING CODE 1803 write(std_out,'(A,I5,A,2F24.16)') "band ", n, ", <psi_n | psi_n > =", norm_k(wfk_tmp1) 1804 write(io_unit_debug,'(A,I5,A,2F24.16)') "band ", n, ", <psi_n | psi_n > =", norm_k(wfk_tmp1) 1805 flush(io_unit_debug) 1806 end do 1807 1808 1809 ! loop on blocks of bands. All bands in a given block will be done in parallel! 1810 1811 ! loop on block of bands 1812 do iblock = 1, nbdblock 1813 1814 ! loop on states for this block 1815 do iband = 1, blocksize 1816 1817 n = (iblock-1)*blocksize+iband 1818 1819 psikb1(:,(iband-1)*npw_k+1:iband*npw_k) = cg(:,(n-1)*npw_k+1:n*npw_k) 1820 1821 end do 1822 1823 ! change configuration of the data 1824 call wf_block_distribute(psikb1, psig1,1) ! LA -> FFT 1825 1826 ! Apply hamiltonian on wavefunction. In bandFFT parallel, Hpsik calls prep_getghc, which knows how to transform the 1827 ! distribution of the data from "linear algebra"-like to "FFT"-like. The output is *probably* in the "linear algebra"-like 1828 ! distribution. 1829 call Hpsik(psig2,psig1) 1830 1831 ! change configuration of the data 1832 call wf_block_distribute(psikb2, psig2,2) ! LA -> FFT 1833 1834 ! extract the wavefunctions band by band 1835 do iband=1, blocksize 1836 1837 n = blocksize*(iblock-1) + iband ! band index 1838 1839 wfk_tmp1(:,1:npw_k) = psikb1(:,(iband-1)*npw_k+1:iband*npw_k) 1840 wfk_tmp2(:,1:npw_k) = psikb2(:,(iband-1)*npw_k+1:iband*npw_k) 1841 1842 tmpc = cg_zdotc(npw_k,wfk_tmp1,wfk_tmp2) 1843 1844 call xmpi_sum(tmpc,mpi_enreg%comm_bandfft,ierr) ! sum on all processors 1845 eig(n) = tmpc(1) 1846 1847 write(std_out,'(A,I5,A,F24.16,A)') "(build_H) band ", n, ", eig =", eig(n), " Ha." 1848 1849 ! DEBUGGING CODE 1850 write(io_unit_debug,'(A,I5,A,F24.16,A)') "band ", n, ", eig =", eig(n), " Ha." 1851 flush(io_unit_debug) 1852 flush(io_unit_debug) 1853 end do 1854 1855 end do 1856 1857 ABI_FREE( wfk_tmp1 ) 1858 ABI_FREE( wfk_tmp2 ) 1859 close(io_unit_debug) 1860 1861 1862 1863 ! Finishing the construction of vxc (transcribing it from the double real grid (for the density) 1864 ! to the single real grid (for the wfs). 1865 ! Assummes we only need one spin component; one transcription per spin being needed. 1866 if(allocated(vxc_dg)) then 1867 ABI_MALLOC(vxc,(n4,n5,n6,dtset%nspden)) 1868 vxc = zero 1869 call fftpac(ispden,mpi_enreg,dtset%nspden,n1,n2,n3,n4,n5,n6,dtset%ngfft,vxc_dg(:,ispden),vxc(:,:,:,ispden),2) 1870 end if 1871 1872 timrev = 1 !Assumes istwfk *1. 1:time reversal symmetry NOT present | 2:" " " present 1873 ABI_MALLOC(title,(dtset%ntypat)) 1874 do i=1,dtset%ntypat 1875 title(i) = "Bloup" ! The clean way would be to get the psps structure in this module 1876 ! (build_vxc is called from a place in GS calculations where it is available; 1877 ! should be the easiest way). For now, this allows the code to run. 1878 end do 1879 call crystal_init(dtset%amu_orig(:,1),Cryst,dtset%spgroup,dtset%natom,dtset%npsp,& 1880 & dtset%ntypat,dtset%nsym,dtset%rprimd_orig(:,:,1),dtset%typat,& 1881 & dtset%xred_orig(:,:,1),dtset%ziontypat,dtset%znucl,timrev,.false.,.false.,title,& 1882 & dtset%symrel,dtset%tnons,dtset%symafm) 1883 ABI_FREE(title) 1884 call Cryst%print() 1885 1886 !TODO : Should be put in a separate build_vc constructor, and should be called right after build_H in the context of optdriver 66. 1887 if(dtset%optdriver==66) then 1888 1889 !Set up of the k-points and tables in the whole BZ 1890 call Kmesh%init(Cryst,dtset%nkpt,dtset%kptns,dtset%kptopt,wrap_1zone=.false.) 1891 call Kmesh%print("K-mesh for the wavefunctions",std_out) 1892 call find_qmesh(Qmesh,Cryst,Kmesh) 1893 call Qmesh%print("Q-mesh for the screening function",std_out) 1894 1895 1896 !------------------------------ 1897 !Building the vc_sqrt structure 1898 !------------------------------ 1899 ! The sqrt_vc code relies on a different parallelism scheme than Vanilla ABINIT. 1900 ! The Gsphere object must be built on a single processor having all the G-vectors 1901 ! available. 1902 1903 !gsph_init need gvec built according to the KSS convention, so that kg_k is not suitable (not properly sorted). 1904 npw_serial=npw_k 1905 call xmpi_sum(npw_serial,mpi_enreg%comm_bandfft,ierr) 1906 1907 ecut_eff = dtset%ecut*(dtset%dilatmx)**2 1908 call make_gvec_kss(dtset%nkpt,dtset%kptns,ecut_eff,dtset%symmorphi,dtset%nsym,dtset%symrel,dtset%tnons,Cryst%gprimd,& 1909 & dtset%prtvol,npw_serial,gvec,ierr) 1910 1911 call Gsphere%init(Cryst,npw_serial,gvec=gvec) 1912 1913 call Gsphere%print() 1914 1915 call Vcp%init(Gsphere,Cryst,Qmesh,Kmesh,dtset%rcut,dtset%gw_icutcoul,dtset%vcutgeo,dtset%ecutsigx,npw_serial,& 1916 dtset%nkpt,dtset%kptns,mpi_enreg%comm_world) 1917 1918 ! Since Vcp%vc_sqrt is sorted according to the KSS convention for G vectors 1919 ! BUT will be applied to GS wavefunctions (where G vectors are sorted otherwise) 1920 ! It is necessary to construct a vc_sqrt vector with the GS sorting. 1921 ! Moreover, if we are in parallel (over band / FFTs), Vcp%vc_sqrt is the serial version 1922 ! and need to be distributed according to the GS scheme (Linear Algebra configuration). 1923 ABI_MALLOC(vc_sqrt,(npw_k)) 1924 vc_sqrt=zero 1925 k=0 1926 do i=1,npw_k 1927 do j=1,npw_serial 1928 if(all(kg_k(:,i)==gvec(:,j))) k=j 1929 end do 1930 vc_sqrt(i)=Vcp%vc_sqrt(k,1) 1931 end do 1932 1933 end if 1934 1935 !-------------------------------------------------------------------------------- 1936 ! Security check : The eigenstates of the projector and the hamiltonian need to 1937 ! agree down to the precision requested (tolwfr). Otherwise, SQMR is doomed. 1938 ! Now that the density is read and the eigenstates are calculated in the GW run, 1939 ! it is less useful. A test on tolwfr could be sufficient. 1940 ! 1941 ! This remains a good tool to debug band+fft parallelism 1942 !-------------------------------------------------------------------------------- 1943 1944 ! only write on the head node! 1945 if (mpi_enreg%me == 0) then 1946 io_unit = get_unit() 1947 open(file='build_H.log',status=files_status_old,unit=io_unit) 1948 write(io_unit,10) '#----------------------------------------------------------------------------' 1949 write(io_unit,10) '#' 1950 write(io_unit,10) '# This file presents the results of a small test to check ' 1951 write(io_unit,10) '# how well the Hamiltonian commutes with the projection ' 1952 write(io_unit,10) '# operator. ' 1953 write(io_unit,10) '#' 1954 write(io_unit,10) '# Definitions:' 1955 write(io_unit,10) '#' 1956 write(io_unit,10) '# P : projections on conduction states' 1957 write(io_unit,10) '# H : Hamiltonian operator' 1958 write(io_unit,10) '# | psi > : eigenstate' 1959 write(io_unit,10) '#' 1960 flush(io_unit) 1961 end if 1962 1963 psikb1 = zero 1964 1965 ! sum all valence states, on copy in every band block. 1966 do i=1,nbandv 1967 1968 do mb = 1, blocksize 1969 1970 psikb1(:,(mb-1)*npw_k+1:mb*npw_k) = psikb1(:,(mb-1)*npw_k+1:mb*npw_k) + cg(:,(i-1)*npw_k+1:i*npw_k) 1971 1972 end do 1973 1974 end do 1975 1976 ! normalize to one! Only sum the fist band block 1977 tmpc = cg_zdotc(npw_k,psikb1,psikb1) 1978 call xmpi_sum(tmpc,mpi_communicator ,ierr) ! sum on all processors 1979 psikb1 = psikb1/sqrt(tmpc(1)) 1980 1981 1982 ! change data distribution 1983 call wf_block_distribute(psikb1, psig1,1) ! LA -> FFT 1984 1985 ! Apply P.H operator 1986 call Hpsik(psig2 ,psig1) 1987 call pc_k_valence_kernel(psig2) 1988 1989 ! Apply H.P operator 1990 call pc_k_valence_kernel(psig1) 1991 call Hpsik(psig1) 1992 1993 ! compute error 1994 psig3 = psig1 - psig2 ! explicitly write the difference in an array 1995 tmpc = cg_zdotc(npw_g,psig3,psig3) 1996 commutation_error = tmpc(1) 1997 1998 call xmpi_sum(commutation_error , mpi_enreg%comm_fft, ierr) ! sum on all processors working on FFT! 1999 2000 ! only write on the head node! 2001 if (mpi_enreg%me == 0) then 2002 write(io_unit,20) ' || (PH -HP) |b> ||^2 = ',commutation_error 2003 write(io_unit,20) ' tolwfr = ',tolwfr 2004 end if 2005 2006 if(commutation_error > tolwfr) then 2007 !write(io_unit,10) '# || (PH -HP) |b> ||^2 > tolwfr ==> Decision taken exit!' 2008 2009 if (mpi_enreg%me == 0) write(io_unit,10) '# || (PH -HP) |b> ||^2 > tolwfr ==> This must be fixed!' 2010 2011 write(std_out,20) "WARNING-build_H: The tolerance tolwfr=",tolwfr 2012 write(std_out,20) " is smaller than the commutation error ||(PH-HP)|b>||^2=",commutation_error 2013 write(std_out,10) " Either set tolwfr to a less stringent value in this calculation" 2014 write(std_out,10) " or to a more stringent value in the wavefunction calculation." 2015 end if 2016 2017 if (mpi_enreg%me == 0) close(io_unit) 2018 2019 10 format(A) 2020 20 format(A,ES12.3) 2021 end subroutine build_H
m_gwls_hamiltonian/build_vxc [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
build_vxc
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1335 subroutine build_vxc(vxc2,nfft2,nspden2) 1336 !Only transcribe the argument vxc2 in the module; the change from dg to sg is done in build_H (and stored in vxc), since the 1337 !arguments of fftpac are built in build_H. 1338 1339 !We need the dimensions of vxc since they don't exist yet in the module; build_vxc being called before build_H. 1340 integer, intent(in) :: nfft2, nspden2 1341 real(dp), intent(in) :: vxc2(nfft2,nspden2) 1342 1343 ! ************************************************************************* 1344 1345 ABI_MALLOC(vxc_dg,(nfft2,nspden2)) 1346 vxc_dg = vxc2 1347 1348 end subroutine build_vxc
m_gwls_hamiltonian/destroy_H [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
destroy_H
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1364 subroutine destroy_H 1365 1366 call dtset%free() 1367 call gs_hamk%free() 1368 1369 call cryst%free() 1370 call Kmesh%free() 1371 call Qmesh%free() 1372 call Gsphere%free() 1373 call Vcp%free() 1374 1375 call bandfft_kpt_destroy_array(bandfft_kpt,mpi_enreg) 1376 call destroy_mpi_enreg(mpi_enreg) 1377 1378 !NOTE : the syntax if(allocated(a)) ABI_FREE(a) result in an error if "a" is not allocated; since the macro replace 1379 !ABI_MALLOC by more than one line of text, the second lines and up get outside the if... if() then syntax is equired. 1380 if(allocated(cg)) then 1381 ABI_FREE(cg) 1382 end if 1383 if(allocated(gbound)) then 1384 ABI_FREE(gbound) 1385 end if 1386 if(allocated(kg_k)) then 1387 ABI_FREE(kg_k) 1388 end if 1389 if(allocated(ffnl)) then 1390 ABI_FREE(ffnl) 1391 end if 1392 if(allocated(ph3d)) then 1393 ABI_FREE(ph3d) 1394 end if 1395 if(allocated(kinpw)) then 1396 ABI_FREE(kinpw) 1397 end if 1398 if(allocated(vxc)) then 1399 ABI_FREE(vxc) 1400 end if 1401 if(allocated(vlocal)) then 1402 ABI_FREE(vlocal) 1403 end if 1404 if(allocated(conjgrprj)) then 1405 ABI_FREE(conjgrprj) 1406 end if 1407 if(allocated(istwfk)) then 1408 ABI_FREE(istwfk) 1409 end if 1410 if(allocated(dummy2)) then 1411 ABI_FREE(dummy2) 1412 end if 1413 if(allocated(dummy3)) then 1414 ABI_FREE(dummy3) 1415 end if 1416 if(allocated(eig)) then 1417 ABI_FREE(eig) 1418 end if 1419 if(allocated(scprod2)) then 1420 ABI_FREE(scprod2) 1421 end if 1422 if(allocated(pcon)) then 1423 ABI_FREE(pcon) 1424 end if 1425 if(allocated(psik1)) then 1426 ABI_FREE(psik1) 1427 end if 1428 if(allocated(psik2)) then 1429 ABI_FREE(psik2) 1430 end if 1431 if(allocated(psik3)) then 1432 ABI_FREE(psik3) 1433 end if 1434 if(allocated(psik4)) then 1435 ABI_FREE(psik4) 1436 end if 1437 if(allocated(psikb1)) then 1438 ABI_FREE(psikb1) 1439 end if 1440 if(allocated(psikb2)) then 1441 ABI_FREE(psikb2) 1442 end if 1443 if(allocated(psikb3)) then 1444 ABI_FREE(psikb3) 1445 end if 1446 if(allocated(psikb4)) then 1447 ABI_FREE(psikb4) 1448 end if 1449 if(allocated(psig1)) then 1450 ABI_FREE(psig1) 1451 end if 1452 if(allocated(psig2)) then 1453 ABI_FREE(psig2) 1454 end if 1455 if(allocated(psig3)) then 1456 ABI_FREE(psig3) 1457 end if 1458 if(allocated(psig4)) then 1459 ABI_FREE(psig4) 1460 end if 1461 if(allocated(psir1)) then 1462 ABI_FREE(psir1) 1463 end if 1464 if(allocated(psir2)) then 1465 ABI_FREE(psir2) 1466 end if 1467 if(allocated(psir3)) then 1468 ABI_FREE(psir3) 1469 end if 1470 if(allocated(psidg)) then 1471 ABI_FREE(psidg) 1472 end if 1473 if(allocated(vxc_dg)) then 1474 ABI_FREE(vxc_dg) 1475 end if 1476 if(allocated(denpot)) then 1477 ABI_FREE(denpot) 1478 end if 1479 if(allocated(kernel_wavefunctions_FFT)) then 1480 ABI_FREE(kernel_wavefunctions_FFT) 1481 end if 1482 if(allocated(valence_wavefunctions_FFT)) then 1483 ABI_FREE(valence_wavefunctions_FFT) 1484 end if 1485 if(associated(gvec)) then 1486 ABI_FREE(gvec) 1487 end if 1488 if(allocated(vc_sqrt)) then 1489 ABI_FREE(vc_sqrt) 1490 end if 1491 1492 end subroutine destroy_H
m_gwls_hamiltonian/dft_xc_energy [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
dft_xc_energy
FUNCTION
.
INPUTS
OUTPUT
SOURCE
695 function dft_xc_energy(e) 696 697 real(dp) :: dft_xc_energy 698 integer, intent(in) :: e 699 700 701 integer :: cplex, option, ierr 702 real(dp), allocatable :: psik_e(:,:) !Working array to store the wavefunction 703 real(dp), allocatable :: psik_e_alltoall(:,:) !Working array to store the wavefunction 704 705 real(dp), allocatable :: psik_out(:,:) !Working array to store the wavefunction 706 707 real(dp) :: tmpc(2) 708 integer :: mpi_communicator 709 real(dp) :: dft_xc_energy_tmp 710 711 ! ************************************************************************* 712 713 !-------------------------------------------------------------------------------- 714 ! The goal of this routine is to compute the xc energy using 715 ! the DFT Vxc array. 716 !-------------------------------------------------------------------------------- 717 718 ! Parallel-MPI code. This is inspired from code within the getghc routine, which 719 ! applies a complex potential to a wavefunction. 720 721 722 cplex = 1 ! real potential 723 option = 2 ! multiply wavefunction by potential 724 nspinor= 1 725 ! Allocate wavefunction arrays which contains the coefficients of the e-state 726 ABI_MALLOC(psik_e, (2,npw_kb)) 727 ABI_MALLOC(psik_e_alltoall,(2,npw_g)) 728 ABI_MALLOC(psik_out, (3,npw_g)) 729 730 731 ! Only fetch the e-state, setting other states in block to zero! 732 psik_e(:,:) = zero 733 psik_e(:,1:npw_k) = cg(:,(e-1)*npw_k+1:e*npw_k) 734 psik_out(:,:) = zero 735 736 737 ! change the configuration of the data 738 call wf_block_distribute(psik_e, psik_e_alltoall,1) ! LA -> FFT 739 740 741 ! Call fourwf to generate the product, in k-space 742 ! Computation: 743 ! psik_e_alltoall(k) -> psi(r) 744 ! res(r) = (vxc(r) x psi(r)) 745 ! psik_out(k) <- res(r) 746 ! psir3 is a dummy, not used here. 747 748 call fourwf(cplex,vxc(:,:,:,ispden),psik_e_alltoall,psik_out,psir3,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,& 749 & mpi_enreg,1,ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight) 750 751 752 tmpc = cg_zdotc(npw_g, psik_e_alltoall,psik_out) 753 754 mpi_communicator = mpi_enreg%comm_fft 755 call xmpi_sum(tmpc,mpi_communicator, ierr) ! sum on all processors working on FFT! 756 757 dft_xc_energy_tmp = tmpc(1) 758 759 mpi_communicator = mpi_enreg%comm_band 760 call xmpi_sum(dft_xc_energy_tmp,mpi_communicator, ierr) ! sum on all processors working on FFT! 761 762 dft_xc_energy = dft_xc_energy_tmp 763 764 ABI_FREE(psik_e) 765 ABI_FREE(psik_e_alltoall) 766 ABI_FREE(psik_out) 767 768 end function dft_xc_energy
m_gwls_hamiltonian/DistributeValenceKernel [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
DistributeValenceKernel
FUNCTION
.
INPUTS
OUTPUT
SOURCE
276 subroutine DistributeValenceKernel() 277 !-------------------------------------------------------------------------------- 278 ! 279 ! This subroutine distributes, once and for all, the kernel of the static 280 ! SQMR operator. 281 ! 282 ! In this first, quick and dirty implementation, we simply distribute 283 ! ALL the valence bands on ALL the FFT groups. This is not efficient, but 284 ! kernel projections are not a bottleneck of the computation. 285 ! 286 ! A better (forthcoming) algorithm would only distribute the actual kernel, 287 ! not all valence bands. 288 !-------------------------------------------------------------------------------- 289 290 integer :: mb, n 291 292 real(dp), allocatable :: psik_n(:,:) !wavefunctions in LA format 293 real(dp), allocatable :: psik_n_alltoall(:,:) !wavefunctions in FFT format 294 295 ! ************************************************************************* 296 297 298 !=================================================================== 299 ! Allocate the global array which will contain the valence states 300 !=================================================================== 301 ABI_MALLOC( kernel_wavefunctions_FFT, (2,npw_g, nband)) 302 303 kernel_wavefunctions_FFT = zero 304 305 !==================================================== 306 ! Allocate working arrays 307 !==================================================== 308 309 ABI_MALLOC(psik_n, (2,npw_kb)) 310 ABI_MALLOC(psik_n_alltoall,(2,npw_g)) 311 312 313 ! loop on all valence states 314 do n = 1, nband 315 316 ! Copy multiple instances of this valence state in the array; 317 ! this way, each FFT group will have ALL the valence states! 318 do mb = 1, blocksize 319 psik_n(:,(mb-1)*npw_k+1:mb*npw_k) = cg(:,(n-1)*npw_k+1:n*npw_k) 320 end do 321 322 ! change configuration of the data 323 call wf_block_distribute(psik_n, psik_n_alltoall,1) ! LA -> FFT 324 325 ! copy data in global array 326 kernel_wavefunctions_FFT(:,:,n) = psik_n_alltoall(:,:) 327 328 end do 329 330 !==================================================== 331 ! cleanup 332 !==================================================== 333 334 ABI_FREE(psik_n) 335 ABI_FREE(psik_n_alltoall) 336 337 end subroutine DistributeValenceKernel
m_gwls_hamiltonian/DistributeValenceWavefunctions [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
DistributeValenceWavefunctions
FUNCTION
.
INPUTS
OUTPUT
SOURCE
193 subroutine DistributeValenceWavefunctions() 194 !-------------------------------------------------------------------------------- 195 ! 196 ! This subroutine distributes, once and for all, the valence wavefunctions to 197 ! the FFT configuration. They will thus be ready to be used within the 198 ! susceptibility operator. 199 ! 200 !-------------------------------------------------------------------------------- 201 integer :: iblk, mb, v 202 203 real(dp), allocatable :: psik_v(:,:) !wavefunctions in LA format 204 real(dp), allocatable :: psik_v_alltoall(:,:) !wavefunctions in FFT format 205 206 ! ************************************************************************* 207 208 209 210 !=================================================================== 211 ! Allocate the global array which will contain the valence states 212 !=================================================================== 213 ABI_MALLOC( valence_wavefunctions_FFT, (2,npw_g,nbdblock)) 214 215 valence_wavefunctions_FFT = zero 216 217 218 !==================================================== 219 ! Allocate working arrays 220 !==================================================== 221 222 ABI_MALLOC(psik_v, (2,npw_kb)) 223 ABI_MALLOC(psik_v_alltoall,(2,npw_g)) 224 225 226 227 ! loop on all blocks of states, 228 do iblk = 1, nbdblock 229 230 ! loop on valence states for this block; if the state is conduction, fill with zeros 231 do mb = 1, blocksize 232 233 v = (iblk-1)*blocksize+mb 234 235 if (v <= nbandv) then 236 psik_v(:,(mb-1)*npw_k+1:mb*npw_k) = cg(:,(v-1)*npw_k+1:v*npw_k) 237 else 238 psik_v(:,(mb-1)*npw_k+1:mb*npw_k) = zero 239 end if 240 241 end do 242 243 ! change configuration of the data 244 call wf_block_distribute(psik_v, psik_v_alltoall,1) ! LA -> FFT 245 246 ! copy data in global array 247 248 valence_wavefunctions_FFT(:,:,iblk) = psik_v_alltoall(:,:) 249 250 end do 251 252 !==================================================== 253 ! cleanup 254 !==================================================== 255 256 ABI_FREE(psik_v) 257 ABI_FREE(psik_v_alltoall) 258 259 260 end subroutine DistributeValenceWavefunctions
m_gwls_hamiltonian/exchange [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
exchange
FUNCTION
.
INPUTS
OUTPUT
SOURCE
558 function exchange(e, Lbasis_lanczos) 559 560 !use m_bandfft_kpt 561 use m_cgtools 562 !================================================================================ 563 ! This subroutine computes the exchange energy in band+FFT parallel 564 ! 565 !================================================================================ 566 real(dp) :: exchange 567 568 integer, intent(in) :: e 569 570 ! If these arguments are provided, the exchange energy is to be projected on this subspace 571 complex(dpc), optional, intent(in) :: Lbasis_lanczos(:,:) ! complex array which contains the Lanczos basis 572 573 real(dp), allocatable :: psik_e(:,:) !Working array to store the wavefunction 574 575 real(dp), allocatable :: psik_v(:,:) !Working array to store the wavefunction 576 577 real(dp), allocatable :: psik_out(:,:) !Working array to store the wavefunction 578 579 580 integer :: iblk, mb 581 582 integer :: l, lmax 583 584 real(dp) :: tmpc(2) 585 586 logical :: project 587 588 ! ************************************************************************* 589 590 !-------------------------------------------------------------------------------- 591 ! Determine if the exhcange energy must be projected on the Lanczos basis 592 ! a truncated Coulomb potential 593 !-------------------------------------------------------------------------------- 594 595 project = .false. 596 if (present(Lbasis_lanczos)) then 597 project = .true. 598 lmax = size(Lbasis_lanczos, 2) 599 end if 600 601 !-------------------------------------------------------------------------------- 602 ! The goal of this routine is to compute the exchange energy using 603 ! a truncated Coulomb potential 604 !-------------------------------------------------------------------------------- 605 606 exchange = 0.0_dp 607 608 !==================================================== 609 ! build the block of wavefunctions which will 610 ! contain copies the e-state 611 !==================================================== 612 613 ABI_MALLOC(psik_e,(2,npw_kb)) 614 615 ! fill psik_e with as many copies of the e-state as there are band processors; 616 ! that way, upon LA -> FFT, each row of fft processors will have the e-state 617 do v = 1, blocksize 618 psik_e(:,(v-1)*npw_k+1:v*npw_k) = cg(:,(e-1)*npw_k+1:e*npw_k) 619 end do 620 621 !==================================================== 622 ! Allocate the block of wavefunctions which will 623 ! contain the valence states 624 !==================================================== 625 626 ABI_MALLOC(psik_v, (2,npw_kb)) 627 ABI_MALLOC(psik_out, (2,npw_kb)) 628 629 ! loop on all blocks of states, 630 do iblk = 1, nbdblock 631 632 ! loop on valence states for this block; if the state is conduction, fill with zeros 633 do mb = 1, blocksize 634 635 v = (iblk-1)*blocksize+mb 636 637 if (v <= nbandv) then 638 psik_v(:,(mb-1)*npw_k+1:mb*npw_k) = cg(:,(v-1)*npw_k+1:v*npw_k) 639 else 640 psik_v(:,(mb-1)*npw_k+1:mb*npw_k) = zero 641 end if 642 643 end do 644 645 call kbkb_to_kb(psik_out,psik_v,psik_e) 646 647 ! apply Coulomb potential, and take norm: cumulate the exchange energy 648 do mb = 1, blocksize 649 650 psik1 = psik_out(:,(mb-1)*npw_k+1:mb*npw_k) 651 call sqrt_vc_k(psik1) 652 653 if (project) then 654 ! project on the Lanczos basis 655 do l = 1, lmax 656 psik2(1,:) = dble(Lbasis_lanczos(:,l)) 657 psik2(2,:) = dimag(Lbasis_lanczos(:,l)) 658 tmpc = scprod_k(psik2,psik1) 659 exchange = exchange - (tmpc(1)**2+tmpc(2)**2) 660 end do 661 else 662 ! compute the "exact" exchange energy 663 exchange = exchange - norm_k(psik1)**2 664 end if 665 666 end do 667 668 end do 669 670 ABI_FREE(psik_e) 671 672 ABI_FREE(psik_v) 673 674 ABI_FREE(psik_out) 675 676 end function exchange
m_gwls_hamiltonian/g_to_r [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
g_to_r
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1175 subroutine g_to_r(psi_out,psi_in) 1176 1177 real(dp), intent(out) :: psi_out(2,n4,n5,n6) 1178 real(dp), intent(in) :: psi_in(2,npw_g) 1179 integer :: option, cplex 1180 integer :: nproc_fft, me_fft, nd3 !, ierr 1181 1182 ! ************************************************************************* 1183 1184 option = 0 ! fft wavefunction to real space 1185 cplex = 2 ! complex potential 1186 1187 psig4 = psi_in 1188 psi_out = zero 1189 call fourwf(cplex,dummy3,psig4,dummy2,psi_out,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,mpi_enreg, & 1190 1,ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight) 1191 psi_out = psi_out/sqrt(ucvol) 1192 1193 !! This comes from prep_fourwf 1194 !nproc_fft=mpi_enreg%nproc_fft 1195 !if (nproc_fft>1) then 1196 ! me_fft=mpi_enreg%me_fft 1197 ! if (me_fft>0) then 1198 ! nd3=(ngfft(3)-1)/nproc_fft+1 1199 ! psi_out(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=psi_out(:,:,:,1:nd3) 1200 ! psi_out(:,:,:,1:nd3)=zero 1201 ! end if 1202 ! call xmpi_sum(psi_out,mpi_enreg%comm_fft,ierr) 1203 !end if 1204 1205 !Instead of communications the whole real space vector on all comm_fft CPUs, 1206 !it is possible to let each CPU keep it's part only and communicate only the sums. 1207 !Then, however, we need to clean the trash in the part of the real space 1208 !vector that's not attributed to the given comm_fft CPU. 1209 nproc_fft=mpi_enreg%nproc_fft 1210 if (nproc_fft>1) then 1211 me_fft=mpi_enreg%me_fft 1212 if (me_fft>0) then 1213 nd3=(ngfft(3)-1)/nproc_fft+1 1214 psi_out(:,:,:,me_fft*nd3+1:me_fft*nd3+nd3)=psi_out(:,:,:,1:nd3) 1215 psi_out(:,:,:,1:nd3)=zero 1216 end if 1217 !!! call xmpi_sum(psi_out,mpi_enreg%comm_fft,ierr) 1218 end if 1219 1220 end subroutine g_to_r
m_gwls_hamiltonian/gr_to_g [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
gr_to_g
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1236 subroutine gr_to_g(psig_out,psir_in,psig_in) 1237 1238 real(dp), intent(in) :: psir_in(2,n4,n5,n6) 1239 real(dp), intent(in), optional :: psig_in(2,npw_g) 1240 real(dp), intent(out) :: psig_out(2,npw_g) 1241 1242 integer :: i1, i2, i3 1243 integer :: cplex, option 1244 1245 ! ************************************************************************* 1246 1247 cplex = 2 ! complex potential 1248 option= 2 ! multiply wavefunction by potential 1249 1250 if(.not. present(psig_in)) then 1251 psig4(:,:) = psig_out(:,:) 1252 else 1253 psig4(:,:) = psig_in(:,:) 1254 end if 1255 1256 do i3=1,n6 1257 do i2=1,n5 1258 do i1=1,n4 1259 denpot(2*i1-1,i2,i3)= psir_in(1,i1,i2,i3) 1260 denpot(2*i1 ,i2,i3)= psir_in(2,i1,i2,i3) 1261 end do 1262 end do 1263 end do 1264 1265 call fourwf( cplex, & ! complex potential 1266 denpot, & ! real space wavefunction, in denpot format 1267 psig4, & ! fourier space wavefunction 1268 psig_out, & ! result, in FFT configuration 1269 psir3,gbound,gbound,istwfk(ckpt),kg_k_gather,kg_k_gather,mgfft,mpi_enreg,1, & ! Various other arguments 1270 ngfft,npw_g,npw_g,n4,n5,n6,option,tim_fourwf,weight,weight) 1271 1272 end subroutine gr_to_g
m_gwls_hamiltonian/Hpsik [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
Hpsik
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1000 subroutine Hpsik(psi_out,psi_in,cte) 1001 1002 !External variables 1003 real(dp), intent(inout) :: psi_out(2,npw_g) 1004 real(dp), intent(inout), optional :: psi_in(2,npw_g) 1005 real(dp), intent(in), optional :: cte 1006 1007 ! ************************************************************************* 1008 1009 !psikb1 is allocated in init_hamiltonian and destroyed in destroy_hamiltonian, to avoid doing it at each application of 1010 !the hamiltonian... 1011 1012 !We are keeping track of how the module treat each argument of the hamiltonian (subroutine getghc) here. 1013 !Legend : T = transmitted from 79_seqpar_mpi/vtowfk.F90 through the call gwls_sternheimer 1014 ! I = input argument (allocated and filled) 1015 ! O = output argument (allocated) 1016 ! D = dummy argument (declared but not allocated. Read/Write attempts should trigger a segfault.) 1017 !call getghc(cpopt, T 1018 !psi, I 1019 !conjgrprj, D 1020 !Hpsik, O 1021 !gsc, D (output for PAW : <G|S|C>) 1022 !gs_hamk, T 1023 !gvnlxc, D (<G|Vnonlocal+VFockACE|C>) 1024 !eshift, I (<G|H-eshift.S|C>) 1025 !mpi_enreg, T 1026 !ndat, Fixed to 1 (# of FFTs to do in //) 1027 !dtset%prtvol, T 1028 !sij_opt, Fixed to 0 (PAW dependant : 0-><G|H|C> ; 1-><G|H|C> & <G|S|C> ; -1-><G|H-cte.S|C>) 1029 !tim_getghc, Fixed to 0 (identity of the timer of the calling subroutine. 1:cgwf 5:lobpcg) 1030 !type_calc) Fixed to 0 (0:whole H N:some part only) 1031 1032 ! It is necessary to pass this optional argument to getghc if paral_kgb=1, or else the code exits cleanly with a "BUG" message... 1033 ! It will be important to properly understand what this means when we start using kpoints... 1034 1035 if(present(psi_in)) then 1036 1037 call getghc(cpopt,psi_in,conjgrprj,psi_out,dummy2,gs_hamk,psig4,eshift,& 1038 & mpi_enreg,ndat,dtset%prtvol,sij_opt,tim_getghc,type_calc) 1039 1040 else 1041 psig3 = psi_out 1042 1043 call getghc(cpopt,psig3,conjgrprj,psi_out,dummy2,gs_hamk,psig4,eshift,& 1044 & mpi_enreg,ndat,dtset%prtvol,sij_opt,tim_getghc,type_calc) 1045 1046 end if 1047 if(present(cte)) then 1048 if(present(psi_in)) then 1049 psi_out = psi_out - cte*psi_in 1050 else 1051 psi_out = psi_out - cte*psig3 1052 end if 1053 end if 1054 1055 end subroutine Hpsik
m_gwls_hamiltonian/Hpsikc [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
Hpsikc
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1071 subroutine Hpsikc(psi_out,psi_in,cte) 1072 1073 !External variables 1074 complex(dpc), intent(out) :: psi_out(npw_g) 1075 complex(dpc), intent(in) :: psi_in(npw_g) 1076 complex(dpc), intent(in), optional :: cte 1077 1078 ! ************************************************************************* 1079 1080 1081 psig1(1,:) = dble(psi_in) 1082 psig1(2,:) = dimag(psi_in) 1083 1084 call Hpsik(psig1) 1085 1086 psi_out = dcmplx(psig1(1,:),psig1(2,:)) 1087 1088 if(present(cte)) psi_out = psi_out - cte*psi_in 1089 end subroutine Hpsikc
m_gwls_hamiltonian/kbkb_to_kb [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
kbkb_to_kb
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1288 subroutine kbkb_to_kb(psik_out,psik_in_1,psik_in_2) 1289 !---------------------------------------------------------------------------------------------------- 1290 ! This function computes the direct product of two wavefunctions in real space, 1291 ! psi_out(r) = psi_in_1^*(r)*psi_in_2(r) (without complex conjugating any of the 2 wavefunctions) 1292 ! and returns the result in fourier space, psi_out(k). 1293 ! 1294 ! The two input wavefunctions are in k space. 1295 ! 1296 ! 1297 !---------------------------------------------------------------------------------------------------- 1298 real(dp), intent(out) :: psik_out(2,npw_kb) 1299 real(dp), intent(inout) :: psik_in_1(2,npw_kb), psik_in_2(2,npw_kb) 1300 1301 ! ************************************************************************* 1302 1303 ! change configuration of the data : LA -> FFT 1304 call wf_block_distribute(psik_in_1, psig1,1) 1305 call wf_block_distribute(psik_in_2, psig2,1) 1306 1307 ! Fourier transform the first input 1308 call g_to_r(psir1, psig1) 1309 1310 ! Complex conjugate in real space the first input 1311 psir1(2,:,:,:) = -psir1(2,:,:,:) 1312 1313 ! Fourrier transform the second input, multiply component-wise 1314 call gr_to_g(psig2,psir1) 1315 1316 ! change configuration of the output : FFT -> LA 1317 call wf_block_distribute(psik_out, psig2,2) 1318 1319 end subroutine kbkb_to_kb
m_gwls_hamiltonian/pc_k_valence_kernel [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
pc_k_valence_kernel
FUNCTION
.
INPUTS
OUTPUT
SOURCE
353 subroutine pc_k_valence_kernel(psi_inout,n) 354 355 !================================================================================ 356 ! This routine projects out of the valence kernel. It assumes the input/output 357 ! array is distributed in the FFT configuration, and that the global 358 ! array containing the kernel (defined in this module) is already prepared 359 ! and ready to be used. 360 !================================================================================ 361 362 real(dp), intent(inout) :: psi_inout(2,npw_g) 363 integer , intent(in), optional :: n 364 365 366 real(dp), allocatable :: psi_projected(:,:) 367 368 real(dp) :: tmpc(2) 369 370 integer :: v, n_max 371 372 integer :: mpi_communicator, ierr 373 374 ! ************************************************************************* 375 376 377 ABI_MALLOC( psi_projected, (2,npw_g)) 378 379 mpi_communicator = mpi_enreg%comm_fft 380 381 if (present(n)) then 382 n_max = n 383 else 384 n_max = nbandv 385 end if 386 387 !==================================================== 388 ! Compute projection 389 !==================================================== 390 391 psi_projected(:,:) = zero 392 393 do v = 1, n_max 394 395 ! compute overlap of kernel member with function 396 tmpc = cg_zdotc(npw_g ,kernel_wavefunctions_FFT(:,:,v),psi_inout(:,:)) 397 398 ! Communicate results 399 call xmpi_sum(tmpc, mpi_communicator, ierr) ! sum on all processors working on FFT! 400 401 ! add overlap with array to project 402 psi_projected(1,:) = psi_projected(1,:) + (tmpc(1)*kernel_wavefunctions_FFT(1,:,v)-tmpc(2)*kernel_wavefunctions_FFT(2,:,v)) 403 psi_projected(2,:) = psi_projected(2,:) + (tmpc(1)*kernel_wavefunctions_FFT(2,:,v)+tmpc(2)*kernel_wavefunctions_FFT(1,:,v)) 404 405 !psi_inout(1,:) = psi_inout(1,:) -( tmpc(1)*kernel_wavefunctions_FFT(1,:,v)-tmpc(2)*kernel_wavefunctions_FFT(2,:,v) ) 406 !psi_inout(2,:) = psi_inout(2,:) -( tmpc(1)*kernel_wavefunctions_FFT(2,:,v)+tmpc(2)*kernel_wavefunctions_FFT(1,:,v) ) 407 408 409 end do 410 411 if (present(n)) then 412 psi_inout(:,:) = psi_projected(:,:) 413 else 414 psi_inout(:,:) = psi_inout(:,:) - psi_projected(:,:) 415 end if 416 417 418 419 ABI_FREE( psi_projected) 420 421 end subroutine pc_k_valence_kernel
m_gwls_hamiltonian/precondition [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
precondition
FUNCTION
.
INPUTS
OUTPUT
SOURCE
916 subroutine precondition(psi_out,psi_in) 917 918 real(dp), intent(out) :: psi_out(2,npw_g) 919 real(dp), intent(in) :: psi_in(2,npw_g) 920 921 ! ************************************************************************* 922 923 do i=1,npw_g 924 psi_out(:,i) = psi_in(:,i)*pcon(i) 925 end do 926 927 end subroutine precondition
m_gwls_hamiltonian/precondition_cplx [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
precondition_cplx
FUNCTION
.
INPUTS
OUTPUT
SOURCE
943 subroutine precondition_cplx(psi_out,psi_in) 944 945 complex(dpc), intent(out) :: psi_out(npw_g) 946 complex(dpc), intent(in) :: psi_in(npw_g) 947 948 ! ************************************************************************* 949 950 do i=1,npw_g 951 psi_out(i) = psi_in(i)*pcon(i) 952 end do 953 end subroutine precondition_cplx
m_gwls_hamiltonian/set_precondition [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
set_precondition
FUNCTION
.
INPUTS
OUTPUT
SOURCE
784 subroutine set_precondition(lambda,omega) 785 !-------------------------------------------------------------------------------- 786 ! This subroutine preconditions the problem 787 ! 788 ! A x = b, 789 ! 790 ! with: 791 ! 792 ! omega lambda Operator 793 ! ------------------------------------------------------ 794 ! absent absent A = (H - lambda_0) (value of lambda_0 not important) 795 ! present present A = (H - lambda)^2 + omega^2 796 ! other cases not implemented 797 ! 798 ! 799 ! In the above, b = psik. 800 ! 801 !-------------------------------------------------------------------------------- 802 803 ! TODO : 804 ! - eliminate the 2 "if(kinpw(i) < huge(zero)*1.0d-11)" 805 ! since ecutsm = 0.0 always (check if that's true in this gw_sternheimer subroutine). 806 807 real(dp), intent(in), optional :: lambda, omega 808 809 real(dp) :: poly, x 810 logical :: omega_imaginary 811 812 813 !integer,save :: counter = 0 814 !integer :: io_unit 815 !character(18):: filename 816 !logical :: file_exists 817 818 ! ************************************************************************* 819 820 821 if (present(lambda) .and. present(omega)) then 822 omega_imaginary =.true. 823 else 824 omega_imaginary =.false. 825 end if 826 827 !io_unit = get_unit() 828 !filename = "PRECONDITIONER.log" 829 !inquire(file=filename,exist=file_exists) 830 831 !if (file_exists) then 832 ! open(io_unit,file=filename,position='append',status=files_status_old) 833 !else 834 ! open(io_unit,file=filename,status=files_status_new) 835 ! write(io_unit,10) "#=======================================================================================" 836 ! write(io_unit,10) "# " 837 ! write(io_unit,10) "# This file contains information regarding the preconditioning scheme for SQMR. " 838 ! write(io_unit,10) "# " 839 ! write(io_unit,10) "#=======================================================================================" 840 !end if 841 842 !counter = counter + 1 843 !write(io_unit,10) "# " 844 !write(io_unit,15) "# Call #:", counter 845 !if (present(lambda) .and. present(omega)) then 846 ! write(io_unit,10) "# lambda and omega are present: imaginary frequency case" 847 !else 848 ! write(io_unit,10) "# lambda and omega are absent: omega = 0 case" 849 !end if 850 851 !do i=1,npw_k 852 853 do i = 1, npw_g 854 855 if(omega_imaginary) then 856 x = (kinpw_gather(i)-lambda)**2 + omega**2 857 else 858 x = kinpw_gather(i) 859 end if 860 861 if(x < huge(zero)*1.0d-11) then 862 poly = 27.0 + x*(18.0 + x*(12.0 + 8.0*x)) 863 pcon(i) = poly/(poly + 16.0*(x**4)) 864 !pcon(i) = 1.0/(1.0+x) !I don't know why, it gives better results for Silane than the above polynomial. 865 else 866 pcon(i) = zero 867 end if 868 end do 869 870 !write(io_unit,30) " prec( 1: 10) = ",pcon(1:10) 871 !write(io_unit,30) " prec(npw_k-10:npw_k) = ",pcon(npw_k-10:npw_k) 872 873 !close(io_unit) 874 875 !10 format(A) 876 !15 format(A,I8) 877 !30 format(A,1000(ES24.12)) 878 879 end subroutine set_precondition
m_gwls_hamiltonian/sqrt_vc_k [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
sqrt_vc_k
FUNCTION
.
INPUTS
OUTPUT
SOURCE
969 subroutine sqrt_vc_k(psi_inout) 970 971 !External variables 972 real(dp), intent(inout) :: psi_inout(2,npw_k) 973 974 !Internal variable 975 complex(dpc) :: c 976 977 ! ************************************************************************* 978 979 do i=1,npw_k 980 c = vc_sqrt(i) * cmplx(psi_inout(1,i),psi_inout(2,i),dpc) 981 psi_inout(1,i) = dble (c) 982 psi_inout(2,i) = dimag(c) 983 end do 984 end subroutine sqrt_vc_k
m_gwls_hamiltonian/unset_precondition [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
unset_precondition
FUNCTION
.
INPUTS
OUTPUT
SOURCE
895 subroutine unset_precondition() 896 897 ! ************************************************************************* 898 899 pcon = one 900 end subroutine unset_precondition
m_gwls_hamiltonian/wf_block_distribute [ Functions ]
[ Top ] [ m_gwls_hamiltonian ] [ Functions ]
NAME
wf_block_distribute
FUNCTION
.
INPUTS
OUTPUT
SOURCE
437 subroutine wf_block_distribute(psik, psik_alltoall, direction) 438 439 !================================================================================ 440 ! 441 ! This subroutine distributes a block of wavefunctions in the "linear algebra" 442 ! configuration, to a configuration appropriate to perform FFT (or apply Hamiltonian). 443 ! 444 ! The code below is inspired by the subroutine prep_getghc, which rearranges 445 ! data over MPI processors prior to applying the Hamiltonian. 446 ! 447 ! input: 448 ! npw_kb : dimension of wavefunctions in the "linear algebra" configuration, 449 ! which means the G-vectors are distributed over all 450 ! processors, and every processor has information about all the bands. 451 ! 452 ! 453 ! npw_g : dimension of wavefunctions in the "FFT" configuration, 454 ! which means the G-vectors are distributed along rows of the 455 ! MPI topology, and a given row only has one band. 456 ! 457 ! direction : Are we going from LA to FFT, or from FFT to LA? 458 ! 1 : LA -> FFT 459 ! 2 : LA <- FFT 460 ! 461 ! input/output: 462 ! psik : block of wavefunctions, in LA configuration 463 ! psik_alltoall : wavefunction, in FFT configuration 464 !================================================================================ 465 466 integer , intent(in) :: direction ! flag which determines the direction of the transfer 467 real(dp), intent(inout) :: psik(2,npw_kb) ! block of wavefunctions, in "linear algebra" configuration 468 real(dp), intent(inout) :: psik_alltoall(2,npw_g) ! wavefunction in "FFT" configuration; a single band, but more G-vectors 469 470 471 integer :: ier, spaceComm 472 473 integer,allocatable :: rdisplsloc(:) 474 integer,allocatable :: recvcountsloc(:) 475 integer,allocatable :: sdisplsloc(:) 476 integer,allocatable :: sendcountsloc(:) 477 478 integer :: nproc_band, bandpp 479 480 integer,pointer :: rdispls(:) 481 integer,pointer :: recvcounts(:) 482 integer,pointer :: sdispls(:) 483 integer,pointer :: sendcounts(:) 484 485 ! ************************************************************************* 486 487 488 ! extract information in order to perform MPI communication. 489 ! This code comes from prep_getghc 490 nproc_band = mpi_enreg%nproc_band 491 bandpp = mpi_enreg%bandpp 492 493 if(mpi_enreg%nproc_band*mpi_enreg%bandpp > 1) then 494 495 spaceComm=mpi_enreg%comm_fft 496 if(mpi_enreg%paral_kgb==1) spaceComm=mpi_enreg%comm_band 497 498 ABI_MALLOC(sendcountsloc,(nproc_band)) 499 ABI_MALLOC(sdisplsloc ,(nproc_band)) 500 ABI_MALLOC(recvcountsloc,(nproc_band)) 501 ABI_MALLOC(rdisplsloc ,(nproc_band)) 502 503 recvcounts =>bandfft_kpt(ikpt_this_proc)%recvcounts(:) 504 sendcounts =>bandfft_kpt(ikpt_this_proc)%sendcounts(:) 505 rdispls =>bandfft_kpt(ikpt_this_proc)%rdispls (:) 506 sdispls =>bandfft_kpt(ikpt_this_proc)%sdispls (:) 507 508 recvcountsloc(:)= recvcounts(:)*2*nspinor*bandpp 509 rdisplsloc(:) = rdispls(:)*2*nspinor*bandpp 510 sendcountsloc(:)= sendcounts(:)*2*nspinor 511 sdisplsloc(:) = sdispls(:)*2*nspinor 512 513 ! use MPI to communicate information! 514 if (direction == 1) then 515 ! LA -> FFT 516 call xmpi_alltoallv(psik, sendcountsloc, sdisplsloc, psik_alltoall, recvcountsloc,rdisplsloc, spaceComm, ier) 517 518 else if (direction == 2) then 519 ! FFT -> LA 520 521 call xmpi_alltoallv(psik_alltoall,recvcountsloc,rdisplsloc, psik, sendcountsloc,sdisplsloc,spaceComm,ier) 522 523 end if 524 525 ABI_FREE(sendcountsloc) 526 ABI_FREE(sdisplsloc ) 527 ABI_FREE(recvcountsloc) 528 ABI_FREE(rdisplsloc ) 529 530 else 531 532 if(direction == 1) psik_alltoall = psik 533 534 if(direction == 2) psik = psik_alltoall 535 536 end if 537 538 end subroutine wf_block_distribute