TABLE OF CONTENTS


ABINIT/m_gemm_nonlop [ Modules ]

[ Top ] [ Modules ]

NAME

 m_gemm_nonlop

FUNCTION

  This module provides functions to compute the nonlocal operator by means of the BLAS GEMM
  routine. By treating ndat simultaneous wavefunctions, it is able to exploit BLAS3 routines,
  which leads to excellent CPU efficiency and OpenMP scalability.

COPYRIGHT

 Copyright (C) 2014-2022 ABINIT group (AL)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

18 ! TODO list :
19 ! Don't allocate the full nkpt structures, only those that are treated by this proc: use same init as in m_bandfft_kpt
20 ! support more options (forces & stresses mostly)
21 ! Support RF/other computations (only GS right now)
22 ! handle the case where nloalg(2) < 0, ie no precomputation of ph3d
23 ! more systematic checking of the workflow (right now, only works if init/make/gemm/destroy, no multiple makes, etc)
24 ! Avoid allocating the complex matrix when istwfk > 1
25 ! Merge with chebfi's invovl
26 
27 
28 #if defined HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "abi_common.h"
33 
34 module m_gemm_nonlop
35 
36  use defs_basis
37  use m_errors
38  use m_abicore
39  use m_xmpi
40  use m_abi_linalg
41 
42  use defs_abitypes, only : MPI_type
43  use m_opernlc_ylm, only : opernlc_ylm
44  use m_pawcprj, only : pawcprj_type
45  use m_geometry, only : strconv
46  use m_kg, only : mkkpg
47 
48  implicit none
49 
50  private
51 
52  ! Use these routines in order: first call init, then call make_gemm_nonlop for each k point,
53  ! then call gemm_nonlop to do the actual computation, and call destroy when done. See gstate and vtorho.
54  public :: init_gemm_nonlop
55  public :: destroy_gemm_nonlop
56  public :: make_gemm_nonlop
57  public :: gemm_nonlop

m_gemm_nonlop/destroy_gemm_nonlop [ Functions ]

[ Top ] [ m_gemm_nonlop ] [ Functions ]

NAME

 destroy_gemm_nonlop

FUNCTION

 Destruction of the gemm_nonlop_kpt array

INPUTS

 nkpt= number of k-points

SOURCE

146  subroutine destroy_gemm_nonlop(nkpt)
147 
148   integer,intent(in) :: nkpt
149   integer :: ikpt
150 
151 ! *************************************************************************
152 
153 ! TODO add cycling if kpt parallelism
154   do ikpt = 1,nkpt
155     call free_gemm_nonlop_ikpt(ikpt)
156   end do
157 
158  ABI_FREE(gemm_nonlop_kpt)
159 
160  end subroutine destroy_gemm_nonlop

m_gemm_nonlop/free_gemm_nonlop_ikpt [ Functions ]

[ Top ] [ m_gemm_nonlop ] [ Functions ]

NAME

 free_destroy_gemm_nonlop_ikpt

FUNCTION

 Release memory for one kpt value of the gemm_nonlop_kpt array

INPUTS

 ikpt= index of gemm_nonlop_kptto be released

SOURCE

176  subroutine free_gemm_nonlop_ikpt(ikpt)
177 
178   integer,intent(in) :: ikpt
179 
180 ! *************************************************************************
181 
182  if(gemm_nonlop_kpt(ikpt)%nprojs /= -1) then
183    if (allocated(gemm_nonlop_kpt(ikpt)%projs)) then
184      ABI_FREE(gemm_nonlop_kpt(ikpt)%projs)
185    end if
186    if (allocated(gemm_nonlop_kpt(ikpt)%projs_r)) then
187      ABI_FREE(gemm_nonlop_kpt(ikpt)%projs_r)
188    end if
189    if (allocated(gemm_nonlop_kpt(ikpt)%projs_i)) then
190    ABI_FREE(gemm_nonlop_kpt(ikpt)%projs_i)
191    end if
192    gemm_nonlop_kpt(ikpt)%nprojs = -1
193    if(gemm_nonlop_kpt(ikpt)%ngrads /= -1) then
194      if (allocated(gemm_nonlop_kpt(ikpt)%dprojs)) then
195        ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs)
196      end if
197      if (allocated(gemm_nonlop_kpt(ikpt)%dprojs_r)) then
198        ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs_r)
199      end if
200      if (allocated(gemm_nonlop_kpt(ikpt)%dprojs_i)) then
201        ABI_FREE(gemm_nonlop_kpt(ikpt)%dprojs_i)
202      end if
203      gemm_nonlop_kpt(ikpt)%ngrads = -1
204    end if
205  end if
206 
207  end subroutine free_gemm_nonlop_ikpt

m_gemm_nonlop/gemm_nonlop [ Functions ]

[ Top ] [ m_gemm_nonlop ] [ Functions ]

NAME

 gemm_nonlop

FUNCTION

 Replacement of nonlop. same prototype as nonlop although not all options are implemented.

INPUTS

SOURCE

480  subroutine gemm_nonlop(atindx1,choice,cpopt,cprjin,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,&
481 &                 enl,enlout,ffnlin,ffnlout,gmet,gprimd,idir,indlmn,istwf_k,&
482 &                 kgin,kgout,kpgin,kpgout,kptin,kptout,lambda,lmnmax,matblk,mgfft,&
483 &                 mpi_enreg,mpsang,mpssoang,natom,nattyp,ndat,ngfft,nkpgin,nkpgout,nloalg,&
484 &                 nnlout,npwin,npwout,nspinor,nspinortot,ntypat,only_SO,paw_opt,phkxredin,&
485 &                 phkxredout,ph1d,ph3din,ph3dout,signs,sij,svectout,&
486 &                 tim_nonlop,ucvol,useylm,vectin,vectout,&
487 &                 use_gpu_cuda)
488 
489   !Arguments ------------------------------------
490   !scalars
491   integer,intent(in) :: choice,cpopt,dimenl1,dimenl2,dimekbq,dimffnlin,dimffnlout,idir
492   integer,intent(in) :: istwf_k,lmnmax,matblk,mgfft,mpsang,mpssoang,natom,ndat,nkpgin
493   integer,intent(in) :: nkpgout,nnlout,npwin,npwout,nspinor,nspinortot,ntypat,only_SO
494   integer,intent(in) :: paw_opt,signs,tim_nonlop,useylm
495   integer,optional,intent(in) :: use_gpu_cuda
496   real(dp),intent(in) :: lambda(ndat),ucvol
497   type(MPI_type),intent(in) :: mpi_enreg
498   !arrays
499   integer,intent(in) :: atindx1(natom),indlmn(6,lmnmax,ntypat),kgin(3,npwin)
500   integer,intent(in) :: kgout(3,npwout),nattyp(ntypat),ngfft(18),nloalg(3)
501   real(dp),intent(in) :: enl(dimenl1,dimenl2,nspinortot**2,dimekbq)
502   real(dp),intent(in) :: ffnlin(npwin,dimffnlin,lmnmax,ntypat)
503   real(dp),intent(in) :: ffnlout(npwout,dimffnlout,lmnmax,ntypat),gmet(3,3)
504   real(dp),intent(in) :: gprimd(3,3),kpgin(npwin,nkpgin*useylm)
505   real(dp),intent(in) :: kpgout(npwout,nkpgout*useylm),kptin(3),kptout(3)
506   real(dp),intent(in) :: phkxredin(2,natom),phkxredout(2,natom),ph1d(2,*)
507   real(dp),intent(in) :: sij(dimenl1,ntypat*((paw_opt+1)/3))
508   real(dp),intent(inout) :: ph3din(2,npwin,matblk),ph3dout(2,npwout,matblk)
509   real(dp),intent(inout) :: vectin(2,npwin*nspinor*ndat)
510   real(dp),intent(inout) :: enlout(nnlout*ndat)
511   real(dp),intent(out) :: svectout(2,npwout*nspinor*(paw_opt/3)*ndat)
512   real(dp),intent(inout) :: vectout(2,npwout*nspinor*ndat) !vz_i
513   type(pawcprj_type),intent(inout) :: cprjin(natom,nspinor*((cpopt+5)/5)*ndat)
514 
515   ! locals
516   integer :: ii, ia, idat, igrad, nprojs, ngrads, shift, iatom, nlmn, ierr, ibeg, iend
517   integer :: cplex, cplex_enl, cplex_fac, proj_shift, grad_shift
518   integer :: enlout_shift, force_shift, nnlout_test
519   integer :: iatm, ndgxdt, ndgxdtfac, nd2gxdt, nd2gxdtfac, optder, itypat, ilmn
520   integer :: cplex_dgxdt(1), cplex_d2gxdt(1)
521   real(dp) :: esum
522   real(dp) :: work(6)
523   real(dp) :: dgxdt_dum_in(1,1,1,1,1), dgxdt_dum_out(1,1,1,1,1),dgxdt_dum_out2(1,1,1,1,1)
524   real(dp) :: d2gxdt_dum_in(1,1,1,1,1), d2gxdt_dum_out(1,1,1,1,1),d2gxdt_dum_out2(1,1,1,1,1)
525   real(dp), allocatable :: enlk(:),sij_typ(:)
526   real(dp), allocatable :: projections(:,:,:), s_projections(:,:,:), vnl_projections(:,:,:)
527   real(dp), allocatable :: dprojections(:,:,:), temp_realvec(:)
528 
529 ! *************************************************************************
530 
531   ! We keep the same interface as nonlop, but we don't use many of those
532   ABI_UNUSED((/ffnlin,ffnlout,gmet,kpgin,kpgout/))
533   ABI_UNUSED((/ph1d(1,1),ph3din,ph3dout/))
534   ABI_UNUSED((/phkxredin,phkxredout,ucvol/))
535   ABI_UNUSED((/mgfft,mpsang,mpssoang/))
536   ABI_UNUSED((/kptin,kptout/))
537   ABI_UNUSED((/idir,nloalg,ngfft,kgin,kgout,ngfft,only_SO,tim_nonlop,use_gpu_cuda/))
538 
539   ! Check supported options
540   if (.not.gemm_nonlop_use_gemm) then
541     ABI_BUG('computation not prepared for gemm_nonlop use!')
542   end if
543   if ( (choice>1.and.choice/=7.and.signs==2) .or. &
544 &      (choice>3.and.choice/=7.and.choice/=23.and.signs==1) .or. &
545 &      (useylm/=1) ) then
546     ABI_BUG('gemm_nonlop option not supported!')
547   end if
548   if (signs==1) then
549     nnlout_test=0
550     if (choice==1) nnlout_test=1
551     if (choice==2) nnlout_test=3*natom
552     if (choice==3) nnlout_test=6
553     if (choice==23) nnlout_test=6+3*natom
554     if (nnlout<nnlout_test) then
555       ABI_BUG('wrong nnlout size!')
556     end if
557   end if
558 
559   cplex=2;if (istwf_k>1) cplex=1
560   cplex_enl=1;if (paw_opt>0) cplex_enl=2*dimenl1/(lmnmax*(lmnmax+1)) ! is enl complex?
561   cplex_fac=max(cplex,dimekbq)
562   if ((nspinortot==2.or.cplex_enl==2).and.paw_opt>0.and.choice/=7) cplex_fac=2 ! is vnl_projections complex?
563 
564   nprojs = gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%nprojs
565   ngrads = gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%ngrads
566   if(choice==1) ngrads=0
567   ABI_CHECK(ngrads>=3.or.choice/=2 ,"Bad allocation in gemm_nonlop (2)!")
568   ABI_CHECK(ngrads>=6.or.choice/=3 ,"Bad allocation in gemm_nonlop (3)!")
569   ABI_CHECK(ngrads>=9.or.choice/=23,"Bad allocation in gemm_nonlop (23)!")
570 
571   ! These will store the non-local factors for vectin, svectout and vectout respectively
572   ABI_MALLOC(projections,(cplex, nprojs,nspinor*ndat))
573   ABI_MALLOC(s_projections,(cplex, nprojs,nspinor*ndat))
574   ABI_MALLOC(vnl_projections,(cplex_fac, nprojs,nspinor*ndat))
575   projections = zero
576   s_projections = zero
577   vnl_projections = zero
578   if (signs==1.and.ngrads>0) then
579     ABI_MALLOC(dprojections,(cplex, ngrads*nprojs,nspinor*ndat))
580     dprojections = zero
581   end if
582 
583   if(nprojs == 0) then
584     ! TODO check if this is correct
585     if(signs == 1) then
586       enlout=zero
587       return
588     end if
589     if(signs == 2) then
590       vectout = zero
591       if(paw_opt>0) svectout = vectin
592       return
593     end if
594   end if
595 
596   if(cpopt >= 2) then
597     ! retrieve from cprjin
598     do idat=1, ndat*nspinor
599       shift = 0
600       do iatom = 1, natom
601         nlmn = cprjin(iatom, idat)%nlmn
602         projections(1:cplex, shift+1:shift+nlmn, idat) = cprjin(iatom, idat)%cp(1:cplex, 1:nlmn)
603         shift = shift + nlmn
604       end do
605     end do
606     if(cpopt==4.and.allocated(dprojections)) then
607       ABI_CHECK(cprjin(1,1)%ncpgr>=ngrads,"cprjin%ncpgr not correct! (1)")
608       do idat=1, ndat*nspinor
609         shift = 0
610         do iatom = 1, natom
611           nlmn  = cprjin(iatom, idat)%nlmn
612           do igrad=1,ngrads
613             dprojections(1:cplex, shift+1:shift+nlmn, idat) = &
614 &                   cprjin(iatom, idat)%dcp(1:cplex,igrad,1:ilmn)
615             shift = shift + nlmn
616           end do
617         end do
618       end do
619     end if
620   else
621     ! opernla
622     if(cplex == 2) then
623       call abi_zgemm_2r('C', 'N', nprojs, ndat*nspinor, npwin, cone, &
624 &                gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwin,&
625 &                vectin, npwin, czero, projections, nprojs)
626       if(signs==1.and.ngrads>0) then
627         call abi_zgemm_2r('C', 'N', ngrads*nprojs, ndat*nspinor, npwin, cone, &
628                  gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs, npwin,&
629                  vectin, npwin, czero, dprojections, ngrads*nprojs)
630       end if
631     else
632       ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat))
633       ! only compute real part of projections = P^* psi => projections_r = P_r^T psi_r + P_i^T psi_i
634       temp_realvec(1:npwin*nspinor*ndat) = vectin(1,1:npwin*nspinor*ndat)
635       if(istwf_k == 2 .and. mpi_enreg%me_g0 == 1) then
636         do idat=1, ndat*nspinor
637           temp_realvec(1+(idat-1)*npwin) = temp_realvec(1+(idat-1)*npwin)/2
638         end do
639       end if
640       call DGEMM('T', 'N', nprojs, ndat*nspinor, npwin, one, &
641 &                gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwin, &
642 &                temp_realvec, npwin, zero, projections, nprojs)
643       if(signs==1.and.ngrads>0) then
644         call DGEMM('T', 'N', ngrads*nprojs, ndat*nspinor, npwin, one, &
645 &                  gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs_r, npwin, &
646 &                  temp_realvec, npwin, zero, dprojections, ngrads*nprojs)
647       end if
648       temp_realvec(1:npwin*nspinor*ndat) = vectin(2,1:npwin*nspinor*ndat)
649       if(istwf_k == 2 .and. mpi_enreg%me_g0 == 1) then
650         do idat=1, ndat*nspinor
651           temp_realvec(1+(idat-1)*npwin) = zero
652         end do
653       end if
654       call DGEMM('T', 'N', nprojs, ndat*nspinor, npwin, one, &
655 &                gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwin, &
656 &                temp_realvec, npwin, one , projections, nprojs)
657       projections = projections * 2
658       if(signs==1.and.ngrads>0) then
659         call DGEMM('T', 'N', ngrads*nprojs, ndat*nspinor, npwin, one, &
660 &                  gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%dprojs_i, npwin, &
661 &                  temp_realvec, npwin, one , dprojections, ngrads*nprojs)
662         dprojections = dprojections * 2
663       end if
664       ABI_FREE(temp_realvec)
665     end if
666     call xmpi_sum(projections,mpi_enreg%comm_fft,ierr)
667     if (choice>1) then
668       call xmpi_sum(dprojections,mpi_enreg%comm_fft,ierr)
669     end if
670 
671     if(cpopt >= 0) then
672       ! store in cprjin
673       do idat=1, ndat*nspinor
674         shift = 0
675         do iatom = 1, natom
676           nlmn = cprjin(iatom, idat)%nlmn
677           cprjin(iatom, idat)%cp(1:cplex, 1:nlmn) = projections(1:cplex, shift+1:shift+nlmn, idat)
678           shift = shift + nlmn
679         end do
680       end do
681       if(cpopt==3) then
682         ABI_CHECK(cprjin(1,1)%ncpgr>=ngrads,"cprjin%ncpgr not correct! (2)")
683         shift = 0
684         do iatom = 1, natom
685           nlmn = cprjin(iatom, idat)%nlmn
686           do igrad=1,ngrads
687             cprjin(iatom, idat)%dcp(1:cplex,igrad,1:nlmn) = &
688 &              dprojections(1:cplex, shift+1:shift+nlmn, idat)
689             shift = shift + nlmn
690           end do
691         end do
692       end if
693     end if
694   end if
695 
696   if(choice > 0) then
697 
698     if(choice /= 7) then
699       ! opernlc
700       iatm = 0
701       ndgxdt = 0
702       ndgxdtfac = 0
703       nd2gxdt = 0
704       nd2gxdtfac = 0
705       optder = 0
706 
707       shift = 0
708       ABI_MALLOC(sij_typ,(((paw_opt+1)/3)*lmnmax*(lmnmax+1)/2))
709       do itypat=1, ntypat
710         nlmn=count(indlmn(3,:,itypat)>0)
711         if (paw_opt>=2) then
712           if (cplex_enl==1) then
713             do ilmn=1,nlmn*(nlmn+1)/2
714               sij_typ(ilmn)=sij(ilmn,itypat)
715             end do
716           else
717             do ilmn=1,nlmn*(nlmn+1)/2
718               sij_typ(ilmn)=sij(2*ilmn-1,itypat)
719             end do
720           end if
721         end if
722 
723         ibeg = shift+1
724         iend = shift+nattyp(itypat)*nlmn
725 
726         do idat = 1,ndat
727           call opernlc_ylm(atindx1,cplex,cplex_dgxdt,cplex_d2gxdt,cplex_enl,cplex_fac,dgxdt_dum_in,dgxdt_dum_out,dgxdt_dum_out2,&
728 &         d2gxdt_dum_in,d2gxdt_dum_out,d2gxdt_dum_out2,dimenl1,dimenl2,dimekbq,enl,&
729 &         projections(:, ibeg:iend, 1+nspinor*(idat-1):nspinor*idat),&
730 &         vnl_projections(:, ibeg:iend,1+nspinor*(idat-1):nspinor*idat),&
731 &         s_projections(:, ibeg:iend,1+nspinor*(idat-1):nspinor*idat),&
732 &         iatm,indlmn(:,:,itypat),itypat,lambda(idat),mpi_enreg,natom,ndgxdt,ndgxdtfac,nd2gxdt,nd2gxdtfac,&
733 &         nattyp(itypat),nlmn,nspinor,nspinortot,optder,paw_opt,sij_typ)
734         end do
735 
736         shift = shift + nattyp(itypat)*nlmn
737         iatm = iatm+nattyp(itypat)
738       end do
739       ABI_FREE(sij_typ)
740     else
741       s_projections = projections
742     end if
743 
744     ! opernlb (only choice=1)
745     if(signs==2) then
746       if(paw_opt == 3 .or. paw_opt == 4) then
747         ! Get svectout from s_projections
748         if(cplex == 2) then
749           call abi_zgemm_2r('N', 'N', npwout, ndat*nspinor, nprojs, cone, &
750 &                        gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwout, &
751 &                        s_projections, nprojs, czero, svectout, npwout)
752         else
753           ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat))
754           call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, &
755 &                    gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwout, &
756 &                    s_projections, nprojs, zero, temp_realvec, npwout)
757           svectout(1,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat)
758           call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, &
759 &                    gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwout,&
760 &                    s_projections, nprojs, zero, temp_realvec, npwout)
761           svectout(2,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat)
762           ABI_FREE(temp_realvec)
763         end if
764         if(choice /= 7) svectout = svectout + vectin ! TODO understand this
765       end if
766       if(paw_opt == 0 .or. paw_opt == 1 .or. paw_opt == 4) then
767         ! Get vectout from vnl_projections
768         if(cplex_fac == 2) then
769           call abi_zgemm_2r('N', 'N', npwout, ndat*nspinor, nprojs, cone, &
770 &                        gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs, npwout, &
771 &                        vnl_projections, nprojs, czero, vectout, npwout)
772         else
773           ABI_MALLOC(temp_realvec,(MAX(npwout,npwin)*nspinor*ndat))
774           call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, &
775 &                    gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_r, npwout, &
776 &                    vnl_projections, nprojs, zero, temp_realvec, npwout)
777           vectout(1,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat)
778           call DGEMM('N', 'N', npwout, ndat*nspinor, nprojs, one, &
779 &                    gemm_nonlop_kpt(gemm_nonlop_ikpt_this_proc_being_treated)%projs_i, npwout, &
780 &                    vnl_projections, nprojs, zero, temp_realvec, npwout)
781           vectout(2,1:npwout*nspinor*ndat) = temp_realvec(1:npwout*nspinor*ndat)
782           ABI_FREE(temp_realvec)
783         end if
784       end if
785     end if ! opernlb
786 
787     ! opernld
788     if(signs==1) then
789       enlout=zero
790       if(choice==1.or.choice==3.or.choice==23) then
791         ABI_MALLOC(enlk,(ndat))
792         enlk=zero
793         do idat=1,ndat*nspinor
794           proj_shift=0
795           do itypat=1, ntypat
796             nlmn=count(indlmn(3,:,itypat)>0)
797             do ia=1,nattyp(itypat)
798               !Following loops are a [D][Z]DOT
799               esum=zero
800               do ilmn=1,nlmn
801                 do ii=1,cplex
802                   esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) &
803 &                           *projections    (ii,proj_shift+ilmn,idat)
804                 end do
805               end do
806               proj_shift=proj_shift+nlmn
807               enlk(idat) = enlk(idat) + esum
808             end do
809           end do
810         end do
811         if (choice==1) enlout(1:ndat)=enlk(1:ndat)
812       end if ! choice=1/3/23
813       if(choice==2.or.choice==3.or.choice==23) then
814         do idat=1,ndat*nspinor
815           proj_shift=0 ; grad_shift=0
816           enlout_shift=(idat-1)*nnlout
817           force_shift=merge(6,0,choice==23)
818           do itypat=1, ntypat
819             nlmn=count(indlmn(3,:,itypat)>0)
820             do ia=1,nattyp(itypat)
821               if (choice==3.or.choice==23) then
822                 do igrad=1,6
823                   !Following loops are a [D][Z]DOT
824                   esum=zero
825                   do ilmn=1,nlmn
826                     do ii=1,cplex
827                       esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) &
828 &                               *dprojections   (ii,grad_shift+ilmn,idat)
829                     end do
830                   end do
831                   grad_shift=grad_shift+nlmn
832                   enlout(enlout_shift+igrad)=enlout(enlout_shift+igrad) + two*esum
833                 end do
834               end if
835               if (choice==2.or.choice==23) then
836                 do igrad=1,3
837                   !Following loops are a [D][Z]DOT
838                   esum=zero
839                   do ilmn=1,nlmn
840                     do ii=1,cplex
841                       esum=esum +vnl_projections(ii,proj_shift+ilmn,idat) &
842 &                               *dprojections   (ii,grad_shift+ilmn,idat)
843                     end do
844                   end do
845                   grad_shift=grad_shift+nlmn
846                   enlout(enlout_shift+force_shift+igrad)= &
847 &                               enlout(enlout_shift+force_shift+igrad) + two*esum
848                 end do
849                 force_shift=force_shift+3
850               end if
851               proj_shift=proj_shift+nlmn
852             end do
853           end do
854         end do
855       end if ! choice=2, 3 or 23
856 
857     end if !opernld
858 
859   end if ! choice>0
860 
861 ! Reduction in case of parallelism
862   if (signs==1.and.mpi_enreg%paral_spinor==1) then
863     if (size(enlout)>0) then
864       call xmpi_sum(enlout,mpi_enreg%comm_spinor,ierr)
865     end if
866     if (choice==3.or.choice==23) then
867       call xmpi_sum(enlk,mpi_enreg%comm_spinor,ierr)
868     end if
869   end if
870 
871 ! Derivatives wrt strain
872 !  - Convert from reduced to cartesian coordinates
873 !  - Substract volume contribution
874  if ((choice==3.or.choice==23).and.signs==1.and.paw_opt<=3) then
875    do idat=1,ndat
876      enlout_shift=(idat-1)*nnlout
877      call strconv(enlout(enlout_shift+1:enlout_shift+6),gprimd,work)
878      enlout(enlout_shift+1:enlout_shift+3)=(work(1:3)-enlk(idat))
879      enlout(enlout_shift+4:enlout_shift+6)= work(4:6)
880    end do
881  end if
882 
883 ! Release memory
884   ABI_FREE(projections)
885   ABI_FREE(s_projections)
886   ABI_FREE(vnl_projections)
887   if (allocated(dprojections)) then
888     ABI_FREE(dprojections)
889   end if
890   if (allocated(enlk)) then
891     ABI_FREE(enlk)
892   end if
893 
894  end subroutine gemm_nonlop
895 !***
896 
897 !----------------------------------------------------------------------
898 
899 end module m_gemm_nonlop

m_gemm_nonlop/gemm_nonlop_type [ Types ]

[ Top ] [ m_gemm_nonlop ] [ Types ]

NAME

 gemm_nonlop_type

FUNCTION

 Contains information needed to apply the nonlocal operator

SOURCE

70  type,public :: gemm_nonlop_type
71 
72    integer :: nprojs
73    integer :: ngrads
74 
75    real(dp), allocatable :: projs(:, :, :)
76    ! (2, npw, nprojs)
77    real(dp), allocatable :: projs_r(:, :, :)
78    ! (1, npw, nprojs)
79    real(dp), allocatable :: projs_i(:, :, :)
80    ! (1, npw, nprojs)
81 
82    real(dp), allocatable :: dprojs(:, :, :)
83    ! (2, npw, nprojs*ngrads)
84    real(dp), allocatable :: dprojs_r(:, :, :)
85    ! (1, npw, nprojs*ngrads)
86    real(dp), allocatable :: dprojs_i(:, :, :)
87    ! (1, npw, nprojs*ngrads)
88 
89  end type gemm_nonlop_type

m_gemm_nonlop/init_gemm_nonlop [ Functions ]

[ Top ] [ m_gemm_nonlop ] [ Functions ]

NAME

 init_gemm_nonlop

FUNCTION

 Initalization of the gemm_nonlop_kpt array

INPUTS

 nkpt= number of k-points

SOURCE

116  subroutine init_gemm_nonlop(nkpt)
117 
118   integer,intent(in) :: nkpt
119   integer :: ikpt
120 
121 ! *************************************************************************
122 
123   ! TODO only allocate the number of kpt treated by this proc
124   ABI_MALLOC(gemm_nonlop_kpt, (nkpt))
125   do ikpt=1,nkpt
126     gemm_nonlop_kpt(ikpt)%nprojs = -1
127     gemm_nonlop_kpt(ikpt)%ngrads = -1
128   end do
129 
130  end subroutine init_gemm_nonlop

m_gemm_nonlop/make_gemm_nonlop [ Functions ]

[ Top ] [ m_gemm_nonlop ] [ Functions ]

NAME

 make_gemm_nonlop

FUNCTION

 Build the gemm_nonlop array

INPUTS

SOURCE

223  subroutine make_gemm_nonlop(ikpt,npw,lmnmax,ntypat,indlmn,nattyp,istwf_k,ucvol,ffnl_k, &
224 &                            ph3d_k,kpt_k,kg_k,kpg_k, &
225 &                            compute_grad_strain,compute_grad_atom) ! Optional parameters
226 
227   integer, intent(in) :: ikpt
228   integer, intent(in) :: npw, lmnmax,ntypat
229   integer, intent(in) :: indlmn(:,:,:), kg_k(:,:)
230   integer, intent(in) :: nattyp(ntypat)
231   integer, intent(in) :: istwf_k
232   logical, intent(in), optional :: compute_grad_strain,compute_grad_atom
233   real(dp), intent(in) :: ucvol
234   real(dp), intent(in) :: ffnl_k(:,:,:,:)
235   real(dp), intent(in) :: ph3d_k(:,:,:)
236   real(dp), intent(in) :: kpt_k(:)
237   real(dp), intent(in), target :: kpg_k(:,:)
238 
239   integer :: nprojs,ndprojs,ngrads
240 
241   integer,parameter :: alpha(6)=(/1,2,3,3,3,2/),beta(6)=(/1,2,3,2,1,1/)
242   integer :: itypat, ilmn, nlmn, ia, iaph3d, igrad, shift, shift_grad
243   integer :: il, ipw, idir, idir1, idir2, nkpg_local
244   logical :: parity,my_compute_grad_strain,my_compute_grad_atom
245   real(dp),allocatable :: atom_projs(:,:,:), atom_dprojs(:,:,:,:), temp(:)
246   real(dp),pointer :: kpg(:,:)
247 
248 ! *************************************************************************
249 
250   my_compute_grad_strain=.false. ; if (present(compute_grad_strain)) my_compute_grad_strain=compute_grad_strain
251   my_compute_grad_atom=.false. ; if (present(compute_grad_atom)) my_compute_grad_atom=compute_grad_atom
252   ABI_CHECK(size(ph3d_k)>0,'nloalg(2)<0 not compatible with use_gemm_nonlop=1!')
253 !  ABI_CHECK((.not.my_compute_grad_strain).or.size(kpg_k)>0,"kpg_k should be allocated to compute gradients!")
254 !  ABI_CHECK((.not.my_compute_grad_atom).or.size(kpg_k)>0,"kpg_k should be allocated to compute gradients!")
255 
256   iaph3d = 1
257 
258   ABI_MALLOC(atom_projs, (2, npw, lmnmax))
259   if (my_compute_grad_strain) then
260     ndprojs = 3
261     ABI_MALLOC(atom_dprojs, (2, npw, ndprojs, lmnmax))
262   else
263     ndprojs = 0
264   end if
265 
266   ABI_MALLOC(temp, (npw))
267 
268   call free_gemm_nonlop_ikpt(ikpt)
269 
270   ! build nprojs, ngrads
271   nprojs = 0 ; ngrads = 0
272   do itypat=1,ntypat
273     nprojs = nprojs + count(indlmn(3,:,itypat)>0)*nattyp(itypat)
274   end do
275   if (my_compute_grad_strain) ngrads=ngrads+6
276   if (my_compute_grad_atom) ngrads=ngrads+3
277   if (nprojs>0) gemm_nonlop_kpt(ikpt)%nprojs = nprojs
278   if (ngrads>0) gemm_nonlop_kpt(ikpt)%ngrads = ngrads
279 
280   if(istwf_k <= 1) then
281     ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs, (2, npw, nprojs))
282     gemm_nonlop_kpt(ikpt)%projs = zero
283     if(ngrads>0) then
284       ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs, (2, npw, nprojs*ngrads))
285       gemm_nonlop_kpt(ikpt)%dprojs = zero
286     end if
287   else
288     ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs_r, (1, npw, nprojs))
289     ABI_MALLOC(gemm_nonlop_kpt(ikpt)%projs_i, (1, npw, nprojs))
290     gemm_nonlop_kpt(ikpt)%projs_r = zero
291     gemm_nonlop_kpt(ikpt)%projs_i = zero
292     if(ngrads>0) then
293       ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs_r, (1, npw, nprojs*ngrads))
294       ABI_MALLOC(gemm_nonlop_kpt(ikpt)%dprojs_i, (1, npw, nprojs*ngrads))
295       gemm_nonlop_kpt(ikpt)%dprojs_r = zero
296       gemm_nonlop_kpt(ikpt)%dprojs_i = zero
297     end if
298   end if
299 
300   ! Compute (k+G) vectors if needed
301   nkpg_local=0
302   if ((my_compute_grad_strain.or.my_compute_grad_atom).and.size(kpg_k)==0) then
303     nkpg_local=3
304     ABI_MALLOC(kpg,(npw,nkpg_local))
305     call mkkpg(kg_k,kpg,kpt_k,nkpg_local,npw)
306   else
307     kpg => kpg_k
308   end if
309 
310   shift = 0 ; shift_grad = 0
311   do itypat = 1, ntypat
312     nlmn = count(indlmn(3,:,itypat)>0)
313 
314     do ia = 1, nattyp(itypat)
315 
316       !! build atom_projs, from opernlb
317       !! P = 4pi/sqrt(ucvol)* conj(diag(ph3d)) * ffnl * diag(parity), with parity = (-i)^l
318 
319       ! start from 4pi/sqrt(ucvol)*ffnl
320       ! atom_projs(1, :, 1:nlmn) = four_pi/sqrt(ham%ucvol) * ham%ffnl_k(:, 1, 1:nlmn)
321       ! TODO vectorize (DCOPY with stride)
322       atom_projs(:,:,:) = zero
323       do ipw=1, npw
324         atom_projs(1,ipw, 1:nlmn) = four_pi/sqrt(ucvol) * ffnl_k(ipw, 1, 1:nlmn, itypat)
325       end do
326       if (ndprojs>0) atom_dprojs(:,:,:,:) = zero
327       if (my_compute_grad_strain) then
328         do ipw=1, npw
329           atom_dprojs(1,ipw, 1:3, 1:nlmn) = four_pi/sqrt(ucvol) * ffnl_k(ipw, 2:4, 1:nlmn, itypat)
330         end do
331       end if
332 
333       ! multiply by (-i)^l
334       do ilmn=1,nlmn
335         il=mod(indlmn(1,ilmn, itypat),4);
336         parity=(mod(il,2)==0)
337         if (il>1) then
338           ! multiply by -1
339           atom_projs(:,:,ilmn) = -atom_projs(:,:,ilmn)
340           if (ndprojs>0) atom_dprojs(:,:,:,ilmn) = -atom_dprojs(:,:,:,ilmn)
341         end if
342         if(.not. parity) then
343           ! multiply by -i
344           temp = atom_projs(2,:,ilmn)
345           atom_projs(2,:,ilmn) = -atom_projs(1,:,ilmn)
346           atom_projs(1,:,ilmn) =  temp
347           if (ndprojs>0) then
348             do idir=1,ndprojs
349               temp = atom_dprojs(2,:,idir,ilmn)
350               atom_dprojs(2,:,idir,ilmn) = -atom_dprojs(1,:,idir,ilmn)
351               atom_dprojs(1,:,idir,ilmn) =  temp
352             end do
353           end if
354         end if
355       end do
356 
357       ! multiply by conj(ph3d)
358       do ilmn=1,nlmn
359         temp = atom_projs(1, :, ilmn)
360         atom_projs(1, :, ilmn) = atom_projs(1, :, ilmn) * ph3d_k(1, :, iaph3d) &
361 &                              + atom_projs(2, :, ilmn) * ph3d_k(2, :, iaph3d)
362         atom_projs(2, :, ilmn) = atom_projs(2, :, ilmn) * ph3d_k(1, :, iaph3d) &
363 &                              - temp                   * ph3d_k(2, :, iaph3d)
364       end do
365       if (ndprojs>0) then
366         do ilmn=1,nlmn
367           do idir=1,ndprojs
368             temp = atom_dprojs(1, :, idir,ilmn)
369             atom_dprojs(1, :, idir,ilmn) = atom_dprojs(1, :, idir,ilmn) * ph3d_k(1, :, iaph3d) &
370 &                                        + atom_dprojs(2, :, idir,ilmn) * ph3d_k(2, :, iaph3d)
371             atom_dprojs(2, :, idir,ilmn) = atom_dprojs(2, :, idir,ilmn) * ph3d_k(1, :, iaph3d) &
372 &                                        - temp                         * ph3d_k(2, :, iaph3d)
373           end do
374         end do
375       end if
376 
377       !! atom_projs is built, copy to projs / dprojs
378 
379       if(istwf_k <= 1) then
380         gemm_nonlop_kpt(ikpt)%projs(1:2, :, shift+1:shift+nlmn) = atom_projs(:, :, 1:nlmn)
381         if(ngrads>0) then
382           igrad=0
383           if(my_compute_grad_strain) then
384             do idir=1,6
385               idir1=alpha(idir);idir2=beta(idir)
386               do ilmn=1,nlmn
387                 do ipw=1,npw
388                   gemm_nonlop_kpt(ikpt)%dprojs(1:2, ipw, shift_grad+nlmn*igrad+ilmn) = &
389 &                  -half*(atom_dprojs(1:2, ipw, idir1, ilmn)*kpg(ipw,idir2) &
390 &                        +atom_dprojs(1:2, ipw, idir2, ilmn)*kpg(ipw,idir1))
391                 end do
392               end do
393               igrad=igrad+1
394             end do
395           end if
396           if(my_compute_grad_atom) then
397             do idir=1,3
398               do ilmn=1,nlmn
399                 do ipw=1,npw
400                   gemm_nonlop_kpt(ikpt)%dprojs(1, ipw, shift_grad+nlmn*igrad+ilmn) = &
401 &                  +atom_projs(2, ipw, ilmn)*kpg(ipw,idir)*two_pi
402                   gemm_nonlop_kpt(ikpt)%dprojs(2, ipw, shift_grad+nlmn*igrad+ilmn) = &
403 &                  -atom_projs(1, ipw, ilmn)*kpg(ipw,idir)*two_pi
404                 end do
405               end do
406               igrad=igrad+1
407             end do
408           end if
409         end if
410 
411       else ! istwf_k>1
412         gemm_nonlop_kpt(ikpt)%projs_r(1, :, shift+1:shift+nlmn) = atom_projs(1, :, 1:nlmn)
413         gemm_nonlop_kpt(ikpt)%projs_i(1, :, shift+1:shift+nlmn) = atom_projs(2, :, 1:nlmn)
414         if(ngrads>0) then
415           igrad=0
416           if(my_compute_grad_strain) then
417             do idir=1,6
418               idir1=alpha(idir);idir2=beta(idir)
419               do ilmn=1,nlmn
420                 do ipw=1,npw
421                   gemm_nonlop_kpt(ikpt)%dprojs_r(1, ipw, shift_grad+nlmn*igrad+ilmn) = &
422 &                  -half*(atom_dprojs(1, ipw, idir1, ilmn)*kpg(ipw,idir2) &
423 &                        +atom_dprojs(1, ipw, idir2, ilmn)*kpg(ipw,idir1))
424 
425                   gemm_nonlop_kpt(ikpt)%dprojs_i(1, ipw, shift_grad+nlmn*igrad+ilmn) = &
426 &                  -half*(atom_dprojs(2, ipw, idir1, ilmn)*kpg(ipw,idir2) &
427 &                        +atom_dprojs(2, ipw, idir2, ilmn)*kpg(ipw,idir1))
428                 end do
429               end do
430               igrad=igrad+1
431             end do
432           end if
433           if(my_compute_grad_atom) then
434             do idir=1,3
435               do ilmn=1,nlmn
436                 do ipw=1,npw
437                   gemm_nonlop_kpt(ikpt)%dprojs_r(1, ipw, shift_grad+nlmn*igrad+ilmn) = &
438 &                  +atom_projs(2, ipw, ilmn)*kpg(ipw,idir)*two_pi
439                   gemm_nonlop_kpt(ikpt)%dprojs_i(1, ipw, shift_grad+nlmn*igrad+ilmn) = &
440 &                  -atom_projs(1, ipw, ilmn)*kpg(ipw,idir)*two_pi
441                 end do
442               end do
443               igrad=igrad+1
444             end do
445           end if
446         end if
447       end if ! istwf_k
448 
449       shift = shift + nlmn
450       shift_grad = shift_grad + ngrads*nlmn
451       iaph3d = iaph3d + 1
452     end do
453   end do
454 
455   ABI_FREE(atom_projs)
456   ABI_FREE(temp)
457   if (allocated(atom_dprojs)) then
458     ABI_FREE(atom_dprojs)
459   end if
460   if (nkpg_local>0) then
461     ABI_FREE(kpg)
462   end if
463 
464  end subroutine make_gemm_nonlop