TABLE OF CONTENTS


ABINIT/m_abi_linalg [ Modules ]

[ Top ] [ Modules ]

NAME

  m_abi_linalg

FUNCTION

  management of Linear Algebra wrappers routines
  with support of different external library (scalapack, elpa, plasma, magma, ... )

COPYRIGHT

  Copyright (C) 2012-2024 ABINIT group (LNguyen,FDahm,MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~ABINIT/Infos/copyright
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

17 #if defined HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20 
21 #include "abi_common.h"
22 
23 module m_abi_linalg
24 
25  use defs_basis
26  use m_errors
27  use m_abicore
28  use m_xmpi
29  use m_xomp
30  use m_slk
31  use, intrinsic :: iso_c_binding
32 !#ifdef HAVE_LINALG_ELPA
33 ! use m_elpa
34 !#endif
35 #ifdef HAVE_LINALG_PLASMA
36  use plasma, except_dp => dp, except_sp => sp
37 #endif
38 
39 #if defined HAVE_GPU
40  use m_gpu_toolbox
41 #endif
42 
43 #if defined HAVE_YAKL
44  use gator_mod, only: gator_allocate, gator_deallocate
45 #endif
46 
47 #if defined HAVE_MPI1
48  include 'mpif.h'
49 #endif
50 
51 #if defined HAVE_MPI2
52  use mpi
53 #endif
54 
55  use m_time,  only : timab
56 
57  implicit none
58 
59  private

m_abi_linalg/abi_linalg_finalize [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

 abi_linalg_finalize

FUNCTION

INPUTS

SOURCE

834  subroutine abi_linalg_finalize(gpu_option)
835 
836 !Arguments ------------------------------------
837  integer, intent(in) :: gpu_option
838 !Local variables ------------------------------
839 #ifdef HAVE_LINALG_PLASMA
840  integer :: info
841 #endif
842 
843 !******************************************************************
844 
845  if (.not.abi_linalg_in_use) return
846 
847  eigen_s_maxsize = 0
848  eigen_d_maxsize = 0
849  eigen_c_maxsize = 0
850  eigen_z_maxsize = 0
851  eigen_s_lwork   = 0
852  eigen_d_lwork   = 0
853  eigen_c_lwork   = 0
854  eigen_z_lwork   = 0
855  eigen_c_lrwork  = 0
856  eigen_z_lrwork  = 0
857  eigen_liwork    = 0
858 
859  lapack_full_storage=.False.
860  lapack_packed_storage=.False.
861  lapack_single_precision=.False.
862  lapack_double_precision=.False.
863  lapack_divide_conquer=.false.
864 
865 #ifdef HAVE_LINALG_SCALAPACK
866  if (ABI_LINALG_SCALAPACK_ISON) then
867    call slk_processor%free()
868    call xmpi_comm_free(slk_communicator)
869    call xmpi_comm_free(slk_complement_communicator)
870    slk_communicator=xmpi_comm_null
871    slk_complement_communicator=xmpi_comm_null
872    slk_minsize=1
873  end if
874 #endif
875 
876 !#ifdef HAVE_LINALG_ELPA
877 ! call elpa_func_uninit()
878 !#endif
879 
880 #ifdef HAVE_LINALG_PLASMA
881    call PLASMA_Finalize(info)
882    ABI_LINALG_PLASMA_ISON=.False.
883  end if
884 #endif
885 
886 #ifdef HAVE_LINALG_MAGMA
887 #ifdef HAVE_LINALG_MAGMA_15
888  if (ABI_LINALG_MAGMA_ISON) then
889    call magmaf_finalize()
890  end if
891 #endif
892 #endif
893 
894 #ifdef HAVE_GPU
895  if (gpu_option/=ABI_GPU_DISABLED) then
896    call abi_gpu_work_finalize()
897    call gpu_linalg_shutdown()
898  end if
899  abi_linalg_gpu_mode = ABI_GPU_DISABLED
900 #else
901  ABI_UNUSED(gpu_option)
902 #endif
903 
904 !Memory freeing
905  ABI_SFREE(eigen_s_work)
906  ABI_SFREE(eigen_d_work)
907  ABI_SFREE(eigen_c_work)
908  ABI_SFREE(eigen_z_work)
909  ABI_SFREE(eigen_c_rwork)
910  ABI_SFREE(eigen_z_rwork)
911  ABI_SFREE(eigen_iwork)
912 
913  end subroutine abi_linalg_finalize

m_abi_linalg/abi_linalg_init [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

 abi_linalg_init

FUNCTION

 Initalization of linear algebra environnement

INPUTS

 max_eigen_pb_size= max. size of eigenproblem during calculation
 optdriver= type of calculation (ground-state, response function, GW, ...)
 wfoptalg= wave functions optimization algorithm (CG, LOBPCG, CHEBFI, ...)
 paral_kgb= 1 if (k,g,b) parallelism is on
 gpu_option = GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU)
 use_slk= 1 if use of Scalapack is on
 np_slk= max. number of processes to be used in Scalapack calls
 comm_scalapack= global communicator to be used in case of Scalapack

SOURCE

491  subroutine abi_linalg_init(max_eigen_pb_size,optdriver,wfoptalg,paral_kgb,&
492 &                           gpu_option,use_slk,np_slk,comm_scalapack)
493 
494 !Arguments ------------------------------------
495  integer,intent(in) :: max_eigen_pb_size
496  integer,intent(in) :: optdriver,wfoptalg,paral_kgb
497  integer,intent(in) :: comm_scalapack,np_slk
498  integer,intent(in) :: gpu_option,use_slk
499 
500 !Local variables ------------------------------
501  integer :: max_eigen_pb_size_eff=0
502  logical :: need_work_space=.true.
503 #ifdef HAVE_LINALG_SCALAPACK
504  integer :: abi_info1,rank,commsize,commcart,sizecart(2)
505  logical :: reorder,periodic(2),keepdim(2)
506 #endif
507 #ifdef HAVE_LINALG_PLASMA
508  integer :: abi_info2,core_id,rank
509  integer :: num_cores=0,num_cores_node=0
510  integer,allocatable :: affinity(:)
511 #endif
512 
513 !******************************************************************
514 
515 !Use only abi_linalg in case of GS calculations
516  abi_linalg_in_use=(optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS)
517 
518  max_eigen_pb_size_eff=0
519  lapack_single_precision=.false.
520  lapack_double_precision=.false.
521  lapack_full_storage    =.false.
522  lapack_packed_storage  =.false.
523  lapack_divide_conquer  =.false.
524  eigen_s_maxsize=0 ; eigen_d_maxsize=0
525  eigen_c_maxsize=0 ; eigen_z_maxsize=0
526  eigen_s_lwork=0   ; eigen_d_lwork=0
527  eigen_c_lwork=0   ; eigen_z_lwork=0
528  eigen_c_lrwork=0  ; eigen_z_lrwork=0
529  eigen_liwork=0
530  ABI_LINALG_SCALAPACK_ISON=.False.
531  ABI_LINALG_MAGMA_ISON=.False.
532  ABI_LINALG_PLASMA_ISON=.False.
533  slk_communicator=xmpi_comm_null
534  slk_complement_communicator=xmpi_comm_null
535  slk_minsize=1
536 
537 !Exit here if we don't use this abi_linalg module
538  if (.not.abi_linalg_in_use) return
539 
540 !Set Lapack parameters
541  max_eigen_pb_size_eff=max_eigen_pb_size
542  if (wfoptalg==4.or.wfoptalg==14.or.gpu_option/=ABI_GPU_DISABLED) max_eigen_pb_size_eff=3*max_eigen_pb_size_eff
543  lapack_full_storage=(wfoptalg==4.or.wfoptalg==14.or.gpu_option/=ABI_GPU_DISABLED)
544  lapack_packed_storage=.true.
545  lapack_single_precision=.false.
546  lapack_double_precision=.true.
547  lapack_divide_conquer=.false.
548 
549 !Set maximum sizes
550  eigen_s_maxsize = max_eigen_pb_size_eff
551  eigen_d_maxsize = max_eigen_pb_size_eff
552  eigen_c_maxsize = max_eigen_pb_size_eff
553  eigen_z_maxsize = max_eigen_pb_size_eff
554  need_work_space=.true.
555 
556 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI
557  if ((paral_kgb==1.or.use_slk==1).and.np_slk>0) then
558    rank=xmpi_comm_rank(comm_scalapack)
559    ! We create slk_communicator using a cartesian grid, and store its complement
560    commsize = MIN(np_slk, xmpi_comm_size(comm_scalapack))
561    sizecart = (/commsize, xmpi_comm_size(comm_scalapack)/commsize/)
562    periodic = (/.true.,.true./) ; reorder = .false.
563    call MPI_CART_CREATE(comm_scalapack,2,sizecart,periodic,reorder,commcart,abi_info1)
564    keepdim = (/.true., .false./)
565    call MPI_CART_SUB(commcart, keepdim, slk_communicator,abi_info1)
566    keepdim = (/.false., .true./)
567    call MPI_CART_SUB(commcart, keepdim, slk_complement_communicator,abi_info1)
568    call slk_processor%init(slk_communicator)
569    slk_minsize=maxval(slk_processor%grid%dims(1:2))
570    need_work_space=(use_slk/=1) ! In this case we never use the work arrays
571    ABI_LINALG_SCALAPACK_ISON = .true.
572  end if
573 #else
574  ABI_UNUSED(comm_scalapack)
575  ABI_UNUSED(paral_kgb)
576  ABI_UNUSED(use_slk)
577  ABI_UNUSED(np_slk)
578 #endif
579 
580 !#ifdef HAVE_LINALG_ELPA
581 ! call elpa_func_init()
582 !#endif
583 
584 #ifdef HAVE_LINALG_PLASMA
585 !Plasma Initialization
586 !Because use of hybrid use of mpi+openmp+plasma,
587 !  we need to set manually the thread bindings policy
588 !  to avoid conflicts between mpi process due to plasma
589  if (XPLASMA_ISON) then
590    num_cores=xomp_get_max_threads()
591    num_cores_node=xomp_get_num_cores_node()
592    rank=xmpi_comm_rank(xmpi_world)
593    if (num_cores_node == 0) then ! This means that OMP is not enabled.
594      num_cores_node = 1
595      ABI_WARNING("You are using PLASMA but OpenMP is not enabled in Abinit!")
596    end if
597    ABI_MALLOC(affinity,(num_cores))
598    do core_id =1,num_cores
599      affinity(core_id) = MOD(rank*num_cores + (core_id-1), num_cores_node)
600    end do
601    call PLASMA_Init_Affinity(num_cores,affinity(1),abi_info2)
602    ABI_FREE(affinity)
603    ABI_LINALG_PLASMA_ISON = .True.
604    lapack_divide_conquer=.true.
605  end if
606 #endif
607 
608 #ifdef HAVE_LINALG_MAGMA
609 #ifdef HAVE_LINALG_MAGMA_15
610  call magmaf_init()
611  ABI_LINALG_MAGMA_ISON = .true.
612  lapack_divide_conquer=.true.
613 #endif
614 #endif
615 
616 #ifdef HAVE_GPU
617 !Cublas initialization
618  if (gpu_option/=ABI_GPU_DISABLED) call gpu_linalg_init()
619  abi_linalg_gpu_mode = gpu_option !FIXME Add a check for this
620 #endif
621 
622  if (need_work_space) call abi_linalg_work_allocate()
623 
624  end subroutine abi_linalg_init

m_abi_linalg/abi_linalg_work_allocate [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

 abi_linalg_work_allocate

FUNCTION

INPUTS

SOURCE

637  subroutine abi_linalg_work_allocate()
638 
639 !Arguments ------------------------------------
640 
641 !Local variables ------------------------------
642 #ifdef HAVE_LINALG_MAGMA
643  integer :: nb
644  integer :: magmaf_get_ssytrd_nb
645  integer :: magmaf_get_dsytrd_nb
646  integer :: magmaf_get_chetrd_nb
647  integer :: magmaf_get_zhetrd_nb
648 #endif
649 
650 !******************************************************************
651 
652 !Single precision WORK
653  eigen_s_lwork = 0
654  if (eigen_s_maxsize>0) then
655    if (lapack_single_precision) then
656      if (lapack_full_storage) then
657        eigen_s_lwork = max(eigen_s_lwork,3*eigen_s_maxsize-1) ! SSYEV, SSYGV
658      end if
659      if (lapack_packed_storage) then
660        eigen_s_lwork = max(eigen_s_lwork,3*eigen_s_maxsize) ! SSPEV[D], SSPGV[D]
661      end if
662      if (lapack_divide_conquer) then
663        eigen_s_lwork = max(eigen_s_lwork,1+6*eigen_s_maxsize+2*eigen_s_maxsize**2) ! SSYEVD, SSYGVD
664      end if
665      if (ABI_LINALG_MAGMA_ISON) then
666        if (lapack_full_storage.and.lapack_divide_conquer) then
667 #if defined HAVE_LINALG_MAGMA
668          nb=magmaf_get_ssytrd_nb(eigen_s_maxsize)
669          eigen_s_lwork = max(eigen_s_lwork,eigen_s_maxsize*(nb+2)) ! MAGMAF_SSYEVD, MAGMAF_SSYGVD
670 #endif
671        end if
672      end if
673      if (ABI_LINALG_PLASMA_ISON) then
674        if (lapack_full_storage.and.lapack_divide_conquer) then
675          eigen_s_lwork = max(eigen_s_lwork,eigen_s_maxsize**2) ! PLASMA_SSYEV
676        end if
677      end if
678    end if
679  end if
680  ABI_SFREE(eigen_s_work)
681  ABI_MALLOC(eigen_s_work,(eigen_s_lwork))
682 
683 !Double precision WORK
684  eigen_d_lwork = 0
685  if (eigen_d_maxsize>0) then
686    if (lapack_double_precision) then
687      if (lapack_full_storage) then
688        eigen_d_lwork = max(eigen_d_lwork,3*eigen_d_maxsize-1) ! DSYEV, DSYGV
689      end if
690      if (lapack_packed_storage) then
691        eigen_d_lwork = max(eigen_d_lwork,3*eigen_d_maxsize) ! DSPEV[D], DSPGV[D]
692      end if
693      if (lapack_divide_conquer) then
694        eigen_d_lwork = max(eigen_d_lwork,1+6*eigen_d_maxsize+2*eigen_d_maxsize**2) ! DSYEVD, DSYGVD
695      end if
696      if (ABI_LINALG_MAGMA_ISON) then
697        if (lapack_full_storage.and.lapack_divide_conquer) then
698 #if defined HAVE_LINALG_MAGMA
699          nb=magmaf_get_dsytrd_nb(eigen_d_maxsize)
700          eigen_d_lwork = max(eigen_d_lwork,eigen_d_maxsize*(nb+2)) ! MAGMAF_DSYEVD, MAGMAF_DSYGVD
701 #endif
702        end if
703      end if
704      if (ABI_LINALG_PLASMA_ISON) then
705        if (lapack_full_storage.and.lapack_divide_conquer) then
706          eigen_d_lwork = max(eigen_d_lwork,eigen_d_maxsize**2) ! PLASMA_DSYEV
707        end if
708      end if
709    end if
710  end if
711  ABI_SFREE(eigen_d_work)
712  ABI_MALLOC(eigen_d_work,(eigen_d_lwork))
713 
714 !Single complex WORK
715  eigen_c_lwork = 0
716  if (eigen_c_maxsize>0) then
717    if (lapack_single_precision) then
718      if (lapack_full_storage) then
719        eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize-1) ! CHEEV, CHEGV
720      end if
721      if (lapack_packed_storage) then
722        eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize) ! CHPEV[D], CHPGV[D]
723      end if
724      if (lapack_divide_conquer) then
725        eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize+eigen_c_maxsize**2) ! CHEEVD, CHEGVD
726      end if
727      if (ABI_LINALG_MAGMA_ISON) then
728        if (lapack_full_storage.and.lapack_divide_conquer) then
729 #if defined HAVE_LINALG_MAGMA
730          nb=magmaf_get_chetrd_nb(eigen_c_maxsize)
731          eigen_c_lwork = max(eigen_c_lwork,eigen_c_maxsize*(nb+1)) ! MAGMAF_CHEEVD, MAGMAF_CHEGVD
732 #endif
733        end if
734      end if
735      if (ABI_LINALG_PLASMA_ISON) then
736        if (lapack_full_storage.and.lapack_divide_conquer) then
737          eigen_c_lwork = max(eigen_c_lwork,eigen_c_maxsize**2) ! PLASMA_CHEEV
738        end if
739      end if
740    end if
741  end if
742  ABI_SFREE(eigen_c_work)
743  ABI_MALLOC(eigen_c_work,(eigen_c_lwork))
744 
745 !Double complex WORK
746  eigen_z_lwork = 0
747  if (eigen_z_maxsize>0) then
748    if (lapack_double_precision) then
749      if (lapack_full_storage) then
750        eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize-1) ! ZHEEV, ZHEGV
751      end if
752      if (lapack_packed_storage) then
753        eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize) ! ZHPEV[D], ZHPGV[D]
754      end if
755      if (lapack_divide_conquer) then
756        eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize+eigen_z_maxsize**2) ! ZHEEVD, ZHEGVD
757      end if
758      if (ABI_LINALG_MAGMA_ISON) then
759        if (lapack_full_storage.and.lapack_divide_conquer) then
760 #if defined HAVE_LINALG_MAGMA
761          nb=magmaf_get_zhetrd_nb(eigen_z_maxsize)
762          eigen_z_lwork = max(eigen_z_lwork,eigen_z_maxsize*(nb+1)) ! MAGMAF_ZHEEVD, MAGMAF_ZHEGVD
763 #endif
764        end if
765      end if
766      if (ABI_LINALG_PLASMA_ISON) then
767        if (lapack_full_storage.and.lapack_divide_conquer) then
768          eigen_z_lwork = max(eigen_z_lwork,eigen_z_maxsize**2) ! PLASMA_ZHEEV
769        end if
770      end if
771    end if
772  end if
773  ABI_SFREE(eigen_z_work)
774  ABI_MALLOC(eigen_z_work,(eigen_z_lwork))
775 
776 !Single precision RWORK
777  eigen_c_lrwork = 0
778  if (eigen_c_maxsize>0) then
779    if (lapack_single_precision) then
780      if (lapack_full_storage.or.lapack_packed_storage) then
781        eigen_c_lrwork = max(eigen_c_lrwork,3*eigen_c_maxsize-2) ! CHEEV, CHEGV, CHPEV, CHPGV
782      end if
783      if (lapack_divide_conquer) then
784        eigen_c_lrwork = max(eigen_c_lrwork,1+5*eigen_c_maxsize+2*eigen_c_maxsize**2) ! CHEEVD, CHEGVD, CHPEVD, CHPGVD
785      end if
786    end if
787  end if
788  ABI_SFREE(eigen_c_rwork)
789  ABI_MALLOC(eigen_c_rwork,(eigen_c_lrwork))
790 
791 !Double precision RWORK
792  eigen_z_lrwork = 0
793  if (eigen_z_maxsize>0) then
794    if (lapack_double_precision) then
795      if (lapack_full_storage.or.lapack_packed_storage) then
796        eigen_z_lrwork = max(eigen_z_lrwork,3*eigen_z_maxsize-2) ! ZHEEV, ZHEGV, ZHPEV, ZHPGV
797      end if
798      if (lapack_divide_conquer) then
799        eigen_z_lrwork = max(eigen_z_lrwork,1+5*eigen_z_maxsize+2*eigen_z_maxsize**2) ! ZHEEVD, ZHEGVD, ZHPEVD, ZHPGVD
800      end if
801    end if
802  end if
803  ABI_SFREE(eigen_z_rwork)
804  ABI_MALLOC(eigen_z_rwork,(eigen_z_lrwork))
805 
806 !Integer IWORK
807  eigen_liwork = 0
808  if (lapack_divide_conquer) then
809    if (lapack_single_precision) then
810      if (eigen_s_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_s_maxsize)
811      if (eigen_c_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_c_maxsize)
812    end if
813    if (lapack_double_precision) then
814      if (eigen_d_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_d_maxsize)
815      if (eigen_z_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_z_maxsize)
816    end if
817  end if
818  ABI_SFREE(eigen_iwork)
819  ABI_MALLOC(eigen_iwork,(eigen_liwork))
820 
821  end subroutine abi_linalg_work_allocate

m_abi_linalg/diag_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Convert diag character to PLASMA integer

SOURCE

1151 integer function diag_plasma(diag)
1152 
1153 !Arguments ------------------------------------
1154 !scalars
1155  character(len=1),intent(in) :: diag
1156 
1157 ! *************************************************************************
1158 
1159  if (LSAME(diag,'U')) then
1160    diag_plasma = PlasmaUnit
1161  else
1162    diag_plasma = PlasmaNonUnit
1163  end if
1164 
1165 end function diag_plasma

m_abi_linalg/jobz_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Convert jobz character to PLASMA integer

SOURCE

1178 integer function jobz_plasma(jobz)
1179 
1180 !Arguments ------------------------------------
1181 !scalars
1182  character(len=1),intent(in) :: jobz
1183 
1184 ! *************************************************************************
1185 
1186  if (LSAME(jobz,'N')) then
1187    jobz_plasma = PlasmaNoVec
1188  else
1189    jobz_plasma = PlasmaVec
1190  end if
1191 
1192 end function jobz_plasma

m_abi_linalg/linalg_allow_gemm3m [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Programmatic interface to enable the use of [Z,C]GEMM3M calls

SOURCE

926 subroutine linalg_allow_gemm3m(bool, write_msg)
927 
928 !Arguments ------------------------------------
929 !scalars
930  logical,intent(in) :: bool, write_msg
931 
932 ! *************************************************************************
933 
934  XGEMM3M_ISON = bool
935  if (write_msg) then
936 #ifdef HAVE_LINALG_GEMM3M
937    if (bool) then
938      ABI_COMMENT("Activating ZGEMM3M version instead of ZGEMM")
939    else
940      ABI_COMMENT("Using ZGEMM instead of ZGEMM3M")
941    end if
942 #else
943    if (bool) then
944      ABI_WARNING("Cannot activate ZGEMM3M as HAVE_LINALG_GEMM3M is not defined!")
945    end if
946 #endif
947  endif
948 
949 end subroutine linalg_allow_gemm3m

m_abi_linalg/linalg_allow_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Programmatic interface to enable the use of PLASMA
  False to disable PLASMA version.

SOURCE

1039 subroutine linalg_allow_plasma(bool)
1040 
1041 !Arguments ------------------------------------
1042 !scalars
1043  logical,intent(in) :: bool
1044 
1045 ! *************************************************************************
1046 
1047  XPLASMA_ISON = bool
1048 #ifndef HAVE_LINALG_PLASMA
1049  ! Just to be on the safe-side.
1050  ! I have to use a weird set of branches to make abirules happy in the BLAS/LAPACK
1051  ! wrappers, and one cannot set XPLASMA_MODE to .True. if PLASMA is not available.
1052  XPLASMA_ISON = .False.
1053 #endif
1054 
1055 end subroutine linalg_allow_plasma

m_abi_linalg/side_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Convert side character to PLASMA integer

SOURCE

1124 integer function side_plasma(side)
1125 
1126 !Arguments ------------------------------------
1127 !scalars
1128  character(len=1),intent(in) :: side
1129 
1130 ! *************************************************************************
1131 
1132  if(LSAME(side,'L')) then
1133     side_plasma = PlasmaLeft
1134  else
1135     side_plasma = PlasmaRight
1136  end if
1137 
1138 end function side_plasma

m_abi_linalg/trans_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Convert trans character to PLASMA integer

SOURCE

1095 integer function trans_plasma(trans)
1096 
1097 !Arguments ------------------------------------
1098 !scalars
1099  character(len=1),intent(in) :: trans
1100 
1101 ! *************************************************************************
1102 
1103  if (LSAME(trans,'C')) then
1104    trans_plasma = PlasmaConjTrans
1105  else if (LSAME(trans,'T')) then
1106    trans_plasma = PlasmaTrans
1107  else
1108    trans_plasma = PlasmaNoTrans
1109  end if
1110 
1111 end function trans_plasma

m_abi_linalg/uplo_plasma [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

FUNCTION

  Convert uplo character to PLASMA integer

SOURCE

1068 integer function uplo_plasma(uplo)
1069 
1070 !Arguments ------------------------------------
1071 !scalars
1072  character(len=1),intent(in) :: uplo
1073 
1074 ! *************************************************************************
1075 
1076  if (LSAME(uplo,'U')) then
1077     uplo_plasma = PlasmaUpper
1078  else
1079     uplo_plasma = PlasmaLower
1080  end if
1081 
1082 end function uplo_plasma

m_abi_linalg/use_cgemm3m [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

  use_cgemm3m

FUNCTION

  Enable the use of CGEMM3M

NOTES

  See use_zgemm3m

SOURCE

1011 pure logical function use_cgemm3m(m, n, k)
1012 
1013 !Arguments ------------------------------------
1014 !scalars
1015  integer,intent(in) :: m,n,k
1016 
1017 ! *************************************************************************
1018 
1019  use_cgemm3m = .False.
1020  if (XGEMM3M_ISON) use_cgemm3m = ((m * n * k) > CGEMM3M_LIMIT)
1021 #ifndef HAVE_LINALG_GEMM3M
1022  use_cgemm3m = .False.
1023 #endif
1024 
1025 end function use_cgemm3m

m_abi_linalg/use_zgemm3m [ Functions ]

[ Top ] [ m_abi_linalg ] [ Functions ]

NAME

  use_zgemm3m

FUNCTION

  Enable the use of ZGEMM3M

NOTES

  The CGEMM3M and ZGEMM3M routines use an algorithm requiring 3 real matrix
  multiplications and 5 real matrix additions to compute the complex matrix
  product; CGEMM(3S) and ZGEMM(3S) use 4 real matrix multiplications and 2
  real matrix additions. Because the matrix multiplication time is usually
  the limiting performance factor in these routines, CGEMM3M and ZGEMM3M
  may run up to 33 percent faster than CGEMM and ZGEMM.  Because of other
  overhead associated with the 3M routines, however, these performance
  improvements may not always be realized.  For example, on one processor
  the 3M routines will generally run more slowly than the standard complex
  matrix multiplication routines when m * n * k < FACTOR, where m, n, and k
  are the input matrix dimensions and FACTOR is approximately 200000 for
  CGEMM3M and 325000 for ZGEMM3M.
  from: http://techpubs.sgi.com/library/tpl/cgi-bin/getdoc.cgi?coll=0650&db=man&raw=1&fname=/usr/share/catman/p_man/cat3/SCSL/ZGEMM3M.z

SOURCE

978 pure logical function use_zgemm3m(m, n, k)
979 
980 !Arguments ------------------------------------
981 !scalars
982  integer,intent(in) :: m,n,k
983 
984 ! *************************************************************************
985 
986  use_zgemm3m = .False.
987  if (XGEMM3M_ISON) use_zgemm3m = ((m * n * k) > ZGEMM3M_LIMIT)
988  !if (XGEMM3M_ISON) use_zgemm3m = .True.
989 
990 #ifndef HAVE_LINALG_GEMM3M
991  use_zgemm3m = .False.
992 #endif
993 
994 end function use_zgemm3m