TABLE OF CONTENTS
- ABINIT/m_abi_linalg
- m_abi_linalg/abi_linalg_finalize
- m_abi_linalg/abi_linalg_init
- m_abi_linalg/abi_linalg_work_allocate
- m_abi_linalg/diag_plasma
- m_abi_linalg/jobz_plasma
- m_abi_linalg/linalg_allow_gemm3m
- m_abi_linalg/linalg_allow_plasma
- m_abi_linalg/side_plasma
- m_abi_linalg/trans_plasma
- m_abi_linalg/uplo_plasma
- m_abi_linalg/use_cgemm3m
- m_abi_linalg/use_zgemm3m
ABINIT/m_abi_linalg [ Modules ]
NAME
m_abi_linalg
FUNCTION
management of Linear Algebra wrappers routines with support of different external library (scalapack, elpa, plasma, magma, ... )
COPYRIGHT
Copyright (C) 2012-2024 ABINIT group (LNguyen,FDahm,MT) This file is distributed under the terms of the GNU General Public License, see ~ABINIT/Infos/copyright or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
17 #if defined HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include "abi_common.h" 22 23 module m_abi_linalg 24 25 use defs_basis 26 use m_errors 27 use m_abicore 28 use m_xmpi 29 use m_xomp 30 use m_slk 31 use, intrinsic :: iso_c_binding 32 !#ifdef HAVE_LINALG_ELPA 33 ! use m_elpa 34 !#endif 35 #ifdef HAVE_LINALG_PLASMA 36 use plasma, except_dp => dp, except_sp => sp 37 #endif 38 39 #if defined HAVE_GPU 40 use m_gpu_toolbox 41 #endif 42 43 #if defined HAVE_YAKL 44 use gator_mod, only: gator_allocate, gator_deallocate 45 #endif 46 47 #if defined HAVE_MPI1 48 include 'mpif.h' 49 #endif 50 51 #if defined HAVE_MPI2 52 use mpi 53 #endif 54 55 use m_time, only : timab 56 57 implicit none 58 59 private
m_abi_linalg/abi_linalg_finalize [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
abi_linalg_finalize
FUNCTION
INPUTS
SOURCE
834 subroutine abi_linalg_finalize(gpu_option) 835 836 !Arguments ------------------------------------ 837 integer, intent(in) :: gpu_option 838 !Local variables ------------------------------ 839 #ifdef HAVE_LINALG_PLASMA 840 integer :: info 841 #endif 842 843 !****************************************************************** 844 845 if (.not.abi_linalg_in_use) return 846 847 eigen_s_maxsize = 0 848 eigen_d_maxsize = 0 849 eigen_c_maxsize = 0 850 eigen_z_maxsize = 0 851 eigen_s_lwork = 0 852 eigen_d_lwork = 0 853 eigen_c_lwork = 0 854 eigen_z_lwork = 0 855 eigen_c_lrwork = 0 856 eigen_z_lrwork = 0 857 eigen_liwork = 0 858 859 lapack_full_storage=.False. 860 lapack_packed_storage=.False. 861 lapack_single_precision=.False. 862 lapack_double_precision=.False. 863 lapack_divide_conquer=.false. 864 865 #ifdef HAVE_LINALG_SCALAPACK 866 if (ABI_LINALG_SCALAPACK_ISON) then 867 call slk_processor%free() 868 call xmpi_comm_free(slk_communicator) 869 call xmpi_comm_free(slk_complement_communicator) 870 slk_communicator=xmpi_comm_null 871 slk_complement_communicator=xmpi_comm_null 872 slk_minsize=1 873 end if 874 #endif 875 876 !#ifdef HAVE_LINALG_ELPA 877 ! call elpa_func_uninit() 878 !#endif 879 880 #ifdef HAVE_LINALG_PLASMA 881 call PLASMA_Finalize(info) 882 ABI_LINALG_PLASMA_ISON=.False. 883 end if 884 #endif 885 886 #ifdef HAVE_LINALG_MAGMA 887 #ifdef HAVE_LINALG_MAGMA_15 888 if (ABI_LINALG_MAGMA_ISON) then 889 call magmaf_finalize() 890 end if 891 #endif 892 #endif 893 894 #ifdef HAVE_GPU 895 if (gpu_option/=ABI_GPU_DISABLED) then 896 call abi_gpu_work_finalize() 897 call gpu_linalg_shutdown() 898 end if 899 abi_linalg_gpu_mode = ABI_GPU_DISABLED 900 #else 901 ABI_UNUSED(gpu_option) 902 #endif 903 904 !Memory freeing 905 ABI_SFREE(eigen_s_work) 906 ABI_SFREE(eigen_d_work) 907 ABI_SFREE(eigen_c_work) 908 ABI_SFREE(eigen_z_work) 909 ABI_SFREE(eigen_c_rwork) 910 ABI_SFREE(eigen_z_rwork) 911 ABI_SFREE(eigen_iwork) 912 913 end subroutine abi_linalg_finalize
m_abi_linalg/abi_linalg_init [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
abi_linalg_init
FUNCTION
Initalization of linear algebra environnement
INPUTS
max_eigen_pb_size= max. size of eigenproblem during calculation optdriver= type of calculation (ground-state, response function, GW, ...) wfoptalg= wave functions optimization algorithm (CG, LOBPCG, CHEBFI, ...) paral_kgb= 1 if (k,g,b) parallelism is on gpu_option = GPU implementation to use, i.e. cuda, openMP, ... (0=not using GPU) use_slk= 1 if use of Scalapack is on np_slk= max. number of processes to be used in Scalapack calls comm_scalapack= global communicator to be used in case of Scalapack
SOURCE
491 subroutine abi_linalg_init(max_eigen_pb_size,optdriver,wfoptalg,paral_kgb,& 492 & gpu_option,use_slk,np_slk,comm_scalapack) 493 494 !Arguments ------------------------------------ 495 integer,intent(in) :: max_eigen_pb_size 496 integer,intent(in) :: optdriver,wfoptalg,paral_kgb 497 integer,intent(in) :: comm_scalapack,np_slk 498 integer,intent(in) :: gpu_option,use_slk 499 500 !Local variables ------------------------------ 501 integer :: max_eigen_pb_size_eff=0 502 logical :: need_work_space=.true. 503 #ifdef HAVE_LINALG_SCALAPACK 504 integer :: abi_info1,rank,commsize,commcart,sizecart(2) 505 logical :: reorder,periodic(2),keepdim(2) 506 #endif 507 #ifdef HAVE_LINALG_PLASMA 508 integer :: abi_info2,core_id,rank 509 integer :: num_cores=0,num_cores_node=0 510 integer,allocatable :: affinity(:) 511 #endif 512 513 !****************************************************************** 514 515 !Use only abi_linalg in case of GS calculations 516 abi_linalg_in_use=(optdriver==RUNL_GSTATE.or.optdriver==RUNL_GWLS) 517 518 max_eigen_pb_size_eff=0 519 lapack_single_precision=.false. 520 lapack_double_precision=.false. 521 lapack_full_storage =.false. 522 lapack_packed_storage =.false. 523 lapack_divide_conquer =.false. 524 eigen_s_maxsize=0 ; eigen_d_maxsize=0 525 eigen_c_maxsize=0 ; eigen_z_maxsize=0 526 eigen_s_lwork=0 ; eigen_d_lwork=0 527 eigen_c_lwork=0 ; eigen_z_lwork=0 528 eigen_c_lrwork=0 ; eigen_z_lrwork=0 529 eigen_liwork=0 530 ABI_LINALG_SCALAPACK_ISON=.False. 531 ABI_LINALG_MAGMA_ISON=.False. 532 ABI_LINALG_PLASMA_ISON=.False. 533 slk_communicator=xmpi_comm_null 534 slk_complement_communicator=xmpi_comm_null 535 slk_minsize=1 536 537 !Exit here if we don't use this abi_linalg module 538 if (.not.abi_linalg_in_use) return 539 540 !Set Lapack parameters 541 max_eigen_pb_size_eff=max_eigen_pb_size 542 if (wfoptalg==4.or.wfoptalg==14.or.gpu_option/=ABI_GPU_DISABLED) max_eigen_pb_size_eff=3*max_eigen_pb_size_eff 543 lapack_full_storage=(wfoptalg==4.or.wfoptalg==14.or.gpu_option/=ABI_GPU_DISABLED) 544 lapack_packed_storage=.true. 545 lapack_single_precision=.false. 546 lapack_double_precision=.true. 547 lapack_divide_conquer=.false. 548 549 !Set maximum sizes 550 eigen_s_maxsize = max_eigen_pb_size_eff 551 eigen_d_maxsize = max_eigen_pb_size_eff 552 eigen_c_maxsize = max_eigen_pb_size_eff 553 eigen_z_maxsize = max_eigen_pb_size_eff 554 need_work_space=.true. 555 556 #if defined HAVE_LINALG_SCALAPACK && defined HAVE_MPI 557 if ((paral_kgb==1.or.use_slk==1).and.np_slk>0) then 558 rank=xmpi_comm_rank(comm_scalapack) 559 ! We create slk_communicator using a cartesian grid, and store its complement 560 commsize = MIN(np_slk, xmpi_comm_size(comm_scalapack)) 561 sizecart = (/commsize, xmpi_comm_size(comm_scalapack)/commsize/) 562 periodic = (/.true.,.true./) ; reorder = .false. 563 call MPI_CART_CREATE(comm_scalapack,2,sizecart,periodic,reorder,commcart,abi_info1) 564 keepdim = (/.true., .false./) 565 call MPI_CART_SUB(commcart, keepdim, slk_communicator,abi_info1) 566 keepdim = (/.false., .true./) 567 call MPI_CART_SUB(commcart, keepdim, slk_complement_communicator,abi_info1) 568 call slk_processor%init(slk_communicator) 569 slk_minsize=maxval(slk_processor%grid%dims(1:2)) 570 need_work_space=(use_slk/=1) ! In this case we never use the work arrays 571 ABI_LINALG_SCALAPACK_ISON = .true. 572 end if 573 #else 574 ABI_UNUSED(comm_scalapack) 575 ABI_UNUSED(paral_kgb) 576 ABI_UNUSED(use_slk) 577 ABI_UNUSED(np_slk) 578 #endif 579 580 !#ifdef HAVE_LINALG_ELPA 581 ! call elpa_func_init() 582 !#endif 583 584 #ifdef HAVE_LINALG_PLASMA 585 !Plasma Initialization 586 !Because use of hybrid use of mpi+openmp+plasma, 587 ! we need to set manually the thread bindings policy 588 ! to avoid conflicts between mpi process due to plasma 589 if (XPLASMA_ISON) then 590 num_cores=xomp_get_max_threads() 591 num_cores_node=xomp_get_num_cores_node() 592 rank=xmpi_comm_rank(xmpi_world) 593 if (num_cores_node == 0) then ! This means that OMP is not enabled. 594 num_cores_node = 1 595 ABI_WARNING("You are using PLASMA but OpenMP is not enabled in Abinit!") 596 end if 597 ABI_MALLOC(affinity,(num_cores)) 598 do core_id =1,num_cores 599 affinity(core_id) = MOD(rank*num_cores + (core_id-1), num_cores_node) 600 end do 601 call PLASMA_Init_Affinity(num_cores,affinity(1),abi_info2) 602 ABI_FREE(affinity) 603 ABI_LINALG_PLASMA_ISON = .True. 604 lapack_divide_conquer=.true. 605 end if 606 #endif 607 608 #ifdef HAVE_LINALG_MAGMA 609 #ifdef HAVE_LINALG_MAGMA_15 610 call magmaf_init() 611 ABI_LINALG_MAGMA_ISON = .true. 612 lapack_divide_conquer=.true. 613 #endif 614 #endif 615 616 #ifdef HAVE_GPU 617 !Cublas initialization 618 if (gpu_option/=ABI_GPU_DISABLED) call gpu_linalg_init() 619 abi_linalg_gpu_mode = gpu_option !FIXME Add a check for this 620 #endif 621 622 if (need_work_space) call abi_linalg_work_allocate() 623 624 end subroutine abi_linalg_init
m_abi_linalg/abi_linalg_work_allocate [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
abi_linalg_work_allocate
FUNCTION
INPUTS
SOURCE
637 subroutine abi_linalg_work_allocate() 638 639 !Arguments ------------------------------------ 640 641 !Local variables ------------------------------ 642 #ifdef HAVE_LINALG_MAGMA 643 integer :: nb 644 integer :: magmaf_get_ssytrd_nb 645 integer :: magmaf_get_dsytrd_nb 646 integer :: magmaf_get_chetrd_nb 647 integer :: magmaf_get_zhetrd_nb 648 #endif 649 650 !****************************************************************** 651 652 !Single precision WORK 653 eigen_s_lwork = 0 654 if (eigen_s_maxsize>0) then 655 if (lapack_single_precision) then 656 if (lapack_full_storage) then 657 eigen_s_lwork = max(eigen_s_lwork,3*eigen_s_maxsize-1) ! SSYEV, SSYGV 658 end if 659 if (lapack_packed_storage) then 660 eigen_s_lwork = max(eigen_s_lwork,3*eigen_s_maxsize) ! SSPEV[D], SSPGV[D] 661 end if 662 if (lapack_divide_conquer) then 663 eigen_s_lwork = max(eigen_s_lwork,1+6*eigen_s_maxsize+2*eigen_s_maxsize**2) ! SSYEVD, SSYGVD 664 end if 665 if (ABI_LINALG_MAGMA_ISON) then 666 if (lapack_full_storage.and.lapack_divide_conquer) then 667 #if defined HAVE_LINALG_MAGMA 668 nb=magmaf_get_ssytrd_nb(eigen_s_maxsize) 669 eigen_s_lwork = max(eigen_s_lwork,eigen_s_maxsize*(nb+2)) ! MAGMAF_SSYEVD, MAGMAF_SSYGVD 670 #endif 671 end if 672 end if 673 if (ABI_LINALG_PLASMA_ISON) then 674 if (lapack_full_storage.and.lapack_divide_conquer) then 675 eigen_s_lwork = max(eigen_s_lwork,eigen_s_maxsize**2) ! PLASMA_SSYEV 676 end if 677 end if 678 end if 679 end if 680 ABI_SFREE(eigen_s_work) 681 ABI_MALLOC(eigen_s_work,(eigen_s_lwork)) 682 683 !Double precision WORK 684 eigen_d_lwork = 0 685 if (eigen_d_maxsize>0) then 686 if (lapack_double_precision) then 687 if (lapack_full_storage) then 688 eigen_d_lwork = max(eigen_d_lwork,3*eigen_d_maxsize-1) ! DSYEV, DSYGV 689 end if 690 if (lapack_packed_storage) then 691 eigen_d_lwork = max(eigen_d_lwork,3*eigen_d_maxsize) ! DSPEV[D], DSPGV[D] 692 end if 693 if (lapack_divide_conquer) then 694 eigen_d_lwork = max(eigen_d_lwork,1+6*eigen_d_maxsize+2*eigen_d_maxsize**2) ! DSYEVD, DSYGVD 695 end if 696 if (ABI_LINALG_MAGMA_ISON) then 697 if (lapack_full_storage.and.lapack_divide_conquer) then 698 #if defined HAVE_LINALG_MAGMA 699 nb=magmaf_get_dsytrd_nb(eigen_d_maxsize) 700 eigen_d_lwork = max(eigen_d_lwork,eigen_d_maxsize*(nb+2)) ! MAGMAF_DSYEVD, MAGMAF_DSYGVD 701 #endif 702 end if 703 end if 704 if (ABI_LINALG_PLASMA_ISON) then 705 if (lapack_full_storage.and.lapack_divide_conquer) then 706 eigen_d_lwork = max(eigen_d_lwork,eigen_d_maxsize**2) ! PLASMA_DSYEV 707 end if 708 end if 709 end if 710 end if 711 ABI_SFREE(eigen_d_work) 712 ABI_MALLOC(eigen_d_work,(eigen_d_lwork)) 713 714 !Single complex WORK 715 eigen_c_lwork = 0 716 if (eigen_c_maxsize>0) then 717 if (lapack_single_precision) then 718 if (lapack_full_storage) then 719 eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize-1) ! CHEEV, CHEGV 720 end if 721 if (lapack_packed_storage) then 722 eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize) ! CHPEV[D], CHPGV[D] 723 end if 724 if (lapack_divide_conquer) then 725 eigen_c_lwork = max(eigen_c_lwork,2*eigen_c_maxsize+eigen_c_maxsize**2) ! CHEEVD, CHEGVD 726 end if 727 if (ABI_LINALG_MAGMA_ISON) then 728 if (lapack_full_storage.and.lapack_divide_conquer) then 729 #if defined HAVE_LINALG_MAGMA 730 nb=magmaf_get_chetrd_nb(eigen_c_maxsize) 731 eigen_c_lwork = max(eigen_c_lwork,eigen_c_maxsize*(nb+1)) ! MAGMAF_CHEEVD, MAGMAF_CHEGVD 732 #endif 733 end if 734 end if 735 if (ABI_LINALG_PLASMA_ISON) then 736 if (lapack_full_storage.and.lapack_divide_conquer) then 737 eigen_c_lwork = max(eigen_c_lwork,eigen_c_maxsize**2) ! PLASMA_CHEEV 738 end if 739 end if 740 end if 741 end if 742 ABI_SFREE(eigen_c_work) 743 ABI_MALLOC(eigen_c_work,(eigen_c_lwork)) 744 745 !Double complex WORK 746 eigen_z_lwork = 0 747 if (eigen_z_maxsize>0) then 748 if (lapack_double_precision) then 749 if (lapack_full_storage) then 750 eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize-1) ! ZHEEV, ZHEGV 751 end if 752 if (lapack_packed_storage) then 753 eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize) ! ZHPEV[D], ZHPGV[D] 754 end if 755 if (lapack_divide_conquer) then 756 eigen_z_lwork = max(eigen_z_lwork,2*eigen_z_maxsize+eigen_z_maxsize**2) ! ZHEEVD, ZHEGVD 757 end if 758 if (ABI_LINALG_MAGMA_ISON) then 759 if (lapack_full_storage.and.lapack_divide_conquer) then 760 #if defined HAVE_LINALG_MAGMA 761 nb=magmaf_get_zhetrd_nb(eigen_z_maxsize) 762 eigen_z_lwork = max(eigen_z_lwork,eigen_z_maxsize*(nb+1)) ! MAGMAF_ZHEEVD, MAGMAF_ZHEGVD 763 #endif 764 end if 765 end if 766 if (ABI_LINALG_PLASMA_ISON) then 767 if (lapack_full_storage.and.lapack_divide_conquer) then 768 eigen_z_lwork = max(eigen_z_lwork,eigen_z_maxsize**2) ! PLASMA_ZHEEV 769 end if 770 end if 771 end if 772 end if 773 ABI_SFREE(eigen_z_work) 774 ABI_MALLOC(eigen_z_work,(eigen_z_lwork)) 775 776 !Single precision RWORK 777 eigen_c_lrwork = 0 778 if (eigen_c_maxsize>0) then 779 if (lapack_single_precision) then 780 if (lapack_full_storage.or.lapack_packed_storage) then 781 eigen_c_lrwork = max(eigen_c_lrwork,3*eigen_c_maxsize-2) ! CHEEV, CHEGV, CHPEV, CHPGV 782 end if 783 if (lapack_divide_conquer) then 784 eigen_c_lrwork = max(eigen_c_lrwork,1+5*eigen_c_maxsize+2*eigen_c_maxsize**2) ! CHEEVD, CHEGVD, CHPEVD, CHPGVD 785 end if 786 end if 787 end if 788 ABI_SFREE(eigen_c_rwork) 789 ABI_MALLOC(eigen_c_rwork,(eigen_c_lrwork)) 790 791 !Double precision RWORK 792 eigen_z_lrwork = 0 793 if (eigen_z_maxsize>0) then 794 if (lapack_double_precision) then 795 if (lapack_full_storage.or.lapack_packed_storage) then 796 eigen_z_lrwork = max(eigen_z_lrwork,3*eigen_z_maxsize-2) ! ZHEEV, ZHEGV, ZHPEV, ZHPGV 797 end if 798 if (lapack_divide_conquer) then 799 eigen_z_lrwork = max(eigen_z_lrwork,1+5*eigen_z_maxsize+2*eigen_z_maxsize**2) ! ZHEEVD, ZHEGVD, ZHPEVD, ZHPGVD 800 end if 801 end if 802 end if 803 ABI_SFREE(eigen_z_rwork) 804 ABI_MALLOC(eigen_z_rwork,(eigen_z_lrwork)) 805 806 !Integer IWORK 807 eigen_liwork = 0 808 if (lapack_divide_conquer) then 809 if (lapack_single_precision) then 810 if (eigen_s_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_s_maxsize) 811 if (eigen_c_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_c_maxsize) 812 end if 813 if (lapack_double_precision) then 814 if (eigen_d_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_d_maxsize) 815 if (eigen_z_maxsize>0) eigen_liwork = max(eigen_liwork,3+5*eigen_z_maxsize) 816 end if 817 end if 818 ABI_SFREE(eigen_iwork) 819 ABI_MALLOC(eigen_iwork,(eigen_liwork)) 820 821 end subroutine abi_linalg_work_allocate
m_abi_linalg/diag_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Convert diag character to PLASMA integer
SOURCE
1151 integer function diag_plasma(diag) 1152 1153 !Arguments ------------------------------------ 1154 !scalars 1155 character(len=1),intent(in) :: diag 1156 1157 ! ************************************************************************* 1158 1159 if (LSAME(diag,'U')) then 1160 diag_plasma = PlasmaUnit 1161 else 1162 diag_plasma = PlasmaNonUnit 1163 end if 1164 1165 end function diag_plasma
m_abi_linalg/jobz_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Convert jobz character to PLASMA integer
SOURCE
1178 integer function jobz_plasma(jobz) 1179 1180 !Arguments ------------------------------------ 1181 !scalars 1182 character(len=1),intent(in) :: jobz 1183 1184 ! ************************************************************************* 1185 1186 if (LSAME(jobz,'N')) then 1187 jobz_plasma = PlasmaNoVec 1188 else 1189 jobz_plasma = PlasmaVec 1190 end if 1191 1192 end function jobz_plasma
m_abi_linalg/linalg_allow_gemm3m [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Programmatic interface to enable the use of [Z,C]GEMM3M calls
SOURCE
926 subroutine linalg_allow_gemm3m(bool, write_msg) 927 928 !Arguments ------------------------------------ 929 !scalars 930 logical,intent(in) :: bool, write_msg 931 932 ! ************************************************************************* 933 934 XGEMM3M_ISON = bool 935 if (write_msg) then 936 #ifdef HAVE_LINALG_GEMM3M 937 if (bool) then 938 ABI_COMMENT("Activating ZGEMM3M version instead of ZGEMM") 939 else 940 ABI_COMMENT("Using ZGEMM instead of ZGEMM3M") 941 end if 942 #else 943 if (bool) then 944 ABI_WARNING("Cannot activate ZGEMM3M as HAVE_LINALG_GEMM3M is not defined!") 945 end if 946 #endif 947 endif 948 949 end subroutine linalg_allow_gemm3m
m_abi_linalg/linalg_allow_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Programmatic interface to enable the use of PLASMA False to disable PLASMA version.
SOURCE
1039 subroutine linalg_allow_plasma(bool) 1040 1041 !Arguments ------------------------------------ 1042 !scalars 1043 logical,intent(in) :: bool 1044 1045 ! ************************************************************************* 1046 1047 XPLASMA_ISON = bool 1048 #ifndef HAVE_LINALG_PLASMA 1049 ! Just to be on the safe-side. 1050 ! I have to use a weird set of branches to make abirules happy in the BLAS/LAPACK 1051 ! wrappers, and one cannot set XPLASMA_MODE to .True. if PLASMA is not available. 1052 XPLASMA_ISON = .False. 1053 #endif 1054 1055 end subroutine linalg_allow_plasma
m_abi_linalg/side_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Convert side character to PLASMA integer
SOURCE
1124 integer function side_plasma(side) 1125 1126 !Arguments ------------------------------------ 1127 !scalars 1128 character(len=1),intent(in) :: side 1129 1130 ! ************************************************************************* 1131 1132 if(LSAME(side,'L')) then 1133 side_plasma = PlasmaLeft 1134 else 1135 side_plasma = PlasmaRight 1136 end if 1137 1138 end function side_plasma
m_abi_linalg/trans_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Convert trans character to PLASMA integer
SOURCE
1095 integer function trans_plasma(trans) 1096 1097 !Arguments ------------------------------------ 1098 !scalars 1099 character(len=1),intent(in) :: trans 1100 1101 ! ************************************************************************* 1102 1103 if (LSAME(trans,'C')) then 1104 trans_plasma = PlasmaConjTrans 1105 else if (LSAME(trans,'T')) then 1106 trans_plasma = PlasmaTrans 1107 else 1108 trans_plasma = PlasmaNoTrans 1109 end if 1110 1111 end function trans_plasma
m_abi_linalg/uplo_plasma [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
FUNCTION
Convert uplo character to PLASMA integer
SOURCE
1068 integer function uplo_plasma(uplo) 1069 1070 !Arguments ------------------------------------ 1071 !scalars 1072 character(len=1),intent(in) :: uplo 1073 1074 ! ************************************************************************* 1075 1076 if (LSAME(uplo,'U')) then 1077 uplo_plasma = PlasmaUpper 1078 else 1079 uplo_plasma = PlasmaLower 1080 end if 1081 1082 end function uplo_plasma
m_abi_linalg/use_cgemm3m [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
use_cgemm3m
FUNCTION
Enable the use of CGEMM3M
NOTES
See use_zgemm3m
SOURCE
1011 pure logical function use_cgemm3m(m, n, k) 1012 1013 !Arguments ------------------------------------ 1014 !scalars 1015 integer,intent(in) :: m,n,k 1016 1017 ! ************************************************************************* 1018 1019 use_cgemm3m = .False. 1020 if (XGEMM3M_ISON) use_cgemm3m = ((m * n * k) > CGEMM3M_LIMIT) 1021 #ifndef HAVE_LINALG_GEMM3M 1022 use_cgemm3m = .False. 1023 #endif 1024 1025 end function use_cgemm3m
m_abi_linalg/use_zgemm3m [ Functions ]
[ Top ] [ m_abi_linalg ] [ Functions ]
NAME
use_zgemm3m
FUNCTION
Enable the use of ZGEMM3M
NOTES
The CGEMM3M and ZGEMM3M routines use an algorithm requiring 3 real matrix multiplications and 5 real matrix additions to compute the complex matrix product; CGEMM(3S) and ZGEMM(3S) use 4 real matrix multiplications and 2 real matrix additions. Because the matrix multiplication time is usually the limiting performance factor in these routines, CGEMM3M and ZGEMM3M may run up to 33 percent faster than CGEMM and ZGEMM. Because of other overhead associated with the 3M routines, however, these performance improvements may not always be realized. For example, on one processor the 3M routines will generally run more slowly than the standard complex matrix multiplication routines when m * n * k < FACTOR, where m, n, and k are the input matrix dimensions and FACTOR is approximately 200000 for CGEMM3M and 325000 for ZGEMM3M. from: http://techpubs.sgi.com/library/tpl/cgi-bin/getdoc.cgi?coll=0650&db=man&raw=1&fname=/usr/share/catman/p_man/cat3/SCSL/ZGEMM3M.z
SOURCE
978 pure logical function use_zgemm3m(m, n, k) 979 980 !Arguments ------------------------------------ 981 !scalars 982 integer,intent(in) :: m,n,k 983 984 ! ************************************************************************* 985 986 use_zgemm3m = .False. 987 if (XGEMM3M_ISON) use_zgemm3m = ((m * n * k) > ZGEMM3M_LIMIT) 988 !if (XGEMM3M_ISON) use_zgemm3m = .True. 989 990 #ifndef HAVE_LINALG_GEMM3M 991 use_zgemm3m = .False. 992 #endif 993 994 end function use_zgemm3m