TABLE OF CONTENTS


ABINIT/m_gpu_toolbox [ Modules ]

[ Top ] [ Modules ]

NAME

  m_gpu_toolbox

FUNCTION

  Fake module to dupe the build system and allow it to include cuda files
   in the chain of dependencies.

COPYRIGHT

  Copyright (C) 2000-2024 ABINIT group (MT)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

 17 #if defined HAVE_CONFIG_H
 18 #include "config.h"
 19 #endif
 20 
 21 #include "abi_common.h"
 22 
 23 module m_gpu_toolbox
 24 
 25   use m_initcuda
 26   use m_gpu_detect
 27 
 28 ! MG: I had to comment this import to avoid the following error on buda2_gnu_8.5_cuda
 29 !
 30 !    type(c_ptr),intent(inout) :: blockvectorbx_gpu, blockvectorx_gpu,sqgram_gpu
 31 !            1
 32 !    Error: Type name 'c_ptr' at (1) is ambiguous
 33 !    abi_gpu_linalg.f90:374:47:
 34 !
 35 ! I believe this is due to a misconfiguration issue in the Fortran compilers used by the bot.
 36 
 37 #ifdef HAVE_FC_ISO_C_BINDING
 38  use, intrinsic :: iso_c_binding, only : C_INT32_T,C_SIZE_T
 39 #endif
 40 
 41   implicit none
 42 
 43   !Interfaces for C bindings --- To be completed
 44 #ifdef HAVE_FC_ISO_C_BINDING
 45 #if defined HAVE_GPU
 46 
 47   ! mirroring cuda enum cudaMemoryAdvise usually defined in
 48   ! /usr/local/cuda/targets/x86_64-linux/include/driver_types.h
 49   !
 50   ! to be used as 3rd arg of gpu_memory_advise_f
 51   !
 52   ! I didn't find a clean way of using an existing enum defined in C
 53   ! without redefining it in fortran
 54   ! I didn't found a way to do it through iso_c_binding, strange...
 55   ! It means this enum will have to be updated if ever the C defined enum
 56   ! changes.
 57   enum, bind(c)
 58     ! Data will mostly be read and only occassionally be written to
 59     enumerator :: CUDA_MEM_ADVISE_SET_READ_MOSTLY          = 1
 60 
 61     ! Undo the effect of ::cudaMemAdviseSetReadMostly
 62     enumerator :: CUDA_MEM_ADVISE_UNSET_READ_MOSTLY        = 2
 63 
 64     ! Set the preferred location for the data as the specified device
 65     enumerator :: CUDA_MEM_ADVISE_SET_PREFERRED_LOCATION   = 3
 66 
 67     ! Clear the preferred location for the data
 68     enumerator :: CUDA_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4
 69 
 70     ! Data will be accessed by the specified device, so prevent page faults as much as possible
 71     enumerator :: CUDA_MEM_ADVISE_SET_ACCESSED_BY          = 5
 72 
 73     ! Let the Unified Memory subsystem decide on the page faulting policy for the specified device
 74     enumerator :: CUDA_MEM_ADVISE_UNSET_ACCESSED_BY        = 6
 75   end enum
 76 
 77   ! CUFFT/hipFFT Transform Types
 78   ! Replicates cuFFT_type enum, matched (obviously) by hipFFT_type.
 79   ! We could use enum from official CUDA Fortran interface but it is only
 80   ! accessible using NVHPC compiler.
 81   ! Only Z2Z is mostly used so an assert on its value will check for enum changes
 82   ! if any.
 83   enum, bind(C)
 84     enumerator :: FFT_R2C = 42   !  z'2a'     ! Real to Complex (interleaved)
 85     enumerator :: FFT_C2R = 44   !  z'2c'     ! Complex (interleaved) to Real
 86     enumerator :: FFT_C2C = 41   !  z'29'     ! Complex to Complex, interleaved
 87     enumerator :: FFT_D2Z = 106  !  z'6a'     ! Double to Double-Complex
 88     enumerator :: FFT_Z2D = 108  !  z'6c'     ! Double-Complex to Double
 89     enumerator :: FFT_Z2Z = 105  !  z'69'     ! Double-Complex to Double-Complex
 90   end enum
 91 
 92   ! CUFFT/hipFFT Direction enum
 93   ! In hipFFT, "BACKWARD" is used instead of "INVERSE" (from cuFFT)
 94   enum, bind(C)
 95     enumerator :: FFT_INVERSE =  1
 96     enumerator :: FFT_FORWARD = -1
 97   end enum
 98 
 99   interface
100 
101     !  integer(C_INT) function cuda_func() bind(C)
102     !    use iso_c_binding, only : C_INT,C_PTR
103     !    type(C_PTR) :: ptr
104     !  end function cuda_func
105 
106     subroutine gpu_device_synchronize() bind(c, name='gpu_device_synchronize_cpp')
107       use, intrinsic :: iso_c_binding
108       implicit none
109     end subroutine gpu_device_synchronize
110 
111     subroutine gpu_get_device(deviceId) bind(c, name='gpu_get_device_cpp')
112       use, intrinsic :: iso_c_binding
113       implicit none
114       integer(kind=C_INT32_T), intent(inout) :: deviceId
115     end subroutine gpu_get_device
116 
117     subroutine gpu_get_free_mem(free_mem) bind(c, name='gpu_get_free_mem_cpp')
118       use, intrinsic :: iso_c_binding
119       implicit none
120       integer(kind=C_SIZE_T), intent(inout) :: free_mem
121     end subroutine gpu_get_free_mem
122 
123     subroutine gpu_data_prefetch_async_f(dev_ptr, count, deviceId) bind(c, name='gpu_data_prefetch_async_cpp')
124       use, intrinsic :: iso_c_binding
125       implicit none
126       type(c_ptr),             value :: dev_ptr
127       integer(kind=C_SIZE_T),  value :: count
128       integer(kind=C_INT32_T), value :: deviceId
129     end subroutine gpu_data_prefetch_async_f
130 
131     subroutine gpu_memory_advise_f(dev_ptr, count, advice, deviceId) bind(c, name='gpu_memory_advise_cpp')
132       use, intrinsic :: iso_c_binding
133       implicit none
134       type(c_ptr),                       value :: dev_ptr
135       integer(kind=C_SIZE_T),            value :: count
136       integer(kind=C_INT),               value :: advice
137       integer(kind=C_INT32_T),           value :: deviceId
138     end subroutine gpu_memory_advise_f
139 
140     !!! FFT related routines
141     subroutine gpu_fft_plan_destroy() bind(c, name='gpu_fft_plan_destroy_cpp')
142       use, intrinsic :: iso_c_binding
143       implicit none
144     end subroutine gpu_fft_plan_destroy
145 
146     subroutine gpu_fft_stream_synchronize() bind(c, name='gpu_fft_stream_synchronize_cpp')
147       use, intrinsic :: iso_c_binding
148       implicit none
149     end subroutine gpu_fft_stream_synchronize
150 
151     subroutine gpu_fft_plan_many(rank, n,&
152         inembed, istride, idist,&
153         onembed, ostride, odist,&
154         ffttype, batch ) bind(c, name='gpu_fft_plan_many_cpp')
155       use, intrinsic :: iso_c_binding
156       implicit none
157       integer    , intent(in)  :: rank
158       type(c_ptr), intent(in)  :: n
159       type(c_ptr), intent(in)  :: inembed, onembed
160       integer    , intent(in)  :: istride, idist, ostride, odist
161       integer    , intent(in)  :: ffttype, batch
162     end subroutine gpu_fft_plan_many
163 
164     subroutine gpu_fft_exec_z2z(idata, odata, direction) bind(c, name='gpu_fft_exec_z2z_cpp')
165       use, intrinsic :: iso_c_binding
166       implicit none
167       type(c_ptr), intent(in)    :: idata, odata
168       integer    , intent(in)    :: direction
169     end subroutine gpu_fft_exec_z2z
170 
171   end interface
172 
173   public
174   ! -1 is a special value for device id, it represent actually the host (CPU)
175   ! see https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
176   integer(C_INT32_T), public, parameter :: CPU_DEVICE_ID = -1
177 
178 #endif
179 #endif
180 
181 contains