TABLE OF CONTENTS


ABINIT/m_initcuda [ Modules ]

[ Top ] [ Modules ]

NAME

 m_initcuda

FUNCTION

  Module containing all variables concerning GPU device
  and the functions needed to extract them

COPYRIGHT

  Copyright (C) 2009-2024 ABINIT group (MMancini, MT, FDahm)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

NOTES

  Is an experimental development

SOURCE

20 #if defined HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #if defined HAVE_GPU_CUDA
25 #include "cuda_common.h"
26 #endif
27 
28 #include "abi_common.h"
29 
30 module m_initcuda
31 
32  use defs_basis
33  use m_abicore
34  use m_xomp
35  use m_xmpi, only: xmpi_world,xmpi_comm_rank,xmpi_comm_size,xmpi_abort
36 
37 #ifdef HAVE_KOKKOS
38  use m_kokkos_utils
39 #endif
40 
41 #ifdef HAVE_YAKL
42  use gator_mod
43 #endif
44 
45  implicit none
46 
47 #if defined HAVE_GPU_CUDA
48  integer,parameter,public :: cudap=kind(CUDA_KIND)
49 #endif
50 
51 !Structures

m_initcuda/CleanGPU [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 CleanGPU

FUNCTION

 Print information about GPU device

SOURCE

257  subroutine CleanGPU(gpuinfo)
258 
259  implicit none
260 
261 !Arguments ------------------------------------
262 !scalars
263  type(devGPU_type),intent(inout) :: gpuinfo
264 ! *********************************************************************
265 #if defined HAVE_GPU
266  if (allocated(gpuinfo%maxmemdev))  then
267    ABI_FREE(gpuinfo%maxmemdev)
268  end if
269 #endif
270 
271  end subroutine CleanGPU

m_initcuda/devGPU_type [ Types ]

[ Top ] [ m_initcuda ] [ Types ]

NAME

 devGPU_type

FUNCTION

 This structured datatype used to contains GPU properties

SOURCE

63  type,public :: devGPU_type
64   integer :: ndevice  !--number of available devices
65   real(dp),allocatable :: maxmemdev(:)  !--max global memory on any device
66  end type devGPU_type

m_initcuda/get_fastest_devices [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 get_fastest_devices

FUNCTION

 In case of multiple devices, sort them by performances
 and output the resulting list of devices.

SOURCE

454  subroutine get_fastest_devices(devices,nb_devices)
455 
456  implicit none
457 
458 !Arguments ------------------------------------
459 !scalars
460  integer,intent(in) :: nb_devices
461  integer,intent(out) :: devices(:)
462 !Local variables ------------------------------
463 !scalars
464  integer :: ii,nproc
465  character(len=500) :: msg
466 #if defined HAVE_GPU
467  integer :: constmem,gflops,jj,lenname,nprocs,ncores,regist,sharemem
468  real(sp) :: clockRate,globalmem
469  character(len=20) :: name
470 #endif
471 !arrays
472 #if defined HAVE_GPU
473  integer :: vers(0:1)
474  integer,allocatable :: isort(:)
475  real(dp),allocatable :: flops(:),mem(:)
476 #endif
477 
478 ! *********************************************************************
479 
480  nproc=xmpi_comm_size(xmpi_world)
481  if (size(devices)/=nproc) stop 'wrong size for devices array!'
482 
483 !Default
484  do ii=0,nproc-1
485    devices(ii+1) = MOD(ii,nb_devices)
486  end do
487  if (nb_devices==1) return
488 
489  write(msg,'(a,i2,a)') ch10,nb_devices,' GPU device(s) have been detected on the current node:'
490  call wrtout(std_out,msg,'PERS')
491 
492 #if defined HAVE_GPU
493 !Check device(s) properties
494  ABI_MALLOC(flops,(nb_devices))
495  ABI_MALLOC(mem,  (nb_devices))
496  do ii=0,nb_devices-1
497    call set_dev(ii)
498    call get_dev_info(ii,name,lenname,vers,globalmem,clockRate,gflops,constmem,&
499 &                    sharemem,regist,nprocs,ncores)
500    flops(ii+1)=dble(gflops) ; mem(ii+1)=dble(globalmem)
501    call unset_dev()
502    write(msg,'(a,i2,3a,i1,a,i1,a,i6,a,f7.1,a,i7,a,i4,a,i4,a)') &
503 &   '  Device ',ii,': ',trim(name(1:lenname)),', v',vers(0),'.',vers(1),', Mem=',nint(globalmem),&
504 &   ' Mbytes, Clock=',clockrate,' GHz, ',gflops,' GFLOPS, ',nprocs,' processors, ',ncores,' cores'
505    call wrtout(std_out,msg,'PERS')
506  end do
507 
508 !Sort devices (first by flops, then by memory)
509  ABI_MALLOC(isort,(nb_devices))
510  isort(:)=(/(ii,ii=1,nb_devices)/)
511  call my_sort(flops,mem,isort)
512 
513 !Distribute cards among procs
514  do ii=0,nproc-1
515    jj=MOD(ii,nb_devices)
516    devices(ii+1) = isort(jj+1)-1
517  end do
518 
519  ABI_FREE(isort)
520  ABI_FREE(flops)
521  ABI_FREE(mem)
522 #endif
523 
524 contains

m_initcuda/Get_Mem_Dev [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 Get_Mem_Dev

FUNCTION

 Get the max memory availeble on device

INPUTS

 device  device number

OUTPUT

 max_mem_dev

SOURCE

231 subroutine Get_Mem_Dev(device,max_mem_dev)
232 
233  implicit none
234 
235 !Arguments ------------------------------------
236 !scalars
237  integer,intent(in) :: device
238  real(sp),intent(out) :: max_mem_dev
239 !Local variables ------------------------------
240 ! *********************************************************************
241 #if defined HAVE_GPU
242  call get_GPU_max_mem(device,max_mem_dev)
243 #endif
244 end subroutine Get_Mem_Dev

m_initcuda/InitGPU [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 InitGPU

FUNCTION

 Print information about GPU device

SOURCE

163  subroutine InitGPU(gpuinfo,device)
164 
165  implicit none
166 
167 !Arguments ------------------------------------
168 !scalars
169  integer,intent(in)              :: device
170  type(devGPU_type),intent(inout) :: gpuinfo
171 !Local variables ------------------------------
172 !scalars
173  real(sp) :: locmax
174 ! *********************************************************************
175  gpuinfo%ndevice = 0
176 #if defined HAVE_GPU
177 !--Initialization
178  if(device>-1)then
179    !--Get the number of device for this proc
180    gpuinfo%ndevice = 1
181    ABI_MALLOC(gpuinfo%maxmemdev,(0:1))
182    call get_GPU_max_mem(device,locmax)
183    gpuinfo%maxmemdev(0:1) = locmax
184    call  prt_device_info(device)
185  endif
186 #endif
187  end subroutine InitGPU

m_initcuda/my_sort [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 my_sort

FUNCTION

  Small sorting routine: change iperm array
  according to list1 values then list2 values

SOURCE

537  subroutine my_sort(list1,list2,iperm)
538 
539  implicit none
540 
541 !Arguments ------------------------------------
542 !scalars
543  integer,intent(inout) :: iperm(:)
544  real(dp),intent(in) :: list1(:),list2(:)
545 !Local variables ------------------------------
546 !scalars
547  integer :: ll,mm,nn,pp
548  real(dp) :: xx
549 !arrays
550  real(dp),allocatable :: llist(:)
551 
552 ! *********************************************************************
553 
554  nn=size(iperm)
555  ABI_MALLOC(llist,(nn))
556  llist(:)=list1(:)
557  do ll=1,nn-1
558    do mm=ll+1,nn
559      if (llist(mm)>llist(ll)) then
560        xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx
561        pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp
562      end if
563    end do
564  end do
565  do ll=1,nn-1
566    do mm=ll+1,nn
567      if (abs(llist(mm)-llist(ll))<tol8) then
568        if (list2(iperm(mm))>list2(iperm(ll))) then
569          xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx
570          pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp
571        end if
572      end if
573    end do
574  end do
575  ABI_FREE(llist)
576 
577  end subroutine my_sort

m_initcuda/prt_device_info [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 prt_device_info

FUNCTION

 Print information about GPU device

SOURCE

 97  subroutine prt_device_info(device)
 98 
 99   implicit none
100 !Arguments ------------------------------------
101 !scalars
102  integer,intent(in) :: device
103 !Local variables ------------------------------
104 !scalars
105  integer :: gflops,constmem,sharemem
106  integer :: ii,regist,lenname,ncores,nprocs
107  real(sp) :: globalmem,clockRate
108  character(20)  :: name
109  character(20)  :: formatdev
110  character(60)  :: gflops_stg
111  character(500) :: msg
112 !arrays
113  integer :: vers(0:1)
114 ! *********************************************************************
115 #if defined HAVE_GPU
116  write(msg,'(a,80a)')' ',('_',ii=1,80)
117  call wrtout(std_out,msg,'PERS')
118  write(msg,'(a25,a25,a31,a)')  '________________________',&
119 &  ' Graphic Card Properties ','_______________________________' ,ch10
120  call wrtout(std_out,msg,'PERS')
121 
122  call get_dev_info(device,name,lenname,vers,globalmem,clockRate,gflops,constmem,sharemem,regist,nprocs,ncores)
123  if (gflops<0) then
124    gflops_stg="undefined (add new def. in version_2_cores function)"
125  else
126    write(gflops_stg,'(i7,a)') gflops,' GFP'
127  end if
128 
129  write(formatdev,'(a12,i4,a)') '(a23,i4,a3,a',lenname,')'
130  write (msg,formatdev)&
131        & '  Device             ',device,' : ',name(1:lenname)
132  call wrtout(std_out,msg,'PERS')
133  write (msg,'(a,2(i1,a),a,i9,a,a,a,f7.1,a,a,a,i9,a,i9,4a,2(a,i9,2a),a,i9,a)')&
134        & ' Revision number:                   ',vers(0),'.',vers(1),ch10, &
135        & ' Total amount of global memory: ',nint(globalmem),' Mbytes',ch10, &
136        & ' Clock rate:                    ',clockRate,' GHz',ch10, &
137        & ' Number of processors/cores:    ',nprocs,'/',ncores,ch10, &
138        & ' Max GFLOPS:                    ',trim(gflops_stg),ch10, &
139        & ' Total  constant memory:        ',constmem,' bytes',ch10, &
140        & ' Shared memory per block:       ',sharemem,' bytes',ch10, &
141        & ' Number of registers per block: ',regist,ch10
142  call wrtout(std_out,msg,'PERS')
143  if(device == -1)then
144    write(msg,'(a)')' no cuda-GPU devices found'
145    call wrtout(std_out,msg,'PERS')
146  end if
147  write(msg,'(a,80a)')' ',('_',ii=1,80)
148  call wrtout(std_out,msg,'PERS')
149 #endif
150  end subroutine prt_device_info

m_initcuda/setdevice_cuda [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 setdevice_cuda

FUNCTION

 Detect and activate a GPU device from current CPU core

INPUTS

  gpu_devices(12)= list of GPU devices to choose on one node (in case of multiple devices);
                   if set to 20*-1, will choose the devices by order of performances.

SIDE EFFECTS

  gpu_option= which GPU implementation is used (None, CUDA, OpenMP, Kokkos)

SOURCE

291  subroutine setdevice_cuda(gpu_devices_node,gpu_option)
292 
293 #ifdef FC_NAG
294  use f90_unix_proc
295 #endif
296  implicit none
297 
298 !Arguments ------------------------------------
299 !scalars
300  integer,intent(inout) :: gpu_option
301 !arrays
302  integer, intent(in) :: gpu_devices_node(12)
303 !Local variables ------------------------------
304 !scalars
305  integer :: device,ii,jj,me,nb_devices,nproc
306  logical :: testopen
307  character(len=500) :: msg
308  type(devGPU_type) :: gpuinfo
309 !arrays
310  integer,allocatable :: fastest_devices(:)
311 ! *********************************************************************
312 
313  if (gpu_option==ABI_GPU_DISABLED) return
314 
315  nproc=xmpi_comm_size(xmpi_world)
316  me=xmpi_comm_rank(xmpi_world)
317 
318 #if defined HAVE_GPU
319  device=-1
320  call c_get_ndevice(nb_devices)
321  !nb_devices=min(nb_devices,20)
322  if(nb_devices>0) then
323    if(nb_devices==1) then
324      device=0
325    else if(all(gpu_devices_node(1:nb_devices)==-1)) then
326      ABI_MALLOC(fastest_devices,(0:nproc-1))
327      call get_fastest_devices(fastest_devices,nb_devices)
328      device=fastest_devices(me)
329      ABI_FREE(fastest_devices)
330    else
331      jj=nb_devices
332      do ii=jj,2,-1
333        if(gpu_devices_node(ii)==-1) nb_devices=ii-1
334      end do
335      device=gpu_devices_node(1+mod(me,nb_devices))
336    end if
337 
338    ! Initialize Kokkos and YAKL if requested
339    if(gpu_option==ABI_GPU_KOKKOS .or. gpu_option==ABI_GPU_LEGACY) then
340 #ifdef HAVE_KOKKOS
341      ! initialize kokkos
342      if (xmpi_comm_rank(xmpi_world) == 0) then
343        write(std_out,*)'initializinging kokkos in MPI process ', xmpi_comm_rank(xmpi_world)
344      end if
345      call kokkos_initialize()
346 
347      ! only master MPI process print kokkos config
348      if (xmpi_comm_rank(xmpi_world) == 0) then
349        call abinit_kokkos_print_config()
350      endif
351 #endif
352 
353 #ifdef HAVE_YAKL
354      call gator_init()
355 #endif
356    end if
357 
358    call set_dev(device)
359    call check_context(nb_devices,msg)
360    if(gpu_option==ABI_GPU_OPENMP) then
361      call xomp_set_default_device(device)
362    end if
363    if(nb_devices==1) then !allocation succeed
364      write(msg, '(4a,i1,2a)' ) ch10,&
365 &     ' setdevice_cuda : COMMENT -',ch10,&
366 &     '  GPU ',device,' has been properly initialized, continuing...',ch10
367      call wrtout(std_out,msg,'PERS')
368    else !gpu allocation failed we print error message returned and exit
369      device=-1
370      call wrtout(std_out,msg,'COLL')
371      call xmpi_abort()
372      inquire(std_out,OPENED=testopen)
373      if (testopen) close(std_out)
374 #if defined FC_NAG
375      call exit(-1)
376 #elif defined HAVE_FC_EXIT
377      call exit(1)
378 #else
379       stop 1
380 #endif
381    end if
382    call InitGPU(gpuinfo,device)
383    call CleanGPU(gpuinfo)
384  else
385    gpu_option=ABI_GPU_DISABLED
386  end if
387 #endif
388  end subroutine setdevice_cuda

m_initcuda/unsetdevice_cuda [ Functions ]

[ Top ] [ m_initcuda ] [ Functions ]

NAME

 unsetdevice_cuda

FUNCTION

 Deactivate a GPU device from current CPU core

INPUTS

  gpu_option= which GPU implementation is used (None, CUDA, OpenMP, Kokkos)

SOURCE

404  subroutine unsetdevice_cuda(gpu_option)
405 
406  implicit none
407 
408 !Arguments ------------------------------------
409 !scalars
410  integer,intent(in) :: gpu_option
411 !Local variables ------------------------------
412 !scalars
413  character(len=500) :: msg
414 ! *********************************************************************
415 
416  if (gpu_option==ABI_GPU_DISABLED) return
417 
418 #if defined HAVE_GPU
419 
420  ! Closing YAKL and Kokkos if opened
421  if (gpu_option==ABI_GPU_KOKKOS) then
422 #ifdef HAVE_YAKL
423    call gator_finalize()
424    write(std_out,*)'yakl gator finalized'
425 #endif
426 #ifdef HAVE_KOKKOS
427    ! finalize kokkos
428    call kokkos_finalize()
429    write(std_out,*)'kokkos finalized'
430 #endif
431  !kokkos_finalize already reset GPU context
432  !if (gpu_option/=ABI_GPU_KOKKOS) call unset_dev()
433  end if
434 
435  if (gpu_option==ABI_GPU_LEGACY) then
436    call unset_dev()
437  end if
438 
439 #endif
440  end subroutine unsetdevice_cuda