TABLE OF CONTENTS
- ABINIT/m_initcuda
- m_initcuda/CleanGPU
- m_initcuda/devGPU_type
- m_initcuda/get_fastest_devices
- m_initcuda/Get_Mem_Dev
- m_initcuda/InitGPU
- m_initcuda/my_sort
- m_initcuda/prt_device_info
- m_initcuda/setdevice_cuda
- m_initcuda/unsetdevice_cuda
ABINIT/m_initcuda [ Modules ]
NAME
m_initcuda
FUNCTION
Module containing all variables concerning GPU device and the functions needed to extract them
COPYRIGHT
Copyright (C) 2009-2024 ABINIT group (MMancini, MT, FDahm) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
NOTES
Is an experimental development
SOURCE
20 #if defined HAVE_CONFIG_H 21 #include "config.h" 22 #endif 23 24 #if defined HAVE_GPU_CUDA 25 #include "cuda_common.h" 26 #endif 27 28 #include "abi_common.h" 29 30 module m_initcuda 31 32 use defs_basis 33 use m_abicore 34 use m_xomp 35 use m_xmpi, only: xmpi_world,xmpi_comm_rank,xmpi_comm_size,xmpi_abort 36 37 #ifdef HAVE_KOKKOS 38 use m_kokkos_utils 39 #endif 40 41 #ifdef HAVE_YAKL 42 use gator_mod 43 #endif 44 45 implicit none 46 47 #if defined HAVE_GPU_CUDA 48 integer,parameter,public :: cudap=kind(CUDA_KIND) 49 #endif 50 51 !Structures
m_initcuda/CleanGPU [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
CleanGPU
FUNCTION
Print information about GPU device
SOURCE
257 subroutine CleanGPU(gpuinfo) 258 259 implicit none 260 261 !Arguments ------------------------------------ 262 !scalars 263 type(devGPU_type),intent(inout) :: gpuinfo 264 ! ********************************************************************* 265 #if defined HAVE_GPU 266 if (allocated(gpuinfo%maxmemdev)) then 267 ABI_FREE(gpuinfo%maxmemdev) 268 end if 269 #endif 270 271 end subroutine CleanGPU
m_initcuda/devGPU_type [ Types ]
[ Top ] [ m_initcuda ] [ Types ]
NAME
devGPU_type
FUNCTION
This structured datatype used to contains GPU properties
SOURCE
63 type,public :: devGPU_type 64 integer :: ndevice !--number of available devices 65 real(dp),allocatable :: maxmemdev(:) !--max global memory on any device 66 end type devGPU_type
m_initcuda/get_fastest_devices [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
get_fastest_devices
FUNCTION
In case of multiple devices, sort them by performances and output the resulting list of devices.
SOURCE
454 subroutine get_fastest_devices(devices,nb_devices) 455 456 implicit none 457 458 !Arguments ------------------------------------ 459 !scalars 460 integer,intent(in) :: nb_devices 461 integer,intent(out) :: devices(:) 462 !Local variables ------------------------------ 463 !scalars 464 integer :: ii,nproc 465 character(len=500) :: msg 466 #if defined HAVE_GPU 467 integer :: constmem,gflops,jj,lenname,nprocs,ncores,regist,sharemem 468 real(sp) :: clockRate,globalmem 469 character(len=20) :: name 470 #endif 471 !arrays 472 #if defined HAVE_GPU 473 integer :: vers(0:1) 474 integer,allocatable :: isort(:) 475 real(dp),allocatable :: flops(:),mem(:) 476 #endif 477 478 ! ********************************************************************* 479 480 nproc=xmpi_comm_size(xmpi_world) 481 if (size(devices)/=nproc) stop 'wrong size for devices array!' 482 483 !Default 484 do ii=0,nproc-1 485 devices(ii+1) = MOD(ii,nb_devices) 486 end do 487 if (nb_devices==1) return 488 489 write(msg,'(a,i2,a)') ch10,nb_devices,' GPU device(s) have been detected on the current node:' 490 call wrtout(std_out,msg,'PERS') 491 492 #if defined HAVE_GPU 493 !Check device(s) properties 494 ABI_MALLOC(flops,(nb_devices)) 495 ABI_MALLOC(mem, (nb_devices)) 496 do ii=0,nb_devices-1 497 call set_dev(ii) 498 call get_dev_info(ii,name,lenname,vers,globalmem,clockRate,gflops,constmem,& 499 & sharemem,regist,nprocs,ncores) 500 flops(ii+1)=dble(gflops) ; mem(ii+1)=dble(globalmem) 501 call unset_dev() 502 write(msg,'(a,i2,3a,i1,a,i1,a,i6,a,f7.1,a,i7,a,i4,a,i4,a)') & 503 & ' Device ',ii,': ',trim(name(1:lenname)),', v',vers(0),'.',vers(1),', Mem=',nint(globalmem),& 504 & ' Mbytes, Clock=',clockrate,' GHz, ',gflops,' GFLOPS, ',nprocs,' processors, ',ncores,' cores' 505 call wrtout(std_out,msg,'PERS') 506 end do 507 508 !Sort devices (first by flops, then by memory) 509 ABI_MALLOC(isort,(nb_devices)) 510 isort(:)=(/(ii,ii=1,nb_devices)/) 511 call my_sort(flops,mem,isort) 512 513 !Distribute cards among procs 514 do ii=0,nproc-1 515 jj=MOD(ii,nb_devices) 516 devices(ii+1) = isort(jj+1)-1 517 end do 518 519 ABI_FREE(isort) 520 ABI_FREE(flops) 521 ABI_FREE(mem) 522 #endif 523 524 contains
m_initcuda/Get_Mem_Dev [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
Get_Mem_Dev
FUNCTION
Get the max memory availeble on device
INPUTS
device device number
OUTPUT
max_mem_dev
SOURCE
231 subroutine Get_Mem_Dev(device,max_mem_dev) 232 233 implicit none 234 235 !Arguments ------------------------------------ 236 !scalars 237 integer,intent(in) :: device 238 real(sp),intent(out) :: max_mem_dev 239 !Local variables ------------------------------ 240 ! ********************************************************************* 241 #if defined HAVE_GPU 242 call get_GPU_max_mem(device,max_mem_dev) 243 #endif 244 end subroutine Get_Mem_Dev
m_initcuda/InitGPU [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
InitGPU
FUNCTION
Print information about GPU device
SOURCE
163 subroutine InitGPU(gpuinfo,device) 164 165 implicit none 166 167 !Arguments ------------------------------------ 168 !scalars 169 integer,intent(in) :: device 170 type(devGPU_type),intent(inout) :: gpuinfo 171 !Local variables ------------------------------ 172 !scalars 173 real(sp) :: locmax 174 ! ********************************************************************* 175 gpuinfo%ndevice = 0 176 #if defined HAVE_GPU 177 !--Initialization 178 if(device>-1)then 179 !--Get the number of device for this proc 180 gpuinfo%ndevice = 1 181 ABI_MALLOC(gpuinfo%maxmemdev,(0:1)) 182 call get_GPU_max_mem(device,locmax) 183 gpuinfo%maxmemdev(0:1) = locmax 184 call prt_device_info(device) 185 endif 186 #endif 187 end subroutine InitGPU
m_initcuda/my_sort [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
my_sort
FUNCTION
Small sorting routine: change iperm array according to list1 values then list2 values
SOURCE
537 subroutine my_sort(list1,list2,iperm) 538 539 implicit none 540 541 !Arguments ------------------------------------ 542 !scalars 543 integer,intent(inout) :: iperm(:) 544 real(dp),intent(in) :: list1(:),list2(:) 545 !Local variables ------------------------------ 546 !scalars 547 integer :: ll,mm,nn,pp 548 real(dp) :: xx 549 !arrays 550 real(dp),allocatable :: llist(:) 551 552 ! ********************************************************************* 553 554 nn=size(iperm) 555 ABI_MALLOC(llist,(nn)) 556 llist(:)=list1(:) 557 do ll=1,nn-1 558 do mm=ll+1,nn 559 if (llist(mm)>llist(ll)) then 560 xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx 561 pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp 562 end if 563 end do 564 end do 565 do ll=1,nn-1 566 do mm=ll+1,nn 567 if (abs(llist(mm)-llist(ll))<tol8) then 568 if (list2(iperm(mm))>list2(iperm(ll))) then 569 xx=llist(ll);llist(ll)=llist(mm);llist(mm)=xx 570 pp=iperm(ll);iperm(ll)=iperm(mm);iperm(mm)=pp 571 end if 572 end if 573 end do 574 end do 575 ABI_FREE(llist) 576 577 end subroutine my_sort
m_initcuda/prt_device_info [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
prt_device_info
FUNCTION
Print information about GPU device
SOURCE
97 subroutine prt_device_info(device) 98 99 implicit none 100 !Arguments ------------------------------------ 101 !scalars 102 integer,intent(in) :: device 103 !Local variables ------------------------------ 104 !scalars 105 integer :: gflops,constmem,sharemem 106 integer :: ii,regist,lenname,ncores,nprocs 107 real(sp) :: globalmem,clockRate 108 character(20) :: name 109 character(20) :: formatdev 110 character(60) :: gflops_stg 111 character(500) :: msg 112 !arrays 113 integer :: vers(0:1) 114 ! ********************************************************************* 115 #if defined HAVE_GPU 116 write(msg,'(a,80a)')' ',('_',ii=1,80) 117 call wrtout(std_out,msg,'PERS') 118 write(msg,'(a25,a25,a31,a)') '________________________',& 119 & ' Graphic Card Properties ','_______________________________' ,ch10 120 call wrtout(std_out,msg,'PERS') 121 122 call get_dev_info(device,name,lenname,vers,globalmem,clockRate,gflops,constmem,sharemem,regist,nprocs,ncores) 123 if (gflops<0) then 124 gflops_stg="undefined (add new def. in version_2_cores function)" 125 else 126 write(gflops_stg,'(i7,a)') gflops,' GFP' 127 end if 128 129 write(formatdev,'(a12,i4,a)') '(a23,i4,a3,a',lenname,')' 130 write (msg,formatdev)& 131 & ' Device ',device,' : ',name(1:lenname) 132 call wrtout(std_out,msg,'PERS') 133 write (msg,'(a,2(i1,a),a,i9,a,a,a,f7.1,a,a,a,i9,a,i9,4a,2(a,i9,2a),a,i9,a)')& 134 & ' Revision number: ',vers(0),'.',vers(1),ch10, & 135 & ' Total amount of global memory: ',nint(globalmem),' Mbytes',ch10, & 136 & ' Clock rate: ',clockRate,' GHz',ch10, & 137 & ' Number of processors/cores: ',nprocs,'/',ncores,ch10, & 138 & ' Max GFLOPS: ',trim(gflops_stg),ch10, & 139 & ' Total constant memory: ',constmem,' bytes',ch10, & 140 & ' Shared memory per block: ',sharemem,' bytes',ch10, & 141 & ' Number of registers per block: ',regist,ch10 142 call wrtout(std_out,msg,'PERS') 143 if(device == -1)then 144 write(msg,'(a)')' no cuda-GPU devices found' 145 call wrtout(std_out,msg,'PERS') 146 end if 147 write(msg,'(a,80a)')' ',('_',ii=1,80) 148 call wrtout(std_out,msg,'PERS') 149 #endif 150 end subroutine prt_device_info
m_initcuda/setdevice_cuda [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
setdevice_cuda
FUNCTION
Detect and activate a GPU device from current CPU core
INPUTS
gpu_devices(12)= list of GPU devices to choose on one node (in case of multiple devices); if set to 20*-1, will choose the devices by order of performances.
SIDE EFFECTS
gpu_option= which GPU implementation is used (None, CUDA, OpenMP, Kokkos)
SOURCE
291 subroutine setdevice_cuda(gpu_devices_node,gpu_option) 292 293 #ifdef FC_NAG 294 use f90_unix_proc 295 #endif 296 implicit none 297 298 !Arguments ------------------------------------ 299 !scalars 300 integer,intent(inout) :: gpu_option 301 !arrays 302 integer, intent(in) :: gpu_devices_node(12) 303 !Local variables ------------------------------ 304 !scalars 305 integer :: device,ii,jj,me,nb_devices,nproc 306 logical :: testopen 307 character(len=500) :: msg 308 type(devGPU_type) :: gpuinfo 309 !arrays 310 integer,allocatable :: fastest_devices(:) 311 ! ********************************************************************* 312 313 if (gpu_option==ABI_GPU_DISABLED) return 314 315 nproc=xmpi_comm_size(xmpi_world) 316 me=xmpi_comm_rank(xmpi_world) 317 318 #if defined HAVE_GPU 319 device=-1 320 call c_get_ndevice(nb_devices) 321 !nb_devices=min(nb_devices,20) 322 if(nb_devices>0) then 323 if(nb_devices==1) then 324 device=0 325 else if(all(gpu_devices_node(1:nb_devices)==-1)) then 326 ABI_MALLOC(fastest_devices,(0:nproc-1)) 327 call get_fastest_devices(fastest_devices,nb_devices) 328 device=fastest_devices(me) 329 ABI_FREE(fastest_devices) 330 else 331 jj=nb_devices 332 do ii=jj,2,-1 333 if(gpu_devices_node(ii)==-1) nb_devices=ii-1 334 end do 335 device=gpu_devices_node(1+mod(me,nb_devices)) 336 end if 337 338 ! Initialize Kokkos and YAKL if requested 339 if(gpu_option==ABI_GPU_KOKKOS .or. gpu_option==ABI_GPU_LEGACY) then 340 #ifdef HAVE_KOKKOS 341 ! initialize kokkos 342 if (xmpi_comm_rank(xmpi_world) == 0) then 343 write(std_out,*)'initializinging kokkos in MPI process ', xmpi_comm_rank(xmpi_world) 344 end if 345 call kokkos_initialize() 346 347 ! only master MPI process print kokkos config 348 if (xmpi_comm_rank(xmpi_world) == 0) then 349 call abinit_kokkos_print_config() 350 endif 351 #endif 352 353 #ifdef HAVE_YAKL 354 call gator_init() 355 #endif 356 end if 357 358 call set_dev(device) 359 call check_context(nb_devices,msg) 360 if(gpu_option==ABI_GPU_OPENMP) then 361 call xomp_set_default_device(device) 362 end if 363 if(nb_devices==1) then !allocation succeed 364 write(msg, '(4a,i1,2a)' ) ch10,& 365 & ' setdevice_cuda : COMMENT -',ch10,& 366 & ' GPU ',device,' has been properly initialized, continuing...',ch10 367 call wrtout(std_out,msg,'PERS') 368 else !gpu allocation failed we print error message returned and exit 369 device=-1 370 call wrtout(std_out,msg,'COLL') 371 call xmpi_abort() 372 inquire(std_out,OPENED=testopen) 373 if (testopen) close(std_out) 374 #if defined FC_NAG 375 call exit(-1) 376 #elif defined HAVE_FC_EXIT 377 call exit(1) 378 #else 379 stop 1 380 #endif 381 end if 382 call InitGPU(gpuinfo,device) 383 call CleanGPU(gpuinfo) 384 else 385 gpu_option=ABI_GPU_DISABLED 386 end if 387 #endif 388 end subroutine setdevice_cuda
m_initcuda/unsetdevice_cuda [ Functions ]
[ Top ] [ m_initcuda ] [ Functions ]
NAME
unsetdevice_cuda
FUNCTION
Deactivate a GPU device from current CPU core
INPUTS
gpu_option= which GPU implementation is used (None, CUDA, OpenMP, Kokkos)
SOURCE
404 subroutine unsetdevice_cuda(gpu_option) 405 406 implicit none 407 408 !Arguments ------------------------------------ 409 !scalars 410 integer,intent(in) :: gpu_option 411 !Local variables ------------------------------ 412 !scalars 413 character(len=500) :: msg 414 ! ********************************************************************* 415 416 if (gpu_option==ABI_GPU_DISABLED) return 417 418 #if defined HAVE_GPU 419 420 ! Closing YAKL and Kokkos if opened 421 if (gpu_option==ABI_GPU_KOKKOS) then 422 #ifdef HAVE_YAKL 423 call gator_finalize() 424 write(std_out,*)'yakl gator finalized' 425 #endif 426 #ifdef HAVE_KOKKOS 427 ! finalize kokkos 428 call kokkos_finalize() 429 write(std_out,*)'kokkos finalized' 430 #endif 431 !kokkos_finalize already reset GPU context 432 !if (gpu_option/=ABI_GPU_KOKKOS) call unset_dev() 433 end if 434 435 if (gpu_option==ABI_GPU_LEGACY) then 436 call unset_dev() 437 end if 438 439 #endif 440 end subroutine unsetdevice_cuda