TABLE OF CONTENTS


ABINIT/m_xgScalapack [ Modules ]

[ Top ] [ Modules ]

NAME

  m_xgScalapack

FUNCTION

COPYRIGHT

  Copyright (C) 2017-2024 ABINIT group (J. Bieder)
  This file is distributed under the terms of the
  GNU General Public License, see ~abinit/COPYING
  or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

15 #if defined HAVE_CONFIG_H
16 #include "config.h"
17 #endif
18 
19 #include "abi_common.h"
20 
21 module m_xgScalapack
22 
23   use defs_basis, only : std_err, std_out, dp, ABI_GPU_DISABLED, ABI_GPU_OPENMP
24   use m_abicore
25   use m_xmpi
26   use m_errors
27   use m_slk
28   use m_xg
29   use m_xomp
30   use m_time,     only: timab
31 
32 #ifdef HAVE_MPI2
33  use mpi
34 #endif
35 
36   implicit none
37 
38 #ifdef HAVE_MPI1
39  include 'mpif.h'
40 #endif
41 
42 
43   private
44 
45   integer, parameter :: M__SLK = 1
46   integer, parameter :: M__ROW = 1
47   integer, parameter :: M__COL = 2
48   integer, parameter :: M__UNUSED = 4
49   integer, parameter :: M__WORLD = 5
50   integer, parameter :: M__NDATA = 5
51   integer, parameter :: M__tim_init    = 1690
52   integer, parameter :: M__tim_free    = 1691
53   integer, parameter :: M__tim_heev    = 1692
54   integer, parameter :: M__tim_hegv    = 1693
55   integer, parameter :: M__tim_scatter = 1694
56 
57   integer, parameter, public :: SLK_AUTO = -1
58   integer, parameter, public :: SLK_FORCED = 1
59   integer, parameter, public :: SLK_DISABLED = 0
60   integer, save :: M__CONFIG = SLK_AUTO
61   integer, save :: M__MAXDIM = 1000
62 
63   type, public :: xgScalapack_t
64     integer :: comms(M__NDATA)
65     integer :: rank(M__NDATA)
66     integer :: size(M__NDATA)
67     integer :: coords(2)
68     integer :: ngroup
69     integer :: verbosity
70     type(grid_scalapack) :: grid
71   end type xgScalapack_t
72 
73   public :: xgScalapack_init
74   public :: xgScalapack_free
75   public :: xgScalapack_heev
76   public :: xgScalapack_hegv
77   public :: xgScalapack_config
78   contains

m_xgScalapack/xgScalapack_init [ Functions ]

[ Top ] [ m_xgScalapack ] [ Functions ]

NAME

  xgScalapack_init

FUNCTION

  Init the scalapack communicator for next operations.
  If the comm has too many cpus, then take only a subgroup of this comm

INPUTS

OUTPUT

SOURCE

 94   subroutine  xgScalapack_init(xgScalapack,comm,maxDim,verbosity,gpu_option,usable)
 95 
 96     type(xgScalapack_t), intent(inout) :: xgScalapack
 97     integer            , intent(in   ) :: comm
 98     integer            , intent(in   ) :: maxDim
 99     integer            , intent(in   ) :: verbosity
100     logical            , intent(in   ) :: gpu_option
101     logical            , intent(  out) :: usable
102     double precision :: tsec(2)
103 #ifdef HAVE_LINALG_MKL_THREADS
104     integer :: mkl_get_max_threads
105 #endif
106 #ifdef HAVE_LINALG_OPENBLAS_THREADS
107     integer :: openblas_get_num_threads
108 #endif
109     integer :: nthread
110 #ifdef HAVE_LINALG_SCALAPACK
111     integer :: maxProc
112     integer :: nproc
113     integer :: ngroup
114     integer :: subgroup
115     integer :: mycomm(2)
116     integer :: ierr
117     integer :: test_row
118     integer :: test_col
119 #else
120     ABI_UNUSED(comm)
121     ABI_UNUSED(maxDim)
122 #endif
123 
124     call timab(M__tim_init,1,tsec)
125 
126     xgScalapack%comms = xmpi_comm_null
127     xgScalapack%rank = xmpi_undefined_rank
128     xgScalapack%verbosity = verbosity
129 
130     nthread = 1
131 #ifdef HAVE_LINALG_MKL_THREADS
132     nthread =  mkl_get_max_threads()
133 #elif HAVE_LINALG_OPENBLAS_THREADS
134     nthread =  openblas_get_num_threads()
135 #else
136     nthread = xomp_get_num_threads(open_parallel=.true.)
137     if ( nthread == 0 ) nthread = 1
138 #endif
139 
140 #ifdef HAVE_LINALG_SCALAPACK
141 
142     nproc = xmpi_comm_size(comm)
143     xgScalapack%comms(M__WORLD) = comm
144     xgScalapack%rank(M__WORLD) = xmpi_comm_rank(comm)
145     xgScalapack%size(M__WORLD) = nproc
146 
147     maxProc = (maxDim / (M__MAXDIM*nthread))+1 ! ( M__MAXDIM x M__MAXDIM matrice per MPI )
148     if ( M__CONFIG > 0 .and. M__CONFIG <= nproc ) then
149       maxProc = M__CONFIG
150     else if ( maxProc > nproc ) then
151       maxProc = nproc
152     end if
153 
154     if ( maxProc == 1 .or. M__CONFIG == SLK_DISABLED) then
155       usable = .false.
156       return
157     else if ( nthread > 1 ) then ! disable scalapack with threads since it is not threadsafe
158       ! This should be check with new elpa version en MPI+OpenMP
159       if ( M__CONFIG > 0 ) then
160         ABI_WARNING("xgScalapack turned off because you have threads")
161       end if
162       usable = .false.
163       return
164     else
165       usable = .true.
166       maxProc = 2*((maxProc+1)/2) ! Round to next even number
167     end if
168 
169     if ( xgScalapack%verbosity > 0 ) then
170       write(std_out,*) " xgScalapack will use", maxProc, "/", nproc, "MPIs"
171     end if
172 
173     ngroup = nproc/maxProc
174     xgScalapack%ngroup = ngroup
175 
176     if ( maxProc < nproc ) then
177       if ( xgScalapack%rank(M__WORLD) < maxProc*ngroup ) then
178         subgroup = xgScalapack%rank(M__WORLD)/maxProc
179         mycomm(1) = M__SLK
180         mycomm(2) = M__UNUSED
181       else
182         subgroup = ngroup+1
183         mycomm(1) = M__UNUSED
184         mycomm(2) = M__SLK
185       end if
186        call MPI_Comm_split(comm, subgroup, xgScalapack%rank(M__WORLD), xgScalapack%comms(mycomm(1)),ierr)
187        if ( ierr /= 0 ) then
188          ABI_ERROR("Error splitting communicator")
189        end if
190        xgScalapack%comms(mycomm(2)) = xmpi_comm_null
191        xgScalapack%rank(mycomm(1)) = xmpi_comm_rank(xgScalapack%comms(mycomm(1)))
192        xgScalapack%rank(mycomm(2)) = xmpi_undefined_rank
193        xgScalapack%size(mycomm(1)) = xmpi_comm_size(xgScalapack%comms(mycomm(1)))
194        xgScalapack%size(mycomm(2)) = nproc - xgScalapack%size(mycomm(1))
195     else
196        call MPI_Comm_dup(comm,xgScalapack%comms(M__SLK),ierr)
197        if ( ierr /= 0 ) then
198          ABI_ERROR("Error duplicating communicator")
199        end if
200        xgScalapack%rank(M__SLK) = xmpi_comm_rank(xgScalapack%comms(M__SLK))
201        xgScalapack%size(M__SLK) = nproc
202     end if
203 
204     if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then
205       call xgScalapack%grid%init(xgScalapack%size(M__SLK), xgScalapack%comms(M__SLK), gpu_option)
206       call BLACS_GridInfo(xgScalapack%grid%ictxt, &
207         xgScalapack%grid%dims(M__ROW), xgScalapack%grid%dims(M__COL),&
208         xgScalapack%coords(M__ROW), xgScalapack%coords(M__COL))
209 
210      !These values are the same as those computed by BLACS_GRIDINFO
211      !except in the case where the myproc argument is not the local proc
212       test_row = INT((xgScalapack%rank(M__SLK)) / xgScalapack%grid%dims(2))
213       test_col = MOD((xgScalapack%rank(M__SLK)), xgScalapack%grid%dims(2))
214       if ( test_row /= xgScalapack%coords(M__ROW) ) then
215         ABI_WARNING("Row id mismatch")
216       end if
217       if ( test_col /= xgScalapack%coords(M__COL) ) then
218         ABI_WARNING("Col id mismatch")
219       end if
220     end if
221 
222 #else
223     usable = .false.
224 #endif
225 
226     call timab(M__tim_init,2,tsec)
227 
228   end subroutine xgScalapack_init
229 
230   subroutine xgScalapack_config(myconfig,maxDim)
231 
232     integer, intent(in) :: myconfig
233     integer, intent(in) :: maxDim
234     if ( myconfig == SLK_AUTO) then
235       M__CONFIG = myconfig
236       ABI_COMMENT("xgScalapack in auto mode")
237     else if ( myconfig == SLK_DISABLED) then
238       M__CONFIG = myconfig
239       ABI_COMMENT("xgScalapack disabled")
240     else if ( myconfig > 0) then
241       M__CONFIG = myconfig
242       ABI_COMMENT("xgScalapack enabled")
243     else
244       ABI_WARNING("Bad value for xgScalapack config -> autodetection")
245       M__CONFIG = SLK_AUTO
246     end if
247     if ( maxDim > 0 ) then
248       M__MAXDIM = maxDim
249     end if
250 
251   end subroutine xgScalapack_config
252 
253   function toProcessorScalapack(xgScalapack) result(processor)
254 
255     type(xgScalapack_t), intent(in) :: xgScalapack
256     type(processor_scalapack) :: processor
257 
258     processor%myproc = xgScalapack%rank(M__SLK)
259     processor%comm = xgScalapack%comms(M__SLK)
260     processor%coords = xgScalapack%coords
261     processor%grid = xgScalapack%grid
262   end function toProcessorScalapack
263 
264   !This is for testing purpose.
265   !May not be optimal since I do not control old implementation but at least gives a reference.
266   subroutine xgScalapack_heev(xgScalapack,matrixA,eigenvalues,gpu_option)
267     use, intrinsic :: iso_c_binding
268     type(xgScalapack_t), intent(inout) :: xgScalapack
269     type(xgBlock_t)    , intent(inout) :: matrixA
270     type(xgBlock_t)    , intent(inout) :: eigenvalues
271     integer, optional  , intent(in)    :: gpu_option
272 #ifdef HAVE_LINALG_SCALAPACK
273     double precision, pointer :: matrix(:,:) !(cplex*nbli_global,nbco_global)
274     double precision, pointer :: eigenvalues_tmp(:,:)
275     double precision, pointer :: vector(:)
276     double precision :: tsec(2)
277     integer :: cplex
278     integer :: istwf_k
279     integer :: nbli_global, nbco_global
280     type(c_ptr) :: cptr
281     integer :: req(2), status(MPI_STATUS_SIZE,2), ierr
282     integer :: l_gpu_option,l_use_gpu_elpa
283 #endif
284 
285 #ifdef HAVE_LINALG_SCALAPACK
286     call timab(M__tim_heev,1,tsec)
287 
288     l_gpu_option=ABI_GPU_DISABLED
289     if (present(gpu_option)) then
290       l_gpu_option = gpu_option
291     end if
292     l_use_gpu_elpa=0
293 #ifdef HAVE_LINALG_ELPA
294     if (l_gpu_option/=ABI_GPU_DISABLED) l_use_gpu_elpa=1
295 #endif
296 
297     ! Keep only working processors
298     if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then
299 
300       call xgBlock_getSize(eigenvalues,nbli_global,nbco_global)
301       if ( cols(matrixA) /= nbli_global ) then
302         ABI_ERROR("Number of eigen values differ from number of vectors")
303       end if
304 
305       if ( space(matrixA) == SPACE_C ) then
306         cplex = 2
307         istwf_k = 1
308       else
309         cplex = 1
310         istwf_k = 2
311       endif
312 
313       call xgBlock_getSize(matrixA,nbli_global,nbco_global)
314 
315       if(l_gpu_option==ABI_GPU_OPENMP) then
316         call xgBlock_copy_from_gpu(matrixA)
317         call xgBlock_copy_from_gpu(eigenvalues)
318       end if
319 
320       call xgBlock_reverseMap(matrixA,matrix,nbli_global,nbco_global)
321       call xgBlock_reverseMap(eigenvalues,eigenvalues_tmp,nbco_global,1)
322       cptr = c_loc(eigenvalues_tmp)
323       call c_f_pointer(cptr,vector,(/ nbco_global /))
324 
325       call compute_eigen1(xgScalapack%comms(M__SLK), &
326         toProcessorScalapack(xgScalapack), &
327         cplex,nbli_global,nbco_global,matrix,vector,istwf_k,&
328         use_gpu_elpa=l_use_gpu_elpa)
329 
330     end if
331 
332     call timab(M__tim_heev,2,tsec)
333 
334     req(1:2)=-1
335     call xgScalapack_scatter(xgScalapack,matrixA,req(1))
336     call xgScalapack_scatter(xgScalapack,eigenvalues,req(2))
337 #ifdef HAVE_MPI
338     if ( any(req/=-1)  ) then
339       call MPI_WaitAll(2,req,status,ierr)
340       if ( ierr /= 0 ) then
341           ABI_ERROR("Error waiting data")
342       endif
343     end if
344 #endif
345 
346     if(l_gpu_option==ABI_GPU_OPENMP) then
347       call xgBlock_copy_to_gpu(matrixA)
348       call xgBlock_copy_to_gpu(eigenvalues)
349     end if
350 
351 #else
352    ABI_ERROR("ScaLAPACK support not available")
353    ABI_UNUSED(xgScalapack%verbosity)
354    ABI_UNUSED(matrixA%normal)
355    ABI_UNUSED(eigenvalues%normal)
356 #endif
357 
358   end subroutine xgScalapack_heev
359 
360   !This is for testing purpose.
361   !May not be optimal since I do not control old implementation but at least gives a reference.
362   subroutine xgScalapack_hegv(xgScalapack,matrixA,matrixB,eigenvalues,gpu_option)
363     use, intrinsic :: iso_c_binding
364     type(xgScalapack_t), intent(inout) :: xgScalapack
365     type(xgBlock_t)    , intent(inout) :: matrixA
366     type(xgBlock_t)    , intent(inout) :: matrixB
367     type(xgBlock_t)    , intent(inout) :: eigenvalues
368     integer, optional  , intent(in)    :: gpu_option
369 #ifdef HAVE_LINALG_SCALAPACK
370     double precision, pointer :: matrix1(:,:) !(cplex*nbli_global,nbco_global)
371     double precision, pointer :: matrix2(:,:) !(cplex*nbli_global,nbco_global)
372     double precision, pointer :: eigenvalues_tmp(:,:)
373     double precision, pointer :: vector(:)
374     double precision :: tsec(2)
375     integer :: cplex
376     integer :: istwf_k
377     integer :: nbli_global, nbco_global
378     type(c_ptr) :: cptr
379     integer :: req(2), status(MPI_STATUS_SIZE,2),ierr
380     integer :: l_gpu_option,l_use_gpu_elpa
381 #endif
382 
383 #ifdef HAVE_LINALG_SCALAPACK
384     call timab(M__tim_hegv,1,tsec)
385 
386     l_gpu_option=ABI_GPU_DISABLED
387     if (present(gpu_option)) then
388       l_gpu_option = gpu_option
389     end if
390     l_use_gpu_elpa=0
391 #ifdef HAVE_LINALG_ELPA
392     if (l_gpu_option/=ABI_GPU_DISABLED) l_use_gpu_elpa=1
393 #endif
394 
395     ! Keep only working processors
396     if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then
397 
398       call xgBlock_getSize(eigenvalues,nbli_global,nbco_global)
399       if ( cols(matrixA) /= cols(matrixB) ) then
400         ABI_ERROR("Matrix A and B don't have the same number of vectors")
401       end if
402 
403       if ( cols(matrixA) /= nbli_global ) then
404         ABI_ERROR("Number of eigen values differ from number of vectors")
405       end if
406 
407       if ( space(matrixA) == SPACE_C ) then
408         cplex = 2
409         istwf_k = 1
410       else
411         cplex = 1
412         istwf_k = 2
413       endif
414 
415       call xgBlock_getSize(matrixA,nbli_global,nbco_global)
416 
417       if(l_gpu_option==ABI_GPU_OPENMP) then
418         call xgBlock_copy_from_gpu(matrixA)
419         call xgBlock_copy_from_gpu(matrixB)
420         call xgBlock_copy_from_gpu(eigenvalues)
421       end if
422 
423       call xgBlock_reverseMap(matrixA,matrix1,nbli_global,nbco_global)
424       call xgBlock_reverseMap(matrixB,matrix2,nbli_global,nbco_global)
425       call xgBlock_reverseMap(eigenvalues,eigenvalues_tmp,nbco_global,1)
426       cptr = c_loc(eigenvalues_tmp)
427       call c_f_pointer(cptr,vector,(/ nbco_global /))
428 
429       call compute_eigen2(xgScalapack%comms(M__SLK), &
430         toProcessorScalapack(xgScalapack), &
431         cplex,nbli_global,nbco_global,matrix1,matrix2,vector,istwf_k,&
432         use_gpu_elpa=l_use_gpu_elpa)
433     end if
434 
435     call timab(M__tim_hegv,2,tsec)
436 
437     req(1:2)=-1
438     call xgScalapack_scatter(xgScalapack,matrixA,req(1))
439     call xgScalapack_scatter(xgScalapack,eigenvalues,req(2))
440 #ifdef HAVE_MPI
441     if ( any(req/=-1)   ) then
442       call MPI_WaitAll(2,req,status,ierr)
443       if ( ierr /= 0 ) then
444           ABI_ERROR("Error waiting data")
445       endif
446     end if
447 #endif
448 
449     if(l_gpu_option==ABI_GPU_OPENMP) then
450       call xgBlock_copy_to_gpu(matrixA)
451       call xgBlock_copy_to_gpu(eigenvalues)
452     end if
453 
454 #else
455    ABI_ERROR("ScaLAPACK support not available")
456    ABI_UNUSED(xgScalapack%verbosity)
457    ABI_UNUSED(matrixA%normal)
458    ABI_UNUSED(matrixB%normal)
459    ABI_UNUSED(eigenvalues%normal)
460 #endif
461 
462   end subroutine xgScalapack_hegv
463 
464 
465   subroutine xgScalapack_scatter(xgScalapack,matrix,req)
466 
467     type(xgScalapack_t), intent(in   ) :: xgScalapack
468     type(xgBlock_t)    , intent(inout) :: matrix
469     integer            , intent(  out) :: req
470     double precision, pointer :: tab(:,:)
471     double precision :: tsec(2)
472     integer :: cols, rows
473     integer :: ierr
474     integer :: sendto, receivefrom
475     integer :: lap
476 
477     call timab(M__tim_scatter,1,tsec)
478 
479     call xgBlock_getSize(matrix,rows,cols)
480     call xgBlock_reverseMap(matrix,tab,rows,cols)
481 
482     ! If we did the he(e|g)v and we are the first group
483     if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null .and. xgScalapack%rank(M__WORLD)<xgScalapack%size(M__SLK) ) then
484       lap = xgScalapack%ngroup
485       sendto = xgScalapack%rank(M__WORLD) + lap*xgScalapack%size(M__SLK)
486       if ( sendto < xgScalapack%size(M__WORLD) ) then
487       !do while ( sendto < xgScalapack%size(M__WORLD) )
488         !call xmpi_send(tab,sendto,sendto,xgScalapack%comms(M__WORLD),ierr)
489         call xmpi_isend(tab,sendto,sendto,xgScalapack%comms(M__WORLD),req,ierr)
490         !write(*,*) xgScalapack%rank(M__WORLD), "sends to", sendto
491         if ( ierr /= 0 ) then
492           ABI_ERROR("Error sending data")
493         end if
494         !lap = lap+1
495         !sendto = xgScalapack%rank(M__WORLD) + lap*xgScalapack%size(M__SLK)
496       !end do
497       end if
498     else if ( xgScalapack%comms(M__UNUSED) /= xmpi_comm_null ) then
499       receivefrom = MODULO(xgScalapack%rank(M__WORLD), xgScalapack%size(M__SLK))
500       if ( receivefrom >= 0 ) then
501         !call xmpi_recv(tab,receivefrom,xgScalapack%rank(M__WORLD),xgScalapack%comms(M__WORLD),ierr)
502         call xmpi_irecv(tab,receivefrom,xgScalapack%rank(M__WORLD),xgScalapack%comms(M__WORLD),req,ierr)
503         !write(*,*) xgScalapack%rank(M__WORLD), "receive from", receivefrom
504         if ( ierr /= 0 ) then
505           ABI_ERROR("Error receiving data")
506         end if
507       end if
508     !else
509       !ABI_BUG("Error scattering data")
510     end if
511 
512     call timab(M__tim_scatter,2,tsec)
513 
514   end subroutine xgScalapack_scatter
515 
516 
517   subroutine  xgScalapack_free(xgScalapack)
518 
519     type(xgScalapack_t), intent(inout) :: xgScalapack
520     double precision :: tsec(2)
521 #ifdef HAVE_LINALG_SCALAPACK
522     integer :: ierr
523 #endif
524 
525     call timab(M__tim_free,1,tsec)
526 #ifdef HAVE_LINALG_SCALAPACK
527     if ( xgScalapack%comms(M__SLK) /= xmpi_comm_null ) then
528       call BLACS_GridExit(xgScalapack%grid%ictxt)
529       call MPI_Comm_free(xgScalapack%comms(M__SLK),ierr)
530     end if
531     if ( xgScalapack%comms(M__UNUSED) /= xmpi_comm_null ) then
532       call MPI_Comm_free(xgScalapack%comms(M__UNUSED),ierr)
533     end if
534 #else
535     ABI_UNUSED(xgScalapack%verbosity)
536 #endif
537     call timab(M__tim_free,2,tsec)
538 
539   end subroutine xgScalapack_free
540 
541 end module m_xgScalapack