TABLE OF CONTENTS
- ABINIT/m_gwls_QR_factorization
- m_hamiltonian/extract_QR
- m_hamiltonian/extract_QR_Householder
- m_hamiltonian/extract_SVD
- m_hamiltonian/extract_SVD_lapack
ABINIT/m_gwls_QR_factorization [ Modules ]
NAME
m_gwls_QR_factorization
FUNCTION
.
COPYRIGHT
Copyright (C) 2009-2024 ABINIT group (JLJ, BR, MC) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 23 24 module m_gwls_QR_factorization 25 !---------------------------------------------------------------------------------------------------- 26 ! This module implements the QR factorization using various algorithms, for the specific 27 ! data distribution corresponding to FFT parallelism. 28 ! 29 ! There are standard routines which do this (lapack, scalapack, etc), however it is complicated 30 ! to get scalapack to run properly in parallel. Implementing ourselves is the shortest path to 31 ! a working solution. 32 !---------------------------------------------------------------------------------------------------- 33 !local modules 34 use m_gwls_utility 35 use m_gwls_TimingLog 36 use m_gwls_wf 37 use m_gwls_hamiltonian 38 39 !abinit modules 40 use defs_basis 41 use defs_wvltypes 42 use m_abicore 43 use m_xmpi 44 use m_errors 45 46 use defs_abitypes, only : MPI_type 47 use m_io_tools, only : get_unit 48 use m_time, only : timab 49 50 51 implicit none 52 save 53 private
m_hamiltonian/extract_QR [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_QR
FUNCTION
.
INPUTS
OUTPUT
SOURCE
77 subroutine extract_QR(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 78 !-------------------------------------------------------------------------- 79 ! This function computes the QR factorization: 80 ! 81 ! X = Q . R 82 ! 83 ! in order to extract the matrix of orthonormal vectors Q and 84 ! the R matrix. 85 ! 86 ! On output, the matrix X is replaced by Q. 87 ! 88 ! If the code is running with only one processor, this routine 89 ! simply invokes extract_QR_serial, which wraps standard Lapack routines. 90 ! If we are running in MPI parallel, the serial Lapack routines no 91 ! longer work (and understanding scalapack is too complicated right now). 92 ! Thus, in that case, this routine implements some old school Gram-Schmidt 93 ! algorithm. 94 !-------------------------------------------------------------------------- 95 implicit none 96 97 integer, intent(in) :: Hsize, Xsize, mpi_communicator 98 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize) 99 100 complex(dpc), intent(out),optional :: Rmatrix(Xsize,Xsize) 101 102 ! local variables 103 104 real(dp) :: tsec(2) 105 integer :: GWLS_TIMAB, OPTION_TIMAB 106 107 ! ************************************************************************* 108 109 110 111 GWLS_TIMAB = 1519 112 OPTION_TIMAB = 1 113 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 114 115 116 !-------------------------------------------------------------------------------- 117 ! Implement Gram-Schmidt. 118 !-------------------------------------------------------------------------------- 119 call extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 120 121 OPTION_TIMAB = 2 122 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 123 124 end subroutine extract_QR
m_hamiltonian/extract_QR_Householder [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_QR_Householder
FUNCTION
.
INPUTS
OUTPUT
SOURCE
374 subroutine extract_QR_Householder(mpi_communicator,Hsize,Xsize,Xmatrix,Rmatrix) 375 !-------------------------------------------------------------------------- 376 ! This function computes the QR factorization: 377 ! 378 ! X = Q . R 379 ! 380 ! in order to extract the matrix of orthonormal vectors Q and 381 ! the R matrix. 382 ! 383 ! On output, the matrix X is replaced by Q. 384 ! 385 ! This routine uses Householder operations to generate Q and R. 386 ! Special attention is given to the fact that the matrix may be 387 ! distributed across processors and MPI communication is necessary. 388 ! 389 !-------------------------------------------------------------------------- 390 implicit none 391 392 integer, intent(in) :: Hsize, Xsize, mpi_communicator 393 complex(dpc),intent(inout) :: Xmatrix(Hsize,Xsize) 394 395 complex(dpc), intent(out),optional :: Rmatrix(Xsize,Xsize) 396 397 ! local variables 398 integer :: numbrer_of_plane_waves 399 400 integer :: io_unit 401 integer :: ierr 402 character(50) :: filename 403 logical :: file_exists 404 405 integer,save :: counter = 0 406 407 integer :: i, j, l_local 408 integer :: l1, l2 409 410 integer, allocatable :: nproc_array(:) 411 412 complex(dpc), allocatable :: Qinternal(:,:) 413 complex(dpc), allocatable :: Rinternal(:,:) 414 complex(dpc), allocatable :: vj(:) 415 416 complex(dpc), allocatable :: A_matrix(:,:) 417 complex(dpc), allocatable :: V_matrix(:,:) 418 419 complex(dpc), allocatable :: list_beta(:) 420 421 complex(dpc) :: cmplx_value 422 real (dp ) :: real_value 423 424 complex(dpc) :: norm_x 425 complex(dpc) :: phase 426 427 complex(dpc), allocatable :: error(:,:) 428 complex(dpc), allocatable :: coeff(:) 429 430 431 integer :: mpi_rank 432 integer :: mpi_nproc 433 logical :: head_node 434 435 ! ************************************************************************* 436 437 438 !-------------------------------------------------------------------------------- 439 ! Implement Householder algorithm, in parallel 440 !-------------------------------------------------------------------------------- 441 mpi_nproc = xmpi_comm_size(mpi_communicator) 442 443 ! extract the rank of this processor in the communicator 444 mpi_rank = xmpi_comm_rank(mpi_communicator) 445 446 ! only head node will write! 447 head_node = mpi_rank == 0 448 449 450 !-------------------------------------------------------------------------------- 451 ! Open a log file for the output of extract_QR 452 !-------------------------------------------------------------------------------- 453 if (debug .and. head_node ) then 454 455 io_unit = get_unit() 456 write(filename,'(A,I0.4,A)') "extract_QR_",mpi_rank,".log" 457 inquire(file=trim(filename),exist=file_exists) 458 459 if (file_exists) then 460 open(io_unit,file=trim(filename),position='append',status=files_status_old) 461 else 462 open(io_unit,file=trim(filename),status=files_status_new) 463 write(io_unit,10) "#=======================================================================================" 464 write(io_unit,10) "# " 465 write(io_unit,10) "# This file contains information regarding the QR factorization, from extract_QR " 466 write(io_unit,25) "# The algorithm is running in MPI parallel with ",mpi_nproc," processors" 467 write(io_unit,10) "# " 468 write(io_unit,10) "#=======================================================================================" 469 end if 470 471 counter = counter + 1 472 473 write(io_unit,10) "# " 474 write(io_unit,11) "# Call # ", counter 475 write(io_unit,10) "# " 476 write(io_unit,11) "# Hsize = ",Hsize 477 write(io_unit,11) "# Xsize = ",Xsize 478 write(io_unit,13) "# Rmatrix present? = ",present(Rmatrix) 479 480 481 end if 482 483 !-------------------------------------------------------------------------------- 484 ! Get the number of plane waves on every processor 485 !-------------------------------------------------------------------------------- 486 ABI_MALLOC(nproc_array,(mpi_nproc)) 487 488 nproc_array = 0 489 490 numbrer_of_plane_waves = Hsize ! do this to avoid "intent" problems 491 call xmpi_allgather(numbrer_of_plane_waves, nproc_array, mpi_communicator, ierr) 492 493 494 !-------------------------------------------------------------------------------- 495 ! Get the offset for each processor 496 ! 497 ! The global index is then given by 498 ! I_{global} = nproc_array(1+rank)+i_{local} 499 ! 500 ! similarly, the local index is given by 501 ! i_{local} = I_{global} - nproc_array(1+rank) 502 ! which is only meaningful if 1 <= i_{local} <= Hsize 503 !-------------------------------------------------------------------------------- 504 505 do j = mpi_nproc, 1, -1 506 nproc_array(j) = 0 507 do i = 1, j-1 508 nproc_array(j) = nproc_array(j)+nproc_array(i) 509 end do 510 end do 511 512 513 !-------------------------------------------------------------------------------- 514 ! Act on the A matrix, following the book by Golub (more or less ;) ) 515 ! 516 !-------------------------------------------------------------------------------- 517 518 ABI_MALLOC(A_matrix, (Hsize,Xsize)) 519 ABI_MALLOC(V_matrix, (Hsize,Xsize)) 520 ABI_MALLOC(list_beta, (Xsize)) 521 ABI_MALLOC(coeff , (Xsize)) 522 523 A_matrix(:,:) = Xmatrix(:,:) 524 V_matrix(:,:) = cmplx_0 525 list_beta(:) = cmplx_0 526 527 528 ABI_MALLOC(vj, (Hsize)) 529 530 do j = 1, Xsize 531 532 ! Store xj in vj, for now 533 vj(:) = A_matrix(:,j) 534 535 536 if (j > 1) then 537 !------------------------------------------ 538 ! set the array to zero all the way to j-1 539 !------------------------------------------ 540 l_local = j-1-nproc_array(1+mpi_rank) 541 542 if ( l_local > Hsize) then 543 vj(:) = cmplx_0 544 else if ( l_local <= Hsize .and. l_local >= 1) then 545 vj(1:l_local) = cmplx_0 546 end if 547 548 end if 549 550 ! compute the norm of x 551 norm_x = sum(conjg(vj(:))*vj(:)) 552 call xmpi_sum(norm_x,mpi_communicator,ierr) ! sum on all processors in communicator 553 norm_x = sqrt(norm_x) 554 555 556 if (abs(norm_x) > tol14) then 557 ! if |x| ~ 0, there is nothing to do! the column in A is full of zeros. 558 559 ! find the j^th component of x 560 l_local = j-nproc_array(1+mpi_rank) 561 562 ! update vj, on the right processor! 563 if ( l_local <= Hsize .and. l_local >= 1) then 564 565 phase = vj(l_local) 566 567 if (abs(phase) < tol14) then 568 phase = cmplx_1 569 else 570 phase = phase/abs(phase) 571 end if 572 573 vj(l_local) = vj(l_local) + phase*norm_x 574 575 end if 576 577 !compute beta 578 cmplx_value = sum(conjg(vj(:))*vj(:)) 579 call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors 580 581 list_beta(j) = 2.0_dp/cmplx_value 582 583 ! store v for later use; this is less efficient than storing it in the null part of A, 584 ! but it is less of a pain to implement. Feel free to clean this up. 585 V_matrix(:,j) = vj(:) 586 587 588 ! Update the A matrix 589 590 ! Compute v^dagger . A 591 !call ZGEMM( 'C', & ! Hermitian conjugate the first array 592 ! 'N', & ! Leave second array as is 593 ! 1, & ! the number of rows of the matrix op( A ) 594 ! Xsize-j+1, & ! the number of columns of the matrix op( B ) 595 ! Hsize, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 596 ! cmplx_1, & ! alpha constant 597 ! vj, & ! matrix A 598 ! Hsize, & ! LDA 599 ! A_matrix(:,j:Xsize), & ! matrix B 600 ! Hsize, & ! LDB 601 ! cmplx_0, & ! beta constant 602 ! coeff(:,j:Xsize), & ! matrix C 603 ! 1) ! LDC 604 do i = j, Xsize 605 coeff(i) = sum(conjg(vj)*A_matrix(:,i)) 606 end do 607 call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in the communicator 608 609 ! update A 610 do i = j, Xsize 611 A_matrix(:,i) = A_matrix(:,i) - list_beta(j)*coeff(i)*vj(:) 612 end do 613 614 end if 615 616 end do 617 618 !-------------------------------------------------------------------------------- 619 ! Extract the R matrix 620 ! 621 !-------------------------------------------------------------------------------- 622 623 ABI_MALLOC(Rinternal,(Xsize,Xsize)) 624 625 Rinternal = cmplx_0 626 627 628 do j = 1, Xsize 629 do i = 1, j 630 l_local = i-nproc_array(1+mpi_rank) 631 632 if ( l_local <= Hsize .and. l_local >= 1) then 633 Rinternal(i,j) = A_matrix(l_local,j) 634 end if 635 636 end do ! i 637 end do ! j 638 639 call xmpi_sum(Rinternal,mpi_communicator,ierr) ! sum on all processors 640 641 642 643 !-------------------------------------------------------------------------------- 644 ! Extract the Q matrix 645 ! 646 !-------------------------------------------------------------------------------- 647 648 ABI_MALLOC( Qinternal, (Hsize,Xsize)) 649 650 ! initialize Q to the identity in the top corner 651 Qinternal = cmplx_0 652 653 do j = 1, Xsize 654 l_local = j-nproc_array(1+mpi_rank) 655 656 if ( l_local <= Hsize .and. l_local >= 1 ) then 657 Qinternal(l_local,j) = cmplx_1 658 end if 659 660 end do ! j 661 662 663 ! Build Q interatively 664 do j = Xsize,1, -1 665 666 vj(:) = V_matrix(:,j) 667 668 669 ! Update the A matrix 670 671 ! Compute v^dagger . A 672 !call ZGEMM( 'C', & ! Hermitian conjugate the first array 673 ! 'N', & ! Leave second array as is 674 ! 1, & ! the number of rows of the matrix op( A ) 675 ! Xsize, & ! the number of columns of the matrix op( B ) 676 ! Hsize, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 677 ! cmplx_1, & ! alpha constant 678 ! vj, & ! matrix A 679 ! Hsize, & ! LDA 680 ! Qinternal, & ! matrix B 681 ! Hsize, & ! LDB 682 ! cmplx_0, & ! beta constant 683 ! coeff, & ! matrix C 684 ! 1) ! LDC 685 686 do i = 1, Xsize 687 coeff(i) = sum(conjg(vj)*Qinternal(:,i)) 688 end do 689 call xmpi_sum(coeff,mpi_communicator,ierr) ! sum on all processors in communicator 690 691 692 ! update Q 693 do i = 1, Xsize 694 Qinternal(:,i) = Qinternal(:,i) - list_beta(j)*coeff(i)*vj(:) 695 end do 696 697 end do ! j 698 699 700 701 ! clean up 702 ABI_FREE(V_matrix) 703 ABI_FREE(coeff) 704 ABI_FREE(vj) 705 706 !-------------------------------------------------------------------------------- 707 ! Do some debug, if requested 708 ! 709 !-------------------------------------------------------------------------------- 710 711 if (debug ) then 712 713 if ( head_node ) then 714 715 write(io_unit,20) "# nproc_array = ",nproc_array 716 flush(io_unit) 717 718 write(io_unit,40) "# list_beta = ",real(list_beta) 719 flush(io_unit) 720 end if 721 722 723 ABI_MALLOC(error,(Xsize,Xsize)) 724 725 error = cmplx_0 726 727 do l2=1,Xsize 728 error(l2,l2) = error(l2,l2) - cmplx_1 729 do l1=1,Xsize 730 731 cmplx_value = complex_vector_product(Qinternal(:,l1),Qinternal(:,l2),Hsize) 732 call xmpi_sum(cmplx_value,mpi_communicator,ierr) ! sum on all processors working on FFT! 733 734 error(l1,l2) = error(l1,l2)+cmplx_value 735 736 end do 737 end do 738 739 if ( head_node ) then 740 write(io_unit,12) "# || Q^t.Q - I || = ",sqrt(sum(abs(error(:,:))**2)) 741 flush(io_unit) 742 end if 743 744 745 ABI_FREE(error) 746 747 ABI_MALLOC(error,(Hsize,Xsize)) 748 749 error = Xmatrix 750 751 do l2=1,Xsize 752 do l1=1,Xsize 753 error(:,l2) = error(:,l2) - Qinternal(:,l1)*Rinternal(l1,l2) 754 end do 755 end do 756 757 real_value = zero 758 do l2=1,Xsize 759 do l1=1,Xsize 760 cmplx_value = complex_vector_product(error(:,l1),error(:,l2),Hsize) 761 762 real_value = real_value + abs(cmplx_value)**2 763 end do 764 end do 765 766 call xmpi_sum(real_value,mpi_communicator,ierr) ! sum on all processors 767 768 769 real_value = sqrt(real_value) 770 771 if ( head_node) then 772 write(io_unit,12) "# || Xin - Q.R || = ",real_value 773 774 if ( real_value > 1.0D-10 ) write(io_unit,10) "# ERROR! " 775 776 write(io_unit,10) '#' 777 write(io_unit,10) '# R matrix' 778 write(io_unit,10) '#' 779 do l1=1, Xsize 780 write(io_unit,30) Rinternal(l1,:) 781 end do 782 write(io_unit,10) '' 783 write(io_unit,10) '' 784 785 786 write(io_unit,10) '#' 787 write(io_unit,10) '# top of A matrix' 788 write(io_unit,10) '#' 789 do l1=1, 2*Xsize 790 write(io_unit,30) A_matrix(l1,:) 791 end do 792 write(io_unit,10) '' 793 write(io_unit,10) '' 794 795 close(io_unit) 796 797 end if 798 799 ABI_FREE(error) 800 801 end if 802 803 !-------------------------------------------------------------------------------- 804 ! Final assignment 805 ! 806 !-------------------------------------------------------------------------------- 807 808 Xmatrix = Qinternal 809 810 if (present(Rmatrix)) then 811 Rmatrix = Rinternal 812 end if 813 814 ABI_FREE(Qinternal) 815 ABI_FREE(Rinternal) 816 ABI_FREE(nproc_array) 817 ABI_FREE(A_matrix) 818 ABI_FREE(list_beta) 819 820 821 10 format(A) 822 11 format(A,I8) 823 12 format(A,E24.16) 824 13 format(A,2X,L10) 825 20 format(A,1000I10) 826 25 format(A,I5,A) 827 30 format(1000(F22.16,2X,F22.16,5X)) 828 40 format(A,1000(F22.16,5X)) 829 830 end subroutine extract_QR_Householder
m_hamiltonian/extract_SVD [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_SVD
FUNCTION
.
INPUTS
OUTPUT
SOURCE
141 subroutine extract_SVD(mpi_communicator, Hsize,lsolutions_max,svd_matrix,svd_values) 142 !-------------------------------------------------------------------------- 143 ! This function computes the singular value decomposition 144 ! 145 ! X = U . SIGMA . V^dagger 146 ! 147 ! More specifically, the matrix U of orthonormal vectors and SIGMA 148 ! the eigenvalues are returned. 149 ! 150 ! different algorithms are used, depending on parallelisation scheme. 151 !-------------------------------------------------------------------------- 152 implicit none 153 154 integer, intent(in) :: mpi_communicator 155 integer, intent(in) :: Hsize, lsolutions_max 156 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max) 157 real(dp), intent(out) :: svd_values(lsolutions_max) 158 159 160 complex(dpc), allocatable :: Rmatrix(:,:) 161 complex(dpc), allocatable :: svd_tmp(:,:) 162 163 real(dp) :: tsec(2) 164 integer :: GWLS_TIMAB, OPTION_TIMAB 165 166 ! ************************************************************************* 167 168 169 170 GWLS_TIMAB = 1520 171 OPTION_TIMAB = 1 172 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 173 174 175 !if ( mpi_enreg%nproc_fft ==1 ) then 176 if ( .false. ) then 177 178 call extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values) 179 180 else 181 182 ABI_MALLOC(Rmatrix,(lsolutions_max,lsolutions_max)) 183 184 ! perform QR first 185 call extract_QR(mpi_communicator, Hsize,lsolutions_max,svd_matrix,Rmatrix) 186 187 ! perform SVD on the much smaller Rmatrix! 188 call extract_SVD_lapack(lsolutions_max,lsolutions_max,Rmatrix,svd_values) 189 190 ABI_MALLOC(svd_tmp,(Hsize,lsolutions_max)) 191 192 ! Rmatrix is overwritten with U matrix from SVD. Update the svd_matrix 193 call ZGEMM( 'N', & ! Leave first array as is 194 'N', & ! Leave second array as is 195 Hsize, & ! the number of rows of the matrix op( A ) 196 lsolutions_max, & ! the number of columns of the matrix op( B ) 197 lsolutions_max, & ! the number of columns of the matrix op( A ) == rows of matrix op( B ) 198 cmplx_1, & ! alpha constant 199 svd_matrix, & ! matrix A 200 Hsize, & ! LDA 201 Rmatrix, & ! matrix B 202 lsolutions_max, & ! LDB 203 cmplx_0, & ! beta constant 204 svd_tmp, & ! matrix C 205 Hsize) ! LDC 206 207 svd_matrix(:,:) = svd_tmp(:,:) 208 209 ABI_FREE(svd_tmp) 210 ABI_FREE(Rmatrix) 211 end if 212 OPTION_TIMAB = 2 213 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 214 215 216 end subroutine extract_SVD
m_hamiltonian/extract_SVD_lapack [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
extract_SVD_lapack
FUNCTION
.
INPUTS
OUTPUT
SOURCE
232 subroutine extract_SVD_lapack(Hsize,lsolutions_max,svd_matrix,svd_values) 233 !-------------------------------------------------------------------------- 234 ! This function computes the singular value decomposition 235 ! using lapack routines. This is not appropriate in MPI parallel! 236 ! 237 ! 238 !-------------------------------------------------------------------------- 239 implicit none 240 241 242 integer, intent(in) :: Hsize, lsolutions_max 243 complex(dpc), intent(inout) :: svd_matrix(Hsize,lsolutions_max) 244 real(dp), intent(out) :: svd_values(lsolutions_max) 245 246 247 248 integer :: info_zgesvd 249 integer :: lwork_svd 250 complex(dpc), allocatable :: work_svd(:) 251 complex(dpc), allocatable :: svd_U(:,:), svd_V(:,:) 252 real (dp ), allocatable :: rwork_svd(:) 253 254 integer :: debug_unit 255 character(50) :: debug_filename 256 257 ! ************************************************************************* 258 259 260 261 262 ! allocate arrays for the svd 263 ABI_MALLOC(svd_U ,(1,1)) 264 ABI_MALLOC(svd_V ,(1,1)) 265 266 267 ! DIMENSION QUERRY for singluar decomposition problem 268 269 ABI_MALLOC(rwork_svd ,(5*min(Hsize,lsolutions_max))) 270 ABI_MALLOC(work_svd,(1)) 271 lwork_svd = -1 272 273 call zgesvd('O', & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; 274 'N', & ! no column vectors of V are computed 275 Hsize, & ! number of rows of the matrix 276 lsolutions_max, & ! number of columns of the matrix 277 svd_matrix, & ! matrix to be decomposed 278 Hsize, & ! LDA 279 svd_values, & ! singular values 280 svd_U, & ! dummy U; not referenced 281 1, & ! size of U 282 svd_V, & ! dummy V; not referenced 283 1, & ! size of V 284 work_svd, & ! work array 285 lwork_svd, & ! size of work array 286 rwork_svd, & ! work array 287 info_zgesvd ) 288 289 if ( info_zgesvd /= 0) then 290 debug_unit = get_unit() 291 write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log' 292 293 open(debug_unit,file=trim(debug_filename),status='unknown') 294 295 write(debug_unit,'(A)') '*********************************************************************************************' 296 write(debug_unit,'(A,I4,A)') '* ERROR: info = ',info_zgesvd,' in ZGESVD(1), gwls_QR_factorization' 297 write(debug_unit,'(A)') '*********************************************************************************************' 298 299 close(debug_unit) 300 301 end if 302 303 304 305 306 307 308 lwork_svd = nint(dble(work_svd(1))) 309 310 ABI_FREE(work_svd) 311 312 ABI_MALLOC(work_svd,(lwork_svd)) 313 314 ! computation run 315 316 call zgesvd('O', & ! The first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; 317 'N', & ! no column vectors of V are computed 318 Hsize, & ! number of rows of the matrix 319 lsolutions_max, & ! number of columns of the matrix 320 svd_matrix, & ! matrix to be decomposed 321 Hsize, & ! LDA 322 svd_values, & ! singular values 323 svd_U, & ! dummy U; not referenced 324 1, & ! size of U 325 svd_V, & ! dummy V; not referenced 326 1, & ! size of V 327 work_svd, & ! work array 328 lwork_svd, & ! size of work array 329 rwork_svd, & ! work array 330 info_zgesvd ) 331 332 if ( info_zgesvd /= 0) then 333 debug_unit = get_unit() 334 write(debug_filename,'(A,I4.4,A)') 'LAPACK_DEBUG_PROC=',mpi_enreg%me,'.log' 335 336 open(debug_unit,file=trim(debug_filename),status='unknown') 337 338 write(debug_unit,'(A)') '*********************************************************************************************' 339 write(debug_unit,'(A,I4,A)') '* ERROR: info = ',info_zgesvd,' in ZGESVD(2), gwls_QR_factorization' 340 write(debug_unit,'(A)') '*********************************************************************************************' 341 342 close(debug_unit) 343 344 end if 345 346 347 348 349 ABI_FREE(work_svd) 350 ABI_FREE(rwork_svd) 351 ABI_FREE(svd_U) 352 ABI_FREE(svd_V) 353 354 end subroutine extract_SVD_lapack