TABLE OF CONTENTS
- ABINIT/m_gwls_DielectricArray
- m_hamiltonian/cleanup_projected_Sternheimer_epsilon
- m_hamiltonian/compute_eps_m1_minus_eps_model_m1
- m_hamiltonian/compute_eps_m1_minus_one
- m_hamiltonian/compute_eps_model_m1_minus_one
- m_hamiltonian/generate_frequencies_and_weights
- m_hamiltonian/ProjectedSternheimerEpsilon
ABINIT/m_gwls_DielectricArray [ Modules ]
NAME
m_gwls_DielectricArray
FUNCTION
.
COPYRIGHT
Copyright (C) 2009-2024 ABINIT group (JLJ, BR, MC) This file is distributed under the terms of the GNU General Public License, see ~abinit/COPYING or http://www.gnu.org/copyleft/gpl.txt .
SOURCE
16 #if defined HAVE_CONFIG_H 17 #include "config.h" 18 #endif 19 20 #include "abi_common.h" 21 22 23 module m_gwls_DielectricArray 24 !---------------------------------------------------------------------------------------------------- 25 ! This module generates and stores the arrays 26 ! 27 ! { eps^{-1}(iw)-eps_model^{-1}(iw) } in Lanczos basis 28 ! { eps_model^{-1}(iw) - 1 } in model Lanczos basis 29 ! 30 ! It makes sense to build these only once, as they do not depend on the external frequency. 31 ! 32 !---------------------------------------------------------------------------------------------------- 33 34 ! local modules 35 use m_gwls_utility 36 use m_gwls_wf 37 use m_gwls_valenceWavefunctions 38 use m_gwls_hamiltonian 39 use m_gwls_lineqsolver 40 use m_gwls_polarisability 41 use m_gwls_model_polarisability 42 use m_gwls_GenerateEpsilon 43 use m_gwls_TimingLog 44 use m_gwls_QR_factorization 45 use m_gwls_LanczosBasis 46 47 ! abinit modules 48 use defs_basis 49 use m_abicore 50 use m_xmpi 51 use m_cgtools 52 53 use m_time, only : timab 54 use m_io_tools, only: get_unit 55 use m_gaussian_quadrature, only: get_frequencies_and_weights_legendre 56 57 58 implicit none 59 save 60 61 private
m_hamiltonian/cleanup_projected_Sternheimer_epsilon [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
cleanup_projected_Sternheimer_epsilon
FUNCTION
.
INPUTS
OUTPUT
SOURCE
963 subroutine cleanup_projected_Sternheimer_epsilon 964 965 implicit none 966 967 ! ************************************************************************* 968 969 if(allocated(projected_epsilon_M_matrix)) then 970 ABI_FREE(projected_epsilon_M_matrix) 971 end if 972 if(allocated(projected_epsilon_B_matrix)) then 973 ABI_FREE(projected_epsilon_B_matrix) 974 end if 975 if(allocated(projected_epsilon_G_matrix)) then 976 ABI_FREE(projected_epsilon_G_matrix) 977 end if 978 if(allocated(eps_m1_minus_eps_model_m1)) then 979 ABI_FREE(eps_m1_minus_eps_model_m1) 980 end if 981 if(allocated(list_omega)) then 982 ABI_FREE(list_omega) 983 end if 984 if(allocated(list_weights)) then 985 ABI_FREE(list_weights) 986 end if 987 if(allocated(eps_model_m1_minus_one_DISTR)) then 988 ABI_FREE(eps_model_m1_minus_one_DISTR) 989 end if 990 if(allocated(model_lanczos_vector_belongs_to_this_node)) then 991 ABI_FREE(model_lanczos_vector_belongs_to_this_node) 992 end if 993 if(allocated(model_lanczos_vector_index)) then 994 ABI_FREE(model_lanczos_vector_index) 995 end if 996 997 998 end subroutine cleanup_projected_Sternheimer_epsilon
m_hamiltonian/compute_eps_m1_minus_eps_model_m1 [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_m1_minus_eps_model_m1
FUNCTION
.
INPUTS
OUTPUT
SOURCE
178 subroutine compute_eps_m1_minus_eps_model_m1(lmax, npt_gauss) 179 !---------------------------------------------------------------------------------------------------- 180 ! 181 ! This subroutine computes the array 182 ! 183 ! eps^{-1}(iw) - eps_model^{-1}(iw), 184 ! 185 ! for all relevant frequencies in the Lanczos basis. 186 !---------------------------------------------------------------------------------------------------- 187 implicit none 188 189 integer , intent(in) :: lmax, npt_gauss 190 191 character(256) :: timing_string 192 real(dp) :: time1, time2 193 real(dp) :: time 194 195 integer :: iw, l 196 complex(dpc),allocatable :: dummy_matrix(:,:) 197 complex(dpc),allocatable :: iden(:,:) 198 199 ! ************************************************************************* 200 201 202 timing_string = "#" 203 call write_text_block_in_Timing_log(timing_string) 204 timing_string = "# Computing eps^{-1}(iw) - eps_model^{-1}(iw) " 205 call write_text_block_in_Timing_log(timing_string) 206 timing_string = "#" 207 call write_text_block_in_Timing_log(timing_string) 208 209 210 call cpu_time(time1) 211 ! Allocate the module array 212 ABI_MALLOC(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1)) 213 ABI_MALLOC(dummy_matrix, (lmax,lmax)) 214 ABI_MALLOC(iden, (lmax,lmax)) 215 216 217 iden = cmplx_0 218 219 do l = 1, lmax 220 iden(l,l) = cmplx_1 221 end do 222 223 do iw = 1, npt_gauss + 1 224 225 226 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw) 227 228 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 229 230 231 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:) 232 233 dummy_matrix(:,:) = model_dielectric_Lanczos_basis(:,:,iw) 234 235 236 237 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 238 239 eps_m1_minus_eps_model_m1(:,:,iw) = eps_m1_minus_eps_model_m1(:,:,iw) - dummy_matrix(:,:) 240 241 242 end do 243 244 245 246 247 ! Deallocate the arrays which are no longer needed 248 ABI_FREE(model_dielectric_Lanczos_basis) 249 ABI_FREE(projected_dielectric_Lanczos_basis) 250 251 252 253 ABI_FREE(dummy_matrix) 254 ABI_FREE(iden) 255 256 257 258 call cpu_time(time2) 259 time = time2-time1 260 261 timing_string = "# Total time : " 262 call write_timing_log(timing_string,time) 263 264 265 266 267 end subroutine compute_eps_m1_minus_eps_model_m1
m_hamiltonian/compute_eps_m1_minus_one [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_m1_minus_one
FUNCTION
.
INPUTS
OUTPUT
SOURCE
283 subroutine compute_eps_m1_minus_one(lmax, npt_gauss) 284 !---------------------------------------------------------------------------------------------------- 285 ! 286 ! This subroutine computes the array 287 ! 288 ! eps^{-1}(iw) - I 289 ! 290 ! for all relevant frequencies in the Lanczos basis. 291 !---------------------------------------------------------------------------------------------------- 292 implicit none 293 294 integer , intent(in) :: lmax, npt_gauss 295 296 character(256) :: timing_string 297 real(dp) :: time1, time2 298 real(dp) :: time 299 300 integer :: iw, l 301 complex(dpc),allocatable :: dummy_matrix(:,:) 302 complex(dpc),allocatable :: iden(:,:) 303 304 ! ************************************************************************* 305 306 307 308 timing_string = "#" 309 call write_text_block_in_Timing_log(timing_string) 310 timing_string = "# Computing eps^{-1}(iw) - I " 311 call write_text_block_in_Timing_log(timing_string) 312 timing_string = "#" 313 call write_text_block_in_Timing_log(timing_string) 314 315 call cpu_time(time1) 316 ! Allocate the module array 317 318 ! The array eps_m1_minus_eps_model_m1 will be used to store 319 ! eps^{-1}-1; we can think of eps_model = I in this case. 320 ABI_MALLOC(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1)) 321 322 ABI_MALLOC(dummy_matrix, (lmax,lmax)) 323 ABI_MALLOC(iden, (lmax,lmax)) 324 325 iden = cmplx_0 326 327 do l = 1, lmax 328 iden(l,l) = cmplx_1 329 end do 330 331 332 do iw = 1, npt_gauss + 1 333 334 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw) 335 336 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax) 337 338 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)-iden(:,:) 339 340 end do 341 342 343 ! Deallocate the arrays which are no longer needed 344 ABI_FREE(projected_dielectric_Lanczos_basis) 345 346 ABI_FREE(dummy_matrix) 347 ABI_FREE(iden) 348 349 call cpu_time(time2) 350 time = time2-time1 351 352 timing_string = "# Total time : " 353 call write_timing_log(timing_string,time) 354 355 end subroutine compute_eps_m1_minus_one
m_hamiltonian/compute_eps_model_m1_minus_one [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
compute_eps_model_m1_minus_one
FUNCTION
.
INPUTS
OUTPUT
SOURCE
371 subroutine compute_eps_model_m1_minus_one(lmax_model, npt_gauss, second_model_parameter, epsilon_model_eigenvalues_0) 372 !---------------------------------------------------------------------------------------------------- 373 ! 374 ! This subroutine computes the array 375 ! 376 ! eps_model^{-1}(iw) - 1 377 ! 378 ! for all relevant frequencies in the model Lanczos basis. 379 ! 380 ! This array can potentially get very large, as the complementary basis gets big to achieve 381 ! convergence. Correspondingly, it makes sense to DISTRIBUTE this array on all processors. 382 ! 383 ! The algorithm will go as follows: 384 ! 385 ! eps_m = 1 - V . P . V, V = sqrt{vc} 386 ! 387 ! I ) compute VPV, storing blocks on different processors: 388 ! 389 ! VPV = [ ------- -------- ] = VPV[lc, nB, nW] 390 ! | [| block | block | ] 391 ! lc [| 1 | 2 | ... ] 392 ! | [| | | ] 393 ! | [| | | ] 394 ! | [ ------- --------- ] 395 ! bsize 396 ! 397 ! This construction *does* involve a fair bit of communications, but it takes 398 ! a lot less RAM! 399 ! 400 ! II) once VPV is constructed, do, one frequency at a time: 401 ! - import all blocks to the HEAD processor 402 ! - compute eps_m = 1- VPV 403 ! - invert eps_m^{-1} 404 ! - subtract identity eps_m^{-1} - 1 405 ! - redistribute, block by block 406 ! 407 ! Doing this frequency by frequency will reduce the RAM weight on the head node. 408 ! 409 ! 410 !---------------------------------------------------------------------------------------------------- 411 implicit none 412 413 integer, intent(in) :: lmax_model, npt_gauss 414 real(dp), intent(in) :: second_model_parameter 415 real(dp), intent(in) :: epsilon_model_eigenvalues_0(lmax_model) 416 417 integer :: l, l1, l2 418 integer :: iw 419 integer :: v 420 integer :: ierr 421 422 real(dp), allocatable :: psikg_valence(:,:) 423 real(dp), allocatable :: psir_valence(:,:,:,:) 424 425 426 real(dp), allocatable :: psik_wrk(:,:) 427 real(dp), allocatable :: psikb_wrk(:,:) 428 real(dp), allocatable :: psikg_wrk(:,:) 429 real(dp), allocatable :: psikg_tmp(:,:) 430 431 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:) 432 433 434 complex(dpc),allocatable :: VPV(:,:,:) 435 real(dp), allocatable :: re_buffer(:,:), im_buffer(:,:) 436 437 complex(dpc),allocatable :: vpv_row(:) 438 439 440 complex(dpc),allocatable :: epsilon_head(:,:) 441 442 real(dp), allocatable :: re_BUFFER_head(:,:) 443 real(dp), allocatable :: im_BUFFER_head(:,:) 444 445 446 447 complex(dpc),allocatable :: YL(:) 448 449 character(256) :: timing_string 450 real(dp) :: time1, time2, time 451 real(dp) :: fft_time1, fft_time2, fft_time 452 real(dp) :: prod_time1, prod_time2, prod_time 453 454 complex(dpc) :: z 455 456 457 integer :: iblk_lanczos, nbdblock_lanczos 458 integer :: mb 459 integer :: lb 460 integer :: ik 461 462 integer :: mpi_communicator 463 integer :: mpi_nproc 464 integer :: mpi_rank 465 integer :: mpi_head_rank 466 467 468 integer,allocatable :: sendcounts(:), displs(:) 469 470 integer :: sendcount, recvcount 471 472 473 logical :: head 474 475 476 ! timing 477 real(dp) :: tsec(2) 478 integer :: GWLS_TIMAB, OPTION_TIMAB 479 480 ! ************************************************************************* 481 482 483 GWLS_TIMAB = 1541 484 OPTION_TIMAB = 1 485 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 486 487 488 489 timing_string = "#" 490 call write_text_block_in_Timing_log(timing_string) 491 timing_string = "# computing eps_model_m1_minus_one" 492 call write_text_block_in_Timing_log(timing_string) 493 timing_string = "#" 494 call write_text_block_in_Timing_log(timing_string) 495 496 497 ! Number of blocks of lanczos vectors (blocksize = npband) 498 nbdblock_lanczos = lmax_model/blocksize 499 if (modulo(lmax_model,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1 500 501 502 ! communicator 503 mpi_communicator = mpi_enreg%comm_bandfft 504 505 ! total number of processors in the communicator 506 mpi_nproc = xmpi_comm_size(mpi_communicator ) 507 508 ! what is the rank of this processor? 509 mpi_rank = xmpi_comm_rank(mpi_communicator ) 510 511 ! rank of the "head" processor 512 mpi_head_rank = 0 513 514 515 ! number of blocks in the model dielectric matrix, which is equal to the number of processors 516 nbdblock_epsilon = mpi_nproc 517 518 519 ! number of vectors in every block 520 blocksize_epsilon = lmax_model/mpi_nproc 521 if (modulo(lmax_model,mpi_nproc) /= 0) blocksize_epsilon = blocksize_epsilon + 1 522 523 ! attribute blocks to every nodes, and tabulate ownership in logical array 524 ! This is not *the most efficient* implementation possible, but it is convenient 525 ABI_MALLOC( model_lanczos_vector_belongs_to_this_node, (lmax_model)) 526 ABI_MALLOC( model_lanczos_vector_index, (lmax_model)) 527 528 529 model_lanczos_vector_index = 0 530 model_lanczos_vector_belongs_to_this_node = .false. 531 532 do l =1, lmax_model 533 if (mpi_rank == (l-1)/blocksize_epsilon) then 534 model_lanczos_vector_belongs_to_this_node(l) = .true. 535 model_lanczos_vector_index(l) = l-mpi_rank*blocksize_epsilon 536 end if 537 end do 538 539 !write(100+mpi_rank,*) "model_lanczos_vector_belongs_to_this_node = ",model_lanczos_vector_belongs_to_this_node(:) 540 !write(100+mpi_rank,*) "model_lanczos_vector_index = ",model_lanczos_vector_index(:) 541 !flush(100+mpi_rank) 542 543 ! Prepare the array that will contain the matrix elements of the model operator 544 ABI_MALLOC(VPV, (lmax_model,blocksize_epsilon,npt_gauss+1)) 545 546 VPV(:,:,:) = cmplx_0 547 548 ABI_MALLOC(vpv_row, (lmax_model)) 549 550 551 ! various working arrays 552 GWLS_TIMAB = 1542 553 OPTION_TIMAB = 1 554 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 555 556 ABI_MALLOC(psikg_valence,(2,npw_g)) 557 ABI_MALLOC(psir_valence ,(2,n4,n5,n6)) 558 559 560 ABI_MALLOC(psik_wrk, (2,npw_k)) 561 ABI_MALLOC(psikb_wrk, (2,npw_kb)) 562 ABI_MALLOC(psikg_wrk, (2,npw_g)) 563 ABI_MALLOC(psikg_tmp, (2,npw_g)) 564 565 ABI_MALLOC(local_Lbasis_conjugated,(npw_k,lmax_model)) 566 ABI_MALLOC(YL,(npw_k)) 567 568 OPTION_TIMAB = 2 569 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 570 571 572 573 fft_time = zero 574 prod_time = zero 575 576 577 call cpu_time(time1) 578 ! loop on all valence bands 579 580 do v = 1, nbandv 581 582 583 ! copy pre-calculated valence state in this covenient local array 584 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v) 585 586 ! compute fourier transform of valence state, and conjugate 587 call g_to_r(psir_valence,psikg_valence) 588 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 589 590 !-------------------------------------------------------------------------- 591 ! 592 ! Step 1: build the modified basis Pc . [ (V^{1/2} l) . psi_v^*]. 593 ! 594 !-------------------------------------------------------------------------- 595 GWLS_TIMAB = 1543 596 OPTION_TIMAB = 1 597 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 598 599 ! loop on all blocks of lanczos vectors 600 do iblk_lanczos = 1, nbdblock_lanczos 601 ! loop on all states within this block 602 do mb = 1, blocksize 603 604 ! Determine the index of the Lanczos vector 605 l = (iblk_lanczos-1)*blocksize + mb 606 607 if ( l <= lmax_model) then 608 psik_wrk(1,:) = dble (Lbasis_model_lanczos(:,l)) 609 psik_wrk(2,:) = dimag(Lbasis_model_lanczos(:,l)) 610 else 611 psik_wrk(:,:) = zero 612 end if 613 614 ! apply Coulomb potential 615 call sqrt_vc_k(psik_wrk) 616 617 ! Store in array of blocks of wavefunctions 618 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 619 620 end do ! mb 621 622 call cpu_time(fft_time1) 623 624 ! Transform to FFT representation 625 call wf_block_distribute(psikb_wrk, psikg_wrk,1) ! LA -> FFT 626 627 628 ! generate the vector Pc [ (sqrt_V_c.l) psi_v^*] 629 630 ! Compute the real space product, and return to k space, in FFT configuration 631 call gr_to_g(psikg_tmp,psir_valence, psikg_wrk) 632 633 634 call cpu_time(fft_time2) 635 fft_time = fft_time + fft_time2-fft_time1 636 637 ! project 638 call pc_k_valence_kernel(psikg_tmp) 639 640 ! Return to LA configuration 641 642 ! Transform to FFT representation 643 call wf_block_distribute(psikb_wrk, psikg_tmp, 2) ! FFT -> LA 644 645 do mb = 1, blocksize 646 647 ! Determine the index of the Lanczos vector 648 l = (iblk_lanczos-1)*blocksize + mb 649 650 651 if ( l <= lmax_model) then 652 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 653 local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 654 end if 655 656 657 end do ! mb 658 659 end do !iblk_lanczos 660 OPTION_TIMAB = 2 661 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 662 663 664 !-------------------------------------------------------------------------- 665 ! Step 2: Now that we have the modified basis, compute the matrix 666 ! elements of the model dielectric operator 667 ! 668 ! 669 !-------------------------------------------------------------------------- 670 GWLS_TIMAB = 1544 671 OPTION_TIMAB = 1 672 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 673 674 do iw = 2, npt_gauss+1 675 676 call setup_Pk_model(list_omega(iw),second_model_parameter) 677 678 call cpu_time(prod_time1) 679 do l1 = 1, lmax_model 680 681 ! Apply core function Y to left-vector; conjugate 682 do ik = 1, npw_k 683 YL(ik) = model_Y_LA(ik)*conjg(local_Lbasis_conjugated(ik,l1)) 684 end do 685 686 ! Only compute lower diagonal part of matrix; epsilon is hermitian conjugate! 687 vpv_row = cmplx_0 688 do l2 = 1, l1 689 do ik = 1, npw_k 690 vpv_row(l2) = vpv_row(l2) + YL(ik)*local_Lbasis_conjugated(ik,l2) 691 end do 692 end do 693 694 ! the code below is twice as long! 695 !call ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) 696 !call ZGEMV ( 'T', npw_k, lmax_model, cmplx_1, local_Lbasis_conjugated, npw_k, YL, 1, cmplx_0, vpv_row, 1) 697 698 !do l2 = 1, l1 699 ! eps_model_m1_minus_one(l1,l2,iw) = eps_model_m1_minus_one(l1,l2,iw) & 700 ! -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k) 701 !end do 702 703 ! Sum on all processors, making sure all processors have the total vpv_row 704 call xmpi_sum(vpv_row, mpi_communicator, ierr) ! sum on all processors for LA configuration 705 706 ! Each processor takes its slice! 707 do l2 =1, l1 708 if ( model_lanczos_vector_belongs_to_this_node(l2) ) then 709 lb = model_lanczos_vector_index(l2) 710 VPV(l1,lb,iw) = VPV(l1,lb,iw) + vpv_row(l2) 711 end if 712 end do 713 714 end do 715 call cpu_time(prod_time2) 716 717 prod_time = prod_time + prod_time2-prod_time1 718 719 end do ! iw 720 OPTION_TIMAB = 2 721 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 722 723 end do ! v 724 725 call cpu_time(time2) 726 727 time = time2-time1 728 timing_string = "# computing VPV : " 729 call write_timing_log(timing_string,time) 730 731 timing_string = "# --- of which is FFT transforms : " 732 call write_timing_log(timing_string,fft_time) 733 734 timing_string = "# --- of which is products : " 735 call write_timing_log(timing_string,prod_time) 736 737 738 739 740 741 GWLS_TIMAB = 1542 742 OPTION_TIMAB = 1 743 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 744 ABI_FREE(local_Lbasis_conjugated) 745 ABI_FREE(YL) 746 ABI_FREE(psikg_valence) 747 ABI_FREE(psir_valence) 748 ABI_FREE(psik_wrk) 749 ABI_FREE(psikb_wrk) 750 ABI_FREE(psikg_wrk) 751 ABI_FREE(psikg_tmp) 752 ABI_FREE(vpv_row) 753 754 OPTION_TIMAB = 2 755 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 756 757 758 759 call cpu_time(time1) 760 GWLS_TIMAB = 1547 761 OPTION_TIMAB = 1 762 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 763 764 765 !-------------------------------------------------------------------------------- 766 ! 767 ! 768 ! Gather dielectric matrix on head node, invert, and re-distribute. This 769 ! Saves a lot of RAM, without needing the full machinery of ScaLAPACK. 770 ! 771 !-------------------------------------------------------------------------------- 772 773 774 ABI_MALLOC(eps_model_m1_minus_one_DISTR, (lmax_model,blocksize_epsilon,npt_gauss+1)) 775 ABI_MALLOC(re_buffer, (lmax_model,blocksize_epsilon)) 776 ABI_MALLOC(im_buffer, (lmax_model,blocksize_epsilon)) 777 778 eps_model_m1_minus_one_DISTR(:,:,:) = cmplx_0 779 780 ! Define the head node, which will invert the dielectric matrices 781 head = mpi_rank == mpi_head_rank 782 783 ! Amount of data received and sent 784 sendcount = lmax_model*blocksize_epsilon 785 recvcount = lmax_model*blocksize_epsilon 786 787 ABI_MALLOC(sendcounts,(mpi_nproc)) 788 ABI_MALLOC(displs ,(mpi_nproc)) 789 sendcounts(:) = sendcount 790 791 do l =1, mpi_nproc 792 displs(l) = (l-1)*sendcount 793 end do 794 795 if (head) then 796 ! build and invert the dielectric array 797 ! careful! lmax_model not necessarily equal to blocksize_epsilon*nbdblock_epsilon 798 ABI_MALLOC(re_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon)) 799 ABI_MALLOC(im_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon)) 800 ABI_MALLOC(epsilon_head, (lmax_model, lmax_model)) 801 else 802 !This looks superfluous and it is on large number of systems, but sending these 803 !unallocated in xmpi_scatterv caused 'cannot allocate memory' cryptic errors on 804 !several parallel test farm computers (cronos_gcc46_paral, petrus_nag, inca_gcc44_sdebug) 805 ABI_MALLOC(re_BUFFER_head, (1,1)) 806 ABI_MALLOC(im_BUFFER_head, (1,1)) 807 ABI_MALLOC(epsilon_head, (1,1)) 808 end if 809 810 ! Do one frequency at a time, to avoid overflowing the RAM 811 do iw = 1, npt_gauss+1 812 ! Gather, except for static case 813 if ( iw /=1 ) then 814 ! Gather VPV on head node, for this frequency 815 call xmpi_gather(dble(VPV(:,:,iw)), sendcount , re_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr) 816 call xmpi_gather(dimag(VPV(:,:,iw)), sendcount , im_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr) 817 end if 818 819 if ( head ) then 820 821 ! fill the dielectric matrix 822 823 epsilon_head(:,:) = cmplx_0 824 if (iw ==1) then 825 ! STATIC CASE, diagonal matrix 826 do l= 1, lmax_model 827 epsilon_head(l,l) = cmplx_1/epsilon_model_eigenvalues_0(l)-cmplx_1 828 end do 829 830 else 831 ! DYNAMIC CASE, compute 832 do l1 =1, lmax_model 833 do l2 =1, l1 834 z = -cmplx_1*re_BUFFER_head(l1,l2)-cmplx_i*im_BUFFER_head(l1,l2) 835 epsilon_head(l1,l2) = z 836 epsilon_head(l2,l1) = conjg(z) 837 end do 838 epsilon_head(l1,l1) = epsilon_head(l1,l1) + cmplx_1 839 end do 840 841 ! invert the matrix 842 call driver_invert_positive_definite_hermitian_matrix(epsilon_head,lmax_model) 843 844 ! subtract identity 845 do l =1, lmax_model 846 epsilon_head(l,l) = epsilon_head(l,l) - cmplx_1 847 end do 848 end if 849 850 ! copy in head buffer 851 re_BUFFER_head(:,:) = zero 852 im_BUFFER_head(:,:) = zero 853 do l1 =1, lmax_model 854 do l2 =1, lmax_model 855 z = epsilon_head(l1,l2) 856 re_BUFFER_head(l1,l2) = dble(z) 857 im_BUFFER_head(l1,l2) = dimag(z) 858 end do 859 end do 860 861 end if 862 863 ! Scatter back the data on the head to all processors 864 call xmpi_scatterv(re_BUFFER_head, sendcounts, displs, re_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr) 865 call xmpi_scatterv(im_BUFFER_head, sendcounts, displs, im_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr) 866 867 eps_model_m1_minus_one_DISTR(:,:,iw) = cmplx_1*re_buffer(:,:) + cmplx_i*im_buffer(:,:) 868 869 end do 870 871 ABI_FREE(re_BUFFER_head) 872 ABI_FREE(im_BUFFER_head) 873 ABI_FREE(epsilon_head) 874 875 876 877 !================================================================================ 878 ! 879 ! For debugging purposes, store distributed dielectric matrix back in the 880 ! complete local copies, to insure the rest of the code works. 881 ! 882 !================================================================================ 883 884 if (.false.) then 885 ! Prepare the array that will contain the matrix elements of the model operator 886 ! THIS IS ONLY FOR THE REST OF THE CODE TO WORK; WE WILL REMOVE THIS 887 ! TO SAVE RAM LATER 888 ABI_MALLOC(eps_model_m1_minus_one, (lmax_model,lmax_model,npt_gauss+1)) 889 890 ! initialize the array with zeros 891 eps_model_m1_minus_one = cmplx_0 892 893 ! Amount of data received and sent 894 sendcount = lmax_model*blocksize_epsilon 895 recvcount = lmax_model*blocksize_epsilon*nbdblock_epsilon 896 897 898 ABI_MALLOC(re_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon)) 899 ABI_MALLOC(im_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon)) 900 901 do iw = 1, npt_gauss+1 902 903 re_buffer(:,:) = dble( eps_model_m1_minus_one_DISTR(:,:,iw)) 904 im_buffer(:,:) = dimag(eps_model_m1_minus_one_DISTR(:,:,iw)) 905 906 call xmpi_allgather(re_buffer,sendcount,re_BUFFER_head,mpi_communicator,ierr) 907 call xmpi_allgather(im_buffer,sendcount,im_BUFFER_head,mpi_communicator,ierr) 908 909 do l = 1, lmax_model 910 eps_model_m1_minus_one(:,l,iw) = cmplx_1*re_BUFFER_head(:,l)+ cmplx_i*im_BUFFER_head(:,l) 911 end do 912 end do 913 914 ABI_FREE(re_BUFFER_head) 915 ABI_FREE(im_BUFFER_head) 916 end if 917 918 call cpu_time(time2) 919 OPTION_TIMAB = 2 920 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 921 922 !================================================================================ 923 ! 924 !================================================================================ 925 926 927 time = time2-time1 928 timing_string = "# inverting / distributing : " 929 930 call write_timing_log(timing_string,time) 931 932 933 934 ABI_FREE(re_buffer ) 935 ABI_FREE(im_buffer ) 936 ABI_FREE(sendcounts) 937 ABI_FREE(displs ) 938 ABI_FREE(VPV ) 939 940 941 942 GWLS_TIMAB = 1541 943 OPTION_TIMAB = 2 944 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec) 945 946 947 end subroutine compute_eps_model_m1_minus_one
m_hamiltonian/generate_frequencies_and_weights [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
generate_frequencies_and_weights
FUNCTION
.
INPUTS
OUTPUT
SOURCE
116 subroutine generate_frequencies_and_weights(npt_gauss) 117 !-------------------------------------------------------------------------------- 118 ! 119 ! This subroutine computes the frequencies and weights necessary for Gauss-Legendre 120 ! quadrature, and stores the results in module arrays. 121 ! 122 !-------------------------------------------------------------------------------- 123 implicit none 124 125 integer, intent(in) :: npt_gauss 126 127 128 real(dp), allocatable :: list_omega_tmp(:) 129 real(dp), allocatable :: list_weights_tmp(:) 130 131 integer :: i 132 133 ! ************************************************************************* 134 135 ABI_MALLOC(list_omega_tmp, (npt_gauss)) 136 ABI_MALLOC(list_weights_tmp, (npt_gauss)) 137 138 call get_frequencies_and_weights_legendre(npt_gauss,list_omega_tmp,list_weights_tmp) 139 140 141 ABI_MALLOC(list_omega, (npt_gauss+1)) 142 ABI_MALLOC(list_weights, (npt_gauss+1)) 143 144 ! make sure the first frequency in zero! 145 list_omega(1) = zero 146 list_weights(1) = zero 147 148 do i = 1,npt_gauss 149 150 ! inverse the order of the frequency points, as they come out 151 ! in reverse order from the generating subroutine 152 list_omega (i+1) = list_omega_tmp (npt_gauss+1-i) 153 list_weights(i+1) = list_weights_tmp(npt_gauss+1-i) 154 155 end do 156 157 158 ABI_FREE(list_omega_tmp) 159 ABI_FREE(list_weights_tmp) 160 161 162 end subroutine generate_frequencies_and_weights
m_hamiltonian/ProjectedSternheimerEpsilon [ Functions ]
[ Top ] [ m_hamiltonian ] [ Functions ]
NAME
ProjectedSternheimerEpsilon
FUNCTION
.
INPUTS
OUTPUT
SOURCE
1015 subroutine ProjectedSternheimerEpsilon(lmax, npt_gauss, second_model_parameter, & 1016 list_projection_frequencies,nfrequencies,& 1017 epsilon_eigenvalues_0,debug,use_model) 1018 !---------------------------------------------------------------------------------------------------- 1019 ! This subroutine combines in a single subprogram the jobs of previous routines 1020 ! 1021 ! - setup_projected_Sternheimer_epsilon 1022 ! - compute_projected_Sternheimer_epsilon 1023 ! 1024 ! The purpose of this combination is to avoid independent loops on nbandv, requiring the 1025 ! arrays 1026 ! projected_epsilon_M_matrix 1027 ! projected_epsilon_B_matrix 1028 ! projected_epsilon_G_matrix 1029 ! 1030 ! from scaling like N^3, which grows very large with problem size. 1031 ! 1032 ! Thus, this routine: 1033 ! 1034 ! - Computes the frequency-dependent dielectric matrix in the Lanczos basis, using 1035 ! the projected Sternheimer equations. 1036 ! 1037 ! - Computes the frequency-dependent MODEL dielectric matrix in the complementary Lanczos basis. 1038 ! 1039 ! 1040 ! This routine will be verbose and write log files; indeed, large jobs crash in here, it will 1041 ! be important to know where/why! 1042 ! 1043 ! The subroutine also computes the matrix elements on epsilon_model(iw) in the Lanczos basis; 1044 ! this is done here to avoid preforming direct products with the valence states again later. 1045 !---------------------------------------------------------------------------------------------------- 1046 implicit none 1047 1048 real(dp), parameter :: svd_tolerance = 1.0e-16_dp 1049 1050 integer, intent(in) :: lmax, npt_gauss 1051 integer, intent(in) :: nfrequencies 1052 real(dp), intent(in) :: list_projection_frequencies(nfrequencies) 1053 logical, intent(in) :: debug 1054 real(dp), intent(in) :: epsilon_eigenvalues_0(lmax) 1055 1056 logical,optional,intent(in) :: use_model 1057 1058 real(dp), intent(in) :: second_model_parameter 1059 1060 1061 integer :: l, l1, l2 1062 integer :: i, iw, v 1063 integer :: recy_i 1064 integer :: lsolutions_max, lsolutions, ls 1065 integer :: projection 1066 1067 complex(dpc), allocatable :: sternheimer_A0(:,:) 1068 complex(dpc), allocatable :: sternheimer_A(:,:) 1069 complex(dpc), allocatable :: sternheimer_B(:,:) 1070 complex(dpc), allocatable :: sternheimer_X(:,:) 1071 complex(dpc), allocatable :: sternheimer_G(:,:) 1072 1073 1074 complex(dpc), allocatable :: dummy_tmp_1(:,:) 1075 complex(dpc), allocatable :: dummy_tmp_2(:,:) 1076 1077 integer, allocatable :: ipiv(:) 1078 1079 1080 1081 complex(dpc),allocatable :: local_Lbasis(:,:) 1082 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:) 1083 1084 complex(dpc),allocatable :: YL(:) 1085 1086 real(dp), allocatable :: psikg_in(:,:), psikg_out(:,:) 1087 1088 real(dp), allocatable :: psik_wrk(:,:), psikg_wrk(:,:), psikb_wrk(:,:) 1089 real(dp), allocatable :: psi_gamma_l1(:,:), psi_gamma_l2(:,:) 1090 1091 real(dp), allocatable :: psikg_valence(:,:) 1092 real(dp), allocatable :: psir_valence(:,:,:,:) 1093 1094 real(dp), allocatable :: psi_rhs(:,:,:) 1095 1096 real(dp), allocatable :: psikg_VL(:,:) 1097 1098 1099 complex(dpc), allocatable :: check_matrix(:,:), check_matrix2(:,:) 1100 complex(dpc), allocatable :: c_sternheimer_solutions(:,:) 1101 complex(dpc), allocatable :: QR_orthonormal_basis(:,:) 1102 1103 complex(dpc), allocatable :: svd_matrix(:,:) 1104 real (dp ), allocatable :: svd_values(:) 1105 1106 1107 integer :: iblk_lanczos, nbdblock_lanczos 1108 integer :: iblk_solutions, nbdblock_solutions 1109 integer :: mb 1110 1111 character(128) :: filename 1112 logical :: file_exists 1113 integer :: io_unit 1114 1115 character(128) :: filename_log 1116 integer :: io_unit_log 1117 1118 1119 real(dp) :: omega 1120 1121 character(256) :: timing_string 1122 real(dp) :: time1, time2 1123 real(dp) :: time_exact 1124 1125 1126 integer :: info 1127 integer :: ierr 1128 1129 real(dp) :: z(2) 1130 1131 1132 logical :: omega_is_imaginary 1133 real(dp) :: omega0 1134 1135 logical :: model 1136 logical :: write_debug 1137 1138 1139 integer :: mpi_communicator, mpi_rank, mpi_group 1140 1141 ! ************************************************************************* 1142 1143 1144 !================================================================================ 1145 ! Prepare MPI information 1146 !================================================================================ 1147 1148 ! for LA configuration ,The processors communicate over band+FFT 1149 mpi_communicator = mpi_enreg%comm_bandfft 1150 1151 ! what is the rank of this processor, within its group? 1152 mpi_rank = mpi_enreg%me_fft 1153 1154 ! Which group does this processor belong to, given the communicator? 1155 mpi_group = mpi_enreg%me_band 1156 1157 1158 1159 1160 !================================================================================ 1161 ! Setup a log file, to keep track of the algorithm 1162 !================================================================================ 1163 1164 1165 write(filename_log,'(A,I4.4,A)') 'ProjectedSternheimerEpsilon_PROC=',mpi_enreg%me,'.log' 1166 1167 io_unit_log = get_unit() 1168 open(io_unit_log,file=filename_log,status=files_status_new) 1169 write(io_unit_log,10) '' 1170 write(io_unit_log,10) '#====================================================================================================' 1171 write(io_unit_log,10) "# ProjectedSternheimerEpsilon: log file " 1172 write(io_unit_log,10) "# ------------------------------------------------------------------- " 1173 write(io_unit_log,10) "# " 1174 write(io_unit_log,10) "# This file tracks the algorithm in the routine ProjectedSternheimerEpsilon. The goal is to " 1175 write(io_unit_log,10) "# establish where the algorithm crashes if it does, and/or to track are far along the code is. " 1176 write(io_unit_log,10) '#' 1177 write(io_unit_log,10) '# MPI data for this process:' 1178 write(io_unit_log,10) '#' 1179 write(io_unit_log,22) '# mpi_rank :',mpi_rank,' (rank of this processor in its band group)' 1180 write(io_unit_log,22) '# mpi_group:',mpi_group,' (band group to which this processor belongs)' 1181 write(io_unit_log,10) '#====================================================================================================' 1182 flush(io_unit_log) 1183 1184 1185 !================================================================================ 1186 ! Setup timing; prepare arrays 1187 !================================================================================ 1188 1189 write(io_unit_log,10) " - Preparing and allocating arrays ...." 1190 flush(io_unit_log) 1191 1192 1193 1194 timing_string = "#" 1195 call write_text_block_in_Timing_log(timing_string) 1196 timing_string = "# ProjectedSternheimerEpsilon " 1197 call write_text_block_in_Timing_log(timing_string) 1198 timing_string = "#" 1199 call write_text_block_in_Timing_log(timing_string) 1200 1201 ! Allocate the module array 1202 ABI_MALLOC(projected_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1)) 1203 1204 projected_dielectric_Lanczos_basis(:,:,:) = cmplx_0 1205 1206 ! initialize zero frequency with exact solution 1207 do l = 1, lmax 1208 projected_dielectric_Lanczos_basis(l,l,1) = cmplx_1*epsilon_eigenvalues_0(l) 1209 end do 1210 1211 1212 ! initialize other frequencies with the identity 1213 do iw = 2, npt_gauss + 1 1214 do l = 1, lmax 1215 projected_dielectric_Lanczos_basis(l,l,iw) = cmplx_1 1216 end do 1217 end do 1218 1219 time_exact = zero 1220 1221 1222 !================================================================================ 1223 ! Parallelisation of the code is subtle; Hamiltonian must act over 1224 ! FFT rows, we must be careful with memory, etc... 1225 ! 1226 ! We will parallelise in block of Lanczos vectors, not over bands. 1227 !================================================================================ 1228 1229 ! Number of blocks of lanczos vectors 1230 nbdblock_lanczos = lmax/blocksize 1231 if (modulo(lmax,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1 1232 1233 1234 if (present(use_model)) then 1235 model = use_model 1236 else 1237 model = .true. 1238 end if 1239 1240 if (model) then 1241 ! Prepare the array that will contain the matrix elements of the model operator 1242 ABI_MALLOC(model_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1)) 1243 1244 ! initialize with zero. NOT with the identity, in order to avoid extra communications 1245 ! (see below) 1246 model_dielectric_Lanczos_basis(:,:,:) = cmplx_0 1247 1248 end if 1249 1250 ! various working arrays 1251 1252 ABI_MALLOC(psikg_valence ,(2,npw_g)) 1253 ABI_MALLOC(psir_valence ,(2,n4,n5,n6)) 1254 1255 1256 ABI_MALLOC(psikg_VL ,(2,npw_g)) 1257 1258 1259 ABI_MALLOC(psik_wrk ,(2,npw_k)) 1260 ABI_MALLOC(psikb_wrk ,(2,npw_kb)) 1261 ABI_MALLOC(psikg_wrk ,(2,npw_g)) 1262 1263 1264 ABI_MALLOC(psi_gamma_l1 ,(2,npw_k)) 1265 ABI_MALLOC(psi_gamma_l2 ,(2,npw_k)) 1266 1267 ABI_MALLOC(psi_rhs ,(2,npw_k,lmax)) 1268 1269 1270 ABI_MALLOC(psikg_in ,(2,npw_g)) 1271 ABI_MALLOC(psikg_out ,(2,npw_g)) 1272 1273 1274 ! maximal possible dimension of the solution space 1275 ! +1 because the solutions at $\omega=\infty$ are free. 1276 ! +1 if recycling is activated, because the solutions at $\omega=0$ are then available. 1277 i=1 1278 if(dtset%gwls_recycle == 1 .or. dtset%gwls_recycle == 2) then 1279 i=2 1280 end if 1281 lsolutions_max = lmax*(nfrequencies+i) 1282 1283 1284 ABI_MALLOC(local_Lbasis, (npw_k,lmax)) 1285 ABI_MALLOC(local_Lbasis_conjugated,(npw_k,lmax)) 1286 ABI_MALLOC(YL,(npw_k)) 1287 1288 ABI_MALLOC(c_sternheimer_solutions,(npw_k,lsolutions_max)) 1289 ABI_MALLOC(QR_orthonormal_basis ,(npw_k,lsolutions_max)) 1290 1291 1292 omega_is_imaginary = .true. 1293 1294 1295 ABI_MALLOC(svd_matrix,(npw_k,lsolutions_max)) 1296 ABI_MALLOC(svd_values,(lsolutions_max)) 1297 1298 1299 ! Prepare files for writing 1300 write_debug = debug .and. mpi_enreg%me == 0 1301 1302 if ( write_debug ) then 1303 1304 write(filename,'(A)') "ProjectedSternheimerEpsilon.log" 1305 inquire(file=filename,exist=file_exists) 1306 1307 i = 0 1308 do while (file_exists) 1309 i = i+1 1310 write (filename,'(A,I0.4,A)') "ProjectedSternheimerEpsilon_",i,".log" 1311 inquire(file=filename,exist=file_exists) 1312 end do 1313 1314 1315 io_unit = get_unit() 1316 open(io_unit,file=filename,status=files_status_new) 1317 write(io_unit,10) '' 1318 write(io_unit,10) '#====================================================================================================' 1319 write(io_unit,10) "# Building the dielectic matrix using projected Sternheimer equation " 1320 write(io_unit,10) "# ------------------------------------------------------------------- " 1321 write(io_unit,10) "# " 1322 write(io_unit,10) '# This file contains some tests to check if the various elements entering the projected ' 1323 write(io_unit,10) '# dielectric matrix have the right properties. At this point, this is mostly for debugging.' 1324 write(io_unit,10) '# The wavefunctions and other related arrays are stored in reciprocal space.' 1325 write(io_unit,10) '#' 1326 write(io_unit,10) '#====================================================================================================' 1327 write(io_unit,10) '' 1328 flush(io_unit) 1329 end if 1330 1331 if (debug) then 1332 ABI_MALLOC(check_matrix ,(lsolutions_max,lsolutions_max)) 1333 ABI_MALLOC(check_matrix2,(lsolutions_max,lsolutions_max)) 1334 end if 1335 1336 !================================================================================ 1337 ! Loop on all valence bands 1338 !================================================================================ 1339 1340 1341 1342 1343 do v = 1, nbandv 1344 write(io_unit_log,10) '#====================================================================================================' 1345 write(io_unit_log,20) '# valence band index:', v 1346 write(io_unit_log,10) '#====================================================================================================' 1347 flush(io_unit_log) 1348 1349 1350 1351 1352 if ( write_debug ) then 1353 write(io_unit,10) '#====================================================================================================' 1354 write(io_unit,20) '# valence band index:', v 1355 write(io_unit,10) '#====================================================================================================' 1356 flush(io_unit) 1357 end if 1358 1359 write(io_unit_log,10) ' - Fourier transform valence state ...' 1360 flush(io_unit_log) 1361 1362 1363 ! copy pre-calculated valence state in this covenient local array 1364 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v) 1365 1366 ! compute fourier transform of valence state, and conjugate 1367 call g_to_r(psir_valence,psikg_valence) 1368 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 1369 1370 ! loop on all blocks of lanczos vectors 1371 write(io_unit_log,10) ' - Loop on all lanczos blocks to generate modified basis and Sternheimer RHS:' 1372 flush(io_unit_log) 1373 do iblk_lanczos = 1, nbdblock_lanczos 1374 !-------------------------------------------------------------------------- 1375 ! Below, we build the modified basis, [ (V^{1/2} l)^* . psi_v], 1376 ! as well as conjugated, projected form, Pc . [ (V^{1/2} l) . psi_v^*]. 1377 ! 1378 ! It is very irritating to have to do it this way, but I don't 1379 ! see an alternative; see discussion below. 1380 ! 1381 !-------------------------------------------------------------------------- 1382 1383 write(io_unit_log,23) ' iblk_lanczos = ',iblk_lanczos," / ",nbdblock_lanczos 1384 1385 1386 write(io_unit_log,10) ' -- Prepare modified basis computation...' 1387 flush(io_unit_log) 1388 1389 1390 1391 ! loop on all states within this block 1392 do mb = 1, blocksize 1393 1394 ! Determine the index of the Lanczos vector 1395 l = (iblk_lanczos-1)*blocksize + mb 1396 1397 ! take a single lanczos vector 1398 1399 if ( l <= lmax) then 1400 psik_wrk(1,:) = dble (Lbasis_lanczos(:,l)) 1401 psik_wrk(2,:) = dimag(Lbasis_lanczos(:,l)) 1402 else 1403 psik_wrk(:,:) = zero 1404 end if 1405 1406 1407 ! Apply coulomb potential 1408 call sqrt_vc_k(psik_wrk) 1409 1410 ! Store in array of blocks of wavefunctions 1411 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1412 1413 end do ! mb 1414 1415 ! Transform to FFT representation 1416 call wf_block_distribute(psikb_wrk, psikg_VL,1) ! LA -> FFT 1417 1418 ! psikg_VL now contains | V^1/2 . l >, in FFT configuration 1419 1420 1421 !---------------------------------------------------------- 1422 ! i) Compute the modified basis 1423 !---------------------------------------------------------- 1424 write(io_unit_log,10) ' -- compute modified basis ...' 1425 flush(io_unit_log) 1426 1427 1428 1429 ! Fourier transform to real space, and conjugate (psir1 is a global work array) 1430 call g_to_r(psir1,psikg_VL) 1431 psir1(2,:,:,:) = -psir1(2,:,:,:) ! IS THIS STACK-DANGEROUS? 1432 1433 ! Compute the real space product, and return to k space, in FFT configuration 1434 call gr_to_g(psikg_wrk,psir1,psikg_valence) 1435 1436 ! psikg_wrk contains | (V^1/2 . l)^* phi_v >, in FFT configuration 1437 1438 ! return to LA representation 1439 call wf_block_distribute(psikb_wrk, psikg_wrk,2) ! FFT -> LA 1440 1441 ! store data, in LA representation 1442 do mb = 1, blocksize 1443 l = (iblk_lanczos-1)*blocksize + mb 1444 1445 if ( l <= lmax) then 1446 ! local_Lbasis 1447 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1448 local_Lbasis(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1449 end if 1450 1451 end do !mb 1452 1453 !-------------------------------------------------------------------------- 1454 ! ii) Build the Sternheimer coefficients, at various frequencies, 1455 ! to define the Sternheimer basis. 1456 !-------------------------------------------------------------------------- 1457 write(io_unit_log,20) ' -- Compute Sternheimer RHS...' 1458 flush(io_unit_log) 1459 1460 1461 ! psikg_wrk still contains | (V^1/2 . l)^* phi_v >, in FFT configuration 1462 1463 ! Create right-hand-side of Sternheimer equation, in FFT configuration 1464 call pc_k_valence_kernel(psikg_wrk) 1465 call Hpsik(psikg_in,psikg_wrk,eig(v)) 1466 call pc_k_valence_kernel(psikg_in) 1467 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS? 1468 1469 ! return RHS to LA representation, for explicit storage 1470 call wf_block_distribute(psikb_wrk, psikg_in,2) ! FFT -> LA 1471 1472 ! store data, in LA representation 1473 do mb = 1, blocksize 1474 l = (iblk_lanczos-1)*blocksize + mb 1475 1476 if ( l <= lmax) then 1477 ! psi_rhs 1478 psi_rhs(:,:,l) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1479 end if 1480 1481 end do !mb 1482 1483 !---------------------------------------------------------- 1484 ! iii) extract solutions for all projection frequencies 1485 !---------------------------------------------------------- 1486 write(io_unit_log,20) ' -- Extract solutions for all projection frequencies...' 1487 flush(io_unit_log) 1488 1489 1490 1491 do iw = 1, nfrequencies 1492 1493 omega0 = list_projection_frequencies(iw) 1494 ! Solve Sternheimer equation 1495 1496 projection = 0 1497 if(omega0 < 1d-12) projection=1 1498 1499 ! solve A x = b, over the whole lanczos block 1500 call sqmr(psikg_in, psikg_out, eig(v), projection, omega0, omega_is_imaginary) 1501 1502 ! return LA representation, for explicit storage 1503 call wf_block_distribute(psikb_wrk, psikg_out, 2) ! FFT -> LA 1504 1505 do mb = 1, blocksize 1506 l = (iblk_lanczos-1)*blocksize + mb 1507 1508 if ( l <= lmax) then 1509 ls = (l-1)*nfrequencies+iw 1510 1511 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1512 1513 c_sternheimer_solutions(:,ls)= cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1514 end if 1515 1516 end do ! mb 1517 1518 end do ! iw 1519 1520 !---------------------------------------------------------- 1521 ! iv) Compute the conjugated, projected modified basis 1522 !---------------------------------------------------------- 1523 1524 write(io_unit_log,20) ' -- compute the conjugated, projected modified basis...' 1525 flush(io_unit_log) 1526 1527 1528 1529 1530 ! Compute the real space product, | (V^1/2. l) phi_v^* > and return to k space, in FFT configuration 1531 call gr_to_g(psikg_wrk, psir_valence, psikg_VL) 1532 1533 ! project on conduction states 1534 call pc_k_valence_kernel(psikg_wrk) 1535 1536 ! return to LA representation 1537 call wf_block_distribute(psikb_wrk, psikg_wrk,2) ! FFT -> LA 1538 1539 ! store back, in LA configuration 1540 do mb = 1, blocksize 1541 l = (iblk_lanczos-1)*blocksize + mb 1542 1543 if ( l <= lmax) then 1544 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1545 local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:) 1546 end if 1547 1548 end do !mb 1549 1550 end do ! iblk_lanczos 1551 1552 1553 1554 write(io_unit_log,10) ' - Read in solutions at w=0 and/or w = infinity, if appropriate...' 1555 flush(io_unit_log) 1556 1557 ! Begin with the storage of the solutions at $\omega = 0.$, which are free. 1558 if(dtset%gwls_recycle == 1) then 1559 c_sternheimer_solutions(:,lsolutions_max-2*lmax+1:lsolutions_max-lmax) = cmplx_1*Sternheimer_solutions_zero(1,:,:,v) + & 1560 & cmplx_i*Sternheimer_solutions_zero(2,:,:,v) 1561 end if 1562 if(dtset%gwls_recycle == 2) then 1563 do i=1,lmax 1564 recy_i = (i-1)*nbandv + v 1565 !BUG : On petrus, NAG 5.3.1 + OpenMPI 1.6.2 cause read(...,rec=i) to read the data written by write(...,rec=i+1). 1566 read(recy_unit,rec=recy_i) psik_wrk 1567 c_sternheimer_solutions(:,lsolutions_max-2*lmax+i) = cmplx_1*psik_wrk(1,:) + cmplx_i*psik_wrk(2,:) 1568 end do 1569 end if 1570 1571 ! and then continue with the storage of the vectors on which the Sternheimer solutions will be projected. 1572 c_sternheimer_solutions(:,lsolutions_max-lmax+1:lsolutions_max) = cmplx_1*psi_rhs(1,:,:) + cmplx_i*psi_rhs(2,:,:) 1573 ! Previously was = local_Lbasis; but analysis in the Lanczos article reveals psi_rhs should be better. 1574 ! Furthermore, tests show that, with psi_rhs, silane@1Ha has Sigma_c 0.01mHa away from the result with 1575 ! gwls_list_proj_freq 0.0 1.0, in contrast with local_Lbasis, which has Sigma_c 0.3mHa away from the same result. 1576 1577 if ( model ) then 1578 1579 write(io_unit_log,10) ' - USE MODEL: model = .true., hence compute model model dielectric matrix...' 1580 flush(io_unit_log) 1581 1582 1583 1584 !-------------------------------------------------------------------------- 1585 ! Now that we have the modified basis, compute the matrix 1586 ! elements of the model dielectric operator 1587 ! 1588 ! CAREFUL! 1589 ! 1590 ! The model is given by 1591 ! 1592 ! P_model(iw) = sum_{v} phi_v(r) P_c.Y(iw).P_c phi_v^*(r') 1593 ! 1594 ! such that 1595 ! 1596 ! <l1 | eps_model(iw) | l2 > = delta_{l1,l2} 1597 ! - sum_{v} < (V^{1/2}.l1).phi_v^*| Pc . Y . Pc | (V^{1/2}.l2).phi_v^* > 1598 ! 1599 ! But local_Lbasis defined above corresponds to 1600 ! Pc | (V^{1/2} .l )^* phi_v >. 1601 ! 1602 ! This is why we must define local_Lbasis_conjugated, of the form 1603 ! Pc | (V^{1/2} .l ) phi_v^* >. 1604 ! 1605 !-------------------------------------------------------------------------- 1606 1607 do iw = 1, npt_gauss+1 1608 1609 call setup_Pk_model(list_omega(iw),second_model_parameter) 1610 1611 ! Only build the lower triangular part; the upper triangular part is obtained from the Hermitian conjugate 1612 do l1 = 1, lmax 1613 1614 YL(:) = model_Y_LA(:)*local_Lbasis_conjugated(:,l1) 1615 do l2 = 1, l1 1616 model_dielectric_Lanczos_basis(l1,l2,iw) = model_dielectric_Lanczos_basis(l1,l2,iw) & 1617 -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k) 1618 1619 end do 1620 end do 1621 1622 end do ! iw 1623 1624 1625 end if 1626 1627 1628 !-------------------------------------------------------------------------- 1629 ! Check explicitly that solutions satisfy the Sternheimer equations 1630 !-------------------------------------------------------------------------- 1631 1632 if ( debug ) then 1633 1634 if (write_debug) then 1635 write(io_unit,10) "#--------------------------------------------------------------------------------" 1636 write(io_unit,10) "# Check explicitly that solutions satisfy the Sternheimer equation. " 1637 write(io_unit,10) "# " 1638 write(io_unit,10) "# Define: " 1639 write(io_unit,10) "# E_l = || (omega^2+[H-Ev]^2) |phi_l> + Pc.[H-Ev].Pc |(V^1/2.q_l)^*.phi_v > || " 1640 write(io_unit,10) "#--------------------------------------------------------------------------------" 1641 write(io_unit,10) '# l Im[omega] (Ha) E_l' 1642 write(io_unit,10) "#--------------------------------------------------------------------------------" 1643 flush(io_unit) 1644 end if 1645 1646 1647 do iblk_lanczos = 1, nbdblock_lanczos 1648 1649 ! loop on all states within this block 1650 do mb = 1, blocksize 1651 1652 l = (iblk_lanczos-1)*blocksize + mb 1653 1654 1655 if ( l <= lmax) then 1656 ! psik_wrk = | (V^{1/2} .l )^* phi_v > 1657 psik_wrk(1,:) = dble (local_Lbasis(:,l)) 1658 psik_wrk(2,:) = dimag(local_Lbasis(:,l)) 1659 else 1660 psik_wrk(:,:) = zero 1661 end if 1662 1663 ! Store in array of blocks of wavefunctions 1664 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1665 end do ! mb 1666 1667 ! Transform to FFT representation 1668 call wf_block_distribute(psikb_wrk, psikg_wrk, 1) ! LA -> FFT 1669 1670 ! Create right-hand-side of Sternheimer equation 1671 call pc_k_valence_kernel(psikg_wrk) 1672 call Hpsik(psikg_in,psikg_wrk,eig(v)) 1673 call pc_k_valence_kernel(psikg_in) 1674 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS? 1675 1676 do iw = 1, nfrequencies 1677 ! loop on all states within this block 1678 do mb = 1, blocksize 1679 1680 l = (iblk_lanczos-1)*blocksize + mb 1681 1682 ls = (l-1)*nfrequencies+iw 1683 1684 if ( l <= lmax) then 1685 psik_wrk(1,:) = dble (c_sternheimer_solutions(:,ls)) 1686 psik_wrk(2,:) = dimag(c_sternheimer_solutions(:,ls)) 1687 else 1688 psik_wrk(:,:) = zero 1689 end if 1690 1691 ! Store in array of blocks of wavefunctions 1692 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1693 end do 1694 1695 ! Transform to FFT representation 1696 call wf_block_distribute(psikb_wrk, psikg_wrk, 1) ! LA -> FFT 1697 1698 1699 omega0 = list_projection_frequencies(iw) 1700 1701 psikg_out(:,:) = omega0**2*psikg_wrk(:,:) 1702 1703 call Hpsik(psikg_wrk,cte=eig(v)) 1704 call Hpsik(psikg_wrk,cte=eig(v)) 1705 1706 1707 psikg_out(:,:) = psikg_out(:,:) + psikg_wrk(:,:)-psikg_in(:,:) 1708 ! psikg_out now contains [ w0^2 + [H-epsilon_v]^2 ] | x > - |RHS>, in FFT configuration. 1709 1710 ! bring it back to LA configuration 1711 1712 ! Transform to FFT representation 1713 call wf_block_distribute(psikb_wrk, psikg_out, 2) ! FFT -> LA 1714 1715 do mb = 1, blocksize 1716 l = (iblk_lanczos-1)*blocksize + mb 1717 1718 if ( l <= lmax) then 1719 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1720 1721 z(:) = cg_zdotc(npw_k ,psik_wrk, psik_wrk) 1722 1723 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors working on FFT! 1724 if (write_debug) write(io_unit,21) l, omega0, sqrt(z(1)) 1725 end if 1726 end do ! mb 1727 1728 end do ! iw 1729 end do ! iblk_lanczos 1730 1731 end if 1732 1733 !-------------------------------------------------------------------------- 1734 ! Step 5: Perform a singular value decomposition to extract a 1735 ! linearly independent basis for the solution space. 1736 ! 1737 !-------------------------------------------------------------------------- 1738 write(io_unit_log,10) ' - Perform SVD to extract linearly independent basis to Sternheimer equation...' 1739 flush(io_unit_log) 1740 1741 1742 1743 1744 svd_matrix(:,:) = c_sternheimer_solutions(:,:) 1745 1746 call extract_SVD(mpi_communicator, npw_k,lsolutions_max,svd_matrix,svd_values) 1747 1748 if ( write_debug ) then 1749 write(io_unit,10) "#--------------------------------------------------------------------------------" 1750 write(io_unit,10) "# Check the singular value decomposition of the arrays" 1751 write(io_unit,10) '# l svd' 1752 write(io_unit,10) "#--------------------------------------------------------------------------------" 1753 flush(io_unit) 1754 end if 1755 1756 lsolutions = 0 1757 QR_orthonormal_basis(:,:) = cmplx_0 1758 do l=1, lsolutions_max 1759 if (svd_values(l) > svd_tolerance ) then 1760 lsolutions = lsolutions + 1 1761 1762 if ( write_debug ) then 1763 write(io_unit,14) l,svd_values(l) 1764 flush(io_unit) 1765 end if 1766 QR_orthonormal_basis(:,l) = svd_matrix(:,l) 1767 1768 else 1769 if ( write_debug ) then 1770 write(io_unit,15) l,svd_values(l),' SVD value too small! Vector to be discarded!' 1771 flush(io_unit) 1772 end if 1773 end if 1774 end do 1775 1776 !-------------------------------------------------------------------------- 1777 ! Step 6: project all relevant arrays onto the newly defined orthonormal 1778 ! basis. 1779 !-------------------------------------------------------------------------- 1780 write(io_unit_log,10) ' - Compute the B matrix...' 1781 flush(io_unit_log) 1782 1783 if (debug) then 1784 check_matrix(:,:) = cmplx_0 1785 do l = 1, lsolutions 1786 check_matrix(l,l) = -cmplx_1 1787 end do 1788 end if 1789 1790 ABI_MALLOC(ipiv ,(lsolutions)) 1791 ABI_MALLOC(sternheimer_A0 ,(lsolutions,lsolutions)) 1792 ABI_MALLOC(sternheimer_B ,(lsolutions,lmax)) 1793 ABI_MALLOC(sternheimer_G ,(lmax,lsolutions)) 1794 1795 ! Compute the X matrix and the check_matrix 1796 do l1 = 1, lsolutions 1797 1798 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1)) 1799 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1)) 1800 1801 do l2 = 1, lsolutions 1802 1803 if (l2 <= lmax) then 1804 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psi_rhs(:,:,l2)) 1805 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors for LA configuration 1806 1807 sternheimer_B(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 1808 end if 1809 1810 if (debug) then 1811 psi_gamma_l2(1,:) = dble (QR_orthonormal_basis(:,l2)) 1812 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2)) 1813 1814 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psi_gamma_l2) 1815 call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors 1816 1817 check_matrix(l1,l2) = check_matrix(l1,l2) + cmplx_1*z(1)+cmplx_i*z(2) 1818 end if 1819 1820 end do ! l2 1821 end do ! l1 1822 1823 1824 ! Number of blocks of solution vectors 1825 nbdblock_solutions = lsolutions/blocksize 1826 1827 if (modulo(lsolutions,blocksize) /= 0) nbdblock_solutions = nbdblock_solutions + 1 1828 1829 1830 write(io_unit_log,10) ' - Compute the A0 matrix...' 1831 flush(io_unit_log) 1832 1833 ! Compute the A matrix 1834 do iblk_solutions =1, nbdblock_solutions 1835 1836 do mb = 1, blocksize 1837 l2 = (iblk_solutions-1)*blocksize + mb 1838 1839 if ( l2 <= lsolutions) then 1840 psik_wrk(1,:) = dble (QR_orthonormal_basis(:,l2)) 1841 psik_wrk(2,:) = dimag(QR_orthonormal_basis(:,l2)) 1842 else 1843 psik_wrk(:,:) = zero 1844 end if 1845 1846 ! Store in array of blocks of wavefunctions 1847 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:) 1848 end do 1849 1850 ! Transform to FFT representation 1851 call wf_block_distribute(psikb_wrk, psikg_wrk,1) ! LA -> FFT 1852 1853 ! act twice with the Hamiltonian operator 1854 call Hpsik(psikg_out,psikg_wrk,eig(v)) 1855 call Hpsik(psikg_out,cte=eig(v)) 1856 1857 ! return to LA representation 1858 call wf_block_distribute(psikb_wrk, psikg_out,2) ! FFT -> LA 1859 1860 do mb = 1, blocksize 1861 l2 = (iblk_solutions-1)*blocksize + mb 1862 1863 if ( l2 <= lsolutions) then 1864 psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 1865 else 1866 psik_wrk(:,:) = zero 1867 end if 1868 1869 do l1 = 1, lsolutions 1870 1871 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1)) 1872 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1)) 1873 1874 z(:) = cg_zdotc(npw_k, psi_gamma_l1, psik_wrk) 1875 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT! 1876 1877 1878 if ( l2 <= lsolutions ) then 1879 sternheimer_A0(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 1880 end if 1881 1882 1883 end do ! l1 1884 end do ! mb 1885 end do ! iblk_solutions 1886 1887 1888 if (debug) then 1889 ! HERE we use a dummy variable to avoid operations which might blow the stack! 1890 ! Stack overflows lead to hard-to-find bugs; let's avoid putting them in here. 1891 ABI_MALLOC(dummy_tmp_1,(lsolutions,lsolutions)) 1892 ABI_MALLOC(dummy_tmp_2,(lsolutions,lsolutions)) 1893 1894 1895 dummy_tmp_1(:,:)= transpose(sternheimer_A0(:,:)) 1896 dummy_tmp_2(:,:)= conjg(dummy_tmp_1(:,:)) 1897 1898 check_matrix2(:,:) = sternheimer_A0(:,:)-dummy_tmp_2(:,:) 1899 1900 ABI_FREE(dummy_tmp_1) 1901 ABI_FREE(dummy_tmp_2) 1902 end if 1903 1904 write(io_unit_log,10) ' - Compute the GAMMA matrix...' 1905 flush(io_unit_log) 1906 1907 1908 ! Compute the GAMMA matrices 1909 do l1 = 1, lmax 1910 1911 ! psik_wrk = | (V^{1/2} . l )^* phi_v > 1912 psik_wrk(1,:) = dble (local_Lbasis(:,l1) ) 1913 psik_wrk(2,:) = dimag(local_Lbasis(:,l1)) 1914 1915 1916 do l2 = 1, lsolutions 1917 1918 psi_gamma_l2(1,:) = real (QR_orthonormal_basis(:,l2)) 1919 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2)) 1920 1921 1922 ! Note that G_{lJ} = < l | Vc^{1/2}.(gamma_J^*.phi_v)> 1923 ! = < gamma_J | (Vc^{1/2}.l^*).phi_v > 1924 1925 z(:) = cg_zdotc(npw_k,psi_gamma_l2, psik_wrk) 1926 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT! 1927 sternheimer_G(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2) 1928 1929 end do ! l1 1930 end do ! l2 1931 1932 1933 1934 if ( write_debug ) then 1935 write(io_unit,19) '<gamma| gamma>', sqrt(sum(abs(check_matrix(:,:))**2)) 1936 write(io_unit,19) ' M hermitian ',sqrt(sum(abs(check_matrix2(:,:))**2)) 1937 write(io_unit,10) " " 1938 write(io_unit,10) "# GAMMA Matrix:" 1939 write(io_unit,10) " " 1940 1941 do l1 = 1, lmax 1942 write(io_unit,30) sternheimer_G(l1,:) 1943 end do 1944 flush(io_unit) 1945 end if 1946 1947 !-------------------------------------------------------------------------- 1948 ! Step 7: Compute the solutions 1949 !-------------------------------------------------------------------------- 1950 1951 write(io_unit_log,10) ' - Compute the Projected Sternheimer solutions and build approximate dielectric operator...' 1952 flush(io_unit_log) 1953 1954 1955 1956 ABI_MALLOC(sternheimer_A ,(lsolutions,lsolutions)) 1957 ABI_MALLOC(sternheimer_X ,(lsolutions,lmax)) 1958 do iw = 2, npt_gauss + 1 1959 1960 write(io_unit_log,23) ' -- iw = ',iw,' / ',npt_gauss+1 1961 flush(io_unit_log) 1962 1963 1964 omega = list_omega(iw) 1965 1966 sternheimer_A(:,:) = sternheimer_A0(:,:) 1967 1968 do l = 1, lsolutions 1969 sternheimer_A(l,l) = sternheimer_A(l,l) + omega**2 1970 end do 1971 1972 sternheimer_X(:,:) = sternheimer_B(:,:) 1973 1974 !-------------------------------------------------------------------------- 1975 ! Step 2: solve A*X = B, a projected form of the Sternheimer equation 1976 !-------------------------------------------------------------------------- 1977 write(io_unit_log,10) ' -- Solve A*X = B' 1978 flush(io_unit_log) 1979 1980 1981 1982 call cpu_time(time1) 1983 call zgesv(lsolutions, & ! number of rows of A matrix 1984 lmax, & ! number of columns of B matrix 1985 sternheimer_A, & ! The A matrix on input, the LU factorization on output 1986 lsolutions, & ! leading dimension of A 1987 ipiv, & ! array of pivots 1988 sternheimer_X, & ! B matrix on input, solution X on output 1989 lsolutions, & ! leading dimension of B 1990 info ) 1991 call cpu_time(time2) 1992 1993 time_exact = time_exact + time2-time1 1994 1995 !-------------------------------------------------------------------------- 1996 ! Step 3: Add contribution to projected epsilon 1997 !-------------------------------------------------------------------------- 1998 ! perform E = E -4 sum_l Gamma_v*X^*_v 1999 2000 write(io_unit_log,10) ' -- add contribution to dielectric matrix at this frequency' 2001 flush(io_unit_log) 2002 2003 2004 2005 ABI_MALLOC(dummy_tmp_1,(lsolutions,lmax)) 2006 dummy_tmp_1(:,:) = conjg(sternheimer_X) ! DO THIS to avoid potential stack problems 2007 2008 call cpu_time(time1) 2009 call zgemm( 'N', & ! A matrix is in normal order 2010 'N', & ! B matrix is in normal order 2011 lmax, & ! number of rows of A 2012 lmax, & ! number of columns of B 2013 lsolutions, & ! number of columns of A 2014 -4.0_dp*cmplx_1, & ! premultiply A*B by this scalar 2015 sternheimer_G, & ! GAMMA matrix 2016 lmax, & ! leading dimension of A 2017 dummy_tmp_1, & ! B matrix 2018 lsolutions, & ! leading dimension of B 2019 cmplx_1, & ! beta is one 2020 projected_dielectric_Lanczos_basis(:,:,iw), & ! C matrix 2021 lmax) ! leading dimension of C 2022 2023 ABI_FREE(dummy_tmp_1) 2024 2025 call cpu_time(time2) 2026 time_exact = time_exact + time2-time1 2027 2028 end do ! iw 2029 2030 write(io_unit_log,10) ' - Deallocate tmp arrays...' 2031 flush(io_unit_log) 2032 2033 2034 2035 2036 ABI_FREE(ipiv ) 2037 ABI_FREE(sternheimer_A ) 2038 ABI_FREE(sternheimer_A0 ) 2039 ABI_FREE(sternheimer_B ) 2040 ABI_FREE(sternheimer_X ) 2041 ABI_FREE(sternheimer_G ) 2042 2043 end do ! v 2044 2045 !-------------------------------------------------------------------------- 2046 ! Finalize, post v loop 2047 !-------------------------------------------------------------------------- 2048 write(io_unit_log,10) " - Finalize, after band iterations...." 2049 flush(io_unit_log) 2050 2051 2052 2053 2054 2055 timing_string = "# Exact Sector : " 2056 call write_timing_log(timing_string,time_exact) 2057 2058 2059 ABI_MALLOC(dummy_tmp_1,(lmax,lmax)) 2060 ABI_MALLOC(dummy_tmp_2,(lmax,lmax)) 2061 2062 do iw = 2, npt_gauss+1 2063 ! finally, make sure matrix is hermitian 2064 2065 ! play this little game to avoid STACK problems 2066 dummy_tmp_1(:,:) = 0.5_dp*transpose(projected_dielectric_Lanczos_basis(:,:,iw)) 2067 dummy_tmp_2(:,:) = conjg(dummy_tmp_1(:,:)) 2068 dummy_tmp_1(:,:) = dummy_tmp_2(:,:) +0.5_dp*projected_dielectric_Lanczos_basis(:,:,iw) 2069 2070 projected_dielectric_Lanczos_basis(:,:,iw) = dummy_tmp_1(:,:) 2071 end do 2072 2073 ABI_FREE(dummy_tmp_1) 2074 ABI_FREE(dummy_tmp_2) 2075 2076 2077 2078 if ( write_debug ) then 2079 !write some results to a file 2080 2081 write(io_unit,10) '#====================================================================================================' 2082 write(io_unit,10) "# Projected dielectric matrices " 2083 write(io_unit,10) "# ------------------------------------------------------- " 2084 write(io_unit,10) "# This file contains the various projected dielectric matrices as a function of frequency. " 2085 write(io_unit,10) '#====================================================================================================' 2086 write(io_unit,10) '' 2087 flush(io_unit) 2088 2089 do iw = 1, npt_gauss+1 2090 write(io_unit,10) "#" 2091 write(io_unit,12) "# omega = ",list_omega(iw), " i Ha" 2092 write(io_unit,10) "#" 2093 2094 do l =1, lmax 2095 write(io_unit,30) projected_dielectric_Lanczos_basis(l,:,iw) 2096 end do 2097 end do 2098 2099 end if 2100 2101 2102 if ( model ) then 2103 2104 2105 ! Add all components on the processors 2106 call xmpi_sum(model_dielectric_Lanczos_basis,mpi_communicator,ierr) ! sum on all processors 2107 2108 ! add the identity 2109 do iw = 1, npt_gauss+1 2110 do l= 1, lmax 2111 model_dielectric_Lanczos_basis(l,l,iw) = model_dielectric_Lanczos_basis(l,l,iw) + cmplx_1 2112 end do 2113 end do 2114 2115 ! hermitian the operator 2116 do iw = 1, npt_gauss+1 2117 2118 do l1 = 1, lmax 2119 do l2 = 1, l1 2120 ! operator is hermitian 2121 model_dielectric_Lanczos_basis(l2,l1,iw) = conjg(model_dielectric_Lanczos_basis(l1,l2,iw)) 2122 end do 2123 end do 2124 2125 end do ! iw 2126 2127 end if 2128 2129 2130 2131 if ( write_debug ) then 2132 close(io_unit) 2133 end if 2134 2135 write(io_unit_log,10) " - Deallocate and exit...." 2136 flush(io_unit_log) 2137 2138 2139 2140 if (debug) then 2141 ABI_FREE(check_matrix) 2142 ABI_FREE(check_matrix2) 2143 end if 2144 2145 2146 ABI_FREE(psikg_valence) 2147 ABI_FREE(psir_valence) 2148 ABI_FREE(psikg_VL) 2149 2150 2151 ABI_FREE(YL) 2152 2153 ABI_FREE(local_Lbasis_conjugated) 2154 ABI_FREE(local_Lbasis) 2155 2156 2157 ABI_FREE(svd_matrix) 2158 ABI_FREE(svd_values) 2159 ABI_FREE(psi_gamma_l1) 2160 ABI_FREE(psi_gamma_l2) 2161 2162 ABI_FREE(psikg_in) 2163 ABI_FREE(psikg_out) 2164 2165 ABI_FREE(psi_rhs) 2166 2167 ABI_FREE(c_sternheimer_solutions) 2168 ABI_FREE(QR_orthonormal_basis) 2169 2170 ABI_FREE(psik_wrk) 2171 ABI_FREE(psikb_wrk) 2172 ABI_FREE(psikg_wrk) 2173 2174 2175 close(io_unit_log) 2176 2177 2178 10 format(A) 2179 12 format(A,F12.8,A) 2180 14 format(I5,10X,ES24.12) 2181 15 format(I5,10X,ES24.12,10X,A) 2182 2183 19 format(20X,A,15X,E24.16) 2184 2185 20 format(A,I5) 2186 21 format(I5,10X,F8.4,15X,ES24.12) 2187 22 format(A,I5,A) 2188 23 format(A,I5,A,I5) 2189 30 format(2X,1000(ES12.4,2X,ES12.4,5X)) 2190 2191 end subroutine ProjectedSternheimerEpsilon