TABLE OF CONTENTS


ABINIT/m_gwls_DielectricArray [ Modules ]

[ Top ] [ Modules ]

NAME

 m_gwls_DielectricArray

FUNCTION

  .

COPYRIGHT

 Copyright (C) 2009-2024 ABINIT group (JLJ, BR, MC)
 This file is distributed under the terms of the
 GNU General Public License, see ~abinit/COPYING
 or http://www.gnu.org/copyleft/gpl.txt .

SOURCE

16 #if defined HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include "abi_common.h"
21 
22 
23 module m_gwls_DielectricArray
24 !----------------------------------------------------------------------------------------------------
25 ! This module generates and stores the arrays
26 !
27 !        { eps^{-1}(iw)-eps_model^{-1}(iw) }  in Lanczos basis
28 !        { eps_model^{-1}(iw) - 1 }           in model Lanczos basis
29 !
30 ! It makes sense to build these only once, as they do not depend on the external frequency.
31 !
32 !----------------------------------------------------------------------------------------------------
33 
34 ! local modules
35 use m_gwls_utility
36 use m_gwls_wf
37 use m_gwls_valenceWavefunctions
38 use m_gwls_hamiltonian
39 use m_gwls_lineqsolver
40 use m_gwls_polarisability
41 use m_gwls_model_polarisability
42 use m_gwls_GenerateEpsilon
43 use m_gwls_TimingLog
44 use m_gwls_QR_factorization
45 use m_gwls_LanczosBasis
46 
47 ! abinit modules
48 use defs_basis
49 use m_abicore
50 use m_xmpi
51 use m_cgtools
52 
53 use m_time,                only : timab
54 use m_io_tools,            only: get_unit
55 use m_gaussian_quadrature, only: get_frequencies_and_weights_legendre
56 
57 
58 implicit none
59 save
60 
61 private

m_hamiltonian/cleanup_projected_Sternheimer_epsilon [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  cleanup_projected_Sternheimer_epsilon

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

963 subroutine cleanup_projected_Sternheimer_epsilon
964 
965 implicit none
966 
967 ! *************************************************************************
968 
969 if(allocated(projected_epsilon_M_matrix)) then
970   ABI_FREE(projected_epsilon_M_matrix)
971 end if
972 if(allocated(projected_epsilon_B_matrix)) then
973   ABI_FREE(projected_epsilon_B_matrix)
974 end if
975 if(allocated(projected_epsilon_G_matrix)) then
976   ABI_FREE(projected_epsilon_G_matrix)
977 end if
978 if(allocated(eps_m1_minus_eps_model_m1)) then
979   ABI_FREE(eps_m1_minus_eps_model_m1)
980 end if
981 if(allocated(list_omega)) then
982   ABI_FREE(list_omega)
983 end if
984 if(allocated(list_weights)) then
985   ABI_FREE(list_weights)
986 end if
987 if(allocated(eps_model_m1_minus_one_DISTR)) then
988   ABI_FREE(eps_model_m1_minus_one_DISTR)
989 end if
990 if(allocated(model_lanczos_vector_belongs_to_this_node)) then
991   ABI_FREE(model_lanczos_vector_belongs_to_this_node)
992 end if
993 if(allocated(model_lanczos_vector_index)) then
994   ABI_FREE(model_lanczos_vector_index)
995 end if
996 
997 
998 end subroutine cleanup_projected_Sternheimer_epsilon

m_hamiltonian/compute_eps_m1_minus_eps_model_m1 [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_m1_minus_eps_model_m1

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

178 subroutine compute_eps_m1_minus_eps_model_m1(lmax, npt_gauss)
179 !----------------------------------------------------------------------------------------------------
180 !
181 ! This subroutine computes the array 
182 !
183 !                eps^{-1}(iw) - eps_model^{-1}(iw), 
184 !
185 ! for all relevant frequencies in the Lanczos basis.
186 !----------------------------------------------------------------------------------------------------
187 implicit none
188 
189 integer ,     intent(in)  :: lmax, npt_gauss
190 
191 character(256) :: timing_string
192 real(dp)       :: time1, time2
193 real(dp)       :: time
194 
195 integer        :: iw, l
196 complex(dpc),allocatable  :: dummy_matrix(:,:)
197 complex(dpc),allocatable  :: iden(:,:)
198 
199 ! *************************************************************************
200 
201 
202 timing_string = "#"
203 call write_text_block_in_Timing_log(timing_string)
204 timing_string = "#        Computing eps^{-1}(iw) - eps_model^{-1}(iw) "
205 call write_text_block_in_Timing_log(timing_string)
206 timing_string = "#"
207 call write_text_block_in_Timing_log(timing_string)
208 
209 
210 call cpu_time(time1)
211 ! Allocate the module array
212 ABI_MALLOC(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1))
213 ABI_MALLOC(dummy_matrix, (lmax,lmax))
214 ABI_MALLOC(iden, (lmax,lmax))
215 
216 
217 iden = cmplx_0
218 
219 do l = 1, lmax
220 iden(l,l) = cmplx_1
221 end do
222 
223 do iw = 1, npt_gauss + 1
224 
225 
226 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw)
227 
228 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
229 
230 
231 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)
232 
233 dummy_matrix(:,:) = model_dielectric_Lanczos_basis(:,:,iw)
234 
235 
236 
237 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
238 
239 eps_m1_minus_eps_model_m1(:,:,iw) = eps_m1_minus_eps_model_m1(:,:,iw) - dummy_matrix(:,:)
240 
241 
242 end do
243 
244 
245 
246 
247 ! Deallocate the arrays which are no longer needed
248 ABI_FREE(model_dielectric_Lanczos_basis)
249 ABI_FREE(projected_dielectric_Lanczos_basis)
250 
251 
252 
253 ABI_FREE(dummy_matrix)
254 ABI_FREE(iden)
255 
256 
257 
258 call cpu_time(time2)
259 time = time2-time1
260 
261 timing_string = "#        Total time   :   "
262 call write_timing_log(timing_string,time)
263 
264 
265 
266 
267 end subroutine compute_eps_m1_minus_eps_model_m1

m_hamiltonian/compute_eps_m1_minus_one [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_m1_minus_one

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

283 subroutine compute_eps_m1_minus_one(lmax, npt_gauss)
284 !----------------------------------------------------------------------------------------------------
285 !
286 ! This subroutine computes the array 
287 !
288 !                eps^{-1}(iw) - I
289 !
290 ! for all relevant frequencies in the Lanczos basis.
291 !----------------------------------------------------------------------------------------------------
292 implicit none
293 
294 integer ,     intent(in)  :: lmax, npt_gauss
295 
296 character(256) :: timing_string
297 real(dp)       :: time1, time2
298 real(dp)       :: time
299 
300 integer        :: iw, l
301 complex(dpc),allocatable  :: dummy_matrix(:,:)
302 complex(dpc),allocatable  :: iden(:,:)
303 
304 ! *************************************************************************
305 
306 
307 
308 timing_string = "#"
309 call write_text_block_in_Timing_log(timing_string)
310 timing_string = "#        Computing eps^{-1}(iw) - I "
311 call write_text_block_in_Timing_log(timing_string)
312 timing_string = "#"
313 call write_text_block_in_Timing_log(timing_string)
314 
315 call cpu_time(time1)
316 ! Allocate the module array
317 
318 ! The array eps_m1_minus_eps_model_m1 will be used to store
319 ! eps^{-1}-1; we can think of eps_model = I in this case.
320 ABI_MALLOC(eps_m1_minus_eps_model_m1, (lmax,lmax,npt_gauss+1))
321 
322 ABI_MALLOC(dummy_matrix, (lmax,lmax))
323 ABI_MALLOC(iden, (lmax,lmax))
324 
325 iden = cmplx_0
326 
327 do l = 1, lmax
328 iden(l,l) = cmplx_1
329 end do
330 
331 
332 do iw = 1, npt_gauss + 1
333 
334 dummy_matrix(:,:) = projected_dielectric_Lanczos_basis(:,:,iw)
335 
336 call driver_invert_positive_definite_hermitian_matrix(dummy_matrix,lmax)
337 
338 eps_m1_minus_eps_model_m1(:,:,iw) = dummy_matrix(:,:)-iden(:,:)
339 
340 end do
341 
342 
343 ! Deallocate the arrays which are no longer needed
344 ABI_FREE(projected_dielectric_Lanczos_basis)
345 
346 ABI_FREE(dummy_matrix)
347 ABI_FREE(iden)
348 
349 call cpu_time(time2)
350 time = time2-time1
351 
352 timing_string = "#        Total time   :   "
353 call write_timing_log(timing_string,time)
354 
355 end subroutine compute_eps_m1_minus_one

m_hamiltonian/compute_eps_model_m1_minus_one [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  compute_eps_model_m1_minus_one

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

371 subroutine compute_eps_model_m1_minus_one(lmax_model, npt_gauss, second_model_parameter, epsilon_model_eigenvalues_0)
372 !----------------------------------------------------------------------------------------------------
373 !
374 ! This subroutine computes the array 
375 !
376 !                eps_model^{-1}(iw) - 1
377 !
378 ! for all relevant frequencies in the model Lanczos basis.
379 !
380 ! This array can potentially get very large, as the complementary basis gets big to achieve 
381 ! convergence. Correspondingly, it makes sense to DISTRIBUTE this array on all processors.
382 !
383 ! The algorithm will go as follows:
384 !
385 !               eps_m = 1 - V . P . V, V = sqrt{vc}
386 !
387 !       I ) compute VPV, storing blocks on different processors:
388 !
389 !               VPV = [ ------- --------      ]  = VPV[lc, nB, nW]
390 !                  |  [| block |  block |     ]
391 !                 lc  [|   1   |    2   | ... ]
392 !                  |  [|       |        |     ]
393 !                  |  [|       |        |     ]
394 !                  |  [ ------- ---------     ]
395 !                        bsize
396 !
397 !               This construction *does* involve a fair bit of communications, but it takes
398 !               a lot less RAM!
399 !
400 !       II) once VPV is constructed, do, one frequency at a time:
401 !               - import all blocks to the HEAD processor
402 !               - compute eps_m = 1- VPV
403 !               - invert eps_m^{-1}
404 !               - subtract identity eps_m^{-1} - 1
405 !               - redistribute, block by block
406 !        
407 !               Doing this frequency by frequency will reduce the RAM weight on the head node.
408 !
409 !
410 !----------------------------------------------------------------------------------------------------
411 implicit none
412 
413 integer,  intent(in) :: lmax_model, npt_gauss
414 real(dp), intent(in) :: second_model_parameter
415 real(dp), intent(in) :: epsilon_model_eigenvalues_0(lmax_model)
416 
417 integer  :: l, l1, l2
418 integer  :: iw 
419 integer  :: v
420 integer  :: ierr
421 
422 real(dp),    allocatable :: psikg_valence(:,:)
423 real(dp),    allocatable :: psir_valence(:,:,:,:)
424 
425 
426 real(dp),    allocatable :: psik_wrk(:,:)
427 real(dp),    allocatable :: psikb_wrk(:,:)
428 real(dp),    allocatable :: psikg_wrk(:,:)
429 real(dp),    allocatable :: psikg_tmp(:,:)
430 
431 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:)
432 
433 
434 complex(dpc),allocatable :: VPV(:,:,:)
435 real(dp),    allocatable :: re_buffer(:,:), im_buffer(:,:) 
436 
437 complex(dpc),allocatable :: vpv_row(:)
438 
439 
440 complex(dpc),allocatable :: epsilon_head(:,:)
441 
442 real(dp), allocatable :: re_BUFFER_head(:,:)
443 real(dp), allocatable :: im_BUFFER_head(:,:)
444 
445 
446 
447 complex(dpc),allocatable :: YL(:)
448 
449 character(256) :: timing_string
450 real(dp)       :: time1, time2, time
451 real(dp)       :: fft_time1, fft_time2, fft_time
452 real(dp)       :: prod_time1, prod_time2, prod_time
453 
454 complex(dpc)   :: z
455 
456 
457 integer   :: iblk_lanczos, nbdblock_lanczos
458 integer   :: mb
459 integer   :: lb
460 integer   :: ik
461 
462 integer   :: mpi_communicator
463 integer   :: mpi_nproc
464 integer   :: mpi_rank
465 integer   :: mpi_head_rank
466 
467 
468 integer,allocatable   :: sendcounts(:), displs(:)
469 
470 integer   :: sendcount, recvcount
471 
472 
473 logical   :: head
474 
475 
476 ! timing
477 real(dp) :: tsec(2)
478 integer :: GWLS_TIMAB, OPTION_TIMAB
479 
480 ! *************************************************************************
481 
482 
483 GWLS_TIMAB   = 1541
484 OPTION_TIMAB = 1
485 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
486 
487 
488 
489 timing_string = "#"
490 call write_text_block_in_Timing_log(timing_string)
491 timing_string = "#        computing eps_model_m1_minus_one"
492 call write_text_block_in_Timing_log(timing_string)
493 timing_string = "#"
494 call write_text_block_in_Timing_log(timing_string)
495 
496 
497 ! Number of blocks of lanczos vectors (blocksize = npband)
498 nbdblock_lanczos = lmax_model/blocksize
499 if (modulo(lmax_model,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1
500 
501 
502 ! communicator 
503 mpi_communicator = mpi_enreg%comm_bandfft
504 
505 ! total number of processors in the communicator
506 mpi_nproc        = xmpi_comm_size(mpi_communicator )
507 
508 ! what is the rank of this processor?
509 mpi_rank         = xmpi_comm_rank(mpi_communicator )
510 
511 ! rank of the "head" processor
512 mpi_head_rank    = 0
513 
514 
515 ! number of blocks in the model dielectric matrix, which is equal to the number of processors
516 nbdblock_epsilon = mpi_nproc
517 
518 
519 ! number of vectors in every block
520 blocksize_epsilon =  lmax_model/mpi_nproc
521 if (modulo(lmax_model,mpi_nproc) /= 0) blocksize_epsilon = blocksize_epsilon + 1
522 
523 ! attribute blocks to every nodes, and tabulate ownership in logical array
524 ! This is not *the most efficient* implementation possible, but it is convenient
525 ABI_MALLOC( model_lanczos_vector_belongs_to_this_node, (lmax_model))
526 ABI_MALLOC( model_lanczos_vector_index, (lmax_model))
527 
528 
529 model_lanczos_vector_index = 0
530 model_lanczos_vector_belongs_to_this_node = .false.
531 
532 do l =1, lmax_model
533 if (mpi_rank == (l-1)/blocksize_epsilon) then
534   model_lanczos_vector_belongs_to_this_node(l) = .true.
535   model_lanczos_vector_index(l) = l-mpi_rank*blocksize_epsilon
536 end if
537 end do
538 
539 !write(100+mpi_rank,*) "model_lanczos_vector_belongs_to_this_node = ",model_lanczos_vector_belongs_to_this_node(:)
540 !write(100+mpi_rank,*) "model_lanczos_vector_index                = ",model_lanczos_vector_index(:)                
541 !flush(100+mpi_rank)
542 
543 ! Prepare the array that will contain the matrix elements of the model operator
544 ABI_MALLOC(VPV, (lmax_model,blocksize_epsilon,npt_gauss+1))
545 
546 VPV(:,:,:) = cmplx_0
547 
548 ABI_MALLOC(vpv_row, (lmax_model))
549 
550 
551 ! various working arrays
552 GWLS_TIMAB   = 1542
553 OPTION_TIMAB = 1
554 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
555 
556 ABI_MALLOC(psikg_valence,(2,npw_g))
557 ABI_MALLOC(psir_valence ,(2,n4,n5,n6))
558 
559 
560 ABI_MALLOC(psik_wrk,  (2,npw_k))
561 ABI_MALLOC(psikb_wrk, (2,npw_kb))
562 ABI_MALLOC(psikg_wrk, (2,npw_g))
563 ABI_MALLOC(psikg_tmp, (2,npw_g))
564 
565 ABI_MALLOC(local_Lbasis_conjugated,(npw_k,lmax_model))
566 ABI_MALLOC(YL,(npw_k))
567 
568 OPTION_TIMAB = 2
569 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
570 
571 
572 
573 fft_time  = zero
574 prod_time = zero
575 
576 
577 call cpu_time(time1)
578 ! loop on all valence bands
579 
580 do v = 1, nbandv
581 
582 
583 ! copy pre-calculated valence state in this covenient local array
584 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v)
585 
586 ! compute fourier transform of valence state, and conjugate
587 call g_to_r(psir_valence,psikg_valence) 
588 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 
589 
590 !--------------------------------------------------------------------------
591 !
592 ! Step 1: build the modified basis Pc . [ (V^{1/2} l) . psi_v^*]. 
593 !
594 !--------------------------------------------------------------------------
595 GWLS_TIMAB   = 1543
596 OPTION_TIMAB = 1
597 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
598 
599 ! loop on all blocks of lanczos vectors
600 do iblk_lanczos = 1, nbdblock_lanczos
601 ! loop on all states within this block
602 do mb = 1, blocksize
603 
604 ! Determine the index of the Lanczos vector
605 l = (iblk_lanczos-1)*blocksize + mb
606 
607 if ( l <= lmax_model) then
608   psik_wrk(1,:) = dble (Lbasis_model_lanczos(:,l))
609   psik_wrk(2,:) = dimag(Lbasis_model_lanczos(:,l))
610 else
611   psik_wrk(:,:) = zero
612 end if
613 
614 ! apply Coulomb potential
615 call sqrt_vc_k(psik_wrk)
616 
617 ! Store in array of blocks of wavefunctions
618 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
619 
620 end do ! mb
621 
622 call cpu_time(fft_time1)
623 
624 ! Transform to FFT representation
625 call wf_block_distribute(psikb_wrk,  psikg_wrk,1) ! LA -> FFT 
626 
627 
628 !  generate the vector  Pc [ (sqrt_V_c.l) psi_v^*]
629 
630 ! Compute the real space product, and return to k space, in FFT configuration
631 call gr_to_g(psikg_tmp,psir_valence, psikg_wrk)
632 
633 
634 call cpu_time(fft_time2)
635 fft_time = fft_time + fft_time2-fft_time1
636 
637 ! project
638 call pc_k_valence_kernel(psikg_tmp)
639 
640 ! Return to LA configuration
641 
642 ! Transform to FFT representation
643 call wf_block_distribute(psikb_wrk,  psikg_tmp, 2) ! FFT -> LA
644 
645 do mb = 1, blocksize
646 
647 ! Determine the index of the Lanczos vector
648 l = (iblk_lanczos-1)*blocksize + mb
649 
650 
651 if ( l <= lmax_model) then
652   psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) 
653   local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
654 end if
655 
656 
657 end do ! mb
658 
659 end do !iblk_lanczos 
660 OPTION_TIMAB = 2
661 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
662 
663 
664 !--------------------------------------------------------------------------
665 ! Step 2: Now that we have the modified basis, compute the matrix
666 !             elements of the model dielectric operator
667 !
668 !
669 !--------------------------------------------------------------------------
670 GWLS_TIMAB   = 1544
671 OPTION_TIMAB = 1
672 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
673 
674 do iw = 2, npt_gauss+1
675 
676 call setup_Pk_model(list_omega(iw),second_model_parameter)
677 
678 call cpu_time(prod_time1)
679 do l1 = 1, lmax_model
680 
681 ! Apply core function Y to left-vector; conjugate
682 do ik = 1, npw_k         
683 YL(ik) = model_Y_LA(ik)*conjg(local_Lbasis_conjugated(ik,l1))
684 end do
685 
686 ! Only compute lower diagonal part of matrix; epsilon is hermitian conjugate!
687 vpv_row = cmplx_0
688 do l2 = 1, l1
689 do ik = 1, npw_k         
690 vpv_row(l2) = vpv_row(l2) + YL(ik)*local_Lbasis_conjugated(ik,l2)
691 end do
692 end do
693 
694 ! the code below is twice as long!
695 !call ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
696 !call ZGEMV ( 'T', npw_k, lmax_model, cmplx_1, local_Lbasis_conjugated, npw_k, YL, 1, cmplx_0, vpv_row, 1)
697 
698 !do l2 = 1, l1
699 !        eps_model_m1_minus_one(l1,l2,iw) = eps_model_m1_minus_one(l1,l2,iw)     &
700 !                       -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k)
701 !end do
702 
703 ! Sum on all processors, making sure all processors have the total vpv_row
704 call xmpi_sum(vpv_row, mpi_communicator, ierr) ! sum on all processors for LA configuration
705 
706 ! Each processor takes its slice!
707 do l2 =1, l1
708 if ( model_lanczos_vector_belongs_to_this_node(l2) ) then
709   lb = model_lanczos_vector_index(l2)
710   VPV(l1,lb,iw) = VPV(l1,lb,iw) + vpv_row(l2)
711 end if
712 end do
713 
714 end do
715 call cpu_time(prod_time2)
716 
717 prod_time = prod_time + prod_time2-prod_time1
718 
719 end do ! iw
720 OPTION_TIMAB = 2
721 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
722 
723 end do ! v
724 
725 call cpu_time(time2)
726 
727 time = time2-time1
728 timing_string = "#        computing VPV                          : "
729 call write_timing_log(timing_string,time)
730 
731 timing_string = "#                --- of which is FFT transforms : "
732 call write_timing_log(timing_string,fft_time)
733 
734 timing_string = "#                --- of which is products       : "
735 call write_timing_log(timing_string,prod_time)
736 
737 
738 
739 
740 
741 GWLS_TIMAB   = 1542
742 OPTION_TIMAB = 1
743 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
744 ABI_FREE(local_Lbasis_conjugated)
745 ABI_FREE(YL)
746 ABI_FREE(psikg_valence)
747 ABI_FREE(psir_valence)
748 ABI_FREE(psik_wrk)
749 ABI_FREE(psikb_wrk)
750 ABI_FREE(psikg_wrk)
751 ABI_FREE(psikg_tmp)
752 ABI_FREE(vpv_row)
753 
754 OPTION_TIMAB = 2
755 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
756 
757 
758 
759 call cpu_time(time1)
760 GWLS_TIMAB   = 1547
761 OPTION_TIMAB = 1
762 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
763 
764 
765 !--------------------------------------------------------------------------------
766 !
767 !
768 ! Gather dielectric matrix on head node, invert, and re-distribute. This
769 ! Saves a lot of RAM, without needing the full machinery of ScaLAPACK.
770 !
771 !--------------------------------------------------------------------------------
772 
773 
774 ABI_MALLOC(eps_model_m1_minus_one_DISTR, (lmax_model,blocksize_epsilon,npt_gauss+1))
775 ABI_MALLOC(re_buffer, (lmax_model,blocksize_epsilon))
776 ABI_MALLOC(im_buffer, (lmax_model,blocksize_epsilon))
777 
778 eps_model_m1_minus_one_DISTR(:,:,:) = cmplx_0
779 
780 ! Define the head node, which will invert the dielectric matrices
781 head = mpi_rank == mpi_head_rank
782 
783 ! Amount of data received and sent
784 sendcount = lmax_model*blocksize_epsilon
785 recvcount = lmax_model*blocksize_epsilon
786 
787 ABI_MALLOC(sendcounts,(mpi_nproc))
788 ABI_MALLOC(displs    ,(mpi_nproc))
789 sendcounts(:) = sendcount 
790 
791 do l =1, mpi_nproc
792 displs(l) = (l-1)*sendcount
793 end do
794 
795 if (head) then
796   ! build and invert the dielectric array
797   ! careful!  lmax_model not necessarily equal to blocksize_epsilon*nbdblock_epsilon
798   ABI_MALLOC(re_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon))
799   ABI_MALLOC(im_BUFFER_head, (lmax_model, blocksize_epsilon*nbdblock_epsilon))
800   ABI_MALLOC(epsilon_head, (lmax_model, lmax_model))
801 else
802   !This looks superfluous and it is on large number of systems, but sending these 
803   !unallocated in xmpi_scatterv caused 'cannot allocate memory' cryptic errors on 
804   !several parallel test farm computers (cronos_gcc46_paral, petrus_nag, inca_gcc44_sdebug)
805   ABI_MALLOC(re_BUFFER_head, (1,1))
806   ABI_MALLOC(im_BUFFER_head, (1,1))
807   ABI_MALLOC(epsilon_head, (1,1))
808 end if
809 
810 ! Do  one frequency at a time, to avoid overflowing the RAM
811 do iw = 1, npt_gauss+1
812 ! Gather, except for static case
813 if ( iw /=1 ) then
814   ! Gather VPV on head node, for this frequency
815   call xmpi_gather(dble(VPV(:,:,iw)),  sendcount , re_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr)
816   call xmpi_gather(dimag(VPV(:,:,iw)), sendcount , im_BUFFER_head, recvcount, mpi_head_rank, mpi_communicator,ierr)
817 end if
818 
819 if ( head ) then
820 
821   ! fill the dielectric matrix
822 
823   epsilon_head(:,:) = cmplx_0
824   if (iw ==1) then
825     ! STATIC CASE, diagonal matrix
826     do l= 1, lmax_model
827     epsilon_head(l,l) = cmplx_1/epsilon_model_eigenvalues_0(l)-cmplx_1
828     end do
829 
830   else
831     ! DYNAMIC CASE, compute
832     do l1 =1, lmax_model
833     do l2 =1, l1
834     z = -cmplx_1*re_BUFFER_head(l1,l2)-cmplx_i*im_BUFFER_head(l1,l2)
835     epsilon_head(l1,l2) = z
836     epsilon_head(l2,l1) = conjg(z)
837     end do
838     epsilon_head(l1,l1) = epsilon_head(l1,l1) + cmplx_1
839     end do
840 
841     ! invert the matrix
842     call driver_invert_positive_definite_hermitian_matrix(epsilon_head,lmax_model)
843 
844     ! subtract identity
845     do l =1, lmax_model
846     epsilon_head(l,l) = epsilon_head(l,l) - cmplx_1
847     end do 
848   end if
849 
850   ! copy in head buffer
851   re_BUFFER_head(:,:) = zero
852   im_BUFFER_head(:,:) = zero
853   do l1 =1, lmax_model
854   do l2 =1, lmax_model
855   z = epsilon_head(l1,l2)
856   re_BUFFER_head(l1,l2) = dble(z) 
857   im_BUFFER_head(l1,l2) = dimag(z) 
858   end do 
859   end do 
860 
861 end if 
862 
863 ! Scatter back the data on the head to all processors
864 call xmpi_scatterv(re_BUFFER_head, sendcounts, displs, re_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr)
865 call xmpi_scatterv(im_BUFFER_head, sendcounts, displs, im_buffer, recvcount, mpi_head_rank, mpi_communicator, ierr)
866 
867 eps_model_m1_minus_one_DISTR(:,:,iw) = cmplx_1*re_buffer(:,:) + cmplx_i*im_buffer(:,:)
868 
869 end do
870 
871 ABI_FREE(re_BUFFER_head)
872 ABI_FREE(im_BUFFER_head)
873 ABI_FREE(epsilon_head)
874 
875 
876 
877 !================================================================================
878 !
879 ! For debugging purposes, store distributed dielectric matrix back in the 
880 ! complete local copies, to insure the rest of the code works.
881 !
882 !================================================================================
883 
884 if (.false.) then
885   ! Prepare the array that will contain the matrix elements of the model operator
886   ! THIS IS ONLY FOR THE REST OF THE CODE TO WORK; WE WILL REMOVE THIS 
887   ! TO SAVE RAM LATER
888   ABI_MALLOC(eps_model_m1_minus_one, (lmax_model,lmax_model,npt_gauss+1))
889 
890   ! initialize the array with zeros
891   eps_model_m1_minus_one = cmplx_0
892 
893   ! Amount of data received and sent
894   sendcount = lmax_model*blocksize_epsilon
895   recvcount = lmax_model*blocksize_epsilon*nbdblock_epsilon
896 
897 
898   ABI_MALLOC(re_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon))
899   ABI_MALLOC(im_BUFFER_head, (lmax_model,blocksize_epsilon*nbdblock_epsilon))
900 
901   do iw = 1, npt_gauss+1
902 
903   re_buffer(:,:) = dble( eps_model_m1_minus_one_DISTR(:,:,iw))
904   im_buffer(:,:) = dimag(eps_model_m1_minus_one_DISTR(:,:,iw))
905 
906   call xmpi_allgather(re_buffer,sendcount,re_BUFFER_head,mpi_communicator,ierr)
907   call xmpi_allgather(im_buffer,sendcount,im_BUFFER_head,mpi_communicator,ierr)
908 
909   do l = 1, lmax_model
910   eps_model_m1_minus_one(:,l,iw) =  cmplx_1*re_BUFFER_head(:,l)+ cmplx_i*im_BUFFER_head(:,l)
911   end do
912   end do
913 
914   ABI_FREE(re_BUFFER_head)
915   ABI_FREE(im_BUFFER_head)
916 end if
917 
918 call cpu_time(time2)
919 OPTION_TIMAB = 2
920 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
921 
922 !================================================================================
923 !
924 !================================================================================
925 
926 
927 time = time2-time1
928 timing_string = "#        inverting / distributing               :  "
929 
930 call write_timing_log(timing_string,time)
931 
932 
933 
934 ABI_FREE(re_buffer )
935 ABI_FREE(im_buffer )
936 ABI_FREE(sendcounts)
937 ABI_FREE(displs    )
938 ABI_FREE(VPV       )
939 
940 
941 
942 GWLS_TIMAB   = 1541
943 OPTION_TIMAB = 2
944 call timab(GWLS_TIMAB,OPTION_TIMAB,tsec)
945 
946 
947 end subroutine compute_eps_model_m1_minus_one

m_hamiltonian/generate_frequencies_and_weights [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  generate_frequencies_and_weights

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

116 subroutine generate_frequencies_and_weights(npt_gauss)
117 !--------------------------------------------------------------------------------
118 !
119 ! This subroutine computes the frequencies and weights necessary for Gauss-Legendre
120 ! quadrature, and stores the results in module arrays.
121 !
122 !--------------------------------------------------------------------------------
123 implicit none
124 
125 integer, intent(in)  :: npt_gauss
126 
127 
128 real(dp), allocatable ::   list_omega_tmp(:)
129 real(dp), allocatable :: list_weights_tmp(:)
130 
131 integer     :: i
132 
133 ! *************************************************************************
134 
135 ABI_MALLOC(list_omega_tmp,   (npt_gauss))
136 ABI_MALLOC(list_weights_tmp, (npt_gauss))
137 
138 call get_frequencies_and_weights_legendre(npt_gauss,list_omega_tmp,list_weights_tmp)
139 
140 
141 ABI_MALLOC(list_omega,   (npt_gauss+1))
142 ABI_MALLOC(list_weights, (npt_gauss+1))
143 
144 ! make sure the first frequency in zero!
145 list_omega(1)   = zero
146 list_weights(1) = zero
147 
148 do i = 1,npt_gauss
149 
150 ! inverse the order of the frequency points, as they come out
151 ! in reverse order from the generating subroutine
152 list_omega  (i+1) = list_omega_tmp  (npt_gauss+1-i)
153 list_weights(i+1) = list_weights_tmp(npt_gauss+1-i)
154 
155 end do
156 
157 
158 ABI_FREE(list_omega_tmp)
159 ABI_FREE(list_weights_tmp)
160 
161 
162 end subroutine generate_frequencies_and_weights

m_hamiltonian/ProjectedSternheimerEpsilon [ Functions ]

[ Top ] [ m_hamiltonian ] [ Functions ]

NAME

  ProjectedSternheimerEpsilon

FUNCTION

  .

INPUTS

OUTPUT

SOURCE

1015 subroutine ProjectedSternheimerEpsilon(lmax, npt_gauss, second_model_parameter, &
1016 list_projection_frequencies,nfrequencies,&
1017 epsilon_eigenvalues_0,debug,use_model)
1018 !----------------------------------------------------------------------------------------------------
1019 ! This subroutine combines in a single subprogram the jobs of previous routines
1020 !
1021 !               - setup_projected_Sternheimer_epsilon
1022 !               - compute_projected_Sternheimer_epsilon
1023 !
1024 ! The purpose of this combination is to avoid independent loops on nbandv, requiring the 
1025 ! arrays
1026 !               projected_epsilon_M_matrix
1027 !               projected_epsilon_B_matrix
1028 !               projected_epsilon_G_matrix
1029 !
1030 ! from scaling like N^3, which grows very large with problem size.
1031 ! 
1032 ! Thus, this routine:
1033 !
1034 !       -  Computes the frequency-dependent dielectric matrix in the Lanczos basis, using
1035 !          the projected Sternheimer equations.
1036 !
1037 !       -  Computes the frequency-dependent MODEL dielectric matrix in the complementary Lanczos basis.
1038 !
1039 !
1040 ! This routine will be verbose and write log files; indeed, large jobs crash in here, it will
1041 ! be important to know where/why!
1042 !
1043 ! The subroutine also computes the matrix elements on epsilon_model(iw) in the Lanczos basis;
1044 ! this is done here to avoid preforming direct products with the valence states again later.
1045 !----------------------------------------------------------------------------------------------------
1046 implicit none
1047 
1048 real(dp), parameter     :: svd_tolerance = 1.0e-16_dp
1049 
1050 integer,     intent(in) :: lmax, npt_gauss
1051 integer,     intent(in) :: nfrequencies
1052 real(dp),    intent(in) :: list_projection_frequencies(nfrequencies)
1053 logical,     intent(in) :: debug
1054 real(dp),    intent(in) :: epsilon_eigenvalues_0(lmax)
1055 
1056 logical,optional,intent(in) :: use_model
1057 
1058 real(dp),    intent(in) :: second_model_parameter
1059 
1060 
1061 integer :: l, l1, l2
1062 integer :: i, iw, v
1063 integer :: recy_i
1064 integer :: lsolutions_max, lsolutions, ls
1065 integer :: projection
1066 
1067 complex(dpc), allocatable :: sternheimer_A0(:,:)
1068 complex(dpc), allocatable :: sternheimer_A(:,:)
1069 complex(dpc), allocatable :: sternheimer_B(:,:)
1070 complex(dpc), allocatable :: sternheimer_X(:,:)
1071 complex(dpc), allocatable :: sternheimer_G(:,:)
1072 
1073 
1074 complex(dpc), allocatable :: dummy_tmp_1(:,:)
1075 complex(dpc), allocatable :: dummy_tmp_2(:,:)
1076 
1077 integer, allocatable      :: ipiv(:)
1078 
1079 
1080 
1081 complex(dpc),allocatable :: local_Lbasis(:,:)
1082 complex(dpc),allocatable :: local_Lbasis_conjugated(:,:)
1083 
1084 complex(dpc),allocatable :: YL(:)
1085 
1086 real(dp), allocatable :: psikg_in(:,:), psikg_out(:,:)
1087 
1088 real(dp), allocatable :: psik_wrk(:,:), psikg_wrk(:,:), psikb_wrk(:,:)
1089 real(dp), allocatable :: psi_gamma_l1(:,:), psi_gamma_l2(:,:)
1090 
1091 real(dp), allocatable :: psikg_valence(:,:)
1092 real(dp), allocatable :: psir_valence(:,:,:,:)
1093 
1094 real(dp), allocatable :: psi_rhs(:,:,:)
1095 
1096 real(dp), allocatable :: psikg_VL(:,:)
1097 
1098 
1099 complex(dpc), allocatable :: check_matrix(:,:), check_matrix2(:,:)
1100 complex(dpc), allocatable :: c_sternheimer_solutions(:,:)
1101 complex(dpc), allocatable :: QR_orthonormal_basis(:,:)
1102 
1103 complex(dpc), allocatable :: svd_matrix(:,:)
1104 real   (dp ), allocatable :: svd_values(:)
1105 
1106 
1107 integer                   :: iblk_lanczos, nbdblock_lanczos
1108 integer                   :: iblk_solutions, nbdblock_solutions
1109 integer                   :: mb
1110 
1111 character(128) :: filename
1112 logical        :: file_exists
1113 integer        :: io_unit
1114 
1115 character(128) :: filename_log
1116 integer        :: io_unit_log
1117 
1118 
1119 real(dp)      :: omega
1120 
1121 character(256) :: timing_string
1122 real(dp)       :: time1, time2
1123 real(dp)       :: time_exact
1124 
1125 
1126 integer  :: info
1127 integer  :: ierr
1128 
1129 real(dp)  ::  z(2)
1130 
1131 
1132 logical        :: omega_is_imaginary
1133 real(dp)       :: omega0
1134 
1135 logical        :: model
1136 logical        :: write_debug
1137 
1138 
1139 integer        :: mpi_communicator, mpi_rank, mpi_group
1140 
1141 ! *************************************************************************
1142 
1143 
1144 !================================================================================
1145 ! Prepare MPI information
1146 !================================================================================
1147 
1148 ! for LA configuration ,The processors communicate over band+FFT
1149 mpi_communicator = mpi_enreg%comm_bandfft
1150 
1151 ! what is the rank of this processor, within its group?
1152 mpi_rank  = mpi_enreg%me_fft
1153 
1154 ! Which group does this processor belong to, given the communicator?
1155 mpi_group = mpi_enreg%me_band
1156 
1157 
1158 
1159 
1160 !================================================================================
1161 ! Setup a log file, to keep track of the algorithm
1162 !================================================================================
1163 
1164 
1165 write(filename_log,'(A,I4.4,A)') 'ProjectedSternheimerEpsilon_PROC=',mpi_enreg%me,'.log'
1166 
1167 io_unit_log = get_unit()
1168 open(io_unit_log,file=filename_log,status=files_status_new)
1169 write(io_unit_log,10) ''
1170 write(io_unit_log,10) '#===================================================================================================='
1171 write(io_unit_log,10) "#                     ProjectedSternheimerEpsilon: log file                                          "
1172 write(io_unit_log,10) "#                     -------------------------------------------------------------------            "
1173 write(io_unit_log,10) "#                                                                                                    "
1174 write(io_unit_log,10) "# This file tracks the algorithm in the routine ProjectedSternheimerEpsilon. The goal is to          " 
1175 write(io_unit_log,10) "# establish where the algorithm crashes if it does, and/or  to track are far along the code is.      "
1176 write(io_unit_log,10) '#'
1177 write(io_unit_log,10) '#  MPI data for this process:'
1178 write(io_unit_log,10) '#'
1179 write(io_unit_log,22) '#    mpi_rank :',mpi_rank,'  (rank of this processor in its band group)'
1180 write(io_unit_log,22) '#    mpi_group:',mpi_group,' (band group to which this processor belongs)'
1181 write(io_unit_log,10) '#===================================================================================================='
1182 flush(io_unit_log)
1183 
1184 
1185 !================================================================================
1186 ! Setup timing; prepare arrays
1187 !================================================================================
1188 
1189 write(io_unit_log,10) " - Preparing and allocating arrays ...."
1190 flush(io_unit_log)
1191 
1192 
1193 
1194 timing_string = "#"
1195 call write_text_block_in_Timing_log(timing_string)
1196 timing_string = "#        ProjectedSternheimerEpsilon "
1197 call write_text_block_in_Timing_log(timing_string)
1198 timing_string = "#"
1199 call write_text_block_in_Timing_log(timing_string)
1200 
1201 ! Allocate the module array
1202 ABI_MALLOC(projected_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1))
1203 
1204 projected_dielectric_Lanczos_basis(:,:,:) = cmplx_0
1205 
1206 ! initialize zero frequency with exact solution
1207 do l = 1, lmax
1208 projected_dielectric_Lanczos_basis(l,l,1) = cmplx_1*epsilon_eigenvalues_0(l)
1209 end do
1210 
1211 
1212 ! initialize other frequencies with the identity
1213 do iw = 2, npt_gauss + 1
1214 do l = 1, lmax
1215 projected_dielectric_Lanczos_basis(l,l,iw) = cmplx_1
1216 end do
1217 end do
1218 
1219 time_exact = zero
1220 
1221 
1222 !================================================================================
1223 ! Parallelisation of the code is subtle; Hamiltonian must act over 
1224 ! FFT rows, we must be careful with memory, etc...
1225 !
1226 ! We will parallelise in block of Lanczos vectors, not over bands.
1227 !================================================================================
1228 
1229 ! Number of blocks of lanczos vectors
1230 nbdblock_lanczos = lmax/blocksize
1231 if (modulo(lmax,blocksize) /= 0) nbdblock_lanczos = nbdblock_lanczos + 1
1232 
1233 
1234 if (present(use_model)) then
1235   model = use_model
1236 else
1237   model = .true.
1238 end if
1239 
1240 if (model) then
1241   ! Prepare the array that will contain the matrix elements of the model operator
1242   ABI_MALLOC(model_dielectric_Lanczos_basis, (lmax,lmax,npt_gauss+1))
1243 
1244   ! initialize with zero. NOT with the identity, in order to avoid extra communications
1245   ! (see below)
1246   model_dielectric_Lanczos_basis(:,:,:) = cmplx_0
1247 
1248 end if
1249 
1250 ! various working arrays
1251 
1252 ABI_MALLOC(psikg_valence    ,(2,npw_g))
1253 ABI_MALLOC(psir_valence     ,(2,n4,n5,n6))
1254 
1255 
1256 ABI_MALLOC(psikg_VL ,(2,npw_g))
1257 
1258 
1259 ABI_MALLOC(psik_wrk         ,(2,npw_k))
1260 ABI_MALLOC(psikb_wrk        ,(2,npw_kb))
1261 ABI_MALLOC(psikg_wrk        ,(2,npw_g))
1262 
1263 
1264 ABI_MALLOC(psi_gamma_l1     ,(2,npw_k))
1265 ABI_MALLOC(psi_gamma_l2     ,(2,npw_k))
1266 
1267 ABI_MALLOC(psi_rhs          ,(2,npw_k,lmax))
1268 
1269 
1270 ABI_MALLOC(psikg_in   ,(2,npw_g))
1271 ABI_MALLOC(psikg_out  ,(2,npw_g))
1272 
1273 
1274 ! maximal possible dimension of the solution space
1275 ! +1 because the solutions at $\omega=\infty$ are free.
1276 ! +1 if recycling is activated, because the solutions at $\omega=0$ are then available.
1277 i=1
1278 if(dtset%gwls_recycle == 1 .or. dtset%gwls_recycle == 2) then
1279   i=2
1280 end if
1281 lsolutions_max = lmax*(nfrequencies+i)
1282 
1283 
1284 ABI_MALLOC(local_Lbasis,           (npw_k,lmax))
1285 ABI_MALLOC(local_Lbasis_conjugated,(npw_k,lmax))
1286 ABI_MALLOC(YL,(npw_k))
1287 
1288 ABI_MALLOC(c_sternheimer_solutions,(npw_k,lsolutions_max))
1289 ABI_MALLOC(QR_orthonormal_basis   ,(npw_k,lsolutions_max))
1290 
1291 
1292 omega_is_imaginary = .true.
1293 
1294 
1295 ABI_MALLOC(svd_matrix,(npw_k,lsolutions_max))
1296 ABI_MALLOC(svd_values,(lsolutions_max))
1297 
1298 
1299 ! Prepare files for writing
1300 write_debug = debug .and. mpi_enreg%me == 0
1301 
1302 if ( write_debug ) then
1303 
1304   write(filename,'(A)') "ProjectedSternheimerEpsilon.log"
1305   inquire(file=filename,exist=file_exists)
1306 
1307   i = 0
1308   do while (file_exists)
1309   i = i+1
1310   write (filename,'(A,I0.4,A)') "ProjectedSternheimerEpsilon_",i,".log"
1311   inquire(file=filename,exist=file_exists)
1312   end do
1313 
1314 
1315   io_unit = get_unit()
1316   open(io_unit,file=filename,status=files_status_new)
1317   write(io_unit,10) ''
1318   write(io_unit,10) '#===================================================================================================='
1319   write(io_unit,10) "#                     Building the dielectic matrix using projected Sternheimer equation             "
1320   write(io_unit,10) "#                     -------------------------------------------------------------------            "
1321   write(io_unit,10) "#                                                                                                    "
1322   write(io_unit,10) '# This file contains some tests to check if the various elements entering the projected  '
1323   write(io_unit,10) '# dielectric matrix have the right properties. At this point, this is mostly for debugging.'
1324   write(io_unit,10) '# The wavefunctions and other related arrays are stored in reciprocal space.'
1325   write(io_unit,10) '#'
1326   write(io_unit,10) '#===================================================================================================='
1327   write(io_unit,10) ''
1328   flush(io_unit)
1329 end if
1330 
1331 if (debug) then
1332   ABI_MALLOC(check_matrix ,(lsolutions_max,lsolutions_max))
1333   ABI_MALLOC(check_matrix2,(lsolutions_max,lsolutions_max))
1334 end if
1335 
1336 !================================================================================
1337 !  Loop on all valence bands
1338 !================================================================================
1339 
1340 
1341 
1342 
1343 do v = 1, nbandv
1344 write(io_unit_log,10) '#===================================================================================================='
1345 write(io_unit_log,20) '#  valence band index:', v
1346 write(io_unit_log,10) '#===================================================================================================='
1347 flush(io_unit_log)
1348 
1349 
1350 
1351 
1352 if ( write_debug ) then
1353   write(io_unit,10) '#===================================================================================================='
1354   write(io_unit,20) '#  valence band index:', v
1355   write(io_unit,10) '#===================================================================================================='
1356   flush(io_unit)
1357 end if
1358 
1359 write(io_unit_log,10) '   - Fourier transform valence state ...'
1360 flush(io_unit_log)
1361 
1362 
1363 ! copy pre-calculated valence state in this covenient local array
1364 psikg_valence(:,:) = kernel_wavefunctions_FFT(:,:,v)
1365 
1366 ! compute fourier transform of valence state, and conjugate
1367 call g_to_r(psir_valence,psikg_valence) 
1368 psir_valence(2,:,:,:) = -psir_valence(2,:,:,:) 
1369 
1370 ! loop on all blocks of lanczos vectors
1371 write(io_unit_log,10) '   - Loop on all lanczos blocks to generate modified basis and Sternheimer RHS:'
1372 flush(io_unit_log)
1373 do iblk_lanczos = 1, nbdblock_lanczos
1374 !--------------------------------------------------------------------------
1375 ! Below, we build the modified basis, [ (V^{1/2} l)^* . psi_v], 
1376 ! as well as conjugated, projected form, Pc . [ (V^{1/2} l) . psi_v^*]. 
1377 !
1378 ! It is very irritating to have to do it this way, but I don't
1379 ! see an alternative; see discussion below.
1380 !
1381 !--------------------------------------------------------------------------
1382 
1383 write(io_unit_log,23) '       iblk_lanczos  = ',iblk_lanczos," / ",nbdblock_lanczos
1384 
1385 
1386 write(io_unit_log,10) '          -- Prepare modified basis computation...'
1387 flush(io_unit_log)
1388 
1389 
1390 
1391 ! loop on all states within this block
1392 do mb = 1, blocksize
1393 
1394 ! Determine the index of the Lanczos vector
1395 l = (iblk_lanczos-1)*blocksize + mb
1396 
1397 ! take a single lanczos vector
1398 
1399 if ( l <= lmax) then
1400   psik_wrk(1,:) = dble (Lbasis_lanczos(:,l))
1401   psik_wrk(2,:) = dimag(Lbasis_lanczos(:,l))
1402 else
1403   psik_wrk(:,:) =  zero
1404 end if
1405 
1406 
1407 ! Apply coulomb potential
1408 call sqrt_vc_k(psik_wrk)
1409 
1410 ! Store in array of blocks of wavefunctions
1411 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1412 
1413 end do ! mb
1414 
1415 ! Transform to FFT representation
1416 call wf_block_distribute(psikb_wrk,  psikg_VL,1) ! LA -> FFT 
1417 
1418 ! psikg_VL now contains | V^1/2 . l >, in FFT configuration
1419 
1420 
1421 !----------------------------------------------------------
1422 ! i) Compute the modified basis
1423 !----------------------------------------------------------
1424 write(io_unit_log,10) '          -- compute modified basis ...'
1425 flush(io_unit_log)
1426 
1427 
1428 
1429 ! Fourier transform to real space, and conjugate (psir1 is a global work array)
1430 call g_to_r(psir1,psikg_VL) 
1431 psir1(2,:,:,:) = -psir1(2,:,:,:) ! IS THIS STACK-DANGEROUS?
1432 
1433 ! Compute the real space product, and return to k space, in FFT configuration
1434 call gr_to_g(psikg_wrk,psir1,psikg_valence)
1435 
1436 ! psikg_wrk contains | (V^1/2 . l)^* phi_v >, in FFT configuration
1437 
1438 ! return to LA representation
1439 call wf_block_distribute(psikb_wrk,  psikg_wrk,2) ! FFT -> LA 
1440 
1441 ! store data, in LA representation
1442 do mb = 1, blocksize
1443 l = (iblk_lanczos-1)*blocksize + mb
1444 
1445 if ( l <= lmax) then
1446   ! local_Lbasis
1447   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1448   local_Lbasis(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1449 end if
1450 
1451 end do !mb 
1452 
1453 !--------------------------------------------------------------------------
1454 ! ii) Build the Sternheimer coefficients, at various frequencies,
1455 !     to define the Sternheimer basis.
1456 !--------------------------------------------------------------------------
1457 write(io_unit_log,20) '          -- Compute Sternheimer RHS...'
1458 flush(io_unit_log)
1459 
1460 
1461 ! psikg_wrk still contains | (V^1/2 . l)^* phi_v >, in FFT configuration
1462 
1463 !  Create right-hand-side of Sternheimer equation, in FFT configuration
1464 call pc_k_valence_kernel(psikg_wrk)
1465 call Hpsik(psikg_in,psikg_wrk,eig(v))
1466 call pc_k_valence_kernel(psikg_in)
1467 psikg_in(:,:) = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS?
1468 
1469 ! return RHS  to LA representation, for explicit storage 
1470 call wf_block_distribute(psikb_wrk,  psikg_in,2) ! FFT -> LA 
1471 
1472 ! store data, in LA representation
1473 do mb = 1, blocksize
1474 l = (iblk_lanczos-1)*blocksize + mb
1475 
1476 if ( l <= lmax) then
1477   ! psi_rhs
1478   psi_rhs(:,:,l)    = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1479 end if
1480 
1481 end do !mb 
1482 
1483 !----------------------------------------------------------
1484 ! iii) extract solutions for all projection frequencies
1485 !----------------------------------------------------------
1486 write(io_unit_log,20) '          -- Extract solutions for all projection frequencies...'
1487 flush(io_unit_log)
1488 
1489 
1490 
1491 do iw = 1, nfrequencies
1492 
1493 omega0 = list_projection_frequencies(iw)
1494 ! Solve Sternheimer equation
1495 
1496 projection = 0
1497 if(omega0 < 1d-12) projection=1
1498 
1499 ! solve A x = b, over the whole lanczos block
1500 call sqmr(psikg_in, psikg_out, eig(v), projection, omega0, omega_is_imaginary)
1501 
1502 ! return LA representation, for explicit storage 
1503 call wf_block_distribute(psikb_wrk,  psikg_out, 2) ! FFT -> LA 
1504 
1505 do mb = 1, blocksize
1506 l = (iblk_lanczos-1)*blocksize + mb
1507 
1508 if ( l <= lmax) then
1509   ls = (l-1)*nfrequencies+iw
1510 
1511   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1512 
1513   c_sternheimer_solutions(:,ls)= cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1514 end if
1515 
1516 end do ! mb
1517 
1518 end do ! iw
1519 
1520 !----------------------------------------------------------
1521 ! iv) Compute the conjugated, projected modified basis
1522 !----------------------------------------------------------
1523 
1524 write(io_unit_log,20) '          -- compute the conjugated, projected modified basis...'
1525 flush(io_unit_log)
1526 
1527 
1528 
1529 
1530 ! Compute the real space product, | (V^1/2. l) phi_v^* > and return to k space, in FFT configuration
1531 call gr_to_g(psikg_wrk, psir_valence, psikg_VL)
1532 
1533 ! project on conduction states
1534 call pc_k_valence_kernel(psikg_wrk)
1535 
1536 ! return to LA representation
1537 call wf_block_distribute(psikb_wrk,  psikg_wrk,2) ! FFT -> LA 
1538 
1539 ! store back, in LA configuration
1540 do mb = 1, blocksize
1541 l = (iblk_lanczos-1)*blocksize + mb
1542 
1543 if ( l <= lmax) then
1544   psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1545   local_Lbasis_conjugated(:,l) = cmplx_1*psik_wrk(1,:)+cmplx_i*psik_wrk(2,:)
1546 end if
1547 
1548 end do !mb 
1549 
1550 end do ! iblk_lanczos
1551 
1552 
1553 
1554 write(io_unit_log,10) '   - Read in solutions at w=0 and/or w = infinity, if appropriate...'
1555 flush(io_unit_log)
1556 
1557 ! Begin with the storage of the solutions at $\omega = 0.$, which are free.
1558 if(dtset%gwls_recycle == 1) then 
1559   c_sternheimer_solutions(:,lsolutions_max-2*lmax+1:lsolutions_max-lmax) = cmplx_1*Sternheimer_solutions_zero(1,:,:,v) + &
1560   &                                                                             cmplx_i*Sternheimer_solutions_zero(2,:,:,v)
1561 end if
1562 if(dtset%gwls_recycle == 2) then
1563   do i=1,lmax
1564   recy_i = (i-1)*nbandv + v       
1565   !BUG : On petrus, NAG 5.3.1 + OpenMPI 1.6.2 cause read(...,rec=i) to read the data written by write(...,rec=i+1).
1566   read(recy_unit,rec=recy_i) psik_wrk
1567   c_sternheimer_solutions(:,lsolutions_max-2*lmax+i) = cmplx_1*psik_wrk(1,:) + cmplx_i*psik_wrk(2,:)
1568   end do
1569 end if
1570 
1571 ! and then continue with the storage of the vectors on which the Sternheimer solutions will be projected.
1572 c_sternheimer_solutions(:,lsolutions_max-lmax+1:lsolutions_max) = cmplx_1*psi_rhs(1,:,:) + cmplx_i*psi_rhs(2,:,:)
1573 ! Previously was = local_Lbasis; but analysis in the Lanczos article reveals psi_rhs should be better.
1574 ! Furthermore, tests show that, with psi_rhs, silane@1Ha has Sigma_c 0.01mHa away from the result with 
1575 ! gwls_list_proj_freq 0.0 1.0, in contrast with local_Lbasis, which has Sigma_c 0.3mHa away from the same result.
1576 
1577 if ( model ) then
1578 
1579   write(io_unit_log,10) '   - USE MODEL: model = .true., hence compute model model dielectric matrix...'
1580   flush(io_unit_log)
1581 
1582 
1583 
1584   !--------------------------------------------------------------------------
1585   ! Now that we have the modified basis, compute the matrix
1586   ! elements of the model dielectric operator
1587   !
1588   ! CAREFUL!
1589   !
1590   !        The model is given by 
1591   !
1592   !                P_model(iw) = sum_{v} phi_v(r) P_c.Y(iw).P_c phi_v^*(r')
1593   !
1594   !    such that        
1595   !
1596   !    <l1 | eps_model(iw) | l2 >  = delta_{l1,l2} 
1597   !    - sum_{v} < (V^{1/2}.l1).phi_v^*| Pc . Y . Pc | (V^{1/2}.l2).phi_v^* >
1598   !
1599   ! But local_Lbasis defined above corresponds to
1600   !                                Pc | (V^{1/2} .l )^* phi_v >.
1601   !
1602   ! This is why we must define local_Lbasis_conjugated, of the form
1603   !                                Pc | (V^{1/2} .l ) phi_v^* >.
1604   !
1605   !--------------------------------------------------------------------------
1606 
1607   do iw = 1, npt_gauss+1
1608 
1609   call setup_Pk_model(list_omega(iw),second_model_parameter)
1610 
1611   ! Only build the lower triangular part; the upper triangular part is obtained from the Hermitian conjugate
1612   do l1 = 1, lmax
1613 
1614   YL(:) = model_Y_LA(:)*local_Lbasis_conjugated(:,l1)
1615   do l2 = 1, l1
1616   model_dielectric_Lanczos_basis(l1,l2,iw) = model_dielectric_Lanczos_basis(l1,l2,iw)  &
1617   -complex_vector_product(YL, local_Lbasis_conjugated(:,l2),npw_k)
1618 
1619   end do
1620   end do
1621 
1622   end do ! iw
1623 
1624 
1625 end if
1626 
1627 
1628 !--------------------------------------------------------------------------
1629 ! Check explicitly that solutions satisfy the Sternheimer equations
1630 !--------------------------------------------------------------------------
1631 
1632 if ( debug ) then
1633 
1634   if (write_debug) then
1635     write(io_unit,10) "#--------------------------------------------------------------------------------"
1636     write(io_unit,10) "# Check explicitly that solutions satisfy the Sternheimer equation.              "
1637     write(io_unit,10) "#                                                                                "
1638     write(io_unit,10) "# Define:                                                                        "
1639     write(io_unit,10) "# E_l = || (omega^2+[H-Ev]^2) |phi_l> + Pc.[H-Ev].Pc |(V^1/2.q_l)^*.phi_v >  ||  "
1640     write(io_unit,10) "#--------------------------------------------------------------------------------"
1641     write(io_unit,10) '#  l                Im[omega] (Ha)                  E_l'
1642     write(io_unit,10) "#--------------------------------------------------------------------------------"
1643     flush(io_unit)
1644   end if
1645 
1646 
1647   do iblk_lanczos = 1, nbdblock_lanczos
1648 
1649   ! loop on all states within this block
1650   do mb = 1, blocksize
1651 
1652   l = (iblk_lanczos-1)*blocksize + mb
1653 
1654 
1655   if ( l <= lmax) then
1656     ! psik_wrk = | (V^{1/2} .l )^* phi_v >
1657     psik_wrk(1,:) = dble (local_Lbasis(:,l))
1658     psik_wrk(2,:) = dimag(local_Lbasis(:,l))
1659   else
1660     psik_wrk(:,:) = zero
1661   end if
1662 
1663   ! Store in array of blocks of wavefunctions
1664   psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1665   end do ! mb
1666 
1667   ! Transform to FFT representation
1668   call wf_block_distribute(psikb_wrk,  psikg_wrk, 1) ! LA -> FFT 
1669 
1670   !  Create right-hand-side of Sternheimer equation
1671   call pc_k_valence_kernel(psikg_wrk)
1672   call Hpsik(psikg_in,psikg_wrk,eig(v))
1673   call pc_k_valence_kernel(psikg_in)
1674   psikg_in(:,:)  = -psikg_in(:,:) ! IS THIS STACK-DANGEROUS?
1675 
1676   do iw = 1, nfrequencies
1677   ! loop on all states within this block
1678   do mb = 1, blocksize
1679 
1680   l = (iblk_lanczos-1)*blocksize + mb
1681 
1682   ls = (l-1)*nfrequencies+iw
1683 
1684   if ( l <= lmax) then
1685     psik_wrk(1,:) = dble (c_sternheimer_solutions(:,ls))
1686     psik_wrk(2,:) = dimag(c_sternheimer_solutions(:,ls))
1687   else
1688     psik_wrk(:,:) = zero
1689   end if
1690 
1691   ! Store in array of blocks of wavefunctions
1692   psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1693   end do
1694 
1695   ! Transform to FFT representation
1696   call wf_block_distribute(psikb_wrk,  psikg_wrk, 1) ! LA -> FFT 
1697 
1698 
1699   omega0 = list_projection_frequencies(iw)
1700 
1701   psikg_out(:,:) = omega0**2*psikg_wrk(:,:)
1702 
1703   call Hpsik(psikg_wrk,cte=eig(v))
1704   call Hpsik(psikg_wrk,cte=eig(v))
1705 
1706 
1707   psikg_out(:,:) = psikg_out(:,:) + psikg_wrk(:,:)-psikg_in(:,:)
1708   ! psikg_out now contains [ w0^2 + [H-epsilon_v]^2 ] | x > - |RHS>, in FFT configuration.
1709 
1710   ! bring it back to LA configuration
1711 
1712   ! Transform to FFT representation
1713   call wf_block_distribute(psikb_wrk,  psikg_out, 2) ! FFT -> LA 
1714 
1715   do mb = 1, blocksize
1716   l = (iblk_lanczos-1)*blocksize + mb
1717 
1718   if ( l <= lmax) then
1719     psik_wrk(:,:) = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1720 
1721     z(:) = cg_zdotc(npw_k ,psik_wrk, psik_wrk)
1722 
1723     call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors working on FFT!
1724     if (write_debug) write(io_unit,21)  l, omega0, sqrt(z(1))
1725   end if
1726   end do ! mb
1727 
1728   end do ! iw
1729   end do ! iblk_lanczos 
1730 
1731 end if
1732 
1733 !--------------------------------------------------------------------------
1734 ! Step 5: Perform a singular value decomposition to extract a
1735 !         linearly independent basis for the solution space.
1736 !
1737 !--------------------------------------------------------------------------
1738 write(io_unit_log,10) '   - Perform SVD to extract linearly independent basis to Sternheimer equation...'
1739 flush(io_unit_log)
1740 
1741 
1742 
1743 
1744 svd_matrix(:,:) =  c_sternheimer_solutions(:,:)
1745 
1746 call extract_SVD(mpi_communicator, npw_k,lsolutions_max,svd_matrix,svd_values)
1747 
1748 if ( write_debug ) then
1749   write(io_unit,10) "#--------------------------------------------------------------------------------"
1750   write(io_unit,10) "# Check the singular value decomposition of the arrays"
1751   write(io_unit,10) '#  l                          svd'
1752   write(io_unit,10) "#--------------------------------------------------------------------------------"
1753   flush(io_unit)
1754 end if
1755 
1756 lsolutions = 0
1757 QR_orthonormal_basis(:,:) = cmplx_0
1758 do l=1, lsolutions_max
1759 if (svd_values(l) > svd_tolerance ) then
1760   lsolutions = lsolutions + 1
1761 
1762   if ( write_debug ) then
1763     write(io_unit,14)   l,svd_values(l)
1764     flush(io_unit)
1765   end if
1766   QR_orthonormal_basis(:,l) = svd_matrix(:,l)
1767 
1768 else
1769   if ( write_debug ) then
1770     write(io_unit,15)   l,svd_values(l),' SVD value too small! Vector to be discarded!'
1771     flush(io_unit)
1772   end if
1773 end if
1774 end do
1775 
1776 !--------------------------------------------------------------------------
1777 ! Step 6: project all relevant arrays onto the newly defined orthonormal
1778 !         basis.
1779 !--------------------------------------------------------------------------
1780 write(io_unit_log,10) '   - Compute the B matrix...'
1781 flush(io_unit_log)
1782 
1783 if (debug) then
1784   check_matrix(:,:) = cmplx_0
1785   do l = 1, lsolutions
1786   check_matrix(l,l) = -cmplx_1
1787   end do
1788 end if
1789 
1790 ABI_MALLOC(ipiv                ,(lsolutions))
1791 ABI_MALLOC(sternheimer_A0      ,(lsolutions,lsolutions))
1792 ABI_MALLOC(sternheimer_B       ,(lsolutions,lmax))
1793 ABI_MALLOC(sternheimer_G       ,(lmax,lsolutions))
1794 
1795 ! Compute the X matrix and the check_matrix
1796 do l1 = 1, lsolutions
1797 
1798 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1))
1799 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1))
1800 
1801 do l2 = 1, lsolutions
1802 
1803 if (l2 <= lmax) then
1804   z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psi_rhs(:,:,l2))
1805   call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors for LA configuration
1806 
1807   sternheimer_B(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
1808 end if
1809 
1810 if (debug)  then
1811   psi_gamma_l2(1,:) = dble (QR_orthonormal_basis(:,l2))
1812   psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2))
1813 
1814   z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psi_gamma_l2)  
1815   call xmpi_sum(z, mpi_communicator,ierr) ! sum on all processors 
1816 
1817   check_matrix(l1,l2) = check_matrix(l1,l2) + cmplx_1*z(1)+cmplx_i*z(2)
1818 end if
1819 
1820 end do ! l2
1821 end do ! l1
1822 
1823 
1824 ! Number of blocks of solution vectors
1825 nbdblock_solutions = lsolutions/blocksize
1826 
1827 if (modulo(lsolutions,blocksize) /= 0) nbdblock_solutions = nbdblock_solutions + 1
1828 
1829 
1830 write(io_unit_log,10) '   - Compute the A0 matrix...'
1831 flush(io_unit_log)
1832 
1833 ! Compute the A matrix
1834 do iblk_solutions =1, nbdblock_solutions 
1835 
1836 do mb = 1, blocksize
1837 l2 = (iblk_solutions-1)*blocksize + mb
1838 
1839 if ( l2 <= lsolutions) then
1840   psik_wrk(1,:) = dble (QR_orthonormal_basis(:,l2))
1841   psik_wrk(2,:) = dimag(QR_orthonormal_basis(:,l2))
1842 else
1843   psik_wrk(:,:) =  zero
1844 end if
1845 
1846 ! Store in array of blocks of wavefunctions
1847 psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k) = psik_wrk(:,:)
1848 end do
1849 
1850 ! Transform to FFT representation
1851 call wf_block_distribute(psikb_wrk,  psikg_wrk,1) ! LA -> FFT 
1852 
1853 ! act twice with the Hamiltonian operator
1854 call Hpsik(psikg_out,psikg_wrk,eig(v))
1855 call Hpsik(psikg_out,cte=eig(v))
1856 
1857 ! return to LA representation
1858 call wf_block_distribute(psikb_wrk,  psikg_out,2) ! FFT -> LA
1859 
1860 do mb = 1, blocksize
1861 l2 = (iblk_solutions-1)*blocksize + mb
1862 
1863 if ( l2 <= lsolutions) then
1864   psik_wrk(:,:)     = psikb_wrk(:,(mb-1)*npw_k+1:mb*npw_k)
1865 else
1866   psik_wrk(:,:)     = zero
1867 end if
1868 
1869 do l1 = 1, lsolutions
1870 
1871 psi_gamma_l1(1,:) = real (QR_orthonormal_basis(:,l1))
1872 psi_gamma_l1(2,:) = dimag(QR_orthonormal_basis(:,l1))
1873 
1874 z(:) = cg_zdotc(npw_k, psi_gamma_l1,  psik_wrk)
1875 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT!
1876 
1877 
1878 if ( l2 <= lsolutions ) then
1879   sternheimer_A0(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
1880 end if
1881 
1882 
1883 end do ! l1
1884 end do ! mb
1885 end do ! iblk_solutions 
1886 
1887 
1888 if (debug) then
1889   ! HERE we use a dummy variable to avoid operations which might blow the stack!
1890   ! Stack overflows lead to hard-to-find bugs; let's avoid putting them in here.
1891   ABI_MALLOC(dummy_tmp_1,(lsolutions,lsolutions))
1892   ABI_MALLOC(dummy_tmp_2,(lsolutions,lsolutions))
1893 
1894 
1895   dummy_tmp_1(:,:)= transpose(sternheimer_A0(:,:))
1896   dummy_tmp_2(:,:)= conjg(dummy_tmp_1(:,:))
1897 
1898   check_matrix2(:,:) = sternheimer_A0(:,:)-dummy_tmp_2(:,:)
1899 
1900   ABI_FREE(dummy_tmp_1)
1901   ABI_FREE(dummy_tmp_2)
1902 end if
1903 
1904 write(io_unit_log,10) '   - Compute the GAMMA matrix...'
1905 flush(io_unit_log)
1906 
1907 
1908 ! Compute the GAMMA matrices
1909 do l1 = 1, lmax
1910 
1911 ! psik_wrk = | (V^{1/2} . l )^* phi_v >
1912 psik_wrk(1,:) = dble (local_Lbasis(:,l1) )
1913 psik_wrk(2,:) = dimag(local_Lbasis(:,l1))
1914 
1915 
1916 do l2 = 1, lsolutions
1917 
1918 psi_gamma_l2(1,:) = real (QR_orthonormal_basis(:,l2))
1919 psi_gamma_l2(2,:) = dimag(QR_orthonormal_basis(:,l2))
1920 
1921 
1922 ! Note that G_{lJ} = < l | Vc^{1/2}.(gamma_J^*.phi_v)>
1923 !                  = < gamma_J | (Vc^{1/2}.l^*).phi_v >
1924 
1925 z(:) = cg_zdotc(npw_k,psi_gamma_l2, psik_wrk)
1926 call xmpi_sum(z,mpi_communicator,ierr) ! sum on all processors working on FFT!
1927 sternheimer_G(l1,l2) = cmplx_1*z(1)+cmplx_i*z(2)
1928 
1929 end do ! l1
1930 end do ! l2
1931 
1932 
1933 
1934 if ( write_debug ) then
1935   write(io_unit,19)   '<gamma| gamma>', sqrt(sum(abs(check_matrix(:,:))**2))
1936   write(io_unit,19)   '  M hermitian ',sqrt(sum(abs(check_matrix2(:,:))**2))
1937   write(io_unit,10)   " "
1938   write(io_unit,10)   "# GAMMA Matrix:"
1939   write(io_unit,10)   " "
1940 
1941   do l1 = 1, lmax
1942   write(io_unit,30) sternheimer_G(l1,:)
1943   end do
1944   flush(io_unit)
1945 end if
1946 
1947 !--------------------------------------------------------------------------
1948 ! Step 7: Compute the solutions
1949 !--------------------------------------------------------------------------
1950 
1951 write(io_unit_log,10) '   - Compute the Projected Sternheimer solutions and build approximate dielectric operator...'
1952 flush(io_unit_log)
1953 
1954 
1955 
1956 ABI_MALLOC(sternheimer_A ,(lsolutions,lsolutions))
1957 ABI_MALLOC(sternheimer_X       ,(lsolutions,lmax))
1958 do iw = 2, npt_gauss + 1
1959 
1960 write(io_unit_log,23) '        -- iw = ',iw,' / ',npt_gauss+1
1961 flush(io_unit_log)
1962 
1963 
1964 omega = list_omega(iw)
1965 
1966 sternheimer_A(:,:) = sternheimer_A0(:,:) 
1967 
1968 do l = 1, lsolutions
1969 sternheimer_A(l,l) = sternheimer_A(l,l) + omega**2
1970 end do
1971 
1972 sternheimer_X(:,:) = sternheimer_B(:,:) 
1973 
1974 !--------------------------------------------------------------------------
1975 ! Step 2:  solve A*X = B, a projected form of the Sternheimer equation
1976 !--------------------------------------------------------------------------
1977 write(io_unit_log,10) '        -- Solve A*X = B'
1978 flush(io_unit_log)
1979 
1980 
1981 
1982 call cpu_time(time1)
1983 call zgesv(lsolutions,      & ! number of rows of A matrix
1984 lmax,            & ! number of columns of B matrix
1985 sternheimer_A,   & ! The A matrix on input, the LU factorization on output
1986 lsolutions,      & ! leading dimension of A
1987 ipiv,            & ! array of pivots
1988 sternheimer_X,   & ! B matrix on input, solution X on output
1989 lsolutions,      & ! leading dimension of B
1990 info )
1991 call cpu_time(time2)
1992 
1993 time_exact = time_exact + time2-time1
1994 
1995 !--------------------------------------------------------------------------
1996 ! Step 3: Add contribution to projected epsilon
1997 !--------------------------------------------------------------------------
1998 ! perform E = E  -4 sum_l Gamma_v*X^*_v
1999 
2000 write(io_unit_log,10) '        -- add contribution to dielectric matrix at this frequency'
2001 flush(io_unit_log)
2002 
2003 
2004 
2005 ABI_MALLOC(dummy_tmp_1,(lsolutions,lmax))
2006 dummy_tmp_1(:,:) = conjg(sternheimer_X) ! DO THIS to avoid potential stack problems
2007 
2008 call cpu_time(time1)
2009 call zgemm(     'N',    & ! A matrix is in normal order
2010 'N',    & ! B matrix is in normal order
2011 lmax,    & ! number of rows of A
2012 lmax,    & ! number of columns of B
2013 lsolutions,    & ! number of columns of A
2014 -4.0_dp*cmplx_1,    & ! premultiply A*B by this scalar
2015 sternheimer_G,    & ! GAMMA matrix
2016 lmax,    & ! leading dimension of A
2017 dummy_tmp_1,    & ! B matrix
2018 lsolutions,    & ! leading dimension of B
2019 cmplx_1,    & ! beta  is one
2020 projected_dielectric_Lanczos_basis(:,:,iw),    & ! C matrix
2021 lmax)      ! leading dimension of C
2022 
2023 ABI_FREE(dummy_tmp_1)
2024 
2025 call cpu_time(time2)
2026 time_exact = time_exact + time2-time1
2027 
2028 end do ! iw
2029 
2030 write(io_unit_log,10) '   - Deallocate tmp arrays...'
2031 flush(io_unit_log)
2032 
2033 
2034 
2035 
2036 ABI_FREE(ipiv           )
2037 ABI_FREE(sternheimer_A  )
2038 ABI_FREE(sternheimer_A0 )
2039 ABI_FREE(sternheimer_B  )
2040 ABI_FREE(sternheimer_X  )
2041 ABI_FREE(sternheimer_G  )
2042 
2043 end do ! v
2044 
2045 !--------------------------------------------------------------------------
2046 ! Finalize, post v loop
2047 !--------------------------------------------------------------------------
2048 write(io_unit_log,10) " - Finalize, after band iterations...."
2049 flush(io_unit_log)
2050 
2051 
2052 
2053 
2054 
2055 timing_string = "#        Exact Sector :   "
2056 call write_timing_log(timing_string,time_exact)
2057 
2058 
2059 ABI_MALLOC(dummy_tmp_1,(lmax,lmax))
2060 ABI_MALLOC(dummy_tmp_2,(lmax,lmax))
2061 
2062 do iw = 2, npt_gauss+1
2063 ! finally, make sure matrix is hermitian
2064 
2065 ! play this little game to avoid STACK problems
2066 dummy_tmp_1(:,:) = 0.5_dp*transpose(projected_dielectric_Lanczos_basis(:,:,iw))
2067 dummy_tmp_2(:,:) = conjg(dummy_tmp_1(:,:))
2068 dummy_tmp_1(:,:) = dummy_tmp_2(:,:) +0.5_dp*projected_dielectric_Lanczos_basis(:,:,iw)
2069 
2070 projected_dielectric_Lanczos_basis(:,:,iw) =    dummy_tmp_1(:,:)
2071 end do
2072 
2073 ABI_FREE(dummy_tmp_1)
2074 ABI_FREE(dummy_tmp_2)
2075 
2076 
2077 
2078 if ( write_debug ) then
2079   !write some results to a file
2080 
2081   write(io_unit,10) '#===================================================================================================='
2082   write(io_unit,10) "#                     Projected dielectric matrices                                                  "
2083   write(io_unit,10) "#                     -------------------------------------------------------                        "
2084   write(io_unit,10) "# This file contains the various projected dielectric matrices as a function of frequency.           "
2085   write(io_unit,10) '#===================================================================================================='
2086   write(io_unit,10) ''
2087   flush(io_unit)
2088 
2089   do iw = 1, npt_gauss+1
2090   write(io_unit,10) "#"
2091   write(io_unit,12) "# omega = ",list_omega(iw), " i Ha"
2092   write(io_unit,10) "#"
2093 
2094   do l =1, lmax
2095   write(io_unit,30)  projected_dielectric_Lanczos_basis(l,:,iw)
2096   end do 
2097   end do 
2098 
2099 end if
2100 
2101 
2102 if ( model ) then
2103 
2104 
2105   ! Add all components on the processors
2106   call xmpi_sum(model_dielectric_Lanczos_basis,mpi_communicator,ierr) ! sum on all processors
2107 
2108   ! add the identity
2109   do iw = 1, npt_gauss+1
2110   do l= 1, lmax
2111   model_dielectric_Lanczos_basis(l,l,iw) = model_dielectric_Lanczos_basis(l,l,iw) + cmplx_1
2112   end do
2113   end do 
2114 
2115   ! hermitian the operator
2116   do iw = 1, npt_gauss+1
2117 
2118   do l1 = 1, lmax
2119   do l2 = 1, l1
2120   ! operator is hermitian
2121   model_dielectric_Lanczos_basis(l2,l1,iw) = conjg(model_dielectric_Lanczos_basis(l1,l2,iw))
2122   end do
2123   end do
2124 
2125   end do ! iw
2126 
2127 end if
2128 
2129 
2130 
2131 if ( write_debug ) then
2132   close(io_unit)
2133 end if
2134 
2135 write(io_unit_log,10) " - Deallocate and exit...."
2136 flush(io_unit_log)
2137 
2138 
2139 
2140 if (debug) then
2141   ABI_FREE(check_matrix)
2142   ABI_FREE(check_matrix2)
2143 end if
2144 
2145 
2146 ABI_FREE(psikg_valence)
2147 ABI_FREE(psir_valence)
2148 ABI_FREE(psikg_VL)
2149 
2150 
2151 ABI_FREE(YL)
2152 
2153 ABI_FREE(local_Lbasis_conjugated)
2154 ABI_FREE(local_Lbasis)
2155 
2156 
2157 ABI_FREE(svd_matrix)
2158 ABI_FREE(svd_values)
2159 ABI_FREE(psi_gamma_l1)
2160 ABI_FREE(psi_gamma_l2)
2161 
2162 ABI_FREE(psikg_in)
2163 ABI_FREE(psikg_out)
2164 
2165 ABI_FREE(psi_rhs)
2166 
2167 ABI_FREE(c_sternheimer_solutions)
2168 ABI_FREE(QR_orthonormal_basis)
2169 
2170 ABI_FREE(psik_wrk)
2171 ABI_FREE(psikb_wrk)
2172 ABI_FREE(psikg_wrk)
2173 
2174 
2175 close(io_unit_log)
2176 
2177 
2178 10 format(A)
2179 12 format(A,F12.8,A)
2180 14 format(I5,10X,ES24.12)
2181 15 format(I5,10X,ES24.12,10X,A)
2182 
2183 19 format(20X,A,15X,E24.16)
2184 
2185 20 format(A,I5)
2186 21 format(I5,10X,F8.4,15X,ES24.12)
2187 22 format(A,I5,A)
2188 23 format(A,I5,A,I5)
2189 30 format(2X,1000(ES12.4,2X,ES12.4,5X))
2190 
2191 end subroutine ProjectedSternheimerEpsilon