!///////////////////////////////////////////////////////////////////////
!
!      Author:          M. Shiga
!      Last updated:    Jan 23, 2025 by M. Shiga
!      Description:     extensive MPI parallelization
!
!///////////////////////////////////////////////////////////////////////
!***********************************************************************
      subroutine analysis_XMPI ( ioption )
!***********************************************************************
!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   iounit_avg, iounit, myrank

      use analysis_variables, only : &
     &   iprint_eavg, iprint_xyz, iprint_cavg, iprint_trj, iprint_dcd

     use XMPI_variables, only : positions_sync, velocities_sync, & 
     &                          forces_sync, charges_sync

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: itest, ioption

!-----------------------------------------------------------------------
!     /*   ioption = 1:  initialize/restart                           */
!-----------------------------------------------------------------------

      if ( ioption .eq. 1 ) then

!-----------------------------------------------------------------------
!        /*   step intervals of analysis                              */
!-----------------------------------------------------------------------

         call read_int1_MPI ( iprint_eavg, '<iprint_eavg>', 13, iounit )
         call read_int1_MPI ( iprint_cavg, '<iprint_cavg>', 13, iounit )
         call read_int1_MPI ( iprint_xyz, '<iprint_xyz>', 12, iounit )
         call read_int1_MPI ( iprint_trj,  '<iprint_trj>',  12, iounit )
         call read_int1_MPI ( iprint_dcd,  '<iprint_dcd>',  12, iounit )

!-----------------------------------------------------------------------
!        /*   check if file called `averages.ini' exists              */
!-----------------------------------------------------------------------

         if ( myrank .eq. 0 ) then
            call testfile ( 'averages.ini', 12, itest )
         end if

!        /*   MPI communication   */
         call my_mpi_bcast_int_0 ( itest )

!-----------------------------------------------------------------------
!        /*   if the file does not exist, initial start.              */
!-----------------------------------------------------------------------

         if ( itest .eq. 1 ) then

             call analysis_eavg_XMPI ( 0 )
             call analysis_xyz_XMPI ( 0 )
             call analysis_cavg_XMPI( 0 )
             call analysis_trj_XMPI  ( 0 )
             call analysis_dcd_XMPI ( 0 )

!-----------------------------------------------------------------------
!        /*   if the file exists, restart.                            */
!-----------------------------------------------------------------------

         else

             open ( iounit_avg, file = 'averages.ini' )

             call analysis_eavg_XMPI ( 1 )
             call analysis_xyz_XMPI ( 1 )
             call analysis_cavg_XMPI( 1 )
             call analysis_trj_XMPI  ( 1 )
             call analysis_dcd_XMPI ( 1 )

             close( iounit_avg )

         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 2:  start analysis                               */
!-----------------------------------------------------------------------

      if ( ioption .eq. 2 ) then
         positions_sync  = .false.
         velocities_sync = .false.
         forces_sync     = .false.
         charges_sync    = .false.

         call analysis_eavg_XMPI ( 2 )
         call analysis_xyz_XMPI ( 2 )
         call analysis_cavg_XMPI( 2 )
         call analysis_trj_XMPI  ( 2 )
         call analysis_dcd_XMPI ( 2 )

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 3:  finalize                                     */
!-----------------------------------------------------------------------

      if ( ioption .eq. 3 ) then

         open ( iounit_avg, file = 'averages.ini' )

         call analysis_eavg_XMPI ( 3 )
         call analysis_xyz_XMPI ( 3 )
         call analysis_cavg_XMPI( 3 )
         call analysis_trj_XMPI  ( 3 )
         call analysis_dcd_XMPI ( 3 )

         close( iounit_avg )

      end if

      return
      end





!***********************************************************************
      subroutine analysis_eavg_XMPI( ioption )
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   boltz, temperature, physmass, x, y, z, ux, uy, uz, fx, fy, fz, &
     &   pot, omega_p2, beta, iounit_eavg, iounit_avg, nbead, natom, &
     &   istep, myrank, myrank_main, mpi_comm_main, nprocs_main

      use analysis_variables, only : &
     &   epot, ekinpri, ekinvir, etot, epot_avg, ekinvir_avg, &
     &   ekinpri_avg, etot_avg, eprivir_avg, specific_heat, &
     &   iprint_eavg

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead, jstart_atom, jend_atom

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none
 
      include 'mpif.h'

      integer :: i, ioption, j, ierr, ndims, dims(1)
      logical :: periods(1), reorder
      integer :: recvtag, sendtag, status(mpi_status_size)
      integer, save :: cart_comm, source_rank, dest_rank, count

      real(8) :: ekinvir1, ekinvir2, ekinpri1, ekinpri2, eprivir

      real(8), save :: dstep = 0.d0

!-----------------------------------------------------------------------
!     /*   return by option                                           */
!-----------------------------------------------------------------------

      if ( iprint_eavg .le. 0 ) return

!-----------------------------------------------------------------------
!     /*   ioption = 0:  initialize                                   */
!-----------------------------------------------------------------------

      if ( ioption .eq. 0 ) then

         epot_avg      = 0.d0
         ekinvir_avg   = 0.d0
         ekinpri_avg   = 0.d0
         etot_avg      = 0.d0

         eprivir_avg = 0.d0

         if ( myrank .eq. 0 ) then

            open ( iounit_eavg, file = 'eavg.out' )

            write(iounit_eavg,'(a)') &
     &      '========' // &
     &      '================================' // &
     &      '================================' // &
     &      '================================' // &
     &      '================================' // &
     &      '================'
            write(iounit_eavg,'(a)') &
     &      '    step' // &
     &      '            epot         ekinvir' // &
     &      '         ekinpri            etot' // &
     &      '        epot_avg     ekinvir_avg' // &
     &      '     ekinpri_avg        etot_avg' // &
     &      '   specific_heat'
            write(iounit_eavg,'(a)') &
     &      '--------' // &
     &      '--------------------------------' // &
     &      '--------------------------------' // &
     &      '--------------------------------' // &
     &      '--------------------------------' // &
     &      '----------------'

            close( iounit_eavg )

         end if

!        /*   Setup MPI communication   */
         ndims = 1
         dims(1) = nprocs_main
         periods(1) = .true.
         reorder = .false.

!        /*   communicator for passing bead coordinates   */
         call MPI_Cart_create( mpi_comm_main, ndims, dims, periods, &
     &                         reorder, cart_comm, ierr)

!        /*   communicator for passing bead coordinates   */
         call MPI_Cart_shift( cart_comm, 0, -1, source_rank, &
     &                        dest_rank, ierr)

         count = jend_atom - jstart_atom + 1

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 1:  restart                                      */
!-----------------------------------------------------------------------

      if ( ioption .eq. 1 ) then

         if ( myrank .eq. 0 ) then
            read ( iounit_avg, * ) dstep
            read ( iounit_avg, * ) epot_avg
            read ( iounit_avg, * ) ekinvir_avg
            read ( iounit_avg, * ) ekinpri_avg
            read ( iounit_avg, * ) etot_avg
            read ( iounit_avg, * ) eprivir_avg
         end if

         call my_mpi_bcast_real_0 ( dstep )
         call my_mpi_bcast_real_0 ( epot_avg )
         call my_mpi_bcast_real_0 ( ekinvir_avg )
         call my_mpi_bcast_real_0 ( ekinpri_avg )
         call my_mpi_bcast_real_0 ( etot_avg )
         call my_mpi_bcast_real_0 ( eprivir_avg )

!        /*   Setup MPI communication   */
         ndims = 1
         dims(1) = nprocs_main
         periods(1) = .true.
         reorder = .false.

!        /*   communicator for passing bead coordinates   */
         call MPI_Cart_create( &
     &      mpi_comm_main, ndims, dims, periods, reorder, cart_comm, &
     &      ierr )

!        /*   communicator for passing bead coordinates   */
         call MPI_Cart_shift( &
     &      cart_comm, 0, -1, source_rank, dest_rank, ierr )

         count = jend_atom - jstart_atom + 1

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 2:  calculate and print out data                 */
!-----------------------------------------------------------------------

      if ( ioption .eq. 2 ) then

         dstep = dstep + 1.d0

!-----------------------------------------------------------------------
!        /*   potential energy                                        */
!-----------------------------------------------------------------------

         epot = 0.d0

         do j = jstart_bead, jend_bead
            if ( jstart_atom .eq. 1 ) epot = epot + pot(j)
         end do

         call my_mpi_allreduce_real_0 ( epot )
!         call my_mpi_allreduce_real_0_sub ( epot )
!         call my_mpi_allreduce_real_0_main ( epot )

         epot = epot/dble(nbead)

!-----------------------------------------------------------------------
!        /*   kinetic energy by virial estimator                      */
!-----------------------------------------------------------------------

!        /*   first term   */

         ekinvir1 = 1.5d0*dble(natom)*boltz*temperature

!        /*   second term   */

         ekinvir2 = 0.d0

!        /*   The centroids ux(i,1), uy(i,1), uz(i,1) are             */
!        /*   communicated elsewhere, so we do not do anything here   */
!        /*   for communication                                       */

         do j = jstart_bead, jend_bead
         do i = jstart_atom, jend_atom
            ekinvir2 = ekinvir2 + (x(i,j)-ux(i,1))*fx(i,j) &
     &                          + (y(i,j)-uy(i,1))*fy(i,j) &
     &                          + (z(i,j)-uz(i,1))*fz(i,j)
         end do
         end do

         call my_mpi_allreduce_real_0 ( ekinvir2 )

         ekinvir2 = - 0.5d0*ekinvir2

!        /*   the sum of kinetic energy   */

         ekinvir = ekinvir1 + ekinvir2

!-----------------------------------------------------------------------
!        /*   kinetic energy by primitive estimator                   */
!-----------------------------------------------------------------------

!        /*   first term   */

         ekinpri1 = 1.5d0*dble(natom)*dble(nbead)*boltz*temperature

!        /*   second term   */

!        /*   Communicate coords only between main_rank and           */
!        /*   main_rank-1. The tags are just to ensure that the       */
!        /*   messages are sent in the right order.                   */

         sendtag = myrank_main
         recvtag = myrank_main + 1

!        /*   Handle the last rank special as it needs to receive     */
!        /*   that first bead                                         */

         if ( myrank_main .eq. nprocs_main-1 ) then

            recvtag = 0

            call MPI_Sendrecv( &
     &         x(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         x(jstart_atom, 1), count, MPI_DOUBLE_PRECISION, &
     &         source_rank, recvtag, cart_comm, status, ierr )

            call MPI_Sendrecv( &
     &         y(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         y(jstart_atom, 1), count, MPI_DOUBLE_PRECISION, &
     &         source_rank, recvtag, cart_comm, status, ierr )

            call MPI_Sendrecv( &
     &         z(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         z(jstart_atom, 1), count, MPI_DOUBLE_PRECISION, &
     &         source_rank, recvtag, cart_comm, status, ierr )

         else

            call MPI_Sendrecv( &
     &         x(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         x(jstart_atom, jend_bead+1), count, &
     &         MPI_DOUBLE_PRECISION, source_rank, recvtag, &
     &         cart_comm, status, ierr )

            call MPI_Sendrecv( &
     &         y(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         y(jstart_atom, jend_bead+1), count, &
     &         MPI_DOUBLE_PRECISION, source_rank, recvtag, &
     &         cart_comm, status, ierr )

            call MPI_Sendrecv( &
     &         z(jstart_atom, jstart_bead), count, &
     &         MPI_DOUBLE_PRECISION, dest_rank, sendtag, &
     &         z(jstart_atom, jend_bead+1), count, &
     &         MPI_DOUBLE_PRECISION, source_rank, recvtag, &
               cart_comm, status, ierr )

         end if

         ekinpri2 = 0.d0

         do j = jstart_bead, jend_bead
         do i = jstart_atom, jend_atom

            if ( j .eq. nbead ) then

               ekinpri2 = ekinpri2 &
     &         + physmass(i)*(x(i,nbead)-x(i,1))*(x(i,nbead)-x(i,1)) &
     &         + physmass(i)*(y(i,nbead)-y(i,1))*(y(i,nbead)-y(i,1)) &
     &         + physmass(i)*(z(i,nbead)-z(i,1))*(z(i,nbead)-z(i,1))

            else

               ekinpri2 = ekinpri2 &
     &         + physmass(i)*(x(i,j)-x(i,j+1))*(x(i,j)-x(i,j+1)) &
     &         + physmass(i)*(y(i,j)-y(i,j+1))*(y(i,j)-y(i,j+1)) &
     &         + physmass(i)*(z(i,j)-z(i,j+1))*(z(i,j)-z(i,j+1))

            end if

         end do
         end do

         call my_mpi_allreduce_real_0 ( ekinpri2 )
!         call my_mpi_allreduce_real_0_sub ( ekinpri2 )
!         call my_mpi_allreduce_real_0_main ( ekinpri2 )

         ekinpri2 = - 0.5d0*omega_p2*ekinpri2

!        /*   the sum of kinetic energy   */

         ekinpri = ekinpri1 + ekinpri2

!-----------------------------------------------------------------------
!        /*   total energy                                            */
!-----------------------------------------------------------------------

         etot = ekinvir + epot

!-----------------------------------------------------------------------
!        /*   accumulative averages                                   */
!-----------------------------------------------------------------------

         epot_avg    = epot   /dstep + epot_avg   *(dstep-1.d0)/dstep
         ekinvir_avg = ekinvir/dstep + ekinvir_avg*(dstep-1.d0)/dstep
         ekinpri_avg = ekinpri/dstep + ekinpri_avg*(dstep-1.d0)/dstep
         etot_avg    = etot   /dstep + etot_avg   *(dstep-1.d0)/dstep

!-----------------------------------------------------------------------
!        /*   specific heat                                           */
!-----------------------------------------------------------------------

         eprivir = ( ekinpri + epot ) * ( ekinvir + epot )

         eprivir_avg = eprivir / dstep &
     &                  + eprivir_avg * (dstep-1.d0)/dstep

         eprivir = ( ekinpri_avg + epot_avg ) &
     &           * ( ekinvir_avg + epot_avg )

         specific_heat &
     &       = beta * beta * ( eprivir_avg -  eprivir ) &
     &       + 1.5d0 * dble(natom)

!-----------------------------------------------------------------------
!        /*   output                                                  */
!-----------------------------------------------------------------------

         if ( mod(istep,iprint_eavg) .eq. 0 ) then

            if ( myrank .eq. 0 ) then

               open ( iounit_eavg, file='eavg.out', access='append' )

               write( iounit_eavg, '(i8,8f16.8,f16.4)' ) &
     &            istep, epot, ekinvir, ekinpri, etot, &
     &            epot_avg, ekinvir_avg, ekinpri_avg, etot_avg, &
     &            specific_heat

               close( iounit_eavg )

            end if

         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 3:  finalize                                     */
!-----------------------------------------------------------------------

      if ( ioption .eq. 3 ) then

         if ( myrank .eq. 0 ) then

            write( iounit_avg, '(e24.16)' ) dstep
            write( iounit_avg, '(e24.16)' ) epot_avg
            write( iounit_avg, '(e24.16)' ) ekinvir_avg
            write( iounit_avg, '(e24.16)' ) ekinpri_avg
            write( iounit_avg, '(e24.16)' ) etot_avg
            write( iounit_avg, '(e24.16)' ) eprivir_avg

         end if

      end if

      return
      end





!***********************************************************************
      subroutine analysis_xyz_XMPI ( ioption )
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, ux, uy, uz, pot, au_length, iounit_xyz, natom, &
     &   iounit, istep, species, nbead, ikind, nkind, mbox, myrank, &
     &   myrank_main, mpi_comm_sub, nprocs, nprocs_main, nprocs_sub

      use XMPI_variables, only : jstart_atom, jstart_bead, jend_atom, &
      &   jend_bead, istart_atom, istart_bead, iend_atom, iend_bead

      use analysis_variables, only : &
     &   iprint_xyz, iformat_xyz, ikind_xyz

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

!     /*   initialize   */
      implicit none
      
      include 'mpif.h'

!     /*   integer   */
      integer :: i, j, l, ioption, m1, m2, m3, ierr, i_loc, j_loc

!     /*   real   */
      real(8) :: xa, ya, za, xb, yb, zb

!     /*   real   */
      real(8), parameter :: bohr2ang = au_length/1.d-10

!     /*   integer   */
      integer, save :: natom_xyz
      integer, dimension(:), allocatable, save :: transfer_size, disps

!     /*   logical   */
      logical :: file_opened

!     /*   Local storage for coordinates  */
      real(8), dimension(:,:,:), allocatable, save :: transfer_xyz, &
     &                                                gather_xyz

!-----------------------------------------------------------------------
!     /*   return by option                                           */
!-----------------------------------------------------------------------

      if ( iprint_xyz .le. 0 ) return

!-----------------------------------------------------------------------
!     /*   ioption = 0:  initialize                                   */
!-----------------------------------------------------------------------

      if ( ioption .eq. 0 ) then

         ierr = 0

!-----------------------------------------------------------------------
!        /*   This array is needed to gather the local coordinates.   */
!        /*   iformat_xyz = 3, means that only the centroid is        */
!        /*   needed.                                                 */
!-----------------------------------------------------------------------

         if ( iformat_xyz .eq. 3 ) then

!           /*   memory allocation   */
            if ( .not. allocated(transfer_size) ) &
     &         allocate( transfer_size(nprocs_sub) )

            if ( .not. allocated(disps) ) &
     &         allocate( disps(nprocs_sub) )

            do j = 1, nprocs_sub
               transfer_size(j) &
     &            = 3 * (iend_atom(j) - istart_atom(j) + 1)
            end do

            disps(1) = 0
            do i = 2, nprocs_sub
               disps(i) = disps(i-1) + transfer_size(i-1)
            end do

         else

!           /*   memory allocation   */
            if ( .not. allocated(transfer_size) ) &
     &         allocate( transfer_size(nprocs) )

            if ( .not. allocated(disps) ) &
     &         allocate( disps(nprocs) )

            l = 0
            do i = 1, nprocs_main
            do j = 1, nprocs_sub
               l = l + 1
               transfer_size(l) &
     &            = 3 * (iend_atom(j) - istart_atom(j) + 1) &
     &                * (iend_bead(i) - istart_bead(i) + 1)
            end do
            end do

            disps(1) = 0
            do i = 2, nprocs
               disps(i) = disps(i-1) + transfer_size(i-1)
            end do

         end if

!-----------------------------------------------------------------------
!        /*   This array is needed to gather the local coordinates.   */
!        /*   iformat_xyz = 3, means that only the centroid is        */
!        /*   needed.                                                 */
!-----------------------------------------------------------------------

         if ( iformat_xyz .eq. 3 ) then
            if ( .not. allocated(transfer_xyz) ) allocate( &
     &         transfer_xyz(3, jend_atom-jstart_atom+1, 1 ) )
         else
            if ( .not. allocated(transfer_xyz) ) allocate( &
     &         transfer_xyz(3, jend_atom-jstart_atom+1, &
     &                         jend_bead-jstart_bead+1 ) )
         end if

         if ( myrank .eq. 0 ) then

!-----------------------------------------------------------------------
!           /*   Allocated only on rank 0 array to store output       */
!           /*   iformat_xyz = 3, means that only the centroid is     */
!           /*   needed.                                              */
!-----------------------------------------------------------------------

            if ( iformat_xyz .eq. 3 ) then
               if ( .not. allocated(gather_xyz) ) &
     &            allocate( gather_xyz(3, natom, 1) )
            else
               if ( .not. allocated(gather_xyz) ) &
     &            allocate( gather_xyz(3, natom, nbead) )
            endif

            inquire( unit = iounit_xyz, opened = file_opened )

            if ( file_opened ) then
               close( iounit_xyz )
               open ( iounit_xyz, file = 'trj.xyz', access = 'append' )
            else
               open ( iounit_xyz, file = 'trj.xyz' )
            end if

         end if

         call my_mpi_bcast_int_0( ierr )

         call error_handling_MPI &
     &      ( ierr, 'subroutine analysis_xyz_MPI', 27 )

         call read_int1_MPI( iformat_xyz, '<iformat_xyz>', 13, iounit )

         call read_intn_MPI( ikind_xyz, 2, '<ikind_xyz>', 11, iounit )

         if ( ikind_xyz(2) .le. 0 ) ikind_xyz(2) = nkind

         l = 0
         do i = 1, natom
            if ( ikind(i) .lt. ikind_xyz(1) ) cycle
            if ( ikind(i) .gt. ikind_xyz(2) ) cycle
            l = l + 1
         end do

         natom_xyz = l

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 1:  restart                                      */
!-----------------------------------------------------------------------

      if ( ioption .eq. 1 ) then

         ierr = 0

!-----------------------------------------------------------------------
!        /*   This array is needed to gather the local coordinates.   */
!        /*   iformat_xyz = 3, means that only the centroid is        */
!        /*   needed.                                                 */
!-----------------------------------------------------------------------

         if ( iformat_xyz .eq. 3 ) then

!           /*   memory allocation   */
            if ( .not. allocated(transfer_size) ) &
     &         allocate( transfer_size(nprocs_sub) )

            if ( .not. allocated(disps) ) &
     &         allocate( disps(nprocs_sub) )

            do j = 1, nprocs_sub
               transfer_size(j) &
     &            = 3 * (iend_atom(j) - istart_atom(j) + 1)
            end do

            disps(1) = 0
            do i = 2, nprocs_sub
               disps(i) = disps(i-1) + transfer_size(i-1)
            end do

         else

!           /*   memory allocation   */
            if ( .not. allocated(transfer_size) ) &
     &         allocate( transfer_size(nprocs) )

            if ( .not. allocated(disps) ) &
     &         allocate( disps(nprocs) )

            l = 0
            do i = 1, nprocs_main
            do j = 1, nprocs_sub
               l = l + 1
               transfer_size(l) &
     &            = 3 * (iend_atom(j) - istart_atom(j) + 1) &
     &                * (iend_bead(i) - istart_bead(i) + 1)
            end do
            end do

            disps(1) = 0
            do i = 2, nprocs
               disps(i) = disps(i-1) + transfer_size(i-1)
            end do

         end if

!-----------------------------------------------------------------------
!        /*   This array is needed to gather the local coordinates.   */
!        /*   iformat_xyz = 3, means that only the centroid is        */
!        /*   needed.                                                 */
!-----------------------------------------------------------------------

         if ( iformat_xyz .eq. 3 ) then
            if ( .not. allocated(transfer_xyz) ) allocate( &
     &         transfer_xyz(3, jend_atom-jstart_atom+1, 1 ) )
         else
            if ( .not. allocated(transfer_xyz) ) allocate( &
     &         transfer_xyz(3, jend_atom-jstart_atom+1, &
     &                         jend_bead-jstart_bead+1 ) )
         end if

         if ( myrank .eq. 0 ) then

!-----------------------------------------------------------------------
!           /*   Allocated only on rank 0 array to store output       */
!           /*   iformat_xyz = 3, means that only the centroid is     */
!           /*   needed.                                              */
!-----------------------------------------------------------------------

            if ( iformat_xyz .eq. 3 ) then
               if ( .not. allocated(gather_xyz) ) &
     &            allocate( gather_xyz(3, natom, 1) )
            else
               if ( .not. allocated(gather_xyz) ) &
     &            allocate( gather_xyz(3, natom, nbead) )
            end if

            inquire( unit = iounit_xyz, opened = file_opened )

            if ( file_opened ) then
               close( iounit_xyz )
               open ( iounit_xyz, file = 'trj.xyz', access = 'append' )
            else
               open ( iounit_xyz, file = 'trj.xyz', access = 'append' )
            end if
 
         end if

         call my_mpi_bcast_int_0( ierr )

         call error_handling_MPI &
     &      ( ierr, 'subroutine analysis_xyz_MPI', 27 )

         call read_int1_MPI( iformat_xyz, '<iformat_xyz>', 13, iounit )

         call read_intn_MPI( ikind_xyz, 2, '<ikind_xyz>', 11, iounit )

         if ( ikind_xyz(2) .le. 0 ) ikind_xyz(2) = nkind

         l = 0
         do i = 1, natom
            if ( ikind(i) .lt. ikind_xyz(1) ) cycle
            if ( ikind(i) .gt. ikind_xyz(2) ) cycle
            l = l + 1
         end do

         natom_xyz = l

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 2:  calculate and print out data                 */
!-----------------------------------------------------------------------

      if ( ioption .eq. 2 ) then

!-----------------------------------------------------------------------
!        /*   print trajectory                                        */
!-----------------------------------------------------------------------

         if ( mod(istep,iprint_xyz) .eq. 0 ) then

!-----------------------------------------------------------------------
!           /*   for centroid: myrank_main=0 passes to myrank=0.      */
!-----------------------------------------------------------------------

            if ( iformat_xyz .eq. 3 ) then

               if( myrank_main .eq. 0) then

                  do i = jstart_atom, jend_atom

                     i_loc = i - jstart_atom + 1

                     transfer_xyz(1, i_loc, 1) = ux(i,1)
                     transfer_xyz(2, i_loc, 1) = uy(i,1)
                     transfer_xyz(3, i_loc, 1) = uz(i,1)

                 end do

                 call mpi_gatherv( &
     &              transfer_xyz, transfer_size(myrank+1), &
     &              mpi_double_precision, gather_xyz, &
     &              transfer_size, disps, mpi_double_precision, &
     &              0, mpi_comm_sub, ierr)

               end if

!-----------------------------------------------------------------------
!           /*   otherwise: all ranks passes to myrank=0.             */
!-----------------------------------------------------------------------

            else

               do j = jstart_bead, jend_bead

                  j_loc = j - jstart_bead + 1

                  do i = jstart_atom, jend_atom

                     i_loc = i - jstart_atom + 1

                     transfer_xyz(1, i_loc, j_loc) = x(i,j)
                     transfer_xyz(2, i_loc, j_loc) = y(i,j)
                     transfer_xyz(3, i_loc, j_loc) = z(i,j)

                  end do

               end do

               call mpi_gatherv( &
     &            transfer_xyz, transfer_size(myrank+1), &
     &            mpi_double_precision, gather_xyz, &
     &            transfer_size, disps, &
     &            mpi_double_precision, 0, mpi_comm_world, ierr )

            end if

!-----------------------------------------------------------------------
!           /*   now output all at myrank=0.                          */
!-----------------------------------------------------------------------

            if ( myrank .eq. 0 ) then

               if ( iformat_xyz .eq. 1 ) then

!                 ===  each bead independently  ===

                  do j = 1, nbead

                     write( iounit_xyz, '(i8)' ) natom_xyz
                     write( iounit_xyz, '(2i8,f16.8)' ) istep, j, pot(j)

                     do i = 1, natom

                        if ( ikind(i) .lt. ikind_xyz(1) ) cycle
                        if ( ikind(i) .gt. ikind_xyz(2) ) cycle

                        xa = gather_xyz(1,i,j)
                        ya = gather_xyz(2,i,j)
                        za = gather_xyz(3,i,j)

                        xb = xa
                        yb = ya
                        zb = za

                        m1 = mbox(1,i,1)   ! mbox(1,i,j)
                        m2 = mbox(2,i,1)   ! mbox(2,i,j)
                        m3 = mbox(3,i,1)   ! mbox(3,i,j)

                        call pbc_unfold_MPI( xb, yb, zb, m1, m2, m3 )

                        xa = xa * bohr2ang
                        ya = ya * bohr2ang
                        za = za * bohr2ang

                        xb = xb * bohr2ang
                        yb = yb * bohr2ang
                        zb = zb * bohr2ang

                        write(iounit_xyz, '(a4,6f16.8)' ) &
     &                     species(i)(1:4), xa, ya, za, xb, yb, zb

                     end do

                  end do

               else if ( iformat_xyz .eq. 2 ) then

!                 ===  all beads into one  ===

                  write( iounit_xyz, '(i8)' ) natom_xyz*nbead
                  write( iounit_xyz, '(i8)' ) istep

                  do j = 1, nbead
                  do i = 1, natom

                     if ( ikind(i) .lt. ikind_xyz(1) ) cycle
                     if ( ikind(i) .gt. ikind_xyz(2) ) cycle

                     xa = gather_xyz(1,i,j)
                     ya = gather_xyz(2,i,j)
                     za = gather_xyz(3,i,j)

                     xb = xa
                     yb = ya
                     zb = za

                     m1 = mbox(1,i,1)   ! mbox(1,i,j)
                     m2 = mbox(2,i,1)   ! mbox(2,i,j)
                     m3 = mbox(3,i,1)   ! mbox(3,i,j)

                     call pbc_unfold_MPI( xb, yb, zb, m1, m2, m3 )

                     xa = xa * bohr2ang
                     ya = ya * bohr2ang
                     za = za * bohr2ang

                     xb = xb * bohr2ang
                     yb = yb * bohr2ang
                     zb = zb * bohr2ang

                     write(iounit_xyz, '(a4,6f16.8)' ) &
     &                  species(i)(1:4), xa, ya, za, xb, yb, zb

                  end do
                  end do

               else if ( iformat_xyz .eq. 3 ) then

!                 ===  the centroid  ===

                  write( iounit_xyz, '(i8)' ) natom_xyz
                  write( iounit_xyz, '(i8)' ) istep

                  do i = 1, natom

                     if ( ikind(i) .lt. ikind_xyz(1) ) cycle
                     if ( ikind(i) .gt. ikind_xyz(2) ) cycle

                     xa = gather_xyz(1,i,1)
                     ya = gather_xyz(2,i,1)
                     za = gather_xyz(3,i,1)

                     xb = xa / bohr2ang
                     yb = ya / bohr2ang
                     zb = za / bohr2ang

                     xb = xa
                     yb = ya
                     zb = za

                     m1 = mbox(1,i,1)
                     m2 = mbox(2,i,1)
                     m3 = mbox(3,i,1)

                     call pbc_unfold_MPI( xb, yb, zb, m1, m2, m3 )

                     xa = xa * bohr2ang
                     ya = ya * bohr2ang
                     za = za * bohr2ang

                     xb = xb * bohr2ang
                     yb = yb * bohr2ang
                     zb = zb * bohr2ang

                     write(iounit_xyz, '(a4,6f16.8)' ) &
     &                  species(i)(1:4), xa, ya, za, xb, yb, zb

                     end do

                  end if

               flush( iounit_xyz )

            end if

         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 3:  finalize                                     */
!-----------------------------------------------------------------------

      if ( ioption .eq. 3 ) then

         continue

      end if

      return
      end





!***********************************************************************
      subroutine analysis_cavg_XMPI( ioption )
!***********************************************************************
!-----------------------------------------------------------------------
!     //   shared variables
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   boltz, beta, fdiff, x, y, z, fx, fy, fz, ux, uy, uz, &
     &   potential, natom, nbead, iounit, iounit_avg, istep, &
     &   myrank, myrank_main, myrank_sub

      use XMPI_variables, only : &
     &   jstart_bead, jstart_atom, jend_bead, jend_atom, nbead_paral, &
     &   natom_paral

      use analysis_variables, only : &
     &   e_avg, c_avg, c1_avg, c2_avg, iprint_cavg

!-----------------------------------------------------------------------
!     //   local variables
!-----------------------------------------------------------------------

      implicit none

      real(8) :: e1, e2, evir, c1, c2, cvir
      real(8) :: dx, dy, dz, d2, dfx, dfy, dfz, prefactor
      integer :: i, j, k, l, ioption
      real(8) :: fxp(natom_paral(myrank_sub+1), nbead_paral(myrank_main+1))
      real(8) :: fyp(natom_paral(myrank_sub+1), nbead_paral(myrank_main+1))
      real(8) :: fzp(natom_paral(myrank_sub+1), nbead_paral(myrank_main+1))
      real(8) :: fxm(natom_paral(myrank_sub+1),nbead_paral(myrank_main+1))
      real(8) :: fym(natom_paral(myrank_sub+1),nbead_paral(myrank_main+1))
      real(8) :: fzm(natom_paral(myrank_sub+1),nbead_paral(myrank_main+1))
      real(8) :: s(nbead)
      real(8), save :: d1step = 0.d0
      real(8), save :: d2step = 0.d0

!-----------------------------------------------------------------------
!     //   return if iprint_cavg is 0 or negative
!-----------------------------------------------------------------------

      if ( iprint_cavg .le. 0 ) return

!-----------------------------------------------------------------------
!     /*   ioption = 0:  initialize                                   */
!-----------------------------------------------------------------------

      if ( ioption .eq. 0 ) then

!        //   initialize heat capacity
         c_avg = 0.d0

!        //   initialize heat capacity: first and second terms
         c1_avg = 0.d0
         c2_avg = 0.d0

!        //   initialize energy
         e_avg = 0.d0

!        //   finite difference parameter
         call read_real1_MPI ( fdiff, '<fdiff>', 7, iounit )

         if ( myrank .eq. 0 ) then
!           //   open file
            open ( iounit, file = 'cavg.out' )

!           //   print header
            write( iounit, '(a)' ) &
     &      '============================================' // &
     &      '===================================='
            write( iounit, '(a)' ) &
     &      '    step       c_avg      c1_avg      c2_avg' // &
     &      '           c          c1          c2'
            write( iounit, '(a)' ) &
     &      '--------------------------------------------' // &
     &      '------------------------------------'

!           //   close file
            close( iounit )
         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 1:  restart                                      */
!-----------------------------------------------------------------------

      if ( ioption .eq. 1 ) then

!        //   read step number, heat capacity, energy
         if ( myrank .eq. 0 ) then
            read ( iounit_avg, * ) d1step
            read ( iounit_avg, * ) d2step
            read ( iounit_avg, * ) e_avg
            read ( iounit_avg, * ) c_avg
            read ( iounit_avg, * ) c1_avg
            read ( iounit_avg, * ) c2_avg
         end if

!        //   communicate
         call my_mpi_bcast_real_0 ( d1step )
         call my_mpi_bcast_real_0 ( d2step )
         call my_mpi_bcast_real_0 ( e_avg )
         call my_mpi_bcast_real_0 ( c_avg )
         call my_mpi_bcast_real_0 ( c1_avg )
         call my_mpi_bcast_real_0 ( c2_avg )

!        //   finite difference parameter
         call read_real1_MPI ( fdiff, '<fdiff>', 7, iounit )

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 2:  calculate and print out data                 */
!-----------------------------------------------------------------------

      if ( ioption .eq. 2 ) then
!        //   update step
         d1step = d1step + 1.d0

!-----------------------------------------------------------------------
!        //   energy by virial estimator
!-----------------------------------------------------------------------

!        //   first term
         e1 = dble(3*natom) / (2.d0*beta)

!        //   second term
         e2 = 0.d0

!        //   loop of beads
         do j = jstart_bead, jend_bead

!!           //   parallel calculation
!            if ( mod( j-1, nprocs ) .ne. myrank ) cycle

!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   virial contribution to kinetic energy
               e2 = e2 - 0.5d0 * dx * fx(i,j)
               e2 = e2 - 0.5d0 * dy * fy(i,j)
               e2 = e2 - 0.5d0 * dz * fz(i,j)

!           //   loop of atoms
            end do

         end do


!        //   communicate
         call my_mpi_allreduce_real_0 ( e2 )

!        //   total energy
         evir = e1 + e2 + potential

!        //   energy average
         e_avg = evir/d1step + e_avg*(d1step-1.d0)/d1step


!-----------------------------------------------------------------------
!        //   heat capacity by double centroid virial estimator
!-----------------------------------------------------------------------

!        //   constant factor
         prefactor = boltz*beta*beta

!        //   first term
         c1 = prefactor * (evir-e_avg) * (evir-e_avg) &
     &      + prefactor * dble(3*natom) / (2.d0*beta*beta) &
     &      + prefactor * ( - 1.5d0 / beta ) * e2

!        //   average of the first term
         c1_avg = c1/d1step + c1_avg*(d1step-1.d0)/d1step
        

!-----------------------------------------------------------------------
!        //   prepare scaling factor to compute second term
!-----------------------------------------------------------------------

!        //   only at iprint_cavg interval
         if ( mod(istep,iprint_cavg) .ne. 0 ) return

!        //   update step
         d2step = d2step + 1.d0

         s(:) = 0.d0

!        //   loop of beads
         do j = jstart_bead, jend_bead

!!           //   parallel calculation
!            if ( mod( j-1, nprocs ) .ne. myrank ) cycle

!           //   mean square deviation of beads from centroid
            d2 = 0.d0

!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   mean square deviation of beads from centroid
               d2 = max( d2, dx, dy, dz )

!           //   loop of atoms
            end do

            call my_mpi_allreduce_max_0_sub( d2 )

!           //   deviation is reset if it is smaller than fdiff
            d2 = max( d2, fdiff )

!           //   scaling factor is bead dependent
            s(j) = fdiff / d2

!        //   loop of beads
         end do

!-----------------------------------------------------------------------
!        //   apply finite difference: negative position
!-----------------------------------------------------------------------

!        //   loop of beads
         do j = jstart_bead, jend_bead

!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   apply shift
               x(i,j) = x(i,j) - s(j)*dx
               y(i,j) = y(i,j) - s(j)*dy
               z(i,j) = z(i,j) - s(j)*dz

!           //   loop of atoms
            end do

!        //   loop of beads
         end do

!        //   calculate force
         call getforce_XMPI

         k = 1
!        //   loop of beads
         do j = jstart_bead, jend_bead

            l = 1
!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   force at negative position
               fxm(l,k) = fx(i,j)
               fym(l,k) = fy(i,j)
               fzm(l,k) = fz(i,j)

               l = l + 1
!           //   loop of atoms
            end do

            k = k + 1
!        //   loop of beads
         end do

!-----------------------------------------------------------------------
!        //   apply finite difference: positive position
!-----------------------------------------------------------------------

!        //   loop of beads
         do j = jstart_bead, jend_bead

!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   apply shift
               x(i,j) = x(i,j) + 2.d0*s(j)*dx
               y(i,j) = y(i,j) + 2.d0*s(j)*dy
               z(i,j) = z(i,j) + 2.d0*s(j)*dz

!           //   loop of atoms
            end do

!        //   loop of beads
         end do

!        //   calculate force
         call getforce_XMPI

         k = 1

!        //   loop of beads
         do j = jstart_bead, jend_bead

            l = 1
!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   force at negative position
               fxp(l,k) = fx(i,j)
               fyp(l,k) = fy(i,j)
               fzp(l,k) = fz(i,j)

               l = l + 1
!           //   loop of atoms
            end do

            k = k + 1
!        //   loop of beads
         end do

!-----------------------------------------------------------------------
!        //   back to original position
!-----------------------------------------------------------------------

!        //   loop of beads
         do j = jstart_bead, jend_bead

!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   apply shift
               x(i,j) = x(i,j) - s(j)*dx
               y(i,j) = y(i,j) - s(j)*dy
               z(i,j) = z(i,j) - s(j)*dz

!           //   loop of atoms
            end do

!        //   loop of beads
         end do

!        //   calculate force: this is to recover the original force
         call getforce_XMPI

!-----------------------------------------------------------------------
!        //    calculate fourth term
!-----------------------------------------------------------------------

!        //   summation
         c2 = 0.d0

         k = 1
!        //   loop of beads
         do j = jstart_bead, jend_bead

!           //   weight of fourth term, should be scaled back
            d2 = prefactor * (-1.d0/(4.d0*beta)) / s(j)

            l = 1
!           //   loop of atoms
            do i = jstart_atom, jend_atom

!              //   deviation of beads from centroid
               dx = x(i,j) - ux(i,1)
               dy = y(i,j) - uy(i,1)
               dz = z(i,j) - uz(i,1)

!              //   difference of forces
               dfx = fxp(l,k) - fxm(l,k)
               dfy = fyp(l,k) - fym(l,k)
               dfz = fzp(l,k) - fzm(l,k)

!              //   force times deviation
               c2 = c2 - 0.5d0 * dfx * dx * d2
               c2 = c2 - 0.5d0 * dfy * dy * d2
               c2 = c2 - 0.5d0 * dfz * dz * d2

               l = l + 1
!           //   loop of atoms
            end do

            k = k + 1
!        //   loop of beads
         end do

!        // Communicate c2
         call my_mpi_allreduce_real_0( c2 )

!-----------------------------------------------------------------------
!        //   final result
!-----------------------------------------------------------------------

!        //   average of the second term
         c2_avg = c2/d2step + c2_avg*(d2step-1.d0)/d2step

!        //   heat capacity
         cvir = c1 + c2

!        //   heat capacity average
         c_avg = c1_avg + c2_avg

!-----------------------------------------------------------------------
!        //   print
!-----------------------------------------------------------------------

!        //   master rank
         if ( myrank .eq. 0 ) then

!           //   open file
            open ( iounit, file = 'cavg.out', access='append' )

!           //   total heat capacity, first term, second term
            write( iounit, '(i8,6f12.4)' ) &
     &         istep, c_avg/boltz, c1_avg/boltz, c2_avg/boltz, &
     &         cvir/boltz, c1/boltz, c2/boltz

!           //   close file
            close( iounit )

!        //   master rank
         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 3:  finalize                                     */
!-----------------------------------------------------------------------

      if ( ioption .eq. 3 ) then

!        //   master rank
         if ( myrank .eq. 0 ) then

!           //   print step, energy, heat capacity for restart
            write( iounit_avg, '(e24.16)' ) d1step
            write( iounit_avg, '(e24.16)' ) d2step
            write( iounit_avg, '(e24.16)' ) e_avg
            write( iounit_avg, '(e24.16)' ) c_avg
            write( iounit_avg, '(e24.16)' ) c1_avg
            write( iounit_avg, '(e24.16)' ) c2_avg

!        //   master rank
         end if

      end if

!-----------------------------------------------------------------------
!     //   end of subroutine
!-----------------------------------------------------------------------

      return
      end

!***********************************************************************
      subroutine analysis_trj_XMPI ( ioption )
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, vx, vy, vz, fx, fy, fz, pot, natom, nbead, &
     &   iounit, iounit_trj, istep, myrank

      use analysis_variables, only : &
     &   iformat_trj, iprint_trj

      use XMPI_variables, only : positions_sync, velocities_sync, & 
     &                           forces_sync

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, ioption, k

      real(8) :: fxn, fyn, fzn

!-----------------------------------------------------------------------
!     /*   return by option                                           */
!-----------------------------------------------------------------------

      if ( iprint_trj .le. 0 ) return

!-----------------------------------------------------------------------
!     /*   ioption = 0:  initialize                                   */
!-----------------------------------------------------------------------

      if ( ioption .eq. 0 ) then

         call read_int1_MPI( iformat_trj, '<iformat_trj>', 13, iounit )

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 1:  restart                                      */
!-----------------------------------------------------------------------

      if ( ioption .eq. 1 ) then

         call read_int1_MPI( iformat_trj, '<iformat_trj>', 13, iounit )

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 2:  calculate and print out data                 */
!-----------------------------------------------------------------------

      if ( ioption .eq. 2 ) then
         if ( mod(istep,iprint_trj) .eq. 0 ) then

!-----------------------------------------------------------------------
!     /*   MPI Communication of trajectory data                       */
!-----------------------------------------------------------------------

!     // Communicate coordinates (main communicator)
            if( .not. positions_sync ) then
               call my_mpi_bcast_xyz_XMPI  ( x, y, z, 4 )
               positions_sync  = .true.
            endif
!     // Communicate forces (sub communicator)

            if( .not. velocities_sync ) then
               call my_mpi_bcast_xyz_XMPI  ( vx, vy, vz, 1 )
               call my_mpi_bcast_xyz_XMPI  ( vx, vy, vz, 4 )
               velocities_sync = .true.
            endif

!     // Communicate velocities (all communication)

            if( .not. forces_sync ) then 
               call my_mpi_bcast_xyz_XMPI  ( fx, fy, fz, 1 )
               call my_mpi_bcast_xyz_XMPI  ( fx, fy, fz, 4 )
               forces_sync     = .true.
            end if
!-----------------------------------------------------------------------
!     /*   print trajectory                                           */
!-----------------------------------------------------------------------

            if ( myrank .eq. 0 ) then

               open ( iounit_trj,  file = 'trj.out', access = 'append' )

               do k = 1, nbead
               do i = 1, natom

                  fxn = fx(i,k)*nbead
                  fyn = fy(i,k)*nbead
                  fzn = fz(i,k)*nbead

                  if ( iformat_trj .eq. 1 ) then

                     write(iounit_trj,'(i8,10e24.16)') &
     &                  istep,  x(i,k),  y(i,k),  z(i,k), &
     &                         vx(i,k), vy(i,k), vz(i,k), &
     &                         fxn, fyn, fzn, pot(k)

                  else if ( iformat_trj .eq. 2 ) then

                     write(iounit_trj,'(i8,10e16.8)') &
     &                  istep,  x(i,k),  y(i,k),  z(i,k), &
     &                         vx(i,k), vy(i,k), vz(i,k), &
     &                         fxn, fyn, fzn, pot(k)

                  else

                     write(iounit_trj,'(i8,10e24.16)') &
     &                  istep,  x(i,k),  y(i,k),  z(i,k), &
     &                         vx(i,k), vy(i,k), vz(i,k), &
     &                         fxn, fyn, fzn, pot(k)

                  end if

               end do
               end do

               close( iounit_trj )

            end if

         end if

      end if

!-----------------------------------------------------------------------
!     /*   ioption = 3:  finalize                                     */
!-----------------------------------------------------------------------

      if ( ioption .eq. 3 ) then

         continue

      end if

      return
      end
