!///////////////////////////////////////////////////////////////////////
!
!      Author:          M. Shiga
!      Last updated:    Jan 23, 2025 by M. Shiga
!      Description:     extensive MPI parallelization
!
!///////////////////////////////////////////////////////////////////////
!***********************************************************************
      subroutine subtract_velocity_cent_XMPI
!***********************************************************************
!=======================================================================
!
!     remove net momentum of the centroid mode
!
!=======================================================================

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &    vux, vuy, vuz, fictmass

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead, jstart_atom, jend_atom

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j

      real(8) :: sumvx, sumvy, sumvz, sump

!-----------------------------------------------------------------------
!     /*   start                                                      */
!-----------------------------------------------------------------------

      sumvx = 0.d0
      sumvy = 0.d0
      sumvz = 0.d0
      sump  = 0.d0

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom
            sumvx = sumvx + fictmass(i,j)*vux(i,j)
            sumvy = sumvy + fictmass(i,j)*vuy(i,j)
            sumvz = sumvz + fictmass(i,j)*vuz(i,j)
            sump  = sump  + fictmass(i,j)
         end do

      end do

      call my_mpi_allreduce_real_0( sumvx )
      call my_mpi_allreduce_real_0( sumvy )
      call my_mpi_allreduce_real_0( sumvz )
      call my_mpi_allreduce_real_0( sump )
!      call my_mpi_allreduce_real_0_sub( sumvx )
!      call my_mpi_allreduce_real_0_sub( sumvy )
!      call my_mpi_allreduce_real_0_sub( sumvz )
!      call my_mpi_allreduce_real_0_sub( sump )
!      call my_mpi_allreduce_real_0_main( sumvx )
!      call my_mpi_allreduce_real_0_main( sumvy )
!      call my_mpi_allreduce_real_0_main( sumvz )
!      call my_mpi_allreduce_real_0_main( sump )

      sumvx = sumvx/sump
      sumvy = sumvy/sump
      sumvz = sumvz/sump

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom
            vux(i,j) = vux(i,j) - sumvx
            vuy(i,j) = vuy(i,j) - sumvy
            vuz(i,j) = vuz(i,j) - sumvz
         end do

      end do

!-----------------------------------------------------------------------
!     /*   normal modes to cartesian                                  */
!-----------------------------------------------------------------------

      call nm_trans_velocity_XMPI ( 0 )

      return
      end





!***********************************************************************
      subroutine subtract_rotation_cent_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   fictmass, xg, yg, zg, ux, uy, uz, vux, vuy, vuz, natom

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead, jstart_atom, jend_atom

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j

      real(8) :: sumx, sumy, sumz, sump

      real(8)                 :: fm
      real(8), dimension(3,3) :: a, c
      real(8), dimension(3)   :: b, d, e, f

      real(8) :: tiny = 1.d-14

!-----------------------------------------------------------------------
!     /*   center of mass                                             */
!-----------------------------------------------------------------------

      sumx = 0.d0
      sumy = 0.d0
      sumz = 0.d0
      sump = 0.d0

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom

            sumx = sumx + fictmass(i,j)*ux(i,j)
            sumy = sumy + fictmass(i,j)*uy(i,j)
            sumz = sumz + fictmass(i,j)*uz(i,j)
            sump = sump + fictmass(i,j)

         end do

      end do

      call my_mpi_allreduce_real_0( sumx )
      call my_mpi_allreduce_real_0( sumy )
      call my_mpi_allreduce_real_0( sumz )
      call my_mpi_allreduce_real_0( sump )
!      call my_mpi_allreduce_real_0_sub( sumvx )
!      call my_mpi_allreduce_real_0_sub( sumvy )
!      call my_mpi_allreduce_real_0_sub( sumvz )
!      call my_mpi_allreduce_real_0_sub( sump )
!      call my_mpi_allreduce_real_0_main( sumvx )
!      call my_mpi_allreduce_real_0_main( sumvy )
!      call my_mpi_allreduce_real_0_main( sumvz )
!      call my_mpi_allreduce_real_0_main( sump )

      xg(1) = sumx/sump
      yg(1) = sumy/sump
      zg(1) = sumz/sump

!-----------------------------------------------------------------------
!     /*   centroids only                                             */
!-----------------------------------------------------------------------

!     /*   angular momentum   */

      b(:) = 0.d0

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom

               fm   = fictmass(i,1)
            b(1) = b(1) + fm*(uy(i,j)-yg(j))*vuz(i,j) &
     &                  - fm*(uz(i,j)-zg(j))*vuy(i,j)
            b(2) = b(2) + fm*(uz(i,j)-zg(j))*vux(i,j) &
     &                  - fm*(ux(i,j)-xg(j))*vuz(i,j)
            b(3) = b(3) + fm*(ux(i,j)-xg(j))*vuy(i,j) &
     &                  - fm*(uy(i,j)-yg(j))*vux(i,j)
         end do

      end do

      call my_mpi_allreduce_real_1( b, 3 )
!      call my_mpi_allreduce_real_1_sub( b, 3 )
!      call my_mpi_allreduce_real_1_main( b, 3 )

!     /*   moment of inertia   */

      a(:,:) = 0.d0

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom

            fm   = fictmass(i,1)
            a(1,1) = a(1,1) + fm*(uy(i,1)-yg(1))*(uy(i,1)-yg(1)) &
     &                      + fm*(uz(i,1)-zg(1))*(uz(i,1)-zg(1))
            a(1,2) = a(1,2) - fm*(ux(i,1)-xg(1))*(uy(i,1)-yg(1))
            a(1,3) = a(1,3) - fm*(ux(i,1)-xg(1))*(uz(i,1)-zg(1))
            a(2,1) = a(2,1) - fm*(uy(i,1)-yg(1))*(ux(i,1)-xg(1))
            a(2,2) = a(2,2) + fm*(uz(i,1)-zg(1))*(uz(i,1)-zg(1)) &
     &                      + fm*(ux(i,1)-xg(1))*(ux(i,1)-xg(1))
            a(2,3) = a(2,3) - fm*(uy(i,1)-yg(1))*(uz(i,1)-zg(1))
            a(3,1) = a(3,1) - fm*(uz(i,1)-zg(1))*(ux(i,1)-xg(1))
            a(3,2) = a(3,2) - fm*(uz(i,1)-zg(1))*(uy(i,1)-yg(1))
            a(3,3) = a(3,3) + fm*(ux(i,1)-xg(1))*(ux(i,1)-xg(1)) &
     &                      + fm*(uy(i,1)-yg(1))*(uy(i,1)-yg(1))
         end do

      end do

      call my_mpi_allreduce_real_2( a, 3, 3 )
!      call my_mpi_allreduce_real_2_sub( a, 3, 3 )
!      call my_mpi_allreduce_real_2_main( a, 3, 3 )

!     /*   principal axis:  diagonalize moment of inertia   */

      call ddiag_MPI ( a, e, c, 3 )

!     /*   in principal axis:  angular momentum   */

      d(1) = c(1,1)*b(1) + c(2,1)*b(2) + c(3,1)*b(3)
      d(2) = c(1,2)*b(1) + c(2,2)*b(2) + c(3,2)*b(3)
      d(3) = c(1,3)*b(1) + c(2,3)*b(2) + c(3,3)*b(3)

!     /*   d = angular momentum divided by moment of inertia   */

      if ( natom .eq. 1 ) then
         d(1) = 0.d0
         d(2) = 0.d0
         d(3) = 0.d0
      else if ( natom .eq. 2 ) then
         d(1) = 0.d0
         if ( e(2) .gt. tiny ) d(2) = d(2)/e(2)
         if ( e(3) .gt. tiny ) d(3) = d(3)/e(3)
      else
         if ( e(1) .gt. tiny ) d(1) = d(1)/e(1)
         if ( e(2) .gt. tiny ) d(2) = d(2)/e(2)
         if ( e(3) .gt. tiny ) d(3) = d(3)/e(3)
      end if

!     /*   d in laboratory frame   */

      f(1) = c(1,1)*d(1) + c(1,2)*d(2) + c(1,3)*d(3)
      f(2) = c(2,1)*d(1) + c(2,2)*d(2) + c(2,3)*d(3)
      f(3) = c(3,1)*d(1) + c(3,2)*d(2) + c(3,3)*d(3)

      do j = jstart_bead, jend_bead

         if ( j .ne. 1 ) cycle

         do i = jstart_atom, jend_atom

            vux(i,j) = vux(i,j) - f(2)*(uz(i,j)-zg(j)) &
     &                          + f(3)*(uy(i,j)-yg(j))
            vuy(i,j) = vuy(i,j) - f(3)*(ux(i,j)-xg(j)) &
     &                          + f(1)*(uz(i,j)-zg(j))
            vuz(i,j) = vuz(i,j) - f(1)*(uy(i,j)-yg(j)) &
     &                          + f(2)*(ux(i,j)-xg(j))

         end do

      end do

!-----------------------------------------------------------------------
!     /*   normal modes to cartesian                                  */
!-----------------------------------------------------------------------

      call nm_trans_velocity_XMPI ( 0 )

      return
      end
