!///////////////////////////////////////////////////////////////////////
!
!      Author:          M. Shiga
!      Last updated:    Jan 23, 2025 by M. Shiga
!      Description:     energy and force from molecular mechanics
!
!///////////////////////////////////////////////////////////////////////
!***********************************************************************
      subroutine force_mm_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   setup                                                      */
!-----------------------------------------------------------------------

      call force_mm_setup_MPI

!-----------------------------------------------------------------------
!     /*   main routine                                               */
!-----------------------------------------------------------------------

      call force_mm_main_XMPI

!-----------------------------------------------------------------------
!     /*   dipole moment                                              */
!-----------------------------------------------------------------------

      call mm_dipole_XMPI

!-----------------------------------------------------------------------
!     /*   all-reduce communication                                   */
!-----------------------------------------------------------------------

      call my_XMPI_allreduce_md

      return
      end





!***********************************************************************
      subroutine force_mm_main_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : iboundary

      use mm_variables, only : ewald_flag

      implicit none

!-----------------------------------------------------------------------
!     /*   linear bonds                                               */
!-----------------------------------------------------------------------

      call force_mm_lin_XMPI

!-----------------------------------------------------------------------
!     /*   generalized linear bonds                                   */
!-----------------------------------------------------------------------

      call force_mm_genlin_XMPI

!-----------------------------------------------------------------------
!     /*   angular bonds                                              */
!-----------------------------------------------------------------------

      call force_mm_angl_XMPI

!-----------------------------------------------------------------------
!     /*   dihedral bonds                                             */
!-----------------------------------------------------------------------

      call force_mm_dih_XMPI

!-----------------------------------------------------------------------
!     /*   improper bonds                                             */
!-----------------------------------------------------------------------

      call force_mm_improper_XMPI

!-----------------------------------------------------------------------
!     /*   cmap of two dihedral bonds                                 */
!-----------------------------------------------------------------------

      call force_mm_cmap_XMPI

!-----------------------------------------------------------------------
!     /*   lennard-jones                                              */
!-----------------------------------------------------------------------

      call force_mm_lj_XMPI

!-----------------------------------------------------------------------
!     /*   lennard-jones pair                                         */
!-----------------------------------------------------------------------

      call force_mm_ljpair_XMPI

!-----------------------------------------------------------------------
!     /*   buckingham                                                 */
!-----------------------------------------------------------------------

      call force_mm_buck_XMPI

!-----------------------------------------------------------------------
!     /*   morse potential                                            */
!-----------------------------------------------------------------------

      call force_mm_morse_XMPI

!-----------------------------------------------------------------------
!     /*   free     boundary  =  direct sum                           */
!     /*   periodic boundary  =  Ewald  sum                           */
!-----------------------------------------------------------------------

      if ( iboundary .eq. 0 ) then

!        /*   direct sum   */
         call force_mm_coulomb_XMPI

      else if ( iboundary .eq. 1 ) then

!        /*   Ewald sum   */
         if ( ewald_flag .ne. 1 ) call force_mm_ewald_XMPI

!        /*   particle mesh Ewald   */
         if ( ewald_flag .eq. 1 ) call force_mm_pmeewald_XMPI

      else if ( iboundary .eq. 2 ) then

!        /*   Ewald sum   */
         if ( ewald_flag .ne. 1 ) call force_mm_ewald_XMPI

!        /*   particle mesh Ewald   */
         if ( ewald_flag .eq. 1 ) call force_mm_pmeewald_XMPI

      end if

      return
      end





!***********************************************************************
      subroutine force_mm_lin_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   fc_lin, eq_lin, nlin, i_lin, j_lin

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: m, k, i, j

      real(8) :: xij, yij, zij, rij, dr, const, fxi, fyi, fzi

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nlin .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, nlin

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_lin(k)
            j = j_lin(k)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            dr = ( rij - eq_lin(k) )

            pot(m) = pot(m) + 0.5d0*fc_lin(k)*dr*dr

            const = - fc_lin(k)*dr/rij

            fxi = const*xij
            fyi = const*yij
            fzi = const*zij

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_genlin_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   fc_genlin, eq_genlin, ngenlin, n_genlin, i_genlin, j_genlin

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: m, k, i, j

      real(8) :: xij, yij, zij, rij, dr, const, fxi, fyi, fzi

      real(8) :: drn = 1.d0
      real(8) :: drm = 0.d0

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( ngenlin .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, ngenlin

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_genlin(k)
            j = j_genlin(k)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            dr  = ( rij - eq_genlin(k) )

            if      ( n_genlin(k) .eq. 0 ) then
               drn    = 1.d0
               drm    = 0.d0
            else if ( n_genlin(k) .eq. 1 ) then
               drn    = dr
               drm    = 1.d0
            else if ( n_genlin(k) .eq. 2 ) then
               drn    = dr*dr
               drm    = dr
            else if ( n_genlin(k) .eq. 3 ) then
               drn    = dr*dr*dr
               drm    = dr*dr
            else if ( n_genlin(k) .eq. 4 ) then
               drn    = dr*dr*dr*dr
               drm    = dr*dr*dr
            else if ( n_genlin(k) .ge. 5 ) then
               drn    = dr**(n_genlin(k))
               drm    = dr**(n_genlin(k)-1)
            else if ( n_genlin(k) .le. -1 ) then
               drn    = dr**(n_genlin(k))
               drm    = dr**(n_genlin(k)-1)
            end if

            pot(m) = pot(m) + fc_genlin(k)*drn

            const = - n_genlin(k)*fc_genlin(k)*drm/rij

            fxi = const*xij
            fyi = const*yij
            fzi = const*zij

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_angl_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, pi, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   fc_angl, eq_angl, i_angl, j_angl, k_angl, nangl

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, m, l

      real(8) :: xij, yij, zij, xkj, ykj, zkj, rij2, rkj2, rijk, &
     &           pijk, qijk, bijk, aijk, da, const, &
     &           fxi, fxj, fxk, fyi, fyj, fyk, fzi, fzj, fzk

      real(8) :: tiny_value = 1.d-4

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nangl .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do l = 1, nangl

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_angl(l)
            j = j_angl(l)
            k = k_angl(l)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            xkj = x(k,m) - x(j,m)
            ykj = y(k,m) - y(j,m)
            zkj = z(k,m) - z(j,m)

            call pbc_atom_MPI ( xkj, ykj, zkj )

            rij2 = xij*xij + yij*yij + zij*zij
            rkj2 = xkj*xkj + ykj*ykj + zkj*zkj

            rijk = sqrt( rij2*rkj2 )

            pijk = xij*xkj + yij*ykj + zij*zkj

            qijk  = pijk/rijk

            qijk = max( qijk, -1.d0 )
            qijk = min( qijk,  1.d0 )

            bijk = acos( qijk )

            aijk = bijk*(180.d0/pi)

            da   = aijk - eq_angl(l)

            pot(m) = pot(m) + 0.5d0*fc_angl(l)*da*da

            if ( abs(bijk)    .lt. tiny_value ) cycle
            if ( abs(bijk-pi) .lt. tiny_value ) cycle
            if ( abs(bijk+pi) .lt. tiny_value ) cycle

            const = fc_angl(l)*da /sin(bijk) /rijk *(180.d0/pi)

            fxi = const*( xkj - pijk/rij2*xij )
            fxk = const*( xij - pijk/rkj2*xkj )
            fxj = - fxi - fxk

            fyi = const*( ykj - pijk/rij2*yij )
            fyk = const*( yij - pijk/rkj2*ykj )
            fyj = - fyi - fyk

            fzi = const*( zkj - pijk/rij2*zij )
            fzk = const*( zij - pijk/rkj2*zkj )
            fzj = - fzi - fzk

            fx(i,m) = fx(i,m) + fxi
            fx(j,m) = fx(j,m) + fxj
            fx(k,m) = fx(k,m) + fxk

            fy(i,m) = fy(i,m) + fyi
            fy(j,m) = fy(j,m) + fyj
            fy(k,m) = fy(k,m) + fyk

            fz(i,m) = fz(i,m) + fzi
            fz(j,m) = fz(j,m) + fzj
            fz(k,m) = fz(k,m) + fzk

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij + fxk*xkj
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij + fxk*ykj
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij + fxk*zkj
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij + fyk*xkj
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij + fyk*ykj
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij + fyk*zkj
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij + fzk*xkj
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij + fzk*ykj
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij + fzk*zkj

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_dih_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   v_dih, i_dih, j_dih, k_dih, l_dih, ndih, mu_dih, nu_dih

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n, mu, nu, ii

      real(8) :: xij, yij, zij, xkj, ykj, zkj, xlj, ylj, zlj, &
     &           xijk, yijk, zijk, xjkl, yjkl, zjkl, rijk2, rjkl2, v, &
     &           rijkl2, rijk2inv, rjkl2inv, rijkl2inv, cos_phi, phi, &
     &           factor, fxi, fyi, fzi, fxj, fyj, fzj, fxk, fyk, fzk, &
     &           fxl, fyl, fzl

      real(8) :: tiny_value = 1.d-4

      real(8), dimension(0:12):: sinfactor

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( ndih .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do n = 1, ndih

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_dih(n)
            j = j_dih(n)
            k = k_dih(n)
            l = l_dih(n)

!-----------------------------------------------------------------------

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            xkj = x(k,m) - x(j,m)
            ykj = y(k,m) - y(j,m)
            zkj = z(k,m) - z(j,m)

            call pbc_atom_MPI ( xkj, ykj, zkj )

            xlj = x(l,m) - x(j,m)
            ylj = y(l,m) - y(j,m)
            zlj = z(l,m) - z(j,m)

            call pbc_atom_MPI ( xlj, ylj, zlj )

!-----------------------------------------------------------------------

            xijk = yij*zkj - zij*ykj
            yijk = zij*xkj - xij*zkj
            zijk = xij*ykj - yij*xkj

            xjkl = ylj*zkj - zlj*ykj
            yjkl = zlj*xkj - xlj*zkj
            zjkl = xlj*ykj - ylj*xkj

            rijk2  = xijk*xijk + yijk*yijk + zijk*zijk
            rjkl2  = xjkl*xjkl + yjkl*yjkl + zjkl*zjkl

            rijkl2 = sqrt(rijk2*rjkl2)

            if ( abs(rijk2)  .lt. tiny_value ) cycle
            if ( abs(rjkl2)  .lt. tiny_value ) cycle
            if ( abs(rijkl2) .lt. tiny_value ) cycle

            rijk2inv  = 1.d0 / rijk2
            rjkl2inv  = 1.d0 / rjkl2
            rijkl2inv = 1.d0 / rijkl2

!-----------------------------------------------------------------------
!           /*   cos_phi = cos( phi )                                 */
!-----------------------------------------------------------------------

            cos_phi = ( xijk*xjkl + yijk*yjkl + zijk*zjkl ) * rijkl2inv

            cos_phi = max( cos_phi, -1.d0 )
            cos_phi = min( cos_phi,  1.d0 )

            phi = acos(cos_phi)

!-----------------------------------------------------------------------
!           /*   mu     =  cos(delta)  =  +1 or -1                    */
!           /*   delta  =  0 or pi                                    */
!-----------------------------------------------------------------------

            mu = mu_dih(n)
            nu = nu_dih(n)
            v  =  v_dih(n)

!-----------------------------------------------------------------------
!           /*   pot = 0.5 * v * ( cos ( nu*phi - delta ) + 1 )       */
!-----------------------------------------------------------------------

            pot(m)  = pot(m) + 0.5d0 * v * ( 1.d0 + cos(nu*phi)*mu )

!-----------------------------------------------------------------------
!           /*    sinfactor(n) = sin(n*phi) / sin(phi)                */
!-----------------------------------------------------------------------

            if      ( nu .eq. 0 ) then
               sinfactor(0) = 0.d0
            else if ( nu .eq. 1 ) then
               sinfactor(1) = 1.d0
            else if( nu .eq. 2 ) then
               sinfactor(2) = 2.d0*cos_phi
            else if( nu .eq. 3 ) then
               sinfactor(3) = 4.d0*cos_phi*cos_phi - 1.d0
            else if( nu .eq. 4 ) then
               sinfactor(4) = 4.d0*cos_phi*(2.d0*cos_phi*cos_phi - 1.d0)
            else if( nu .eq. 6 ) then
               sinfactor(6) = 2.d0 * ( 4.d0*cos_phi*cos_phi - 1.d0 ) &
     &                   * cos_phi * ( 4.d0*cos_phi*cos_phi - 3.d0 )
            else
               sinfactor(1) = 1.d0
               sinfactor(2) = 2.d0*cos_phi
               do ii = 3, nu
                  sinfactor(ii) = sinfactor(ii-2) &
     &                         - sin((ii-2)*phi)*sin(phi) &
     &                         + cos((ii-2)*phi)*cos(phi) &
     &                         + cos((ii-1)*phi)
               end do
            end if

            factor = - sinfactor(nu) * 0.5d0 * v * mu * nu

            fxi = factor * ( + ( ykj*zjkl - zkj*yjkl ) * rijkl2inv &
     &                    - ( ykj*zijk - zkj*yijk ) * cos_phi*rijk2inv )
            fyi = factor * ( + ( zkj*xjkl - xkj*zjkl ) * rijkl2inv &
     &                    - ( zkj*xijk - xkj*zijk ) * cos_phi*rijk2inv )
            fzi = factor * ( + ( xkj*yjkl - ykj*xjkl ) * rijkl2inv &
     &                    - ( xkj*yijk - ykj*xijk ) * cos_phi*rijk2inv )

            fxl = factor * ( + ( ykj*zijk - zkj*yijk ) * rijkl2inv &
     &                    - ( ykj*zjkl - zkj*yjkl ) * cos_phi*rjkl2inv )
            fyl = factor * ( + ( zkj*xijk - xkj*zijk ) * rijkl2inv &
     &                    - ( zkj*xjkl - xkj*zjkl ) * cos_phi*rjkl2inv )
            fzl = factor * ( + ( xkj*yijk - ykj*xijk ) * rijkl2inv &
     &                    - ( xkj*yjkl - ykj*xjkl ) * cos_phi*rjkl2inv )

            fxk = factor * ( - ( yij*zjkl - zij*yjkl ) * rijkl2inv &
     &                    - ( ylj*zijk - zlj*yijk ) * rijkl2inv &
     &                    + ( yij*zijk - zij*yijk ) * cos_phi*rijk2inv &
     &                    + ( ylj*zjkl - zlj*yjkl ) * cos_phi*rjkl2inv )
            fyk = factor * ( - ( zij*xjkl - xij*zjkl ) * rijkl2inv &
     &                    - ( zlj*xijk - xlj*zijk ) * rijkl2inv &
     &                    + ( zij*xijk - xij*zijk ) * cos_phi*rijk2inv &
     &                    + ( zlj*xjkl - xlj*zjkl ) * cos_phi*rjkl2inv )
            fzk = factor * ( - ( xij*yjkl - yij*xjkl ) * rijkl2inv &
     &                    - ( xlj*yijk - ylj*xijk ) * rijkl2inv &
     &                    + ( xij*yijk - yij*xijk ) * cos_phi*rijk2inv &
     &                    + ( xlj*yjkl - ylj*xjkl ) * cos_phi*rjkl2inv )

            fxj = - ( fxi + fxk + fxl )
            fyj = - ( fyi + fyk + fyl )
            fzj = - ( fzi + fzk + fzl )

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) + fxj
            fy(j,m) = fy(j,m) + fyj
            fz(j,m) = fz(j,m) + fzj

            fx(k,m) = fx(k,m) + fxk
            fy(k,m) = fy(k,m) + fyk
            fz(k,m) = fz(k,m) + fzk

            fx(l,m) = fx(l,m) + fxl
            fy(l,m) = fy(l,m) + fyl
            fz(l,m) = fz(l,m) + fzl

            vir_bead(1,1,m) = vir_bead(1,1,m) +fxi*xij +fxk*xkj +fxl*xlj
            vir_bead(1,2,m) = vir_bead(1,2,m) +fxi*yij +fxk*ykj +fxl*ylj
            vir_bead(1,3,m) = vir_bead(1,3,m) +fxi*zij +fxk*zkj +fxl*zlj
            vir_bead(2,1,m) = vir_bead(2,1,m) +fyi*xij +fyk*xkj +fyl*xlj
            vir_bead(2,2,m) = vir_bead(2,2,m) +fyi*yij +fyk*ykj +fyl*ylj
            vir_bead(2,3,m) = vir_bead(2,3,m) +fyi*zij +fyk*zkj +fyl*zlj
            vir_bead(3,1,m) = vir_bead(3,1,m) +fzi*xij +fzk*xkj +fzl*xlj
            vir_bead(3,2,m) = vir_bead(3,2,m) +fzi*yij +fzk*ykj +fzl*ylj
            vir_bead(3,3,m) = vir_bead(3,3,m) +fzi*zij +fzk*zkj +fzl*zlj

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_coulomb_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   q, factor_bcp, ncharge, nbcp, i_q, i_bcp, j_bcp

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n

      real(8) :: qi, qj, xij, yij, zij, rij, rinv, uij, duij, &
     &           fxi, fyi, fzi, factor

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( ncharge .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop:  direct sum between all charges                 */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         n = 0

         do k = 1, ncharge-1
         do l = k+1, ncharge

            n = n + 1

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_q(k)
            j  = i_q(l)

            qi = q(i)
            qj = q(j)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            rinv = 1.d0/rij

            uij = + qi*qj*rinv

            pot(m) = pot(m) + uij

            duij = - uij*rinv

            fxi = - duij*xij*rinv
            fyi = - duij*yij*rinv
            fzi = - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do
         end do

      end do

!-----------------------------------------------------------------------
!     /*   main loop:  subtract bonded charge pairs                   */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, nbcp

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_bcp(k)
            j  = j_bcp(k)

            factor = factor_bcp(k)

            qi = q(i)
            qj = q(j)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            rinv = 1.d0/rij

            uij = (factor - 1.d0) * qi*qj*rinv

            pot(m) = pot(m) + uij

            duij = - uij*rinv

            fxi = - duij*xij*rinv
            fyi = - duij*yij*rinv
            fzi = - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_ewald_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use mm_variables, only : ncharge, nbox_ewald, ioption_ewald

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

!-----------------------------------------------------------------------
!     /*   set up Ewald parameters                                    */
!-----------------------------------------------------------------------

      if ( ncharge .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   real space contribution of Ewald sum                       */
!-----------------------------------------------------------------------

      if ( nbox_ewald(1)*nbox_ewald(2)*nbox_ewald(3) .eq. 1 ) then
         call force_ewald_rs_pair_XMPI
      else
         call force_ewald_rs_XMPI
      end if

!-----------------------------------------------------------------------
!     /*   Fourier space contribution of Ewald sum                    */
!-----------------------------------------------------------------------

      call force_ewald_fs_XMPI

!-----------------------------------------------------------------------
!     /*   self contribution of Ewald sum                             */
!-----------------------------------------------------------------------

      call force_ewald_self_XMPI

!-----------------------------------------------------------------------
!     /*   charged system contribution of Ewald sum                   */
!-----------------------------------------------------------------------

      call force_ewald_charge_XMPI

!-----------------------------------------------------------------------
!     /*   dipole contribution of Ewald sum                           */
!-----------------------------------------------------------------------

      if ( ioption_ewald .eq. 1 ) call force_ewald_dipole_XMPI

      return
      end





!***********************************************************************
      subroutine force_mm_pmeewald_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use mm_variables, only : ncharge, nbox_ewald, ioption_ewald

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

!-----------------------------------------------------------------------
!     /*   set up Ewald parameters                                    */
!-----------------------------------------------------------------------

      if ( ncharge .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   real space contribution of Ewald sum                       */
!-----------------------------------------------------------------------

      if ( nbox_ewald(1)*nbox_ewald(2)*nbox_ewald(3) .eq. 1 ) then
         call force_ewald_rs_pair_XMPI
      else
         call force_ewald_rs_XMPI
      end if

!-----------------------------------------------------------------------
!     /*   Fourier space contribution of Ewald sum                    */
!-----------------------------------------------------------------------

      call force_pmeewald_fs_XMPI

!-----------------------------------------------------------------------
!     /*   self contribution of Ewald sum                             */
!-----------------------------------------------------------------------

      call force_ewald_self_XMPI

!-----------------------------------------------------------------------
!     /*   charged system contribution of Ewald sum                   */
!-----------------------------------------------------------------------

      call force_ewald_charge_XMPI

!-----------------------------------------------------------------------
!     /*   dipole contribution of Ewald sum                           */
!-----------------------------------------------------------------------

      if ( ioption_ewald .eq. 1 ) call force_ewald_dipole_XMPI

      return
      end





!***********************************************************************
      subroutine force_ewald_rs_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, pot, fx, fy, fz, vir_bead, box, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   factor_bcp, q, rcut_ewald, alpha_ewald, bigbox, bigboxinv, &
     &   nbox_ewald, i_q, i_bcp, j_bcp, nbcp, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n, jx, jy, jz, j2

      real(8) :: qiqj, xij, yij, zij, aij, bij, cij, uij, duij, qi, qj, &
     &           r2, r, rinv, rinv2, rinv3, ar, erf_0, erf_1, erfc_ar, &
     &           pot_m, factor, fxi, fyi, fzi, rcut_ewald2, rij

!-----------------------------------------------------------------------
!     /*   parameters                                                 */
!-----------------------------------------------------------------------

      bigbox(:,1) = dble(nbox_ewald(1))*box(:,1)
      bigbox(:,2) = dble(nbox_ewald(2))*box(:,2)
      bigbox(:,3) = dble(nbox_ewald(3))*box(:,3)

      call inv3 ( bigbox, bigboxinv )

      rcut_ewald2 = rcut_ewald*rcut_ewald

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         pot_m = 0.d0

         n = 0

         do k = 1, ncharge
         do l = 1, ncharge

            n = n + 1

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_q(k)
            j = i_q(l)

            qiqj = q(i)*q(j)

            if ( qiqj .eq. 0.d0 ) cycle

            do jx = 0, nbox_ewald(1)-1
            do jy = 0, nbox_ewald(2)-1
            do jz = 0, nbox_ewald(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               r2 = xij*xij + yij*yij + zij*zij

               if ( r2 .gt. rcut_ewald2 ) cycle

               r = sqrt(r2)

               rinv  = 1.d0/r
               rinv2 = rinv*rinv
               rinv3 = rinv*rinv2

               ar = alpha_ewald*r

               erfc_ar = 1.d0 - erf_0(ar)

               pot_m = pot_m + qiqj*erfc_ar*rinv

               factor = erfc_ar*rinv3 + alpha_ewald*erf_1(ar)*rinv2
               factor = qiqj*factor

               fxi = factor*xij
               fyi = factor*yij
               fzi = factor*zij

               fx(i,m) = fx(i,m) + fxi
               fy(i,m) = fy(i,m) + fyi
               fz(i,m) = fz(i,m) + fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

         end do
         end do

         pot(m) = pot(m) + 0.5d0*pot_m

      end do

!-----------------------------------------------------------------------
!     /*   main loop:  subtract bonded charge pairs                   */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, nbcp

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_bcp(k)
            j  = j_bcp(k)

            factor = factor_bcp(k)

            qi = q(i)
            qj = q(j)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            rinv = 1.d0/rij

            uij = (factor - 1.d0) * qi*qj*rinv

            pot(m) = pot(m) + uij

            duij = - uij*rinv

            fxi = - duij*xij*rinv
            fyi = - duij*yij*rinv
            fzi = - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_ewald_rs_pair_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, pot, fx, fy, fz, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   factor_bcp, q, rcut_ewald, alpha_ewald, i_q, i_bcp, j_bcp, &
     &   nbcp, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n

      real(8) :: rcut_ewald2, qiqj, xij, yij, zij, r2, r, rinv, &
     &           rinv2, rinv3, ar, erfc_ar, erf_0, erf_1, factor, uij, &
     &           duij, fxi, fyi, fzi, rij, qi, qj

!-----------------------------------------------------------------------
!     /*   parameters                                                 */
!-----------------------------------------------------------------------

      rcut_ewald2 = rcut_ewald*rcut_ewald

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         n = 0

         do k = 1, ncharge-1
         do l = k+1, ncharge

            n = n + 1

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_q(k)
            j  = i_q(l)

            qiqj = q(i)*q(j)

            if ( qiqj .eq. 0.d0 ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            r2 = xij*xij + yij*yij + zij*zij

            if ( r2 .gt. rcut_ewald2 ) cycle

            r = sqrt(r2)

            rinv  = 1.d0/r
            rinv2 = rinv*rinv
            rinv3 = rinv*rinv2

            ar = alpha_ewald*r

            erfc_ar = 1.d0 - erf_0(ar)

            pot(m) = pot(m) + qiqj*erfc_ar*rinv

            factor = erfc_ar*rinv3 + alpha_ewald*erf_1(ar)*rinv2

            factor = qiqj*factor

            fxi = factor*xij
            fyi = factor*yij
            fzi = factor*zij

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do
         end do

      end do

!-----------------------------------------------------------------------
!     /*   main loop:  subtract bonded charge pairs                   */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, nbcp

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_bcp(k)
            j  = j_bcp(k)

            factor = factor_bcp(k)

            qi = q(i)
            qj = q(j)

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            rinv = 1.d0/rij

            uij = (factor - 1.d0) * qi*qj*rinv

            pot(m) = pot(m) + uij

            duij = - uij*rinv

            fxi = - duij*xij*rinv
            fyi = - duij*yij*rinv
            fzi = - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_ewald_self_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   pi, pot, myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   alpha_ewald, q, i_q, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, m, k

      real(8) :: factor, q2sum

!-----------------------------------------------------------------------
!     /*   parameters                                                 */
!-----------------------------------------------------------------------

      factor = alpha_ewald/sqrt(pi)

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         q2sum = 0.d0

         do k = 1, ncharge

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_q(k)

            q2sum = q2sum + q(i)*q(i)

         end do

         pot(m) = pot(m) - q2sum*factor

      end do

      return
      end





!***********************************************************************
      subroutine force_ewald_charge_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   pi, volume, pot, vir_bead, myrank_sub

      use mm_variables, only : &
     &   alpha_ewald, q, i_q, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, m, k

      real(8) :: factor, qsum

!-----------------------------------------------------------------------
!     /*   parameters                                                 */
!-----------------------------------------------------------------------

      factor = pi/(2.d0*volume*alpha_ewald*alpha_ewald)

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         if ( myrank_sub .ne. 0 ) cycle

         qsum = 0.d0

         do k = 1, ncharge

            i  = i_q(k)

            qsum = qsum + q(i)

         end do

         pot(m) = pot(m) - qsum*qsum*factor

         vir_bead(1,1,m) = vir_bead(1,1,m) - qsum*qsum*factor
         vir_bead(2,2,m) = vir_bead(2,2,m) - qsum*qsum*factor
         vir_bead(3,3,m) = vir_bead(3,3,m) - qsum*qsum*factor

      end do

      return
      end





!***********************************************************************
      subroutine force_ewald_fs_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, pot, vir_bead, fx, fy, fz, pi, boxinv, volume, natom, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   eigax, eigay, eigaz, eigbx, eigby, eigbz, eigcx, eigcy, eigcz, &
     &   alpha_ewald, q, i_q, lmax_ewald, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: m, k, i, l, n, l2, la, lb, lc

      real(8) :: ax, ay, az, bx, by, bz, cx, cy, cz, a2, b2, c2, &
     &           al2, bl2, cl2, factor_1, factor_2, factor_3, factor_4, &
     &           factor_5, factor_6, factor_7, factor_8, &
     &           gx, gy, gz, g2, g2max, qcos, qsin, fxi, fyi, fzi, &
     &           cos_gxyz, sin_gxyz, qexp2

      integer, save :: iset = 0
      integer, save :: lmax_ewald_save(3)

!-----------------------------------------------------------------------
!     /*   memory allocation                                          */
!-----------------------------------------------------------------------

      if ( iset .eq. 0 ) then

         if ( .not. allocated( eigax ) ) &
     &      allocate( eigax(natom,-lmax_ewald(1):lmax_ewald(1)) )
         if ( .not. allocated( eigay ) ) &
     &      allocate( eigay(natom,-lmax_ewald(1):lmax_ewald(1)) )
         if ( .not. allocated( eigaz ) ) &
     &      allocate( eigaz(natom,-lmax_ewald(1):lmax_ewald(1)) )
         if ( .not. allocated( eigbx ) ) &
     &      allocate( eigbx(natom,-lmax_ewald(2):lmax_ewald(2)) )
         if ( .not. allocated( eigby ) ) &
     &      allocate( eigby(natom,-lmax_ewald(2):lmax_ewald(2)) )
         if ( .not. allocated( eigbz ) ) &
     &      allocate( eigbz(natom,-lmax_ewald(2):lmax_ewald(2)) )
         if ( .not. allocated( eigcx ) ) &
     &      allocate( eigcx(natom,-lmax_ewald(3):lmax_ewald(3)) )
         if ( .not. allocated( eigcy ) ) &
     &      allocate( eigcy(natom,-lmax_ewald(3):lmax_ewald(3)) )
         if ( .not. allocated( eigcz ) ) &
     &      allocate( eigcz(natom,-lmax_ewald(3):lmax_ewald(3)) )

         lmax_ewald_save(1) = lmax_ewald(1)
         lmax_ewald_save(2) = lmax_ewald(2)
         lmax_ewald_save(3) = lmax_ewald(3)

         iset = 1

      else

         if ( lmax_ewald(1) .ne. lmax_ewald_save(1) ) then

            if ( allocated( eigax ) ) &
     &         deallocate( eigax )
            if ( allocated( eigay ) ) &
     &         deallocate( eigay )
            if ( allocated( eigaz ) ) &
     &         deallocate( eigaz )

            if ( .not. allocated( eigax ) ) &
     &         allocate( eigax(natom,-lmax_ewald(1):lmax_ewald(1)) )
            if ( .not. allocated( eigay ) ) &
     &         allocate( eigay(natom,-lmax_ewald(1):lmax_ewald(1)) )
            if ( .not. allocated( eigaz ) ) &
     &         allocate( eigaz(natom,-lmax_ewald(1):lmax_ewald(1)) )

         end if

         if ( lmax_ewald(2) .ne. lmax_ewald_save(2) ) then

            if ( allocated( eigbx ) ) &
     &         deallocate( eigbx )
            if ( allocated( eigby ) ) &
     &         deallocate( eigby )
            if ( allocated( eigbz ) ) &
     &         deallocate( eigbz )

            if ( .not. allocated( eigbx ) ) &
     &         allocate( eigbx(natom,-lmax_ewald(2):lmax_ewald(2)) )
            if ( .not. allocated( eigby ) ) &
     &         allocate( eigby(natom,-lmax_ewald(2):lmax_ewald(2)) )
            if ( .not. allocated( eigbz ) ) &
     &         allocate( eigbz(natom,-lmax_ewald(2):lmax_ewald(2)) )

         end if

         if ( lmax_ewald(3) .ne. lmax_ewald_save(3) ) then

            if ( allocated( eigcx ) ) &
     &         deallocate( eigcx )
            if ( allocated( eigcy ) ) &
     &         deallocate( eigcy )
            if ( allocated( eigcz ) ) &
     &         deallocate( eigcz )

            if ( .not. allocated( eigcx ) ) &
     &         allocate( eigcx(natom,-lmax_ewald(3):lmax_ewald(3)) )
            if ( .not. allocated( eigcy ) ) &
     &         allocate( eigcy(natom,-lmax_ewald(3):lmax_ewald(3)) )
            if ( .not. allocated( eigcz ) ) &
     &         allocate( eigcz(natom,-lmax_ewald(3):lmax_ewald(3)) )

         end if

         lmax_ewald_save(1) = lmax_ewald(1)
         lmax_ewald_save(2) = lmax_ewald(2)
         lmax_ewald_save(3) = lmax_ewald(3)

      end if

!-----------------------------------------------------------------------
!     /*   loop of beads: start                                       */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

!-----------------------------------------------------------------------
!        /*   parameters                                              */
!-----------------------------------------------------------------------

         ax = 2.d0*pi*boxinv(1,1)
         ay = 2.d0*pi*boxinv(1,2)
         az = 2.d0*pi*boxinv(1,3)
         bx = 2.d0*pi*boxinv(2,1)
         by = 2.d0*pi*boxinv(2,2)
         bz = 2.d0*pi*boxinv(2,3)
         cx = 2.d0*pi*boxinv(3,1)
         cy = 2.d0*pi*boxinv(3,2)
         cz = 2.d0*pi*boxinv(3,3)

         a2 = ax*ax + ay*ay + az*az
         b2 = bx*bx + by*by + bz*bz
         c2 = cx*cx + cy*cy + cz*cz

         al2 = a2*lmax_ewald(1)**2
         bl2 = b2*lmax_ewald(2)**2
         cl2 = c2*lmax_ewald(3)**2

         g2max = min( al2, bl2, cl2 )

!-----------------------------------------------------------------------
!        /*   main loop                                               */
!-----------------------------------------------------------------------

         do k = 1, ncharge

            i  = i_q(k)

            eigax(i, 0)  = (1.d0,0.d0)
            eigay(i, 0)  = (1.d0,0.d0)
            eigaz(i, 0)  = (1.d0,0.d0)
            eigbx(i, 0)  = (1.d0,0.d0)
            eigby(i, 0)  = (1.d0,0.d0)
            eigbz(i, 0)  = (1.d0,0.d0)
            eigcx(i, 0)  = (1.d0,0.d0)
            eigcy(i, 0)  = (1.d0,0.d0)
            eigcz(i, 0)  = (1.d0,0.d0)

            eigax(i, 1)  = dcmplx ( cos(ax*x(i,m)), sin(ax*x(i,m)) )
            eigay(i, 1)  = dcmplx ( cos(ay*y(i,m)), sin(ay*y(i,m)) )
            eigaz(i, 1)  = dcmplx ( cos(az*z(i,m)), sin(az*z(i,m)) )
            eigbx(i, 1)  = dcmplx ( cos(bx*x(i,m)), sin(bx*x(i,m)) )
            eigby(i, 1)  = dcmplx ( cos(by*y(i,m)), sin(by*y(i,m)) )
            eigbz(i, 1)  = dcmplx ( cos(bz*z(i,m)), sin(bz*z(i,m)) )
            eigcx(i, 1)  = dcmplx ( cos(cx*x(i,m)), sin(cx*x(i,m)) )
            eigcy(i, 1)  = dcmplx ( cos(cy*y(i,m)), sin(cy*y(i,m)) )
            eigcz(i, 1)  = dcmplx ( cos(cz*z(i,m)), sin(cz*z(i,m)) )

            eigax(i,-1)  = dconjg ( eigax(i,1) )
            eigay(i,-1)  = dconjg ( eigay(i,1) )
            eigaz(i,-1)  = dconjg ( eigaz(i,1) )
            eigbx(i,-1)  = dconjg ( eigbx(i,1) )
            eigby(i,-1)  = dconjg ( eigby(i,1) )
            eigbz(i,-1)  = dconjg ( eigbz(i,1) )
            eigcx(i,-1)  = dconjg ( eigcx(i,1) )
            eigcy(i,-1)  = dconjg ( eigcy(i,1) )
            eigcz(i,-1)  = dconjg ( eigcz(i,1) )

            do l = 2, lmax_ewald(1)
               eigax(i, l)  = eigax(i,l-1)*eigax(i,1)
               eigay(i, l)  = eigay(i,l-1)*eigay(i,1)
               eigaz(i, l)  = eigaz(i,l-1)*eigaz(i,1)
               eigax(i,-l)  = dconjg ( eigax(i,l) )
               eigay(i,-l)  = dconjg ( eigay(i,l) )
               eigaz(i,-l)  = dconjg ( eigaz(i,l) )
            end do
            do l = 2, lmax_ewald(2)
               eigbx(i, l)  = eigbx(i,l-1)*eigbx(i,1)
               eigby(i, l)  = eigby(i,l-1)*eigby(i,1)
               eigbz(i, l)  = eigbz(i,l-1)*eigbz(i,1)
               eigbx(i,-l)  = dconjg ( eigbx(i,l) )
               eigby(i,-l)  = dconjg ( eigby(i,l) )
               eigbz(i,-l)  = dconjg ( eigbz(i,l) )
            end do
            do l = 2, lmax_ewald(3)
               eigcx(i, l)  = eigcx(i,l-1)*eigcx(i,1)
               eigcy(i, l)  = eigcy(i,l-1)*eigcy(i,1)
               eigcz(i, l)  = eigcz(i,l-1)*eigcz(i,1)
               eigcx(i,-l)  = dconjg ( eigcx(i,l) )
               eigcy(i,-l)  = dconjg ( eigcy(i,l) )
               eigcz(i,-l)  = dconjg ( eigcz(i,l) )
            end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

         factor_1 = (4.d0*pi)/(2.d0*volume)

         n = 0

         do la =              0, lmax_ewald(1)
         do lb = -lmax_ewald(2), lmax_ewald(2)
         do lc = -lmax_ewald(3), lmax_ewald(3)

            n = n + 1

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            l2 = la*la + lb*lb + lc*lc

            if ( l2 .eq. 0 ) cycle

            if ( la .eq. 0 ) then
               factor_2 = 1.d0
            else
               factor_2 = 2.d0
            end if

            gx = ax*la + bx*lb + cx*lc
            gy = ay*la + by*lb + cy*lc
            gz = az*la + bz*lb + cz*lc

            g2 = gx*gx + gy*gy + gz*gz

            if ( g2 .gt. g2max ) cycle

            factor_3 = exp(-g2/(4.d0*alpha_ewald*alpha_ewald))/g2

            qcos = 0.d0
            qsin = 0.d0

            do k = 1, ncharge

               i  = i_q(k)

               cos_gxyz = dreal(eigax(i,la)*eigbx(i,lb)*eigcx(i,lc) &
     &                         *eigay(i,la)*eigby(i,lb)*eigcy(i,lc) &
     &                         *eigaz(i,la)*eigbz(i,lb)*eigcz(i,lc))
               sin_gxyz = dimag(eigax(i,la)*eigbx(i,lb)*eigcx(i,lc) &
     &                         *eigay(i,la)*eigby(i,lb)*eigcy(i,lc) &
     &                         *eigaz(i,la)*eigbz(i,lb)*eigcz(i,lc))

               qcos = qcos + q(i)*cos_gxyz
               qsin = qsin + q(i)*sin_gxyz

            end do

            qexp2 = qcos*qcos + qsin*qsin

            pot(m) = pot(m) + factor_1*factor_2*factor_3*qexp2

            do k = 1, ncharge

               i  = i_q(k)

               cos_gxyz = dreal(eigax(i,la)*eigbx(i,lb)*eigcx(i,lc) &
     &                         *eigay(i,la)*eigby(i,lb)*eigcy(i,lc) &
     &                         *eigaz(i,la)*eigbz(i,lb)*eigcz(i,lc))
               sin_gxyz = dimag(eigax(i,la)*eigbx(i,lb)*eigcx(i,lc) &
     &                         *eigay(i,la)*eigby(i,lb)*eigcy(i,lc) &
     &                         *eigaz(i,la)*eigbz(i,lb)*eigcz(i,lc))

               factor_4 = sin_gxyz*qcos - cos_gxyz*qsin

               factor_5 = 2.d0*q(i)*factor_1*factor_2*factor_3*factor_4

               fxi = factor_5*gx
               fyi = factor_5*gy
               fzi = factor_5*gz

               fx(i,m) = fx(i,m) + fxi
               fy(i,m) = fy(i,m) + fyi
               fz(i,m) = fz(i,m) + fzi

            end do

            factor_6 = factor_1*factor_2*factor_3*qexp2
            factor_7 = 1.d0/(4.d0*alpha_ewald*alpha_ewald)
            factor_8 = 2.d0 * ( 1.d0 + factor_7*g2 ) / g2

            vir_bead(1,1,m) = vir_bead(1,1,m) &
     &         + factor_6 * ( 1.d0 - factor_8*gx*gx )
            vir_bead(1,2,m) = vir_bead(1,2,m) &
     &         - factor_6 * factor_8*gx*gy
            vir_bead(1,3,m) = vir_bead(1,3,m) &
     &         - factor_6 * factor_8*gx*gz
            vir_bead(2,1,m) = vir_bead(2,1,m) &
     &         - factor_6 * factor_8*gy*gx
            vir_bead(2,2,m) = vir_bead(2,2,m) &
     &         + factor_6 * ( 1.d0 - factor_8*gy*gy )
            vir_bead(2,3,m) = vir_bead(2,3,m) &
     &         - factor_6 * factor_8*gy*gz
            vir_bead(3,1,m) = vir_bead(3,1,m) &
     &         - factor_6 * factor_8*gz*gx
            vir_bead(3,2,m) = vir_bead(3,2,m) &
     &         - factor_6 * factor_8*gz*gy
            vir_bead(3,3,m) = vir_bead(3,3,m) &
     &         + factor_6 * ( 1.d0 - factor_8*gz*gz )

         end do
         end do
         end do

!-----------------------------------------------------------------------
!     /*   loop of beads: end                                         */
!-----------------------------------------------------------------------

      end do

      return
      end





!***********************************************************************
      subroutine mm_dipole_XMPI
!***********************************************************************
!=======================================================================
!
!     calculate dipole moment
!
!=======================================================================

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, dipx, dipy, dipz, mbox, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   q, i_q, ncharge

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: m, i, k, m1, m2, m3

      real(8) :: xb, yb, zb

!-----------------------------------------------------------------------
!     /*   calculate mm dipole moment                                 */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         dipx(m) = 0.d0
         dipy(m) = 0.d0
         dipz(m) = 0.d0

         do k = 1, ncharge

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i  = i_q(k)

            xb = x(i,m)
            yb = y(i,m)
            zb = z(i,m)

            m1 = mbox(1,i,1)   ! mbox(1,i,m)
            m2 = mbox(2,i,1)   ! mbox(2,i,m)
            m3 = mbox(3,i,1)   ! mbox(3,i,m)

            call pbc_unfold_MPI( xb, yb, zb, m1, m2, m3 )

            dipx(m) = dipx(m) + q(i)*xb
            dipy(m) = dipy(m) + q(i)*yb
            dipz(m) = dipz(m) + q(i)*zb

         end do

      end do

      return
      end





!***********************************************************************
      subroutine mm_dipole_mol_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, ux, uy, uz, dipx, dipy, dipz, iounit, myrank, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   q, natom_per_dip, natom_dip_max, list_atom_dip, &
     &   nmol_dip, list_atom_dip

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     //   local variables
!-----------------------------------------------------------------------

      implicit none

!     /*   flag for initial setups   */
      integer, save :: iset = 0

!     /*   integer   */
      integer :: ierr = 0

!     /*   integer   */
      integer :: i, j, k, l, m, n

!     /*   real numbers   */
      real(8) :: xk, yk, zk, qk

!-----------------------------------------------------------------------
!     //   this is carried out only once
!-----------------------------------------------------------------------

      if ( iset .eq. 0 ) then

!-----------------------------------------------------------------------
!     //   psuedo do loop
!-----------------------------------------------------------------------

      do

!-----------------------------------------------------------------------
!     //   read number of molecules
!-----------------------------------------------------------------------

!     /*   parent process   */
      if ( myrank .eq. 0 ) then

!        /*   file open   */
         open ( iounit, file = 'mm.dat' )

!        /*   search for tag    */
         call search_tag ( '<dipoles>', 9, iounit, ierr )

!        /*   number of polar molecules   */
         read ( iounit, *, iostat = ierr ) nmol_dip

!        /*   file close   */
         close( iounit )

!     /*   parent process   */
      end if

!     /*   communicate   */
      call my_MPI_bcast_int_0 ( ierr )

!-----------------------------------------------------------------------
!     //   warning: on error, atomic dipole is applied for pbc
!-----------------------------------------------------------------------

      if ( ierr .ne. 0 ) exit

!-----------------------------------------------------------------------
!     //   communicate
!-----------------------------------------------------------------------

      call my_MPI_bcast_int_0( nmol_dip )

!-----------------------------------------------------------------------
!     //   memory allocation
!-----------------------------------------------------------------------

!     /*   number of atoms in a molecule   */
      if ( .not. allocated( natom_per_dip ) ) &
     &   allocate( natom_per_dip(nmol_dip) )

!-----------------------------------------------------------------------
!     //   number of atoms per molecule
!-----------------------------------------------------------------------

!     /*   parent process   */
      if ( myrank .eq. 0 ) then

!        /*   file open   */
         open ( iounit, file = 'mm.dat' )

!        /*   search for tag    */
         call search_tag ( '<dipoles>', 9, iounit, ierr )

!        /*   number of molecules   */
         read ( iounit, *, iostat = ierr )

!        /*   maximum number of atoms per polar molecule   */
         natom_dip_max = 0

!        /*   loop of molecules   */
         do i = 1, nmol_dip

!           /*   number of atoms per polar molecule   */
            read ( iounit, *, iostat = ierr ) natom_per_dip(i)

!           /*   maximum number of atoms per polar molecule   */

            if ( natom_per_dip(i) .gt. natom_dip_max ) then
               natom_dip_max = natom_per_dip(i)
            end if

!        /*   loop of molecules   */
         end do

!        /*   file close   */
         close( iounit )

!     /*   parent process   */
      end if

!-----------------------------------------------------------------------
!     //   communicate
!-----------------------------------------------------------------------

      call my_MPI_bcast_int_1( natom_per_dip, nmol_dip )
      call my_MPI_bcast_int_0( natom_dip_max )

!-----------------------------------------------------------------------
!     //   memory allocation
!-----------------------------------------------------------------------

!     /*   list of atoms in a molecule   */
      if ( .not. allocated( list_atom_dip ) ) &
     &   allocate( list_atom_dip(natom_dip_max,nmol_dip) )

!-----------------------------------------------------------------------
!     //   number of atoms per molecule
!-----------------------------------------------------------------------

!     /*   parent process   */
      if ( myrank .eq. 0 ) then

!     /*   file open   */
      open ( iounit, file = 'mm.dat' )

!     /*   search for tag    */
      call search_tag ( '<dipoles>', 9, iounit, ierr )

!     /*   number of molecules   */
      read ( iounit, *, iostat = ierr )

!     /*   loop of molecules   */
      do i = 1, nmol_dip

!        /*   read list of atoms   */
         read ( iounit, *, iostat = ierr ) &
     &      j, ( list_atom_dip(k,i), k = 1, natom_per_dip(i) )

!     /*   loop of molecules   */
      end do

!     /*   file close   */
      close( iounit )

!     /*   parent process   */
      end if

!     /*   communicate   */
      call my_MPI_bcast_int_0 ( ierr )

!-----------------------------------------------------------------------
!     //   communicate
!-----------------------------------------------------------------------

      call my_MPI_bcast_int_2( list_atom_dip, natom_dip_max, nmol_dip )

!-----------------------------------------------------------------------
!     //   exit loop
!-----------------------------------------------------------------------

      exit

!-----------------------------------------------------------------------
!     //   pseudo do loop
!-----------------------------------------------------------------------

      end do

!-----------------------------------------------------------------------
!     //   set done
!-----------------------------------------------------------------------

      iset = 1

!-----------------------------------------------------------------------
!     //   this is carried out only once
!-----------------------------------------------------------------------

      end if

!-----------------------------------------------------------------------
!     //   calculate mm dipole moment
!-----------------------------------------------------------------------

!     /*   read correctly   */
      if ( ierr .eq. 0 ) then

!     /*   loop of beads   */
      do l = jstart_bead, jend_bead

!        /*   atoms   */
         n = 0

!        /*   loop of polar molecules   */
         do j = 1, nmol_dip

!        /*   loop of atoms per polar molecules    */
         do i = 1, natom_per_dip(j)

!           /*   atoms  */
            n = n + 1

!           /*   only my bead   */
            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

!           /*   i-th atom   */
            k = list_atom_dip(i,j)

!           /*   first atom   */
            m = list_atom_dip(1,j)

!           /*   relative position of i-th atom to the first atom   */

            xk = x(k,l) - ux(m,1)
            yk = y(k,l) - uy(m,1)
            zk = z(k,l) - uz(m,1)

!           /*   apply the boundary condition   */
            call pbc_atom_MPI ( xk, yk, zk )

!           /*   relative position of i-th atom to the first atom   */

            xk = xk + ux(m,1)
            yk = yk + uy(m,1)
            zk = zk + uz(m,1)

!           /*   atomic charge   */
            qk = q(k)

!           /*   dipole moment   */

            dipx(l) = dipx(l) + qk*xk
            dipy(l) = dipy(l) + qk*yk
            dipz(l) = dipz(l) + qk*zk

!        /*   loop of atoms per polar molecules    */
         end do

!        /*   loop of polar molecules   */
         end do

!     /*   loop of beads   */
      end do

!     /*   read correctly   */
      end if

!-----------------------------------------------------------------------
!     //   if read incorrect, calculate mm atomic dipole moment
!-----------------------------------------------------------------------

!     /*   read incorrect   */
      if ( ierr .ne. 0 ) then

!        /*   mm atomic dipole moment   */
         call mm_dipole_XMPI

!     /*   read incorrect   */
      end if

!-----------------------------------------------------------------------
!     //   end of routine
!-----------------------------------------------------------------------

      return
      end





!***********************************************************************
      subroutine force_mm_improper_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   pi, x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   eq_improper, fc_improper, i_improper, j_improper, &
     &   k_improper, l_improper, nimproper

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n

      real(8) :: tiny_value = 1.d-4

      real(8) :: xij, yij, zij, xkj, ykj, zkj, xlj, ylj, zlj, &
     &           xijk, yijk, zijk, xjkl, yjkl, zjkl, rijk2, rjkl2, &
     &           rijkl2, rijk2inv, rjkl2inv, rijkl2inv, cos_phi, phi, &
     &           fxi, fyi, fzi, fxj, fyj, fzj, fxk, fyk, fzk, &
     &           fxl, fyl, fzl

      real(8) :: dphi, factor_1, factor_2, factor_3, factor_4, &
     &           px1, py1, pz1, px2, py2, pz2, px3, py3, pz3, &
     &           px4, py4, pz4, px5, py5, pz5, px6, py6, pz6

      real(8) :: ax, ay, az, a1, a2, xkl, ykl, zkl, xki, yki, zki

      real(8) :: daxdxi, daxdyi, daxdzi, daydxi, daydyi, daydzi, &
     &           dazdxi, dazdyi, dazdzi, dadxi, dadyi, dadzi, &
     &           daxdxj, daxdyj, daxdzj, daydxj, daydyj, daydzj, &
     &           dazdxj, dazdyj, dazdzj, dadxj, dadyj, dadzj, &
     &           daxdxl, daxdyl, daxdzl, daydxl, daydyl, daydzl, &
     &           dazdxl, dazdyl, dazdzl, dadxl, dadyl, dadzl

      real(8) :: f1, sin_phi, sign_phi

!      real(8)::  daxdxk, daxdyk, daxdzk, daydxk, daydyk, daydzk,
!     &           dazdxk, dazdyk, dazdzk, dadxk, dadyk, dadzk

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nimproper .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do n = 1, nimproper

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_improper(n)
            j = j_improper(n)
            k = k_improper(n)
            l = l_improper(n)

!-----------------------------------------------------------------------

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            xkj = x(k,m) - x(j,m)
            ykj = y(k,m) - y(j,m)
            zkj = z(k,m) - z(j,m)

            call pbc_atom_MPI ( xkj, ykj, zkj )

            xlj = x(l,m) - x(j,m)
            ylj = y(l,m) - y(j,m)
            zlj = z(l,m) - z(j,m)

            call pbc_atom_MPI ( xlj, ylj, zlj )

!-----------------------------------------------------------------------

            xijk = yij*zkj - zij*ykj
            yijk = zij*xkj - xij*zkj
            zijk = xij*ykj - yij*xkj

            xjkl = ylj*zkj - zlj*ykj
            yjkl = zlj*xkj - xlj*zkj
            zjkl = xlj*ykj - ylj*xkj

            rijk2  = xijk*xijk + yijk*yijk + zijk*zijk
            rjkl2  = xjkl*xjkl + yjkl*yjkl + zjkl*zjkl

            rijkl2 = sqrt(rijk2*rjkl2)

            if ( abs(rijk2)  .lt. tiny_value ) cycle
            if ( abs(rjkl2)  .lt. tiny_value ) cycle
            if ( abs(rijkl2) .lt. tiny_value ) cycle

            rijk2inv  = 1.d0 / rijk2
            rjkl2inv  = 1.d0 / rjkl2
            rijkl2inv = 1.d0 / rijkl2

!-----------------------------------------------------------------------

            factor_1 = fc_improper(n) * (180.d0/pi) * (180.d0/pi)

            cos_phi = ( xijk*xjkl + yijk*yjkl + zijk*zjkl ) * rijkl2inv

            cos_phi = max( cos_phi, -1.d0 )
            cos_phi = min( cos_phi,  1.d0 )

            phi = acos(cos_phi)

            sign_phi = ( yijk*zjkl - zijk*yjkl ) * xkj &
     &               + ( zijk*xjkl - xijk*zjkl ) * ykj &
     &               + ( xijk*yjkl - yijk*xjkl ) * zkj

            sign_phi = sign( 1.d0, sign_phi )

            phi = phi * sign_phi

            factor_2  = eq_improper(n) * (pi/180.d0)

            dphi = phi - factor_2

!-----------------------------------------------------------------------

            pot(m)  = pot(m) + 0.5d0 * factor_1 * dphi * dphi

!-----------------------------------------------------------------------

            if ( ( abs(phi)    .gt. tiny_value ) .and. &
     &           ( abs(phi-pi) .gt. tiny_value ) .and. &
     &           ( abs(phi+pi) .gt. tiny_value ) ) then

!-----------------------------------------------------------------------

               factor_3 = sin(phi)

               factor_4 = factor_1 * dphi / factor_3

               px1 = yijk*zij - zijk*yij
               py1 = zijk*xij - xijk*zij
               pz1 = xijk*yij - yijk*xij

               px2 = yjkl*zij - zjkl*yij
               py2 = zjkl*xij - xjkl*zij
               pz2 = xjkl*yij - yjkl*xij

               px3 = yijk*zkj - zijk*ykj
               py3 = zijk*xkj - xijk*zkj
               pz3 = xijk*ykj - yijk*xkj

               px4 = yjkl*zkj - zjkl*ykj
               py4 = zjkl*xkj - xjkl*zkj
               pz4 = xjkl*ykj - yjkl*xkj

               px5 = yijk*zlj - zijk*ylj
               py5 = zijk*xlj - xijk*zlj
               pz5 = xijk*ylj - yijk*xlj

               px6 = yjkl*zlj - zjkl*ylj
               py6 = zjkl*xlj - xjkl*zlj
               pz6 = xjkl*ylj - yjkl*xlj

               fxi = factor_4 * ( - px4*rijkl2inv &
     &                            + px3*rijk2inv*cos_phi )
               fyi = factor_4 * ( - py4*rijkl2inv &
     &                            + py3*rijk2inv*cos_phi )
               fzi = factor_4 * ( - pz4*rijkl2inv &
     &                            + pz3*rijk2inv*cos_phi )

               fxk = factor_4 * ( + px2*rijkl2inv &
     &                            + px5*rijkl2inv &
     &                            - px1*rijk2inv*cos_phi &
     &                            - px6*rjkl2inv*cos_phi )
               fyk = factor_4 * ( + py2*rijkl2inv &
     &                            + py5*rijkl2inv &
     &                            - py1*rijk2inv*cos_phi &
     &                            - py6*rjkl2inv*cos_phi )
               fzk = factor_4 * ( + pz2*rijkl2inv &
     &                            + pz5*rijkl2inv &
     &                            - pz1*rijk2inv*cos_phi &
     &                            - pz6*rjkl2inv*cos_phi )

               fxl = factor_4 * ( - px3*rijkl2inv &
     &                            + px4*rjkl2inv*cos_phi )
               fyl = factor_4 * ( - py3*rijkl2inv &
     &                            + py4*rjkl2inv*cos_phi )
               fzl = factor_4 * ( - pz3*rijkl2inv &
     &                            + pz4*rjkl2inv*cos_phi )

               fxj = - ( fxi + fxk + fxl )
               fyj = - ( fyi + fyk + fyl )
               fzj = - ( fzi + fzk + fzl )

!-----------------------------------------------------------------------

            else

!-----------------------------------------------------------------------

               xki = - xij + xkj
               yki = - yij + ykj
               zki = - zij + zkj

               xkl = - xlj + xkj
               ykl = - ylj + ykj
               zkl = - zlj + zkj

               ax = yijk*zjkl - zijk*yjkl
               ay = zijk*xjkl - xijk*zjkl
               az = xijk*yjkl - yijk*xjkl

               a2 = ax*ax + ay*ay + az*az

               a1 = sqrt( a2 )

               sin_phi = a1 / rijkl2

               sin_phi = max( sin_phi, -1.d0 )
               sin_phi = min( sin_phi,  1.d0 )

               phi = sign_phi * asin( sin_phi )

               if ( cos_phi .lt. 0.d0 ) phi = pi - phi

               daxdxi = - zjkl * zkj - ykj * yjkl
               daxdyi = + yjkl * xkj
               daxdzi = + zjkl * xkj

               daydxi = + xjkl * ykj
               daydyi = - xjkl * xkj - zkj * zjkl
               daydzi = + zjkl * ykj

               dazdxi = + xjkl * zkj
               dazdyi = + yjkl * zkj
               dazdzi = - yjkl * ykj - xkj * xjkl

               daxdxj = - yijk * ykl + zjkl * zki &
     &                  + yjkl * yki - zijk * zkl
               daxdyj = + yijk * xkl - yjkl * xki
               daxdzj = + zijk * xkl - zjkl * xki

               daydxj = + xijk * ykl - xjkl * yki
               daydyj = - zijk * zkl + xjkl * xki &
     &                  + zjkl * zki - xijk * xkl
               daydzj = + zijk * ykl - zjkl * yki

               dazdxj = + xijk * zkl - xjkl * zki
               dazdyj = + yijk * zkl - yjkl * zki
               dazdzj = - xijk * xkl + yjkl * yki &
     &                  + xjkl * xki - yijk * ykl

!               daxdxk = - yjkl * yij + zijk * zlj
!                        + yijk * ylj - zjkl * zij
!               daxdyk = + yjkl * xij - yijk * xlj
!               daxdzk = + zjkl * xij - zijk * xlj
!
!               daydxk = + xjkl * yij - xijk * ylj
!               daydyk = - zjkl * zij + xijk * xlj
!                        + zijk * zlj - xjkl * xij
!               daydzk = + zjkl * yij - zijk * ylj
!
!               dazdxk = + xjkl * zij - xijk * zlj
!               dazdyk = + yjkl * zij - yijk * zlj
!               dazdzk = - xjkl * xij + yijk * ylj
!                        + xijk * xlj - yjkl * yij

               daxdxl = + zijk * zkj + ykj * yijk
               daxdyl = - yijk * xkj
               daxdzl = - zijk * xkj

               daydxl = - xijk * ykj
               daydyl = + xijk * xkj + zkj * zijk
               daydzl = - zijk * ykj

               dazdxl = - xijk * zkj
               dazdyl = - yijk * zkj
               dazdzl = + yijk * ykj + xkj * xijk

               dadxi = ax/a1*daxdxi + ay/a1*daydxi + az/a1*dazdxi
               dadyi = ax/a1*daxdyi + ay/a1*daydyi + az/a1*dazdyi
               dadzi = ax/a1*daxdzi + ay/a1*daydzi + az/a1*dazdzi

               dadxj = ax/a1*daxdxj + ay/a1*daydxj + az/a1*dazdxj
               dadyj = ax/a1*daxdyj + ay/a1*daydyj + az/a1*dazdyj
               dadzj = ax/a1*daxdzj + ay/a1*daydzj + az/a1*dazdzj

!               dadxk = ax/a1*daxdxk + ay/a1*daydxk + az/a1*dazdxk
!               dadyk = ax/a1*daxdyk + ay/a1*daydyk + az/a1*dazdyk
!               dadzk = ax/a1*daxdzk + ay/a1*daydzk + az/a1*dazdzk

               dadxl = ax/a1*daxdxl + ay/a1*daydxl + az/a1*dazdxl
               dadyl = ax/a1*daxdyl + ay/a1*daydyl + az/a1*dazdyl
               dadzl = ax/a1*daxdzl + ay/a1*daydzl + az/a1*dazdzl

               f1 = - sign_phi/cos_phi * (180.d0/pi) * (180.d0/pi) &
     &              * fc_improper(n) * dphi

               fxi = + f1 * ( dadxi / rijkl2 &
     &                + sin_phi * ( + yijk*zkj - zijk*ykj ) * rijk2inv )

               fyi = + f1 * ( dadyi / rijkl2 &
     &                + sin_phi * ( + zijk*xkj - xijk*zkj ) * rijk2inv )

               fzi = + f1 * ( dadzi / rijkl2 &
     &                + sin_phi * ( + xijk*ykj - yijk*xkj ) * rijk2inv )

               fxj = + f1 * ( dadxj / rijkl2 &
     &                + sin_phi * ( - yijk*zki + zijk*yki ) * rijk2inv &
     &                - sin_phi * ( + yjkl*zkl - zjkl*ykl ) * rjkl2inv )

               fyj = + f1 * ( dadyj / rijkl2 &
     &                + sin_phi * ( - zijk*xki + xijk*zki ) * rijk2inv &
     &                - sin_phi * ( + zjkl*xkl - xjkl*zkl ) * rjkl2inv )

               fzj = + f1 * ( dadzj / rijkl2 &
     &                + sin_phi * ( - xijk*yki + yijk*xki ) * rijk2inv &
     &                - sin_phi * ( + xjkl*ykl - yjkl*xkl ) * rjkl2inv )

!              fxk = - f1 * ( dadxk / rijkl2
!     &                - sin_phi * ( - yjkl*zlj + zjkl*ylj ) * rjkl2inv
!     &                + sin_phi * ( + yijk*zij - zijk*yij ) * rijk2inv )
!
!              fyk = - f1 * ( dadyk / rijkl2
!     &                - sin_phi * ( - zjkl*xlj + xjkl*zlj ) * rjkl2inv
!     &                + sin_phi * ( + zijk*xij - xijk*zij ) * rijk2inv )
!
!              fzk = - f1 * ( dadzk / rijkl2
!     &                - sin_phi * ( - xjkl*ylj + yjkl*xlj ) * rjkl2inv
!     &                + sin_phi * ( + xijk*yij - yijk*xij ) * rijk2inv )

               fxl = + f1 * ( dadxl / rijkl2 &
     &                + sin_phi * ( + yjkl*zkj - zjkl*ykj ) * rjkl2inv )

               fyl = + f1 * ( dadyl / rijkl2 &
     &                + sin_phi * ( + zjkl*xkj - xjkl*zkj ) * rjkl2inv )

               fzl = + f1 * ( dadzl / rijkl2 &
     &                + sin_phi * ( + xjkl*ykj - yjkl*xkj ) * rjkl2inv )

               fxk = - fxi - fxj - fxl
               fyk = - fyi - fyj - fyl
               fzk = - fzi - fzj - fzl

!-----------------------------------------------------------------------

            end if

!-----------------------------------------------------------------------

            fx(i,m) = fx(i,m) + fxi
            fx(j,m) = fx(j,m) + fxj
            fx(k,m) = fx(k,m) + fxk
            fx(l,m) = fx(l,m) + fxl

            fy(i,m) = fy(i,m) + fyi
            fy(j,m) = fy(j,m) + fyj
            fy(k,m) = fy(k,m) + fyk
            fy(l,m) = fy(l,m) + fyl

            fz(i,m) = fz(i,m) + fzi
            fz(j,m) = fz(j,m) + fzj
            fz(k,m) = fz(k,m) + fzk
            fz(l,m) = fz(l,m) + fzl

!-----------------------------------------------------------------------

            vir_bead(1,1,m) = vir_bead(1,1,m) +fxi*xij +fxk*xkj +fxl*xlj
            vir_bead(1,2,m) = vir_bead(1,2,m) +fxi*yij +fxk*ykj +fxl*ylj
            vir_bead(1,3,m) = vir_bead(1,3,m) +fxi*zij +fxk*zkj +fxl*zlj
            vir_bead(2,1,m) = vir_bead(2,1,m) +fyi*xij +fyk*xkj +fyl*xlj
            vir_bead(2,2,m) = vir_bead(2,2,m) +fyi*yij +fyk*ykj +fyl*ylj
            vir_bead(2,3,m) = vir_bead(2,3,m) +fyi*zij +fyk*zkj +fyl*zlj
            vir_bead(3,1,m) = vir_bead(3,1,m) +fzi*xij +fzk*xkj +fzl*xlj
            vir_bead(3,2,m) = vir_bead(3,2,m) +fzi*yij +fzk*ykj +fzl*ylj
            vir_bead(3,3,m) = vir_bead(3,3,m) +fzi*zij +fzk*zkj +fzl*zlj

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_mm_lj_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, boxinv, box, iboundary, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   rout_lj, rin_lj, eps_lj, sig_lj, bigbox, bigboxinv, &
     &   i_lj, j_lj, nlj, nbox_lj

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, l, m, jx, jy, jz, j2

      real(8) :: rout_lj2, xij, yij, zij, rij2, rij, rinv, eps, sig, &
     &           sr, sr2, sr6, sr12, u6, u12, uij, duij, fxi, fyi, fzi, &
     &           swf, dswf, aij, bij, cij, absa, absb, absc

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nlj .eq. 0 ) return

      rout_lj2 = rout_lj*rout_lj

!-----------------------------------------------------------------------
!     /*   extention of simulation box in real space sum              */
!-----------------------------------------------------------------------

      if ( ( iboundary .eq. 1 ) .or. ( iboundary .eq. 2 ) ) then

         absa = sqrt ( boxinv(1,1)*boxinv(1,1) &
     &               + boxinv(1,2)*boxinv(1,2) &
     &               + boxinv(1,3)*boxinv(1,3) )
         absb = sqrt ( boxinv(2,1)*boxinv(2,1) &
     &               + boxinv(2,2)*boxinv(2,2) &
     &               + boxinv(2,3)*boxinv(2,3) )
         absc = sqrt ( boxinv(3,1)*boxinv(3,1) &
     &               + boxinv(3,2)*boxinv(3,2) &
     &               + boxinv(3,3)*boxinv(3,3) )

         nbox_lj(1) = int(2.d0*rout_lj*absa) + 1
         nbox_lj(2) = int(2.d0*rout_lj*absb) + 1
         nbox_lj(3) = int(2.d0*rout_lj*absc) + 1

      end if

!-----------------------------------------------------------------------
!     /*   main loop : free boundary                                  */
!-----------------------------------------------------------------------

      if ( iboundary .eq. 0 ) then

         do m = jstart_bead, jend_bead

         do l = 1, nlj

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_lj(l)
            j = j_lj(l)

            if ( i .eq. j ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_lj2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0/rij

            eps     = eps_lj(l)
            sig     = sig_lj(l)

            sr      = sig*rinv
            sr2     = sr*sr
            sr6     = sr2*sr2*sr2
            sr12    = sr6*sr6

            u6      = - 4.d0*eps*sr6
            u12     = + 4.d0*eps*sr12

!           /*   switching function   */
            call getswf( rij, rin_lj, rout_lj, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + u12

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij*swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary with minimum image           */
!-----------------------------------------------------------------------

      else if ( nbox_lj(1)*nbox_lj(2)*nbox_lj(3) .eq. 1 ) then

         do m = jstart_bead, jend_bead

         do l = 1, nlj

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_lj(l)
            j = j_lj(l)

            if ( i .eq. j ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_lj2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0/rij

            eps     = eps_lj(l)
            sig     = sig_lj(l)

            sr      = sig*rinv
            sr2     = sr*sr
            sr6     = sr2*sr2*sr2
            sr12    = sr6*sr6

            u6      = - 4.d0*eps*sr6
            u12     = + 4.d0*eps*sr12

!           /*   switching function   */
            call getswf( rij, rin_lj, rout_lj, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + u12

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij*swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary                              */
!-----------------------------------------------------------------------

      else

         bigbox(:,1) = dble(nbox_lj(1))*box(:,1)
         bigbox(:,2) = dble(nbox_lj(2))*box(:,2)
         bigbox(:,3) = dble(nbox_lj(3))*box(:,3)

         call inv3 ( bigbox, bigboxinv )

         do m = jstart_bead, jend_bead

         do l = 1, nlj

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_lj(l)
            j = j_lj(l)

            do jx = 0, nbox_lj(1)-1
            do jy = 0, nbox_lj(2)-1
            do jz = 0, nbox_lj(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_lj2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               eps     = eps_lj(l)
               sig     = sig_lj(l)

               sr      = sig*rinv
               sr2     = sr*sr
               sr6     = sr2*sr2*sr2
               sr12    = sr6*sr6

               u6      = - 4.d0*eps*sr6
               u12     = + 4.d0*eps*sr12

!              /*   switching function   */
               call getswf( rij, rin_lj, rout_lj, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + u12

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij*swf*0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(i,m) = fx(i,m) + 0.5d0*fxi
               fy(i,m) = fy(i,m) + 0.5d0*fyi
               fz(i,m) = fz(i,m) + 0.5d0*fzi

               fx(j,m) = fx(j,m) - 0.5d0*fxi
               fy(j,m) = fy(j,m) - 0.5d0*fyi
               fz(j,m) = fz(j,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

            if ( i .eq. j ) cycle

            i = j_lj(l)
            j = i_lj(l)

            do jx = 0, nbox_lj(1)-1
            do jy = 0, nbox_lj(2)-1
            do jz = 0, nbox_lj(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_lj2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               eps     = eps_lj(l)
               sig     = sig_lj(l)

               sr      = sig*rinv
               sr2     = sr*sr
               sr6     = sr2*sr2*sr2
               sr12    = sr6*sr6

               u6      = - 4.d0*eps*sr6
               u12     = + 4.d0*eps*sr12

!              /*   switching function   */
               call getswf( rij, rin_lj, rout_lj, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + u12

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij*swf*0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(i,m) = fx(i,m) + 0.5d0*fxi
               fy(i,m) = fy(i,m) + 0.5d0*fyi
               fz(i,m) = fz(i,m) + 0.5d0*fzi

               fx(j,m) = fx(j,m) - 0.5d0*fxi
               fy(j,m) = fy(j,m) - 0.5d0*fyi
               fz(j,m) = fz(j,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

         end do

         end do

      end if

      return
      end





!***********************************************************************
      subroutine force_mm_buck_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, boxinv, box, iboundary, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   rout_buck, rin_buck, a_buck, b_buck, c_buck, bigbox, bigboxinv, &
     &   i_buck, j_buck, nbuck, nbox_buck

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, l, m, jx, jy, jz, j2

      real(8) :: rout_buck2, xij, yij, zij, rij2, rij, rinv, a, b, c, &
     &           rinv2, rinv6, u6, ue, uij, duij, fxi, fyi, fzi, &
     &           swf, dswf, aij, bij, cij, absa, absb, absc

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nbuck .eq. 0 ) return

      rout_buck2 = rout_buck*rout_buck

!-----------------------------------------------------------------------
!     /*   extention of simulation box in real space sum              */
!-----------------------------------------------------------------------

      if ( ( iboundary .eq. 1 ) .or. ( iboundary .eq. 2 ) ) then

         absa = sqrt ( boxinv(1,1)*boxinv(1,1) &
     &               + boxinv(1,2)*boxinv(1,2) &
     &               + boxinv(1,3)*boxinv(1,3) )
         absb = sqrt ( boxinv(2,1)*boxinv(2,1) &
     &               + boxinv(2,2)*boxinv(2,2) &
     &               + boxinv(2,3)*boxinv(2,3) )
         absc = sqrt ( boxinv(3,1)*boxinv(3,1) &
     &               + boxinv(3,2)*boxinv(3,2) &
     &               + boxinv(3,3)*boxinv(3,3) )

         nbox_buck(1) = int(2.d0*rout_buck*absa) + 1
         nbox_buck(2) = int(2.d0*rout_buck*absb) + 1
         nbox_buck(3) = int(2.d0*rout_buck*absc) + 1

      end if

!-----------------------------------------------------------------------
!     /*   main loop : free boundary                                  */
!-----------------------------------------------------------------------

      if ( iboundary .eq. 0 ) then

         do m = jstart_bead, jend_bead

         do l = 1, nbuck

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_buck(l)
            j = j_buck(l)

            if ( i .eq. j ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_buck2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0 / rij

            a     = a_buck(l)
            b     = b_buck(l)
            c     = c_buck(l)

            rinv2 = rinv * rinv
            rinv6 = rinv2 * rinv2 * rinv2

            u6     = - c*rinv**6
            ue     = + a*exp(-b*rij)

!           /*   switching function   */
            call getswf( rij, rin_buck, rout_buck, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + ue

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - b*ue ) * swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij * swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary with minimum image           */
!-----------------------------------------------------------------------

      else if ( nbox_buck(1)*nbox_buck(2)*nbox_buck(3) .eq. 1 ) then

         do m = jstart_bead, jend_bead

         do l = 1, nbuck

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_buck(l)
            j = j_buck(l)

            if ( i .eq. j ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_buck2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0/rij

            a     = a_buck(l)
            b     = b_buck(l)
            c     = c_buck(l)

            rinv2 = rinv * rinv
            rinv6 = rinv2 * rinv2 * rinv2

            u6     = - c*rinv**6
            ue     = + a*exp(-b*rij)

!           /*   switching function   */
            call getswf( rij, rin_buck, rout_buck, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + ue

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - b*ue ) * swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij * swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary                              */
!-----------------------------------------------------------------------

      else

         bigbox(:,1) = dble(nbox_buck(1))*box(:,1)
         bigbox(:,2) = dble(nbox_buck(2))*box(:,2)
         bigbox(:,3) = dble(nbox_buck(3))*box(:,3)

         call inv3 ( bigbox, bigboxinv )

         do m = jstart_bead, jend_bead

         do l = 1, nbuck

            if ( mod( l-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i = i_buck(l)
            j = j_buck(l)

            do jx = 0, nbox_buck(1)-1
            do jy = 0, nbox_buck(2)-1
            do jz = 0, nbox_buck(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_buck2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               a     = a_buck(l)
               b     = b_buck(l)
               c     = c_buck(l)

               rinv2 = rinv * rinv
               rinv6 = rinv2 * rinv2 * rinv2

               u6     = - c*rinv**6
               ue     = + a*exp(-b*rij)

!              /*   switching function   */
               call getswf( rij, rin_buck, rout_buck, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + ue

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - b*ue ) * swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij * swf * 0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(i,m) = fx(i,m) + 0.5d0*fxi
               fy(i,m) = fy(i,m) + 0.5d0*fyi
               fz(i,m) = fz(i,m) + 0.5d0*fzi

               fx(j,m) = fx(j,m) - 0.5d0*fxi
               fy(j,m) = fy(j,m) - 0.5d0*fyi
               fz(j,m) = fz(j,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

            if ( i .eq. j ) cycle

            i = j_buck(l)
            j = i_buck(l)

            do jx = 0, nbox_buck(1)-1
            do jy = 0, nbox_buck(2)-1
            do jz = 0, nbox_buck(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_buck2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               a     = a_buck(l)
               b     = b_buck(l)
               c     = c_buck(l)

               rinv2 = rinv * rinv
               rinv6 = rinv2 * rinv2 * rinv2

               u6     = - c*rinv**6
               ue     = + a*exp(-b*rij)

!              /*   switching function   */
               call getswf( rij, rin_buck, rout_buck, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + ue

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - b*ue ) * swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij * swf * 0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(i,m) = fx(i,m) + 0.5d0*fxi
               fy(i,m) = fy(i,m) + 0.5d0*fyi
               fz(i,m) = fz(i,m) + 0.5d0*fzi

               fx(j,m) = fx(j,m) - 0.5d0*fxi
               fy(j,m) = fy(j,m) - 0.5d0*fyi
               fz(j,m) = fz(j,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

         end do

         end do

      end if

      return
      end





!***********************************************************************
      subroutine force_mm_morse_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, pot, fx, fy, fz, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   depth_morse, alpha_morse, eq_morse, i_morse, j_morse, nmorse

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, m

      real(8) :: alpha, const, depth, dr, factor, expfactor, &
     &           fxi, fyi, fzi, rij, xij, yij, zij

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nmorse .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do k = 1, nmorse

            if ( mod( k-1, nprocs_sub ) .ne. myrank_sub ) cycle

            i     =      i_morse(k)
            j     =      j_morse(k)
            depth =  depth_morse(k)
            alpha =  alpha_morse(k)

            if ( i .eq. j ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij = sqrt( xij*xij + yij*yij + zij*zij )

            dr  = ( rij - eq_morse(k) )

            expfactor = exp(-alpha*dr)

            factor = 1.d0 - expfactor

            pot(m) = pot(m) + depth*( factor*factor - 1.d0 )

            const = - 2.d0*depth*factor*alpha*expfactor/rij

            fxi = const*xij
            fyi = const*yij
            fzi = const*zij

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do

      end do

      return
      end





!***********************************************************************
      subroutine force_ewald_dipole_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables from PIMD                                 */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, volume, pi, mbox, &
     &   myrank_sub

      use mm_variables, only : i_q, ncharge, q

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

!     /*   reset   */
      implicit none

!     /*   integers   */
      integer :: i, j, k, m1, m2, m3

!     /*   real numbers   */
      real(8) :: dx, dy, dz, d2, factor, xi, yi, zi

!-----------------------------------------------------------------------
!     /*   shared variables from PIMD                                 */
!-----------------------------------------------------------------------

!     /*   constant   */
      factor = 2.d0 * pi / ( 3.d0 * volume )

!     /*   loop of beads   */
      do j = jstart_bead, jend_bead

!        /*   parallel calculation   */
         if ( myrank_sub .ne. 0 ) cycle

!        /*   initialize   */
         dx = 0.d0
         dy = 0.d0
         dz = 0.d0

!        /*   loop of charges   */
         do k = 1, ncharge

!           /*   atom   */
            i  = i_q(k)

!           /*   coordinates   */
            xi = x(i,j)
            yi = y(i,j)
            zi = z(i,j)

!           /*   box number   */
            m1 = mbox(1,i,1)   ! mbox(1,i,j)
            m2 = mbox(2,i,1)   ! mbox(2,i,j)
            m3 = mbox(3,i,1)   ! mbox(3,i,j)

!           /*   apply periodic boundary condition   */
            call pbc_unfold_MPI ( xi, yi, zi, m1, m2, m3 )

!           /*   dipole moment   */
            dx = dx + q(i) * xi
            dy = dy + q(i) * yi
            dz = dz + q(i) * zi

!        /*   loop of atoms   */
         end do

!        /*   square of dipole moment   */
         d2 = dx*dx + dy*dy + dz*dz

!        /*   potential   */
         pot(j) = pot(j) + factor * d2

!        /*   loop of charges   */
         do k = 1, ncharge

!           /*   atom   */
            i  = i_q(k)

!           /*   coordinates   */
            xi = x(i,j)
            yi = y(i,j)
            zi = z(i,j)

!           /*   box number   */
            m1 = mbox(1,i,1)   ! mbox(1,i,j)
            m2 = mbox(2,i,1)   ! mbox(2,i,j)
            m3 = mbox(3,i,1)   ! mbox(3,i,j)

!           /*   apply periodic boundary condition   */
            call pbc_unfold_MPI ( xi, yi, zi, m1, m2, m3 )

!           /*   forces   */
            fx(i,j) = fx(i,j) - 2.d0 * factor * q(i) * dx
            fy(i,j) = fy(i,j) - 2.d0 * factor * q(i) * dy
            fz(i,j) = fz(i,j) - 2.d0 * factor * q(i) * dz

         end do

!        /*   virial   */
         vir_bead(1,1,j) = vir_bead(1,1,j) &
     &      - 2.d0 * factor * dx * dx + factor * d2
         vir_bead(1,2,j) = vir_bead(1,2,j) &
     &      - 2.d0 * factor * dx * dy
         vir_bead(1,3,j) = vir_bead(1,3,j) &
     &      - 2.d0 * factor * dx * dz
         vir_bead(2,1,j) = vir_bead(2,1,j) &
     &      - 2.d0 * factor * dy * dx
         vir_bead(2,2,j) = vir_bead(2,2,j) &
     &      - 2.d0 * factor * dy * dy + factor * d2
         vir_bead(2,3,j) = vir_bead(2,3,j) &
     &      - 2.d0 * factor * dy * dz
         vir_bead(3,1,j) = vir_bead(3,1,j) &
     &      - 2.d0 * factor * dz * dx
         vir_bead(3,2,j) = vir_bead(3,2,j) &
     &      - 2.d0 * factor * dz * dy
         vir_bead(3,3,j) = vir_bead(3,3,j) &
     &      - 2.d0 * factor * dz * dz + factor * d2

!     /*   loop of beads   */
      end do

      return
      end





!***********************************************************************
      subroutine force_mm_cmap_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   pi, x, y, z, fx, fy, fz, pot, vir_bead, &
     &   myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   xgrid_cmap, ygrid_cmap, vgrid_cmap, v2grid_cmap, &
     &   i_cmap, j_cmap, k_cmap, l_cmap, ncmap, mgrid_cmap

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, k, l, m, n, i1or2

      real(8) :: xij, yij, zij, xkj, ykj, zkj, xlj, ylj, zlj, xijk, &
     &           yijk, zijk, xjkl, yjkl, zjkl, rijk2, rjkl2, rijkl2, &
     &           rijk2inv, rjkl2inv, rijkl2inv, cos_phi, sign_phi, &
     &           v, phi(2), dvdphi(2), fxi, fyi, fzi, fxj, fyj, fzj, &
     &           fxk, fyk, fzk, fxl, fyl, fzl, factor, sin_phi, f1, &
     &           xki, yki, zki, xkl, ykl, zkl, ax, ay, az, a1, a2, &
     &           daxdxi, daxdyi, daxdzi, daydxi, daydyi, daydzi, &
     &           dazdxi, dazdyi, dazdzi, dadxi, dadyi, dadzi, &
     &           daxdxj, daxdyj, daxdzj, daydxj, daydyj, daydzj, &
     &           dazdxj, dazdyj, dazdzj, dadxj, dadyj, dadzj, &
     &           daxdxl, daxdyl, daxdzl, daydxl, daydyl, daydzl, &
     &           dazdxl, dazdyl, dazdzl, dadxl, dadyl, dadzl, &
     &           phi1, phi2, p1c, p2c, dvdphi1, dvdphi2

      real(8) :: tiny_value = 1.d-4

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( ncmap .eq. 0 ) return

!-----------------------------------------------------------------------
!     /*   main loop                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         do n = 1, ncmap

            if ( mod( n-1, nprocs_sub ) .ne. myrank_sub ) cycle

            do i1or2 = 1, 2

               i = i_cmap(i1or2,n)
               j = j_cmap(i1or2,n)
               k = k_cmap(i1or2,n)
               l = l_cmap(i1or2,n)

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               call pbc_atom_MPI ( xij, yij, zij )

               xkj = x(k,m) - x(j,m)
               ykj = y(k,m) - y(j,m)
               zkj = z(k,m) - z(j,m)

               call pbc_atom_MPI ( xkj, ykj, zkj )

               xlj = x(l,m) - x(j,m)
               ylj = y(l,m) - y(j,m)
               zlj = z(l,m) - z(j,m)

               call pbc_atom_MPI ( xlj, ylj, zlj )

               xijk = yij*zkj - zij*ykj
               yijk = zij*xkj - xij*zkj
               zijk = xij*ykj - yij*xkj

               xjkl = ylj*zkj - zlj*ykj
               yjkl = zlj*xkj - xlj*zkj
               zjkl = xlj*ykj - ylj*xkj

               rijk2  = xijk*xijk + yijk*yijk + zijk*zijk
               rjkl2  = xjkl*xjkl + yjkl*yjkl + zjkl*zjkl

               rijkl2 = sqrt(rijk2*rjkl2)

               if ( abs(rijk2)  .lt. tiny_value ) cycle
               if ( abs(rjkl2)  .lt. tiny_value ) cycle
               if ( abs(rijkl2) .lt. tiny_value ) cycle

               rijk2inv  = 1.d0 / rijk2
               rjkl2inv  = 1.d0 / rjkl2
               rijkl2inv = 1.d0 / rijkl2

               cos_phi = ( xijk*xjkl + yijk*yjkl + zijk*zjkl ) &
     &                 * rijkl2inv

               cos_phi = max( cos_phi, -1.d0 )
               cos_phi = min( cos_phi,  1.d0 )

               phi(i1or2) = acos(cos_phi)

               sign_phi = ( yijk*zjkl - zijk*yjkl ) * xkj &
     &                  + ( zijk*xjkl - xijk*zjkl ) * ykj &
     &                  + ( xijk*yjkl - yijk*xjkl ) * zkj

               sign_phi = sign( 1.d0, sign_phi )

               phi(i1or2) = phi(i1or2) * sign_phi

            end do

            p1c = 0.5d0 * ( xgrid_cmap(1) + xgrid_cmap(mgrid_cmap) )
            p2c = 0.5d0 * ( ygrid_cmap(1) + ygrid_cmap(mgrid_cmap) )

            phi1 = phi(1) * 180.d0 / pi
            phi2 = phi(2) * 180.d0 / pi

            phi1 = phi1 - nint( (phi1-p1c) / 360.d0 ) * 360.d0
            phi2 = phi2 - nint( (phi2-p2c) / 360.d0 ) * 360.d0

            call splin2_cmap( xgrid_cmap(:), ygrid_cmap(:), &
     &                        vgrid_cmap(:,:,n), v2grid_cmap(:,:,n), &
     &                        mgrid_cmap, mgrid_cmap, &
     &                        phi1, phi2, v, dvdphi1, dvdphi2 )

            dvdphi(1) = dvdphi1 * 180.d0 / pi
            dvdphi(2) = dvdphi2 * 180.d0 / pi

            pot(m)  = pot(m) + v

            do i1or2 = 1, 2

               i = i_cmap(i1or2,n)
               j = j_cmap(i1or2,n)
               k = k_cmap(i1or2,n)
               l = l_cmap(i1or2,n)

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               call pbc_atom_MPI ( xij, yij, zij )

               xkj = x(k,m) - x(j,m)
               ykj = y(k,m) - y(j,m)
               zkj = z(k,m) - z(j,m)

               call pbc_atom_MPI ( xkj, ykj, zkj )

               xlj = x(l,m) - x(j,m)
               ylj = y(l,m) - y(j,m)
               zlj = z(l,m) - z(j,m)

               call pbc_atom_MPI ( xlj, ylj, zlj )

               xijk = yij*zkj - zij*ykj
               yijk = zij*xkj - xij*zkj
               zijk = xij*ykj - yij*xkj

               xjkl = ylj*zkj - zlj*ykj
               yjkl = zlj*xkj - xlj*zkj
               zjkl = xlj*ykj - ylj*xkj

               rijk2  = xijk*xijk + yijk*yijk + zijk*zijk
               rjkl2  = xjkl*xjkl + yjkl*yjkl + zjkl*zjkl

               rijkl2 = sqrt(rijk2*rjkl2)

               if ( abs(rijk2)  .lt. tiny_value ) cycle
               if ( abs(rjkl2)  .lt. tiny_value ) cycle
               if ( abs(rijkl2) .lt. tiny_value ) cycle

               rijk2inv  = 1.d0 / rijk2
               rjkl2inv  = 1.d0 / rjkl2
               rijkl2inv = 1.d0 / rijkl2

               cos_phi = ( xijk*xjkl + yijk*yjkl + zijk*zjkl ) &
     &                 * rijkl2inv

               cos_phi = max( cos_phi, -1.d0 )
               cos_phi = min( cos_phi,  1.d0 )

               phi(i1or2) = acos(cos_phi)

               sign_phi = ( yijk*zjkl - zijk*yjkl ) * xkj &
     &                  + ( zijk*xjkl - xijk*zjkl ) * ykj &
     &                  + ( xijk*yjkl - yijk*xjkl ) * zkj

               sign_phi = sign( 1.d0, sign_phi )

               phi(i1or2) = phi(i1or2) * sign_phi

               if ( ( abs(phi(i1or2))    .gt. tiny_value ) .and. &
     &              ( abs(phi(i1or2)-pi) .gt. tiny_value ) .and. &
     &              ( abs(phi(i1or2)+pi) .gt. tiny_value ) ) then

                  sin_phi = sin( phi(i1or2) )

                  factor = dvdphi(i1or2) / sin_phi

                  fxi = factor * ( + ( ykj*zjkl - zkj*yjkl ) * rijkl2inv &
     &                    - ( ykj*zijk - zkj*yijk ) * cos_phi*rijk2inv )
                  fyi = factor * ( + ( zkj*xjkl - xkj*zjkl ) * rijkl2inv &
     &                    - ( zkj*xijk - xkj*zijk ) * cos_phi*rijk2inv )
                  fzi = factor * ( + ( xkj*yjkl - ykj*xjkl ) * rijkl2inv &
     &                    - ( xkj*yijk - ykj*xijk ) * cos_phi*rijk2inv )

                  fxl = factor * ( + ( ykj*zijk - zkj*yijk ) * rijkl2inv &
     &                    - ( ykj*zjkl - zkj*yjkl ) * cos_phi*rjkl2inv )
                  fyl = factor * ( + ( zkj*xijk - xkj*zijk ) * rijkl2inv &
     &                    - ( zkj*xjkl - xkj*zjkl ) * cos_phi*rjkl2inv )
                  fzl = factor * ( + ( xkj*yijk - ykj*xijk ) * rijkl2inv &
     &                    - ( xkj*yjkl - ykj*xjkl ) * cos_phi*rjkl2inv )

                  fxk = factor * ( - ( yij*zjkl - zij*yjkl ) * rijkl2inv &
     &                    - ( ylj*zijk - zlj*yijk ) * rijkl2inv &
     &                    + ( yij*zijk - zij*yijk ) * cos_phi*rijk2inv &
     &                    + ( ylj*zjkl - zlj*yjkl ) * cos_phi*rjkl2inv )
                  fyk = factor * ( - ( zij*xjkl - xij*zjkl ) * rijkl2inv &
     &                    - ( zlj*xijk - xlj*zijk ) * rijkl2inv &
     &                    + ( zij*xijk - xij*zijk ) * cos_phi*rijk2inv &
     &                    + ( zlj*xjkl - xlj*zjkl ) * cos_phi*rjkl2inv )
                  fzk = factor * ( - ( xij*yjkl - yij*xjkl ) * rijkl2inv &
     &                    - ( xlj*yijk - ylj*xijk ) * rijkl2inv &
     &                    + ( xij*yijk - yij*xijk ) * cos_phi*rijk2inv &
     &                    + ( xlj*yjkl - ylj*xjkl ) * cos_phi*rjkl2inv )

                  fxj = - ( fxi + fxk + fxl )
                  fyj = - ( fyi + fyk + fyl )
                  fzj = - ( fzi + fzk + fzl )

               else

                  xki = - xij + xkj
                  yki = - yij + ykj
                  zki = - zij + zkj

                  xkl = - xlj + xkj
                  ykl = - ylj + ykj
                  zkl = - zlj + zkj

                  ax = yijk*zjkl - zijk*yjkl
                  ay = zijk*xjkl - xijk*zjkl
                  az = xijk*yjkl - yijk*xjkl

                  a2 = ax*ax + ay*ay + az*az

                  a1 = sqrt( a2 )

                  sin_phi = a1 / rijkl2

                  sin_phi = max( sin_phi, -1.d0 )
                  sin_phi = min( sin_phi,  1.d0 )

                  phi = sign_phi * asin( sin_phi )

                  if ( cos_phi .lt. 0.d0 ) phi = pi - phi

                  daxdxi = - zjkl * zkj - ykj * yjkl
                  daxdyi = + yjkl * xkj
                  daxdzi = + zjkl * xkj

                  daydxi = + xjkl * ykj
                  daydyi = - xjkl * xkj - zkj * zjkl
                  daydzi = + zjkl * ykj

                  dazdxi = + xjkl * zkj
                  dazdyi = + yjkl * zkj
                  dazdzi = - yjkl * ykj - xkj * xjkl

                  daxdxj = - yijk * ykl + zjkl * zki &
     &                     + yjkl * yki - zijk * zkl
                  daxdyj = + yijk * xkl - yjkl * xki
                  daxdzj = + zijk * xkl - zjkl * xki

                  daydxj = + xijk * ykl - xjkl * yki
                  daydyj = - zijk * zkl + xjkl * xki &
     &                     + zjkl * zki - xijk * xkl
                  daydzj = + zijk * ykl - zjkl * yki

                  dazdxj = + xijk * zkl - xjkl * zki
                  dazdyj = + yijk * zkl - yjkl * zki
                  dazdzj = - xijk * xkl + yjkl * yki &
     &                     + xjkl * xki - yijk * ykl

!                  daxdxk = - yjkl * yij + zijk * zlj
!                           + yijk * ylj - zjkl * zij
!                  daxdyk = + yjkl * xij - yijk * xlj
!                  daxdzk = + zjkl * xij - zijk * xlj
!
!                  daydxk = + xjkl * yij - xijk * ylj
!                  daydyk = - zjkl * zij + xijk * xlj
!                           + zijk * zlj - xjkl * xij
!                  daydzk = + zjkl * yij - zijk * ylj
!
!                  dazdxk = + xjkl * zij - xijk * zlj
!                  dazdyk = + yjkl * zij - yijk * zlj
!                  dazdzk = - xjkl * xij + yijk * ylj
!                           + xijk * xlj - yjkl * yij

                  daxdxl = + zijk * zkj + ykj * yijk
                  daxdyl = - yijk * xkj
                  daxdzl = - zijk * xkj

                  daydxl = - xijk * ykj
                  daydyl = + xijk * xkj + zkj * zijk
                  daydzl = - zijk * ykj

                  dazdxl = - xijk * zkj
                  dazdyl = - yijk * zkj
                  dazdzl = + yijk * ykj + xkj * xijk

                  dadxi = ax/a1*daxdxi + ay/a1*daydxi + az/a1*dazdxi
                  dadyi = ax/a1*daxdyi + ay/a1*daydyi + az/a1*dazdyi
                  dadzi = ax/a1*daxdzi + ay/a1*daydzi + az/a1*dazdzi

                  dadxj = ax/a1*daxdxj + ay/a1*daydxj + az/a1*dazdxj
                  dadyj = ax/a1*daxdyj + ay/a1*daydyj + az/a1*dazdyj
                  dadzj = ax/a1*daxdzj + ay/a1*daydzj + az/a1*dazdzj

!                  dadxk = ax/a1*daxdxk + ay/a1*daydxk + az/a1*dazdxk
!                  dadyk = ax/a1*daxdyk + ay/a1*daydyk + az/a1*dazdyk
!                  dadzk = ax/a1*daxdzk + ay/a1*daydzk + az/a1*dazdzk

                  dadxl = ax/a1*daxdxl + ay/a1*daydxl + az/a1*dazdxl
                  dadyl = ax/a1*daxdyl + ay/a1*daydyl + az/a1*dazdyl
                  dadzl = ax/a1*daxdzl + ay/a1*daydzl + az/a1*dazdzl

                  f1 = - sign_phi / cos_phi * dvdphi(i1or2)

                  fxi = + f1 * ( dadxi / rijkl2 &
     &                + sin_phi * ( + yijk*zkj - zijk*ykj ) * rijk2inv )

                  fyi = + f1 * ( dadyi / rijkl2 &
     &                + sin_phi * ( + zijk*xkj - xijk*zkj ) * rijk2inv )

                  fzi = + f1 * ( dadzi / rijkl2 &
     &                + sin_phi * ( + xijk*ykj - yijk*xkj ) * rijk2inv )

                  fxj = + f1 * ( dadxj / rijkl2 &
     &                + sin_phi * ( - yijk*zki + zijk*yki ) * rijk2inv &
     &                - sin_phi * ( + yjkl*zkl - zjkl*ykl ) * rjkl2inv )

                  fyj = + f1 * ( dadyj / rijkl2 &
     &                + sin_phi * ( - zijk*xki + xijk*zki ) * rijk2inv &
     &                - sin_phi * ( + zjkl*xkl - xjkl*zkl ) * rjkl2inv )

                  fzj = + f1 * ( dadzj / rijkl2 &
     &                + sin_phi * ( - xijk*yki + yijk*xki ) * rijk2inv &
     &                - sin_phi * ( + xjkl*ykl - yjkl*xkl ) * rjkl2inv )

!                 fxk = - f1 * ( dadxk / rijkl2
!     &                - sin_phi * ( - yjkl*zlj + zjkl*ylj ) * rjkl2inv
!     &                + sin_phi * ( + yijk*zij - zijk*yij ) * rijk2inv )
!
!                 fyk = - f1 * ( dadyk / rijkl2
!     &                - sin_phi * ( - zjkl*xlj + xjkl*zlj ) * rjkl2inv
!     &                + sin_phi * ( + zijk*xij - xijk*zij ) * rijk2inv )
!
!                 fzk = - f1 * ( dadzk / rijkl2
!     &                - sin_phi * ( - xjkl*ylj + yjkl*xlj ) * rjkl2inv
!     &                + sin_phi * ( + xijk*yij - yijk*xij ) * rijk2inv )

                  fxl = + f1 * ( dadxl / rijkl2 &
     &                + sin_phi * ( + yjkl*zkj - zjkl*ykj ) * rjkl2inv )

                  fyl = + f1 * ( dadyl / rijkl2 &
     &                + sin_phi * ( + zjkl*xkj - xjkl*zkj ) * rjkl2inv )

                  fzl = + f1 * ( dadzl / rijkl2 &
     &                + sin_phi * ( + xjkl*ykj - yjkl*xkj ) * rjkl2inv )

                  fxk = - fxi - fxj - fxl
                  fyk = - fyi - fyj - fyl
                  fzk = - fzi - fzj - fzl

               end if

               fx(i,m) = fx(i,m) + fxi
               fx(j,m) = fx(j,m) + fxj
               fx(k,m) = fx(k,m) + fxk
               fx(l,m) = fx(l,m) + fxl

               fy(i,m) = fy(i,m) + fyi
               fy(j,m) = fy(j,m) + fyj
               fy(k,m) = fy(k,m) + fyk
               fy(l,m) = fy(l,m) + fyl

               fz(i,m) = fz(i,m) + fzi
               fz(j,m) = fz(j,m) + fzj
               fz(k,m) = fz(k,m) + fzk
               fz(l,m) = fz(l,m) + fzl

               vir_bead(1,1,m) = vir_bead(1,1,m)+fxi*xij+fxk*xkj+fxl*xlj
               vir_bead(1,2,m) = vir_bead(1,2,m)+fxi*yij+fxk*ykj+fxl*ylj
               vir_bead(1,3,m) = vir_bead(1,3,m)+fxi*zij+fxk*zkj+fxl*zlj
               vir_bead(2,1,m) = vir_bead(2,1,m)+fyi*xij+fyk*xkj+fyl*xlj
               vir_bead(2,2,m) = vir_bead(2,2,m)+fyi*yij+fyk*ykj+fyl*ylj
               vir_bead(2,3,m) = vir_bead(2,3,m)+fyi*zij+fyk*zkj+fyl*zlj
               vir_bead(3,1,m) = vir_bead(3,1,m)+fzi*xij+fzk*xkj+fzl*xlj
               vir_bead(3,2,m) = vir_bead(3,2,m)+fzi*yij+fzk*ykj+fzl*ylj
               vir_bead(3,3,m) = vir_bead(3,3,m)+fzi*zij+fzk*zkj+fzl*zlj

            end do

         end do

      end do

      return
      end



!***********************************************************************
      subroutine force_mm_ljpair_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   x, y, z, fx, fy, fz, pot, vir_bead, boxinv, box, iboundary, &
     &   natom, myrank_sub, nprocs_sub

      use mm_variables, only : &
     &   rout_ljpair, rin_ljpair, eps_ljpair, sig_ljpair, bigboxinv, &
     &   bigbox, nbox_ljpair, nljpair, epsrule_ljpair, sigrule_ljpair

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      integer :: i, j, m, jx, jy, jz, j2

      real(8) :: rout_ljpair2, xij, yij, zij, rij2, rij, rinv, eps, sig, &
     &           sr, sr2, sr6, sr12, u6, u12, uij, duij, fxi, fyi, fzi, &
     &           swf, dswf, aij, bij, cij, absa, absb, absc

!-----------------------------------------------------------------------
!     /*   initialize                                                 */
!-----------------------------------------------------------------------

      if ( nljpair .eq. 0 ) return

      rout_ljpair2 = rout_ljpair*rout_ljpair

!-----------------------------------------------------------------------
!     /*   extention of simulation box in real space sum              */
!-----------------------------------------------------------------------

      if ( ( iboundary .eq. 1 ) .or. ( iboundary .eq. 2 ) ) then

         absa = sqrt ( boxinv(1,1)*boxinv(1,1) &
     &               + boxinv(1,2)*boxinv(1,2) &
     &               + boxinv(1,3)*boxinv(1,3) )
         absb = sqrt ( boxinv(2,1)*boxinv(2,1) &
     &               + boxinv(2,2)*boxinv(2,2) &
     &               + boxinv(2,3)*boxinv(2,3) )
         absc = sqrt ( boxinv(3,1)*boxinv(3,1) &
     &               + boxinv(3,2)*boxinv(3,2) &
     &               + boxinv(3,3)*boxinv(3,3) )

         nbox_ljpair(1) = int(2.d0*rout_ljpair*absa) + 1
         nbox_ljpair(2) = int(2.d0*rout_ljpair*absb) + 1
         nbox_ljpair(3) = int(2.d0*rout_ljpair*absc) + 1

      end if

!-----------------------------------------------------------------------
!     /*   main loop : free boundary                                  */
!-----------------------------------------------------------------------

      if ( iboundary .eq. 0 ) then

         do m = jstart_bead, jend_bead

         do i = 1, natom-1
         do j = i+1, natom

            if ( mod( i-1, nprocs_sub ) .ne. myrank_sub ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_ljpair2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0/rij

            call get_eps_ljpair &
     &         ( eps_ljpair(i), eps_ljpair(j), eps, epsrule_ljpair )

            call get_sig_ljpair &
     &         ( sig_ljpair(i), sig_ljpair(j), sig, sigrule_ljpair )

            sr      = sig*rinv
            sr2     = sr*sr
            sr6     = sr2*sr2*sr2
            sr12    = sr6*sr6

            u6      = - 4.d0*eps*sr6
            u12     = + 4.d0*eps*sr12

!           /*   switching function   */
            call getswf( rij, rin_ljpair, rout_ljpair, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + u12

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij*swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do
         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary with minimum image           */
!-----------------------------------------------------------------------

      else if(nbox_ljpair(1)*nbox_ljpair(2)*nbox_ljpair(3) .eq. 1) then

         do m = jstart_bead, jend_bead

         do i = 1, natom-1
         do j = i+1, natom

            if ( mod( i-1, nprocs_sub ) .ne. myrank_sub ) cycle

            xij = x(i,m) - x(j,m)
            yij = y(i,m) - y(j,m)
            zij = z(i,m) - z(j,m)

            call pbc_atom_MPI ( xij, yij, zij )

            rij2 = xij*xij + yij*yij + zij*zij

            if ( rij2 .gt. rout_ljpair2 ) cycle

            rij     = sqrt(rij2)

            rinv    = 1.d0/rij

            call get_eps_ljpair &
     &         ( eps_ljpair(i), eps_ljpair(j), eps, epsrule_ljpair )

            call get_sig_ljpair &
     &         ( sig_ljpair(i), sig_ljpair(j), sig, sigrule_ljpair )

            sr      = sig*rinv
            sr2     = sr*sr
            sr6     = sr2*sr2*sr2
            sr12    = sr6*sr6

            u6      = - 4.d0*eps*sr6
            u12     = + 4.d0*eps*sr12

!           /*   switching function   */
            call getswf( rij, rin_ljpair, rout_ljpair, swf, dswf )

!           /*   bare potential   */
            uij     = + u6 + u12

!           /*   bare potential gradient   */
            duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!           /*   corrected potential   */
            pot(m)  = pot(m) + uij*swf

!           /*   corrected forces   */

            fxi = - uij*dswf*xij*rinv - duij*xij*rinv
            fyi = - uij*dswf*yij*rinv - duij*yij*rinv
            fzi = - uij*dswf*zij*rinv - duij*zij*rinv

            fx(i,m) = fx(i,m) + fxi
            fy(i,m) = fy(i,m) + fyi
            fz(i,m) = fz(i,m) + fzi

            fx(j,m) = fx(j,m) - fxi
            fy(j,m) = fy(j,m) - fyi
            fz(j,m) = fz(j,m) - fzi

            vir_bead(1,1,m) = vir_bead(1,1,m) + fxi*xij
            vir_bead(1,2,m) = vir_bead(1,2,m) + fxi*yij
            vir_bead(1,3,m) = vir_bead(1,3,m) + fxi*zij
            vir_bead(2,1,m) = vir_bead(2,1,m) + fyi*xij
            vir_bead(2,2,m) = vir_bead(2,2,m) + fyi*yij
            vir_bead(2,3,m) = vir_bead(2,3,m) + fyi*zij
            vir_bead(3,1,m) = vir_bead(3,1,m) + fzi*xij
            vir_bead(3,2,m) = vir_bead(3,2,m) + fzi*yij
            vir_bead(3,3,m) = vir_bead(3,3,m) + fzi*zij

         end do
         end do

         end do

!-----------------------------------------------------------------------
!     /*   main loop : periodic boundary                              */
!-----------------------------------------------------------------------

      else

         bigbox(:,1) = dble(nbox_ljpair(1))*box(:,1)
         bigbox(:,2) = dble(nbox_ljpair(2))*box(:,2)
         bigbox(:,3) = dble(nbox_ljpair(3))*box(:,3)

         call inv3 ( bigbox, bigboxinv )

         do m = jstart_bead, jend_bead

         do i = 1, natom
         do j = i, natom

            if ( mod( i-1, nprocs_sub ) .ne. myrank_sub ) cycle

            do jx = 0, nbox_ljpair(1)-1
            do jy = 0, nbox_ljpair(2)-1
            do jz = 0, nbox_ljpair(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(i,m) - x(j,m)
               yij = y(i,m) - y(j,m)
               zij = z(i,m) - z(j,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_ljpair2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               call get_eps_ljpair &
     &            ( eps_ljpair(i), eps_ljpair(j), eps, epsrule_ljpair )

               call get_sig_ljpair &
     &            ( sig_ljpair(i), sig_ljpair(j), sig, sigrule_ljpair )

               sr      = sig*rinv
               sr2     = sr*sr
               sr6     = sr2*sr2*sr2
               sr12    = sr6*sr6

               u6      = - 4.d0*eps*sr6
               u12     = + 4.d0*eps*sr12

!              /*   switching function   */
               call getswf( rij, rin_ljpair, rout_ljpair, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + u12

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij*swf*0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(i,m) = fx(i,m) + 0.5d0*fxi
               fy(i,m) = fy(i,m) + 0.5d0*fyi
               fz(i,m) = fz(i,m) + 0.5d0*fzi

               fx(j,m) = fx(j,m) - 0.5d0*fxi
               fy(j,m) = fy(j,m) - 0.5d0*fyi
               fz(j,m) = fz(j,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

            if ( i .eq. j ) cycle

            do jx = 0, nbox_ljpair(1)-1
            do jy = 0, nbox_ljpair(2)-1
            do jz = 0, nbox_ljpair(3)-1

               j2 = jx*jx + jy*jy + jz*jz

               if ( ( j2 .eq. 0 ) .and. ( i .eq. j ) ) cycle

               xij = x(j,m) - x(i,m)
               yij = y(j,m) - y(i,m)
               zij = z(j,m) - z(i,m)

               xij = xij - box(1,1)*jx - box(1,2)*jy - box(1,3)*jz
               yij = yij - box(2,1)*jx - box(2,2)*jy - box(2,3)*jz
               zij = zij - box(3,1)*jx - box(3,2)*jy - box(3,3)*jz

               aij = bigboxinv(1,1)*xij + bigboxinv(1,2)*yij &
     &             + bigboxinv(1,3)*zij
               bij = bigboxinv(2,1)*xij + bigboxinv(2,2)*yij &
     &             + bigboxinv(2,3)*zij
               cij = bigboxinv(3,1)*xij + bigboxinv(3,2)*yij &
     &             + bigboxinv(3,3)*zij

               aij = aij - nint(aij)
               bij = bij - nint(bij)
               cij = cij - nint(cij)

               xij = bigbox(1,1)*aij + bigbox(1,2)*bij + bigbox(1,3)*cij
               yij = bigbox(2,1)*aij + bigbox(2,2)*bij + bigbox(2,3)*cij
               zij = bigbox(3,1)*aij + bigbox(3,2)*bij + bigbox(3,3)*cij

               rij2 = xij*xij + yij*yij + zij*zij

               if ( rij2 .gt. rout_ljpair2 ) cycle

               rij     = sqrt(rij2)

               rinv    = 1.d0/rij

               call get_eps_ljpair &
     &            ( eps_ljpair(j), eps_ljpair(i), eps, epsrule_ljpair )

               call get_sig_ljpair &
     &            ( sig_ljpair(j), sig_ljpair(i), sig, sigrule_ljpair )

               sr      = sig*rinv
               sr2     = sr*sr
               sr6     = sr2*sr2*sr2
               sr12    = sr6*sr6

               u6      = - 4.d0*eps*sr6
               u12     = + 4.d0*eps*sr12

!              /*   switching function   */
               call getswf( rij, rin_ljpair, rout_ljpair, swf, dswf )

!              /*   bare potential   */
               uij     = + u6 + u12

!              /*   bare potential gradient   */
               duij    = ( - 6.d0*u6*rinv - 12.d0*u12*rinv )*swf

!              /*   corrected potential   */
               pot(m)  = pot(m) + uij*swf*0.5d0

!              /*   corrected forces   */

               fxi = - uij*dswf*xij*rinv - duij*xij*rinv
               fyi = - uij*dswf*yij*rinv - duij*yij*rinv
               fzi = - uij*dswf*zij*rinv - duij*zij*rinv

               fx(j,m) = fx(j,m) + 0.5d0*fxi
               fy(j,m) = fy(j,m) + 0.5d0*fyi
               fz(j,m) = fz(j,m) + 0.5d0*fzi

               fx(i,m) = fx(i,m) - 0.5d0*fxi
               fy(i,m) = fy(i,m) - 0.5d0*fyi
               fz(i,m) = fz(i,m) - 0.5d0*fzi

               vir_bead(1,1,m) = vir_bead(1,1,m) + 0.5d0*fxi*xij
               vir_bead(1,2,m) = vir_bead(1,2,m) + 0.5d0*fxi*yij
               vir_bead(1,3,m) = vir_bead(1,3,m) + 0.5d0*fxi*zij
               vir_bead(2,1,m) = vir_bead(2,1,m) + 0.5d0*fyi*xij
               vir_bead(2,2,m) = vir_bead(2,2,m) + 0.5d0*fyi*yij
               vir_bead(2,3,m) = vir_bead(2,3,m) + 0.5d0*fyi*zij
               vir_bead(3,1,m) = vir_bead(3,1,m) + 0.5d0*fzi*xij
               vir_bead(3,2,m) = vir_bead(3,2,m) + 0.5d0*fzi*yij
               vir_bead(3,3,m) = vir_bead(3,3,m) + 0.5d0*fzi*zij

            end do
            end do
            end do

         end do
         end do

         end do

      end if

      return
      end





!***********************************************************************
      subroutine my_XMPI_allreduce_md
!***********************************************************************
!=======================================================================
!
!     all-reduce communication in molecular dynamics
!
!=======================================================================

      use common_variables, only : &
     &   pot, fx, fy, fz, dipx, dipy, dipz, vir_bead, natom

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

      implicit none

      integer :: m

      do m = jstart_bead, jend_bead

!        /*   sum potential energy   */
         call my_mpi_allreduce_real_0_sub( pot(m) )

!        /*   sum virial   */
         call my_mpi_allreduce_real_2_sub( vir_bead(:,:,m), 3, 3 )

!        /*   sum forces for local atoms   */
         call my_mpi_allreduce_real_1_sub( fx(:,m), natom )
         call my_mpi_allreduce_real_1_sub( fy(:,m), natom )
         call my_mpi_allreduce_real_1_sub( fz(:,m), natom )

!        /*   dipoles   */
         call my_mpi_allreduce_real_1_sub( dipx(m), 1 )
         call my_mpi_allreduce_real_1_sub( dipy(m), 1 )
         call my_mpi_allreduce_real_1_sub( dipz(m), 1 )

      end do

      return
      end





#ifdef nopme



!***********************************************************************
      subroutine force_pmeewald_fs_XMPI
!***********************************************************************

      use common_variables, only : myrank

      implicit none

      if ( myrank .eq. 0 ) then

         write( 6, '(a)' ) 'Error termination - PME is not compiled.'
         write( 6, '(a)' )
         write( 6, '(a)' ) 'Recompile pimd.mpi.x with -Dpme option.'
         write( 6, '(a)' )

      end if

      call my_mpi_finalize_2

      stop
      end



#else



!***********************************************************************
      subroutine force_pmeewald_fs_XMPI
!***********************************************************************

!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : pot, vir_bead, fx, fy, fz

      use mm_variables, only : SizeGridQ

      use XMPI_variables, only : &
     &   jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      real(8), dimension(SizeGridQ) :: gridQ

      integer :: m

!-----------------------------------------------------------------------
!     /*   main part                                                  */
!-----------------------------------------------------------------------

      do m = jstart_bead, jend_bead

         call PME_scaled_coord (m)

         call PME_bspline_coeffs

         call PME_charge_grid (gridQ)

         call PME_FFT_back (gridQ)

         call PME_calc_energy (gridQ, m, pot, vir_bead(:,:,m))

         call PME_FFT_forward (gridQ)

         call PME_calc_force (gridQ, m, fx, fy, fz)

      end do

      return
      end



#endif
