!///////////////////////////////////////////////////////////////////////
!
!      Author:          M. Shiga
!      Last updated:    Jan 23, 2025 by M. Shiga
!      Description:     extensive MPI parallelization
!
!///////////////////////////////////////////////////////////////////////
!***********************************************************************
      subroutine force_helium_aziz1992_XMPI
!***********************************************************************
!=======================================================================
!
!     HFD-B2 potential for helium:
!     Aziz, Slaman, Koide, Allnat, Meath, Mol. Phys. 77, 321 (1992)
!
!=======================================================================
!-----------------------------------------------------------------------
!     /*   shared variables                                           */
!-----------------------------------------------------------------------

      use common_variables, only : &
     &   au_length, boltz, fx, fy, fz, pi, pot, vir_bead, x, y, z, &
     &   boxinv, natom, iboundary

      use XMPI_variables, only : &
     &   jstart_atom, jend_atom, jstart_bead, jend_bead

!-----------------------------------------------------------------------
!     /*   local variables                                            */
!-----------------------------------------------------------------------

      implicit none

      real(8), parameter ::  coef6          =  1.34920045d0
      real(8), parameter ::  coef8          =  0.41365922d0
      real(8), parameter ::  coef10         =  0.17078164d0
      real(8), parameter ::  acoef          =  1.9221529d+5
      real(8), parameter ::  alphap         = 10.73520708d0
      real(8), parameter ::  betap          = -1.89296514d0
      real(8), parameter ::  dcoef          =  1.4135d0
      real(8), parameter ::  epsilon_kelvin = 10.94d0
      real(8), parameter ::  rm_angstrom    =  2.970d0

      integer :: ibead, i, j

      real(8) :: rxij, ryij, rzij, rij, rij2, rcut2, reducex, reducex2, &
     &           rxinv, rxinv2, rxinv6, rxinv7, rxinv8, rxinv9, rxinv10, &
     &           rxinv11, dummy, dumarg, pf, dfdx, pg, factf, dgdx, pe, &
     &           dedx, vij, dvdr, wij, fij, fxij, fyij, fzij, epsilon, &
     &           rm, absa, absb, absc, rcut

      real(8) :: pot_i, fx_i(natom), fy_i(natom), fz_i(natom), &
     &           vir_i(3,3)

!-----------------------------------------------------------------------
!     /*   cut off                                                    */
!-----------------------------------------------------------------------

      rcut = 0.d0

      if ( iboundary .ne. 0 ) then

         absa = sqrt ( boxinv(1,1)*boxinv(1,1) &
     &               + boxinv(1,2)*boxinv(1,2) &
     &               + boxinv(1,3)*boxinv(1,3) )
         absb = sqrt ( boxinv(2,1)*boxinv(2,1) &
     &               + boxinv(2,2)*boxinv(2,2) &
     &               + boxinv(2,3)*boxinv(2,3) )
         absc = sqrt ( boxinv(3,1)*boxinv(3,1) &
     &               + boxinv(3,2)*boxinv(3,2) &
     &               + boxinv(3,3)*boxinv(3,3) )

         rcut = 0.5d0 / max( absa, absb, absc )

      end if

      rcut2 = rcut * rcut

!-----------------------------------------------------------------------
!     /*   unit conversion                                            */
!-----------------------------------------------------------------------

      epsilon = epsilon_kelvin * boltz
      rm      = rm_angstrom / ( au_length * 1.d+10 )

      factf   = epsilon / rm

!-----------------------------------------------------------------------
!     /*   start main loop                                            */
!-----------------------------------------------------------------------

      do ibead = jstart_bead, jend_bead

         pot_i = 0.d0

         fx_i(:) = 0.d0
         fy_i(:) = 0.d0
         fz_i(:) = 0.d0

         vir_i(:,:) = 0.d0

         do i = jstart_atom, jend_atom
         do j = 1, natom

            if ( j .eq. i ) cycle

            rxij = x(i,ibead) - x(j,ibead)
            ryij = y(i,ibead) - y(j,ibead)
            rzij = z(i,ibead) - z(j,ibead)

            call pbc_atom_MPI( rxij, ryij, rzij )

            rij2 = rxij*rxij + ryij*ryij + rzij*rzij

            if ( ( iboundary .ne. 0 ) .and. ( rij2 .ge. rcut2 ) ) cycle

            rij = sqrt( rij2 )

            reducex  = rij / rm
            reducex2 = reducex * reducex
            rxinv    = 1.d0 / reducex
            rxinv2   = rxinv * rxinv
            rxinv6   = rxinv2 * rxinv2 * rxinv2
            rxinv7   = rxinv6 * rxinv
            rxinv8   = rxinv7 * rxinv
            rxinv9   = rxinv8 * rxinv
            rxinv10  = rxinv9 * rxinv
            rxinv11  = rxinv10 * rxinv

            pf = 1.d0
            dfdx = 0.d0

            if ( reducex .lt. dcoef ) then
               dummy = dcoef*rxinv - 1.d0
               dumarg = dummy*dummy
               pf = exp(-dumarg)
               dfdx = 2.d0*dcoef*rxinv2*dummy*pf
            end if

            pg = coef6*rxinv6 + coef8*rxinv8 + coef10*rxinv10

            dgdx = - 6.d0*coef6*rxinv7 - 8.d0*coef8*rxinv9 &
     &             - 10.d0*coef10*rxinv11

            dumarg = -alphap*reducex + betap*reducex2

            pe = acoef*exp(dumarg)

            dedx = (-alphap + 2.d0*betap*reducex)*pe

            vij = pe - pg*pf

            pot_i = pot_i + 0.5d0 * vij * epsilon

            dvdr = dedx - dgdx*pf - pg*dfdx

            wij = rij*dvdr * factf

            fij = - wij / rij2

            fxij = fij*rxij
            fyij = fij*ryij
            fzij = fij*rzij

            fx_i(i) = fx_i(i) + fxij
            fy_i(i) = fy_i(i) + fyij
            fz_i(i) = fz_i(i) + fzij

            vir_i(1,1) = vir_i(1,1) + 0.5d0*fxij*rxij
            vir_i(1,2) = vir_i(1,2) + 0.5d0*fxij*ryij
            vir_i(1,3) = vir_i(1,3) + 0.5d0*fxij*rzij
            vir_i(2,1) = vir_i(2,1) + 0.5d0*fyij*rxij
            vir_i(2,2) = vir_i(2,2) + 0.5d0*fyij*ryij
            vir_i(2,3) = vir_i(2,3) + 0.5d0*fyij*rzij
            vir_i(3,1) = vir_i(3,1) + 0.5d0*fzij*rxij
            vir_i(3,2) = vir_i(3,2) + 0.5d0*fzij*ryij
            vir_i(3,3) = vir_i(3,3) + 0.5d0*fzij*rzij

         end do
         end do

!        /*   sum potential energy   */
         call my_mpi_allreduce_real_0_sub( pot_i )

!        /*   sum virial   */
         call my_mpi_allreduce_real_2_sub( vir_i, 3, 3 )

!        /*   sum forces for local atoms   */
!         call my_mpi_reduce_xyz_XMPI( fx_i, fy_i, fz_i, 1 )
         call my_mpi_allreduce_real_1_sub( fx_i, natom )
         call my_mpi_allreduce_real_1_sub( fy_i, natom )
         call my_mpi_allreduce_real_1_sub( fz_i, natom )

         pot(ibead) = pot(ibead) + pot_i

         fx(:,ibead) = fx(:,ibead) + fx_i(:)
         fy(:,ibead) = fy(:,ibead) + fy_i(:)
         fz(:,ibead) = fz(:,ibead) + fz_i(:)

         vir_bead(:,:,ibead) = vir_i(:,:)

      end do

      return
      end
