#!/bin/bash
#SBATCH --mem-per-cpu=4000 --time=2:0:0 -c1 --ntasks=1024 --nodes=16 -A def-svassili
export OMP_NUM_THREADS="${SLURM_CPUS_PER_TASK:-1}"
module load StdEnv/2020 gcc/9.3.0 openmpi/4.0.3 gromacs/2021.4
srun gmx_mpi mdrun -s topol.tpr -cpi state.cpi
grep Brand md.log
grep "The number of OpenMP threads" md.log
grep "MPI pro" md.log
echo ntasks=$SLURM_TASKS_PER_NODE
grep Performance: md.log
NOTE: 8.6 % of the available CPU time was lost due to load imbalance
in the domain decomposition.
Dynamic load balancing was automatically disabled, but it might be benefic
ial to manually tuning it on (option -dlb on.)
You can also consider manually changing the decomposition (option -dd);
e.g. by using fewer domains along the box dimension in which there is
considerable inhomogeneity in the simulated system.
NOTE: 19.3 % performance was lost because the PME ranks
had more work to do than the PP ranks.
You might want to increase the number of PME ranks
or increase the cut-off and the grid spacing.
Will use 640 particle-particle and 384 PME only ranks
title = benchmark
; Run parameters
integrator = md
nsteps = 400000
dt = 0.001
; Output control
nstxout = 0
nstvout = 0
nstfout = 0
nstenergy = 10000
nstlog = 10000
nstxout-compressed = 50000
compressed-x-grps = System
; Bond parameters
continuation = yes
constraint_algorithm = lincs
constraints = h-bonds
; Neighborsearching
cutoff-scheme = Verlet
ns_type = grid
nstlist = 10
rcoulomb = 0.8
rvdw = 0.8
DispCorr = Ener ; anaytic VDW correction
; Electrostatics
coulombtype = PME
pme_order = 4
fourier-nx = 144
fourier-ny = 144
fourier-nz = 144
; Temperature coupling is on
tcoupl = V-rescale
tc-grps = system
tau_t = 0.1
ref_t = 300
; Pressure coupling is on
pcoupl = Parrinello-Rahman
pcoupltype = isotropic
tau_p = 2.0
ref_p = 1.0
compressibility = 4.5e-5
; Periodic boundary conditions
pbc = xyz
; Velocity generation
gen_vel = no