#!/bin/bash
#SBATCH --mem-per-cpu=2000 --time=1:0:0 -c12 --ntasks=1
#SBATCH --gpus-per-node=1 --nodes=1 -A def-svassili
td=$SLURM_TMPDIR
wd=$SLURM_SUBMIT_DIR
cp topol.tpr $td && cd $td
module load StdEnv/2020 gcc/9.3.0 cuda/11.4 openmpi/4.0.3 gromacs/2021.4
gmx mdrun -ntomp ${SLURM_CPUS_PER_TASK:-1} \
-nb gpu -pme gpu -update gpu -bonded cpu -s topol.tpr
# Print CPU info and timing into SLURM log
grep Brand md.log
grep -A1 "Number of GPUs detected:" md.log | tail -n1
grep "The number of OpenMP threads" md.log
grep Performance: md.log
cp md.log $wd
Inconsistent performance.
- Use 400,000 steps to minimize the contribution of the startup time.
- Use local scratch.
- Execution time seems to be more consistent:
69.064, 80.399, 83.476, 82.780, 83.300, 85.565,
74.991, 66.509, 60.763, 74.863, 79.997, 78.514,
70.280, 76.229, 23.110, 72.572, 72.164, 78.360,
69.685, 84.491, 55.888
No, performance is still inconsistent.
See jobs 989213-15. that ran on one node at the same time.
Occasionally jobs run 4x slower!
For example, job 994479 at ng10103. GPU was starving with periods of only 2-3% usage.
It ran four times slower.(23.110 ns/day) !
Running processes:
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
2479684 svassili 20 0 7085004 650700 185956 S 1076 0.1 179:16.48 gmx
3195197 akalan2 20 0 72.8g 61.8g 133080 R 798.0 12.3 27757:00 python
1925299 claudele 20 0 6753532 540424 151456 R 722.5 0.1 10320:02 namd2
642799 xiruzhu 20 0 62.7g 15.7g 132312 R 102.0 3.1 2568:52 python
title = benchmark
; Run parameters
integrator = md
nsteps = 400000
dt = 0.001
; Output control
nstxout = 0
nstvout = 0
nstfout = 0
nstenergy = 10000
nstlog = 10000
nstxout-compressed = 50000
compressed-x-grps = System
; Bond parameters
continuation = yes
constraint_algorithm = lincs
constraints = h-bonds
; Neighborsearching
cutoff-scheme = Verlet
ns_type = grid
nstlist = 10
rcoulomb = 0.8
rvdw = 0.8
DispCorr = Ener ; anaytic VDW correction
; Electrostatics
coulombtype = PME
pme_order = 4
fourier-nx = 144
fourier-ny = 144
fourier-nz = 144
; Temperature coupling is on
tcoupl = V-rescale
tc-grps = system
tau_t = 0.1
ref_t = 300
; Pressure coupling is on
pcoupl = Parrinello-Rahman
pcoupltype = isotropic
tau_p = 2.0
ref_p = 1.0
compressibility = 4.5e-5
; Periodic boundary conditions
pbc = xyz
; Velocity generation
gen_vel = no