HPL benchmark solves a randomly generated dense linear system of equations in double floating-point precision (IEEE 64-bit) arithmetic using MPI.
Install compilers and download HPL source code:
yum install gcc gcc-c++ make -y
tar xf parallel_studio_xe_2020_cluster_edition.tgz
cd parallel_studio_xe_2020_cluster_edition/
./install.sh
source /opt/intel/compilers_and_libraries_2020.0.166/linux/bin/compilervars.sh intel64
cd /root
curl -O https://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz
tar -xzf hpl-2.3.tar.gz
mv hpl-2.3 hpl
cd hpl
Create file Make.Linux_Intel64
SHELL = /bin/sh
CD = cd
CP = cp
LN_S = ln -fs
MKDIR = mkdir -p
RM = /bin/rm -f
TOUCH = touch
ARCH = Linux_Intel64
TOPdir = $(HOME)/hpl
INCdir = $(TOPdir)/include
BINdir = $(TOPdir)/bin/$(ARCH)
LIBdir = $(TOPdir)/lib/$(ARCH)
HPLlib = $(LIBdir)/libhpl.a
LAdir = $(MKLROOT)
ifndef LAinc
LAinc = $(LAdir)/mkl/include
endif
ifndef LAlib
LAlib = -L$(LAdir)/mkl/lib/intel64 \
-Wl,--start-group \
$(LAdir)/lib/intel64/libmkl_intel_lp64.a \
$(LAdir)/lib/intel64/libmkl_intel_thread.a \
$(LAdir)/lib/intel64/libmkl_core.a \
-Wl,--end-group -lpthread -ldl
endif
F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib)
HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
CC = mpiicc
CCNOOPT = $(HPL_DEFS)
OMP_DEFS = -qopenmp -qopenmp-link=static
CCFLAGS = $(HPL_DEFS) -axAVX -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
LINKER = $(CC)
LINKFLAGS = $(CCFLAGS) $(OMP_DEFS) -mt_mpi
ARCHIVER = ar
ARFLAGS = r
RANLIB = echo
Compile:
make arch=Linux_Intel64
Create file HPL.dat. (This online generator can help https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/)
HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out output file name (if any)
6 device out (6=stdout,7=stderr,file)
1 # of problems sizes (N)
45000 Ns
1 # of NBs
320 NBs
0 PMAP process mapping (0=Row-,1=Column-major)
1 # of process grids (P x Q)
4 Ps
4 Qs
16.0 threshold
1 # of panel fact
2 PFACTs (0=left, 1=Crout, 2=Right)
1 # of recursive stopping criterium
4 NBMINs (>= 1)
1 # of panels in recursion
2 NDIVs
1 # of recursive panel fact.
1 RFACTs (0=left, 1=Crout, 2=Right)
1 # of broadcast
1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
1 # of lookahead depth
1 DEPTHs (>=0)
2 SWAP (0=bin-exch,1=long,2=mix)
64 swapping threshold
0 L1 in (0=transposed,1=no-transposed) form
0 U in (0=transposed,1=no-transposed) form
1 Equilibration (0=no,1=yes)
8 memory alignment in double (> 0)
##### This line (no. 32) is ignored (it serves as a separator). ######
0 Number of additional problem sizes for PTRANS
1200 10000 30000 values of N
0 number of additional blocking sizes for PTRANS
40 9 8 13 13 20 16 32 64 values of NB
Portable HPL needs some mpi related files:
mkdir mpi
mkdir mpi_lib
mkdir mpi_prov
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/libfabric.so.1 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/lib/libmpifort.so.12 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/lib/release/libmpi.so.12 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/mpiexec mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/hydra_bstrap_proxy mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/hydra_pmi_proxy mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/prov/libtcp-fi.so mpi_prov/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/prov/librxm-fi.so mpi_prov/
Create start script run.sh
#!/bin/bash
export FI_PROVIDER=tcp
export FI_PROVIDER_PATH=./mpi_prov/
LD_PRELOAD=./mpi_lib/libfabric.so.1:./mpi_lib/libmpifort.so.12:./mpi_lib/libmpi.so.12 ./mpi/mpiexec ./bin/Linux_Intel64/xhpl
Run:
chmod +x run.sh
./run.sh
To run on multiple nodes hpl directory should be located on shared filesystem. Example:
LD_PRELOAD=./mpi_lib/libfabric.so.1:./mpi_lib/libmpifort.so.12:./mpi_lib/libmpi.so.12 ./mpi/mpiexec -hosts node-000,node-001,node-002,node-003,node-004 ./bin/Linux_Intel64/xhpl