HPL benchmark on CentOS 8 with Intel compiler 2020

Posted by Pavlo Khmel on Sat 22 February 2020

HPL benchmark solves a randomly generated dense linear system of equations in double floating-point precision (IEEE 64-bit) arithmetic using MPI.

Install compilers and download HPL source code:

yum install gcc gcc-c++ make -y
tar xf parallel_studio_xe_2020_cluster_edition.tgz 
cd parallel_studio_xe_2020_cluster_edition/
./install.sh 
source /opt/intel/compilers_and_libraries_2020.0.166/linux/bin/compilervars.sh intel64
cd /root
curl -O https://www.netlib.org/benchmark/hpl/hpl-2.3.tar.gz
tar -xzf hpl-2.3.tar.gz 
mv hpl-2.3 hpl
cd hpl

Create file Make.Linux_Intel64

SHELL        = /bin/sh
CD           = cd
CP           = cp
LN_S         = ln -fs
MKDIR        = mkdir -p
RM           = /bin/rm -f
TOUCH        = touch
ARCH         = Linux_Intel64
TOPdir       = $(HOME)/hpl
INCdir       = $(TOPdir)/include
BINdir       = $(TOPdir)/bin/$(ARCH)
LIBdir       = $(TOPdir)/lib/$(ARCH)
HPLlib       = $(LIBdir)/libhpl.a 
LAdir        = $(MKLROOT)
ifndef  LAinc
LAinc        = $(LAdir)/mkl/include
endif
ifndef  LAlib
LAlib        = -L$(LAdir)/mkl/lib/intel64 \
               -Wl,--start-group \
               $(LAdir)/lib/intel64/libmkl_intel_lp64.a \
               $(LAdir)/lib/intel64/libmkl_intel_thread.a \
               $(LAdir)/lib/intel64/libmkl_core.a \
               -Wl,--end-group -lpthread -ldl
endif
F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) -I$(LAinc) $(MPinc)
HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
HPL_OPTS     = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT
HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
CC       = mpiicc
CCNOOPT  = $(HPL_DEFS)
OMP_DEFS = -qopenmp -qopenmp-link=static
CCFLAGS  = $(HPL_DEFS) -axAVX -O3 -w -ansi-alias -i-static -z noexecstack -z relro -z now -nocompchk -Wall
LINKER       = $(CC)
LINKFLAGS    = $(CCFLAGS) $(OMP_DEFS) -mt_mpi
ARCHIVER     = ar
ARFLAGS      = r
RANLIB       = echo

Compile:

make arch=Linux_Intel64

Create file HPL.dat. (This online generator can help https://www.advancedclustering.com/act_kb/tune-hpl-dat-file/)

HPLinpack benchmark input file
Innovative Computing Laboratory, University of Tennessee
HPL.out      output file name (if any) 
6            device out (6=stdout,7=stderr,file)
1            # of problems sizes (N)
45000         Ns
1            # of NBs
320           NBs
0            PMAP process mapping (0=Row-,1=Column-major)
1            # of process grids (P x Q)
4            Ps
4            Qs
16.0         threshold
1            # of panel fact
2            PFACTs (0=left, 1=Crout, 2=Right)
1            # of recursive stopping criterium
4            NBMINs (>= 1)
1            # of panels in recursion
2            NDIVs
1            # of recursive panel fact.
1            RFACTs (0=left, 1=Crout, 2=Right)
1            # of broadcast
1            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
1            # of lookahead depth
1            DEPTHs (>=0)
2            SWAP (0=bin-exch,1=long,2=mix)
64           swapping threshold
0            L1 in (0=transposed,1=no-transposed) form
0            U  in (0=transposed,1=no-transposed) form
1            Equilibration (0=no,1=yes)
8            memory alignment in double (> 0)
##### This line (no. 32) is ignored (it serves as a separator). ######
0                               Number of additional problem sizes for PTRANS
1200 10000 30000                values of N
0                               number of additional blocking sizes for PTRANS
40 9 8 13 13 20 16 32 64        values of NB

Portable HPL needs some mpi related files:

mkdir mpi
mkdir mpi_lib
mkdir mpi_prov
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/libfabric.so.1 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/lib/libmpifort.so.12 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/lib/release/libmpi.so.12 mpi_lib/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/mpiexec mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/hydra_bstrap_proxy mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/bin/hydra_pmi_proxy mpi/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/prov/libtcp-fi.so mpi_prov/
cp /opt/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64/libfabric/lib/prov/librxm-fi.so mpi_prov/

Create start script run.sh

#!/bin/bash
export FI_PROVIDER=tcp
export FI_PROVIDER_PATH=./mpi_prov/
LD_PRELOAD=./mpi_lib/libfabric.so.1:./mpi_lib/libmpifort.so.12:./mpi_lib/libmpi.so.12 ./mpi/mpiexec ./bin/Linux_Intel64/xhpl

Run:

chmod +x run.sh
./run.sh

To run on multiple nodes hpl directory should be located on shared filesystem. Example:

LD_PRELOAD=./mpi_lib/libfabric.so.1:./mpi_lib/libmpifort.so.12:./mpi_lib/libmpi.so.12 ./mpi/mpiexec -hosts node-000,node-001,node-002,node-003,node-004 ./bin/Linux_Intel64/xhpl