Hello,
I am compiling VASP 6.5.1 with GPU support on an HPE/Cray system with the Cray programming environment (specifically, https://docs.ncsa.illinois.edu/systems/ ... index.html). I am able to get the executables built, but when I run them, I get back "CHECK_MPI: your MPI is not CUDA-aware, STOPPING" in the stdout file. It is not immediately clear as to why.
Below is my makefile.include:
Code: Select all
# Default precompiler options
CPP_OPTIONS = -DHOST=\"LinuxNV\" \
-DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENMP \
-DACC_OFFLOAD \
-DNVCUDA \
-DUSENCCL
CPP = ftn -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
# N.B.: you might need to change the cuda-version here
# to one that comes with your NVIDIA-HPC SDK
CC = cc -acc -gpu=cc60,cc70,cc80,cuda12.3 -mp
FC = ftn -acc -gpu=cc60,cc70,cc80,cuda12.3 -mp
FCL = ftn -acc -gpu=cc60,cc70,cc80,cuda12.3 -mp -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o minimax_dependence.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = $(FC)
CC_LIB = $(CC)
CFLAGS_LIB = -O -w
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB = linpack_double.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
##
## Customize as of this point! Of course you may change the preceding
## part of this file as well if you like, but it should rarely be
## necessary ...
##
# When compiling on the target machine itself , change this to the
# relevant target when cross-compiling for another architecture
VASP_TARGET_CPU ?= -tp host
FFLAGS += $(VASP_TARGET_CPU)
# Specify your NV HPC-SDK installation (mandatory)
#... first try to set it automatically
NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# If the above fails, then NVROOT needs to be set manually
#NVHPC ?= /opt/nvidia/hpc_sdk
#NVVERSION = 21.11
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2
OFLAG_IN = -fast -Mwarperf
SOURCE_IN := nonlr.o
# Software emulation of quadruple precsion (mandatory)
QD ?= $(NVROOT)/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# BLAS (mandatory)
BLAS = -lblas
# LAPACK (mandatory)
LAPACK = -llapack
# scaLAPACK (mandatory)
SCALAPACK = -Mscalapack
#LLIBS += $(SCALAPACK) $(LAPACK) $(BLAS)
# FFTW (mandatory)
#FFTW_ROOT ?= /path/to/your/fftw/installation
LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp
INCS += -I$(FFTW_ROOT)/include
# Use cusolvermp (optional)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
#LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal
# HDF5-support (optional but strongly recommended, and mandatory for some features)
CPP_OPTIONS+= -DVASP_HDF5
#HDF5_ROOT ?= /path/to/your/hdf5/installation
LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
INCS += -I$(HDF5_ROOT)/include
# For the VASP-2-Wannier90 interface (optional)
#CPP_OPTIONS += -DVASP2WANNIER90
#WANNIER90_ROOT ?= /path/to/your/wannier90/installation
#LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier
# For the fftlib library (hardly any benefit for the OpenACC GPU port)
#CPP_OPTIONS+= -Dsysv
#FCL += fftlib.o
#CXX_FFTLIB = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_THREADSAFE
#INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include
#LIBS += fftlib
#LLIBS += -ldl
# For machine learning library vaspml (experimental)
#CPP_OPTIONS += -Dlibvaspml
#CPP_OPTIONS += -DVASPML_USE_CBLAS
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3
#CXX_ML = mpic++ -mp
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra
#INCLUDE_ML =
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1
NVFORTRAN_VERSION := $(shell ftn --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p')
define greater_or_equal
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false)
endef
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true)
CC += -gpu=tripcount:host
FC += -gpu=tripcount:host
endif
Calling ldd on the executables yields:
Code: Select all
linux-vdso.so.1 (0x0000ffff87fa0000)
libqdmod.so.0 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/extras/qd/lib/libqdmod.so.0 (0x0000ffff87f40000)
libqd.so.0 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/extras/qd/lib/libqd.so.0 (0x0000ffff87ef0000)
libfftw3.so.mpi31.3 => /opt/cray/pe/lib64/libfftw3.so.mpi31.3 (0x0000ffff87d60000)
libfftw3_omp.so.mpi31.3 => /opt/cray/pe/lib64/libfftw3_omp.so.mpi31.3 (0x0000ffff87d30000)
libhdf5_fortran_nvidia.so.310 => /opt/cray/pe/lib64/libhdf5_fortran_nvidia.so.310 (0x0000ffff87cb0000)
libcuda.so.1 => /usr/lib64/libcuda.so.1 (0x0000ffff86100000)
libmpifort_nvidia.so.12 => /opt/cray/pe/lib64/libmpifort_nvidia.so.12 (0x0000ffff86090000)
libsci_nvidia_mpi_mp.so.6 => /opt/cray/pe/lib64/libsci_nvidia_mpi_mp.so.6 (0x0000ffff82cd0000)
libmpi_nvidia.so.12 => /opt/cray/pe/lib64/libmpi_nvidia.so.12 (0x0000ffff80950000)
libmpi_gtl_cuda.so.0 => /opt/cray/pe/lib64/libmpi_gtl_cuda.so.0 (0x0000ffff808e0000)
libsci_nvidia_mp.so.6 => /opt/cray/pe/lib64/libsci_nvidia_mp.so.6 (0x0000ffff78660000)
libdl.so.2 => /lib64/libdl.so.2 (0x0000ffff78630000)
libxpmem.so.0 => /usr/lib64/libxpmem.so.0 (0x0000ffff78600000)
libnvhpcwrapcufft.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvhpcwrapcufft.so (0x0000ffff785d0000)
libcufft.so.11 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/math_libs/12.3/lib64/libcufft.so.11 (0x0000ffff6db80000)
libcusolver.so.11 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/math_libs/12.3/lib64/libcusolver.so.11 (0x0000ffff66f90000)
libcudaforwrapnccl.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudaforwrapnccl.so (0x0000ffff66f60000)
libnccl.so.2 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/comm_libs/12.3/nccl/lib/libnccl.so.2 (0x0000ffff5a7b0000)
libcublas.so.12 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/math_libs/12.3/lib64/libcublas.so.12 (0x0000ffff53c60000)
libcublasLt.so.12 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/math_libs/12.3/lib64/libcublasLt.so.12 (0x0000ffff32840000)
libcudaforwrapblas.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudaforwrapblas.so (0x0000ffff327e0000)
libcudaforwrapblas117.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudaforwrapblas117.so (0x0000ffff327b0000)
libcudart.so.12 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/cuda/12.3/lib64/libcudart.so.12 (0x0000ffff326f0000)
libcudafor_120.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudafor_120.so (0x0000ffff2a800000)
libcudafor.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudafor.so (0x0000ffff2a7d0000)
libacchost.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libacchost.so (0x0000ffff2a740000)
libaccdevaux.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libaccdevaux.so (0x0000ffff2a6f0000)
libacccuda.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libacccuda.so (0x0000ffff2a590000)
libcudadevice.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudadevice.so (0x0000ffff2a550000)
libcudafor2.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libcudafor2.so (0x0000ffff2a520000)
libnvf.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvf.so (0x0000ffff29f10000)
libnvhpcatm.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvhpcatm.so (0x0000ffff29ee0000)
libatomic.so.1 => /usr/lib64/libatomic.so.1 (0x0000ffff29eb0000)
libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x0000ffff29e70000)
libstdc++.so.6 => /usr/lib64/libstdc++.so.6 (0x0000ffff29c30000)
libnvomp.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvomp.so (0x0000ffff29a10000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x0000ffff299d0000)
libnvcpumath.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvcpumath.so (0x0000ffff29840000)
libnvc.so => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/compilers/lib/libnvc.so (0x0000ffff297e0000)
librt.so.1 => /lib64/librt.so.1 (0x0000ffff297b0000)
libc.so.6 => /lib64/libc.so.6 (0x0000ffff29600000)
libm.so.6 => /lib64/libm.so.6 (0x0000ffff29540000)
libhdf5_nvidia.so.310 => /opt/cray/pe/lib64/libhdf5_nvidia.so.310 (0x0000ffff29030000)
libz.so.1 => /usr/lib64/libz.so.1 (0x0000ffff28ff0000)
/lib/ld-linux-aarch64.so.1 (0x0000ffff87fb0000)
libfabric.so.1 => /opt/cray/libfabric/1.20.1/lib64/libfabric.so.1 (0x0000ffff28ed0000)
libpmi.so.0 => /opt/cray/pe/lib64/libpmi.so.0 (0x0000ffff28e90000)
libpmi2.so.0 => /opt/cray/pe/lib64/libpmi2.so.0 (0x0000ffff28e50000)
libnvJitLink.so.12 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/cuda/12.3/lib64/libnvJitLink.so.12 (0x0000ffff25fe0000)
libcusparse.so.12 => /opt/nvidia/hpc_sdk/Linux_aarch64/24.3/math_libs/12.3/lib64/libcusparse.so.12 (0x0000ffff16400000)
libcxi.so.1 => /usr/lib64/libcxi.so.1 (0x0000ffff163c0000)
libpals.so.0 => /opt/cray/pals/1.4/lib/libpals.so.0 (0x0000ffff16390000)
libnl-3.so.200 => /usr/lib64/libnl-3.so.200 (0x0000ffff16340000)
I am a bit perplexed because it seems to me that everything is linked appropriately. Do you have any suggestions?