Energy different in each electronic iteration with different number of CPU
Posted: Wed May 12, 2010 3:57 pm
Dear all,
I compiled vasp5.2 on IBM P6. There is no mistake report. But if I use different number of CPU, i get the different energy in every electronic iteration. But the converged energy is same.
Another case, if i use NPAR=1, the energy in every electronic iteration by using CPU=16 and CPU=32 is same. Also it is same as the results which are got in another machine.
Details are as following,
CPU=16
N E dE d eps ncg rms rms(c)
DAV: 1 0.232716856956E+03 0.23272E+03 -0.89381E+03 3136 0.599E+02
DAV: 2 -0.236401503629E+02 -0.25636E+03 -0.23380E+03 4336 0.136E+02
DAV: 3 -0.671131904802E+02 -0.43473E+02 -0.41508E+02 4496 0.714E+01
DAV: 4 -0.704422956238E+02 -0.33291E+01 -0.31854E+01 4448 0.235E+01
DAV: 5 -0.705016494094E+02 -0.59354E-01 -0.59234E-01 3952 0.274E+00 0.242E+01
RMM: 6 -0.411361821220E+02 0.29365E+02 -0.88317E+01 4009 0.450E+01 0.207E+01
RMM: 7 -0.446914358248E+02 -0.35553E+01 -0.11769E+01 3838 0.180E+01 0.879E+00
RMM: 8 -0.500817278724E+02 -0.53903E+01 -0.40095E+00 4146 0.852E+00 0.489E+00
RMM: 9 -0.585703381108E+02 -0.84886E+01 -0.55078E+00 4060 0.849E+00 0.463E+00
RMM: 10 -0.614972076296E+02 -0.29269E+01 -0.13642E+00 4020 0.490E+00 0.305E+00
RMM: 11 -0.660231748370E+02 -0.45260E+01 -0.32868E+00 3711 0.639E+00 0.146E+00
RMM: 12 -0.692155416121E+02 -0.31924E+01 -0.35814E+00 3805 0.636E+00 0.859E-01
RMM: 13 -0.699470611653E+02 -0.73152E+00 -0.87710E-01 3709 0.361E+00 0.511E-01
RMM: 14 -0.701503655437E+02 -0.20330E+00 -0.30599E-01 3803 0.167E+00 0.349E-01
RMM: 15 -0.707311919136E+02 -0.58083E+00 -0.10615E-01 3691 0.989E-01 0.240E-01
RMM: 16 -0.707793207041E+02 -0.48129E-01 -0.19877E-02 3930 0.660E-01 0.841E-02
RMM: 17 -0.708010203502E+02 -0.21700E-01 -0.71238E-03 3914 0.303E-01 0.540E-02
RMM: 18 -0.708053604985E+02 -0.43401E-02 -0.18363E-03 3805 0.152E-01 0.401E-02
RMM: 19 -0.708089950626E+02 -0.36346E-02 -0.64858E-04 3778 0.103E-01 0.209E-02
RMM: 20 -0.708107664552E+02 -0.17714E-02 -0.29666E-04 3767 0.590E-02 0.151E-02
RMM: 21 -0.708121654824E+02 -0.13990E-02 -0.83315E-05 3410 0.376E-02 0.100E-02
RMM: 22 -0.708136719370E+02 -0.15065E-02 -0.15889E-04 3140 0.322E-02 0.620E-03
RMM: 23 -0.708139065031E+02 -0.23457E-03 -0.45099E-05 3089 0.250E-02 0.468E-03
RMM: 24 -0.708139805268E+02 -0.74024E-04 -0.12668E-05 3033 0.123E-02 0.177E-03
RMM: 25 -0.708140043667E+02 -0.23840E-04 -0.49466E-06 2752 0.917E-03 0.846E-04
RMM: 26 -0.708140307477E+02 -0.26381E-04 -0.33394E-06 2827 0.596E-03 0.752E-04
RMM: 27 -0.708140333064E+02 -0.25587E-05 -0.15571E-06 2678 0.503E-03
1 F= -.70814033E+02 E0= -.70814033E+02 d E =0.000000E+00 mag= 1.7678
CPU=32
N E dE d eps ncg rms rms(c)
DAV: 1 0.219323415190E+03 0.21932E+03 -0.94528E+03 2976 0.599E+02
DAV: 2 -0.398297226521E+02 -0.25915E+03 -0.24460E+03 4640 0.109E+02
DAV: 3 -0.694713222076E+02 -0.29642E+02 -0.29534E+02 4384 0.560E+01
DAV: 4 -0.705003074032E+02 -0.10290E+01 -0.10281E+01 4640 0.119E+01
DAV: 5 -0.705043389887E+02 -0.40316E-02 -0.40316E-02 4128 0.601E-01 0.242E+01
RMM: 6 -0.411406842672E+02 0.29364E+02 -0.88237E+01 4000 0.450E+01 0.207E+01
RMM: 7 -0.446705931695E+02 -0.35299E+01 -0.11791E+01 3827 0.180E+01 0.881E+00
RMM: 8 -0.500530562495E+02 -0.53825E+01 -0.39889E+00 4123 0.851E+00 0.490E+00
RMM: 9 -0.585559993267E+02 -0.85029E+01 -0.55213E+00 4037 0.852E+00 0.462E+00
RMM: 10 -0.615046022519E+02 -0.29486E+01 -0.13702E+00 3986 0.494E+00 0.304E+00
RMM: 11 -0.660348262754E+02 -0.45302E+01 -0.32971E+00 3695 0.639E+00 0.145E+00
RMM: 12 -0.692201566275E+02 -0.31853E+01 -0.35737E+00 3794 0.638E+00 0.859E-01
RMM: 13 -0.699473661003E+02 -0.72721E+00 -0.86686E-01 3681 0.359E+00 0.510E-01
RMM: 14 -0.701528933137E+02 -0.20553E+00 -0.30887E-01 3793 0.167E+00 0.347E-01
RMM: 15 -0.707302809964E+02 -0.57739E+00 -0.10468E-01 3682 0.980E-01 0.237E-01
RMM: 16 -0.707797391456E+02 -0.49458E-01 -0.19945E-02 3939 0.659E-01 0.836E-02
RMM: 17 -0.708009733727E+02 -0.21234E-01 -0.70220E-03 3918 0.305E-01 0.534E-02
RMM: 18 -0.708053857529E+02 -0.44124E-02 -0.19497E-03 3784 0.152E-01 0.403E-02
RMM: 19 -0.708090746597E+02 -0.36889E-02 -0.62655E-04 3767 0.102E-01 0.212E-02
RMM: 20 -0.708107851698E+02 -0.17105E-02 -0.28730E-04 3794 0.586E-02 0.152E-02
RMM: 21 -0.708121319533E+02 -0.13468E-02 -0.80396E-05 3409 0.375E-02 0.101E-02
RMM: 22 -0.708136636507E+02 -0.15317E-02 -0.15951E-04 3132 0.322E-02 0.640E-03
RMM: 23 -0.708139089638E+02 -0.24531E-03 -0.47334E-05 3083 0.253E-02 0.466E-03
RMM: 24 -0.708139792482E+02 -0.70284E-04 -0.13335E-05 3054 0.121E-02 0.178E-03
RMM: 25 -0.708140033723E+02 -0.24124E-04 -0.46043E-06 2767 0.913E-03 0.904E-04
RMM: 26 -0.708140288486E+02 -0.25476E-04 -0.29860E-06 2855 0.596E-03 0.750E-04
RMM: 27 -0.708140332793E+02 -0.44306E-05 -0.17427E-06 2655 0.498E-03
1 F= -.70814033E+02 E0= -.70814033E+02 d E =0.000000E+00 mag= 1.7677
CPU=16 or CPU= 32 with NPAR=1
N E dE d eps ncg rms rms(c)
DAV: 1 0.259494536026E+03 0.25949E+03 -0.84729E+03 2840 0.685E+02
DAV: 2 0.302053548693E+01 -0.25647E+03 -0.23424E+03 3356 0.159E+02
DAV: 3 -0.629748254715E+02 -0.65995E+02 -0.64344E+02 3708 0.921E+01
DAV: 4 -0.702036995567E+02 -0.72289E+01 -0.68792E+01 4348 0.408E+01
DAV: 5 -0.704879029121E+02 -0.28420E+00 -0.25291E+00 3552 0.426E+00 0.242E+01
RMM: 6 -0.410015908934E+02 0.29486E+02 -0.98303E+01 3428 0.544E+01 0.204E+01
RMM: 7 -0.446947397083E+02 -0.36931E+01 -0.13255E+01 3355 0.219E+01 0.847E+00
RMM: 8 -0.506057331632E+02 -0.59110E+01 -0.46961E+00 3668 0.103E+01 0.473E+00
RMM: 9 -0.586720210090E+02 -0.80663E+01 -0.51103E+00 3576 0.934E+00 0.457E+00
RMM: 10 -0.613813945142E+02 -0.27094E+01 -0.12635E+00 3585 0.575E+00 0.298E+00
RMM: 11 -0.659061180267E+02 -0.45247E+01 -0.33864E+00 3306 0.747E+00 0.144E+00
RMM: 12 -0.691932525409E+02 -0.32871E+01 -0.38901E+00 3329 0.736E+00 0.846E-01
RMM: 13 -0.699578659428E+02 -0.76461E+00 -0.88284E-01 3181 0.410E+00 0.557E-01
RMM: 14 -0.701380829743E+02 -0.18022E+00 -0.33347E-01 3335 0.212E+00 0.354E-01
RMM: 15 -0.707189401268E+02 -0.58086E+00 -0.12695E-01 3192 0.118E+00 0.247E-01
RMM: 16 -0.707801940550E+02 -0.61254E-01 -0.23173E-02 3403 0.774E-01 0.783E-02
RMM: 17 -0.708010435408E+02 -0.20849E-01 -0.77493E-03 3479 0.358E-01 0.508E-02
RMM: 18 -0.708055782017E+02 -0.45347E-02 -0.25324E-03 3409 0.191E-01 0.342E-02
RMM: 19 -0.708091857312E+02 -0.36075E-02 -0.59540E-04 3426 0.120E-01 0.181E-02
RMM: 20 -0.708111723368E+02 -0.19866E-02 -0.30037E-04 3358 0.649E-02 0.139E-02
RMM: 21 -0.708124444972E+02 -0.12722E-02 -0.12134E-04 3023 0.493E-02 0.107E-02
RMM: 22 -0.708137050541E+02 -0.12606E-02 -0.15687E-04 2781 0.390E-02 0.642E-03
RMM: 23 -0.708139066596E+02 -0.20161E-03 -0.40121E-05 2786 0.287E-02 0.438E-03
RMM: 24 -0.708139610761E+02 -0.54417E-04 -0.11898E-05 2599 0.147E-02 0.207E-03
RMM: 25 -0.708139987255E+02 -0.37649E-04 -0.71577E-06 2363 0.112E-02 0.108E-03
RMM: 26 -0.708140263784E+02 -0.27653E-04 -0.32369E-06 2450 0.712E-03 0.710E-04
RMM: 27 -0.708140340162E+02 -0.76379E-05 -0.17588E-06 2276 0.574E-03
1 F= -.70814034E+02 E0= -.70814034E+02 d E =0.000000E+00 mag= 1.7678
Any help will be highly appreciated.
The following is the my makefile for IBM P6:
.SUFFIXES: .inc .f .F
#-----------------------------------------------------------------------
# Makefile for RS 6000/ SP2, SP3 and possibly SP4
# you might also try the makefile.hlrn supplied by Bernd Kallies <kallies@zib.de>
#
#
# Attention:
# =======================
# present default optimisation is -O3, but you might try -O4 as well
#
# several files are optimized less aggressive (see bottom of the makefile),
# since agressive optimisation of those files caused VASP to bomb
# in one or the other compiler version
# because it was impossible to keep track of which file is not
# properly compiled by which version, all files that
# are problematic in one or the other version are compiled
# with lower optimisation
#
# ZHEEVX was not working properly with some version
# if you have problems remove
# #define USE_ZHEEVX
# from subrot.F, davidson.F and wavepre_noio.F
#
#-----------------------------------------------------------------------
# all CPP processed fortran files have the extension .f
SUFFIX=.f
#-----------------------------------------------------------------------
# fortran compiler and linker
#-----------------------------------------------------------------------
FC=mpxlf90_r -qfree=f90 -q64
FCL=$(FC)
#-----------------------------------------------------------------------
# C-preprocessor define any of the flags given below
# MPI generate parallel version
# NGZhalf charge density reduced in Z direction
# wNGZhalf gamma point only reduced in Z direction
# CACHE_SIZE 5001 for SP3 and Power 3
# 32768 for 550,590,3CT
# 8001 595/397 quad word systems
# scaLAPACK use scaLAPACK
#-----------------------------------------------------------------------
CPP = /usr/ccs/lib/cpp -P -DHOST=\"SP2/3/4\" -DMPI -DNGZhalf \
-Duse_collective \
-DCACHE_SIZE=32768 $*.F >$*.f
#-----------------------------------------------------------------------
# general fortran flags, none required
#-----------------------------------------------------------------------
FFLAGS =
#-----------------------------------------------------------------------
# optimization:
# optimise for the machine on which the code is compiled
#-----------------------------------------------------------------------
OFLAG = -O3 -qstrict -qarch=pwr6 -qtune=pwr6
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH = none
OBJ_NOOPT = none
DEBUG = -g
INCS =
INLINE = $(OFLAG)
#-----------------------------------------------------------------------
# maybe one need to set an include path (usually not required)
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# options for linking
# the following option increases the size of the data frame
# the default makefile does not include support for scaLAPACK
# since problems with scaLAPACK were reported
#
#-----------------------------------------------------------------------
LINK = -Wl,-bD:1000000000
MPI = -L/usr/lpp/ppe.poe/lib/ -lmpi
#SCALAPACK = -lpesslsmp -lblacssmp -L /usr/local/lib -lscalapack -lblacsF77init
#SCALAPACK = -lscalapack -lpblas -lblacs -ltools
SCALAPACK =
LIB = -L../vasp.5.lib -ldmy ../vasp.5.lib/linpack_double.o \
-L/usr/local/lib -llapack-essl -lessl -lpesslsmp -lblacssmp $(MPI) $(SCALAPACK)
#-----------------------------------------------------------------------
# specify 3d-fft to be used with VASP
#-----------------------------------------------------------------------
#FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o
FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o /usr/local/lib/libfftw3.a
#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o
SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o xclib.o xclib_grad.o \
radial.o pseudo.o mgrid.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
metagga.o constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o egrad.o pawsym.o pawfock.o pawlhf.o paw.o \
mkpoints_full.o charge.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
steep.o chain.o dyna.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
hamil_high.o nmr.o force.o \
pead.o subrot.o subrot_scf.o pwlhf.o gw_model.o optreal.o davidson.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o linear_response.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \
ump2.o bse.o acfdt.o chi.o sydmat.o
INC=
vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F
main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)
makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)
makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
poscar.o: poscar.inc poscar.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.inc wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F
$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)
fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
# special rules
#-----------------------------------------------------------------------
radial.o: radial.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
wave.o: wave.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
metagga.o: metagga.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
nonl.o: nonl.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O -c $*$(SUFFIX)
paw.o: paw.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)
pseudo.o: pseudo.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)
I compiled vasp5.2 on IBM P6. There is no mistake report. But if I use different number of CPU, i get the different energy in every electronic iteration. But the converged energy is same.
Another case, if i use NPAR=1, the energy in every electronic iteration by using CPU=16 and CPU=32 is same. Also it is same as the results which are got in another machine.
Details are as following,
CPU=16
N E dE d eps ncg rms rms(c)
DAV: 1 0.232716856956E+03 0.23272E+03 -0.89381E+03 3136 0.599E+02
DAV: 2 -0.236401503629E+02 -0.25636E+03 -0.23380E+03 4336 0.136E+02
DAV: 3 -0.671131904802E+02 -0.43473E+02 -0.41508E+02 4496 0.714E+01
DAV: 4 -0.704422956238E+02 -0.33291E+01 -0.31854E+01 4448 0.235E+01
DAV: 5 -0.705016494094E+02 -0.59354E-01 -0.59234E-01 3952 0.274E+00 0.242E+01
RMM: 6 -0.411361821220E+02 0.29365E+02 -0.88317E+01 4009 0.450E+01 0.207E+01
RMM: 7 -0.446914358248E+02 -0.35553E+01 -0.11769E+01 3838 0.180E+01 0.879E+00
RMM: 8 -0.500817278724E+02 -0.53903E+01 -0.40095E+00 4146 0.852E+00 0.489E+00
RMM: 9 -0.585703381108E+02 -0.84886E+01 -0.55078E+00 4060 0.849E+00 0.463E+00
RMM: 10 -0.614972076296E+02 -0.29269E+01 -0.13642E+00 4020 0.490E+00 0.305E+00
RMM: 11 -0.660231748370E+02 -0.45260E+01 -0.32868E+00 3711 0.639E+00 0.146E+00
RMM: 12 -0.692155416121E+02 -0.31924E+01 -0.35814E+00 3805 0.636E+00 0.859E-01
RMM: 13 -0.699470611653E+02 -0.73152E+00 -0.87710E-01 3709 0.361E+00 0.511E-01
RMM: 14 -0.701503655437E+02 -0.20330E+00 -0.30599E-01 3803 0.167E+00 0.349E-01
RMM: 15 -0.707311919136E+02 -0.58083E+00 -0.10615E-01 3691 0.989E-01 0.240E-01
RMM: 16 -0.707793207041E+02 -0.48129E-01 -0.19877E-02 3930 0.660E-01 0.841E-02
RMM: 17 -0.708010203502E+02 -0.21700E-01 -0.71238E-03 3914 0.303E-01 0.540E-02
RMM: 18 -0.708053604985E+02 -0.43401E-02 -0.18363E-03 3805 0.152E-01 0.401E-02
RMM: 19 -0.708089950626E+02 -0.36346E-02 -0.64858E-04 3778 0.103E-01 0.209E-02
RMM: 20 -0.708107664552E+02 -0.17714E-02 -0.29666E-04 3767 0.590E-02 0.151E-02
RMM: 21 -0.708121654824E+02 -0.13990E-02 -0.83315E-05 3410 0.376E-02 0.100E-02
RMM: 22 -0.708136719370E+02 -0.15065E-02 -0.15889E-04 3140 0.322E-02 0.620E-03
RMM: 23 -0.708139065031E+02 -0.23457E-03 -0.45099E-05 3089 0.250E-02 0.468E-03
RMM: 24 -0.708139805268E+02 -0.74024E-04 -0.12668E-05 3033 0.123E-02 0.177E-03
RMM: 25 -0.708140043667E+02 -0.23840E-04 -0.49466E-06 2752 0.917E-03 0.846E-04
RMM: 26 -0.708140307477E+02 -0.26381E-04 -0.33394E-06 2827 0.596E-03 0.752E-04
RMM: 27 -0.708140333064E+02 -0.25587E-05 -0.15571E-06 2678 0.503E-03
1 F= -.70814033E+02 E0= -.70814033E+02 d E =0.000000E+00 mag= 1.7678
CPU=32
N E dE d eps ncg rms rms(c)
DAV: 1 0.219323415190E+03 0.21932E+03 -0.94528E+03 2976 0.599E+02
DAV: 2 -0.398297226521E+02 -0.25915E+03 -0.24460E+03 4640 0.109E+02
DAV: 3 -0.694713222076E+02 -0.29642E+02 -0.29534E+02 4384 0.560E+01
DAV: 4 -0.705003074032E+02 -0.10290E+01 -0.10281E+01 4640 0.119E+01
DAV: 5 -0.705043389887E+02 -0.40316E-02 -0.40316E-02 4128 0.601E-01 0.242E+01
RMM: 6 -0.411406842672E+02 0.29364E+02 -0.88237E+01 4000 0.450E+01 0.207E+01
RMM: 7 -0.446705931695E+02 -0.35299E+01 -0.11791E+01 3827 0.180E+01 0.881E+00
RMM: 8 -0.500530562495E+02 -0.53825E+01 -0.39889E+00 4123 0.851E+00 0.490E+00
RMM: 9 -0.585559993267E+02 -0.85029E+01 -0.55213E+00 4037 0.852E+00 0.462E+00
RMM: 10 -0.615046022519E+02 -0.29486E+01 -0.13702E+00 3986 0.494E+00 0.304E+00
RMM: 11 -0.660348262754E+02 -0.45302E+01 -0.32971E+00 3695 0.639E+00 0.145E+00
RMM: 12 -0.692201566275E+02 -0.31853E+01 -0.35737E+00 3794 0.638E+00 0.859E-01
RMM: 13 -0.699473661003E+02 -0.72721E+00 -0.86686E-01 3681 0.359E+00 0.510E-01
RMM: 14 -0.701528933137E+02 -0.20553E+00 -0.30887E-01 3793 0.167E+00 0.347E-01
RMM: 15 -0.707302809964E+02 -0.57739E+00 -0.10468E-01 3682 0.980E-01 0.237E-01
RMM: 16 -0.707797391456E+02 -0.49458E-01 -0.19945E-02 3939 0.659E-01 0.836E-02
RMM: 17 -0.708009733727E+02 -0.21234E-01 -0.70220E-03 3918 0.305E-01 0.534E-02
RMM: 18 -0.708053857529E+02 -0.44124E-02 -0.19497E-03 3784 0.152E-01 0.403E-02
RMM: 19 -0.708090746597E+02 -0.36889E-02 -0.62655E-04 3767 0.102E-01 0.212E-02
RMM: 20 -0.708107851698E+02 -0.17105E-02 -0.28730E-04 3794 0.586E-02 0.152E-02
RMM: 21 -0.708121319533E+02 -0.13468E-02 -0.80396E-05 3409 0.375E-02 0.101E-02
RMM: 22 -0.708136636507E+02 -0.15317E-02 -0.15951E-04 3132 0.322E-02 0.640E-03
RMM: 23 -0.708139089638E+02 -0.24531E-03 -0.47334E-05 3083 0.253E-02 0.466E-03
RMM: 24 -0.708139792482E+02 -0.70284E-04 -0.13335E-05 3054 0.121E-02 0.178E-03
RMM: 25 -0.708140033723E+02 -0.24124E-04 -0.46043E-06 2767 0.913E-03 0.904E-04
RMM: 26 -0.708140288486E+02 -0.25476E-04 -0.29860E-06 2855 0.596E-03 0.750E-04
RMM: 27 -0.708140332793E+02 -0.44306E-05 -0.17427E-06 2655 0.498E-03
1 F= -.70814033E+02 E0= -.70814033E+02 d E =0.000000E+00 mag= 1.7677
CPU=16 or CPU= 32 with NPAR=1
N E dE d eps ncg rms rms(c)
DAV: 1 0.259494536026E+03 0.25949E+03 -0.84729E+03 2840 0.685E+02
DAV: 2 0.302053548693E+01 -0.25647E+03 -0.23424E+03 3356 0.159E+02
DAV: 3 -0.629748254715E+02 -0.65995E+02 -0.64344E+02 3708 0.921E+01
DAV: 4 -0.702036995567E+02 -0.72289E+01 -0.68792E+01 4348 0.408E+01
DAV: 5 -0.704879029121E+02 -0.28420E+00 -0.25291E+00 3552 0.426E+00 0.242E+01
RMM: 6 -0.410015908934E+02 0.29486E+02 -0.98303E+01 3428 0.544E+01 0.204E+01
RMM: 7 -0.446947397083E+02 -0.36931E+01 -0.13255E+01 3355 0.219E+01 0.847E+00
RMM: 8 -0.506057331632E+02 -0.59110E+01 -0.46961E+00 3668 0.103E+01 0.473E+00
RMM: 9 -0.586720210090E+02 -0.80663E+01 -0.51103E+00 3576 0.934E+00 0.457E+00
RMM: 10 -0.613813945142E+02 -0.27094E+01 -0.12635E+00 3585 0.575E+00 0.298E+00
RMM: 11 -0.659061180267E+02 -0.45247E+01 -0.33864E+00 3306 0.747E+00 0.144E+00
RMM: 12 -0.691932525409E+02 -0.32871E+01 -0.38901E+00 3329 0.736E+00 0.846E-01
RMM: 13 -0.699578659428E+02 -0.76461E+00 -0.88284E-01 3181 0.410E+00 0.557E-01
RMM: 14 -0.701380829743E+02 -0.18022E+00 -0.33347E-01 3335 0.212E+00 0.354E-01
RMM: 15 -0.707189401268E+02 -0.58086E+00 -0.12695E-01 3192 0.118E+00 0.247E-01
RMM: 16 -0.707801940550E+02 -0.61254E-01 -0.23173E-02 3403 0.774E-01 0.783E-02
RMM: 17 -0.708010435408E+02 -0.20849E-01 -0.77493E-03 3479 0.358E-01 0.508E-02
RMM: 18 -0.708055782017E+02 -0.45347E-02 -0.25324E-03 3409 0.191E-01 0.342E-02
RMM: 19 -0.708091857312E+02 -0.36075E-02 -0.59540E-04 3426 0.120E-01 0.181E-02
RMM: 20 -0.708111723368E+02 -0.19866E-02 -0.30037E-04 3358 0.649E-02 0.139E-02
RMM: 21 -0.708124444972E+02 -0.12722E-02 -0.12134E-04 3023 0.493E-02 0.107E-02
RMM: 22 -0.708137050541E+02 -0.12606E-02 -0.15687E-04 2781 0.390E-02 0.642E-03
RMM: 23 -0.708139066596E+02 -0.20161E-03 -0.40121E-05 2786 0.287E-02 0.438E-03
RMM: 24 -0.708139610761E+02 -0.54417E-04 -0.11898E-05 2599 0.147E-02 0.207E-03
RMM: 25 -0.708139987255E+02 -0.37649E-04 -0.71577E-06 2363 0.112E-02 0.108E-03
RMM: 26 -0.708140263784E+02 -0.27653E-04 -0.32369E-06 2450 0.712E-03 0.710E-04
RMM: 27 -0.708140340162E+02 -0.76379E-05 -0.17588E-06 2276 0.574E-03
1 F= -.70814034E+02 E0= -.70814034E+02 d E =0.000000E+00 mag= 1.7678
Any help will be highly appreciated.
The following is the my makefile for IBM P6:
.SUFFIXES: .inc .f .F
#-----------------------------------------------------------------------
# Makefile for RS 6000/ SP2, SP3 and possibly SP4
# you might also try the makefile.hlrn supplied by Bernd Kallies <kallies@zib.de>
#
#
# Attention:
# =======================
# present default optimisation is -O3, but you might try -O4 as well
#
# several files are optimized less aggressive (see bottom of the makefile),
# since agressive optimisation of those files caused VASP to bomb
# in one or the other compiler version
# because it was impossible to keep track of which file is not
# properly compiled by which version, all files that
# are problematic in one or the other version are compiled
# with lower optimisation
#
# ZHEEVX was not working properly with some version
# if you have problems remove
# #define USE_ZHEEVX
# from subrot.F, davidson.F and wavepre_noio.F
#
#-----------------------------------------------------------------------
# all CPP processed fortran files have the extension .f
SUFFIX=.f
#-----------------------------------------------------------------------
# fortran compiler and linker
#-----------------------------------------------------------------------
FC=mpxlf90_r -qfree=f90 -q64
FCL=$(FC)
#-----------------------------------------------------------------------
# C-preprocessor define any of the flags given below
# MPI generate parallel version
# NGZhalf charge density reduced in Z direction
# wNGZhalf gamma point only reduced in Z direction
# CACHE_SIZE 5001 for SP3 and Power 3
# 32768 for 550,590,3CT
# 8001 595/397 quad word systems
# scaLAPACK use scaLAPACK
#-----------------------------------------------------------------------
CPP = /usr/ccs/lib/cpp -P -DHOST=\"SP2/3/4\" -DMPI -DNGZhalf \
-Duse_collective \
-DCACHE_SIZE=32768 $*.F >$*.f
#-----------------------------------------------------------------------
# general fortran flags, none required
#-----------------------------------------------------------------------
FFLAGS =
#-----------------------------------------------------------------------
# optimization:
# optimise for the machine on which the code is compiled
#-----------------------------------------------------------------------
OFLAG = -O3 -qstrict -qarch=pwr6 -qtune=pwr6
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH = none
OBJ_NOOPT = none
DEBUG = -g
INCS =
INLINE = $(OFLAG)
#-----------------------------------------------------------------------
# maybe one need to set an include path (usually not required)
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
# options for linking
# the following option increases the size of the data frame
# the default makefile does not include support for scaLAPACK
# since problems with scaLAPACK were reported
#
#-----------------------------------------------------------------------
LINK = -Wl,-bD:1000000000
MPI = -L/usr/lpp/ppe.poe/lib/ -lmpi
#SCALAPACK = -lpesslsmp -lblacssmp -L /usr/local/lib -lscalapack -lblacsF77init
#SCALAPACK = -lscalapack -lpblas -lblacs -ltools
SCALAPACK =
LIB = -L../vasp.5.lib -ldmy ../vasp.5.lib/linpack_double.o \
-L/usr/local/lib -llapack-essl -lessl -lpesslsmp -lblacssmp $(MPI) $(SCALAPACK)
#-----------------------------------------------------------------------
# specify 3d-fft to be used with VASP
#-----------------------------------------------------------------------
#FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o
FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o /usr/local/lib/libfftw3.a
#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o
SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o xclib.o xclib_grad.o \
radial.o pseudo.o mgrid.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
metagga.o constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o egrad.o pawsym.o pawfock.o pawlhf.o paw.o \
mkpoints_full.o charge.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
steep.o chain.o dyna.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
hamil_high.o nmr.o force.o \
pead.o subrot.o subrot_scf.o pwlhf.o gw_model.o optreal.o davidson.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o linear_response.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \
ump2.o bse.o acfdt.o chi.o sydmat.o
INC=
vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F
main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)
makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)
makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
poscar.o: poscar.inc poscar.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.inc wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F
$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)
fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
# special rules
#-----------------------------------------------------------------------
radial.o: radial.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
wave.o: wave.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
metagga.o: metagga.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O2 -c $*$(SUFFIX)
nonl.o: nonl.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O -c $*$(SUFFIX)
paw.o: paw.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)
pseudo.o: pseudo.F
$(CPP)
$(FC) $(FFLAGS) $(INCS) -O1 -c $*$(SUFFIX)