diff -ruN as-is/vasp.5.2/makefile.open64 open64/vasp.5.2/makefile.open64 --- as-is/vasp.5.2/makefile.open64 1969-12-31 16:00:00.000000000 -0800 +++ open64/vasp.5.2/makefile.open64 2013-01-15 11:27:42.000000000 -0800 @@ -0,0 +1,253 @@ +.SUFFIXES: .inc .f .f90 .F +#----------------------------------------------------------------------- +# Makefile for AMD systems with the Open64 compiler and ACML library +# +# The makefile was created by Martin Hilgeman +#----------------------------------------------------------------------- + +# all CPP processed fortran files have the extension .f +SUFFIX=.f90 + +#----------------------------------------------------------------------- +# START CUSTOMIZATION HERE +#----------------------------------------------------------------------- + +#----------------------------------------------------------------------- +# whereis CPP ?? (I need CPP, can't use gcc with proper options) +# the following works almost on all systems +# possible cpp is located in a different directory +#----------------------------------------------------------------------- + +CPP_ = ./preprocess <$*.F | /usr/bin/cpp -P -C -traditional >$*$(SUFFIX) + +#----------------------------------------------------------------------- +# f90 compiler +#----------------------------------------------------------------------- + +# simple version, use mpif77 wrapper +# this works only if mpif77 has been compiled using the exactly +# same fortran compiler +FC=mpif90 +FCL=$(FC) + +#----------------------------------------------------------------------- +# general fortran flags (there must a trailing blank on this line) +#----------------------------------------------------------------------- +VSC= + +FFLAGS = -fno-second-underscore -freeform -ftpp -woff1278,1279,1615 -r8 + +#----------------------------------------------------------------------- +# optimization +# for some files a lower optimization level is explicitly selected +# at the bottom +#----------------------------------------------------------------------- + +OFLAG = -O3 -OPT:Olimit=0 -march=bdver1 -mavx -mfma4 + +OFLAG_HIGH = $(OFLAG) +OFLAG_MED = -O2 -march=bdver1 -mavx -mfma4 +OFLAG_LOW = -O1 -march=bdver1 +OFLAG_NOOPT = -O0 -march=bdver1 +OBJ_HIGH = +OBJ_NOOPT = +DEBUG = -g +INLINE = $(OFLAG) + +#----------------------------------------------------------------------- +# the following lines specify the position of BLAS and LAPACK, +# PBLAS and scaLAPACK +#----------------------------------------------------------------------- + +# Use AMD ACML library for BLAS and LAPACK +ACML_PATH=/home/martinh/opt/acml/5.2.0/open64_64_fma4 +BLAS=-L$(ACML_PATH)/lib -lacml -lfortran -Wl,-rpath,$(ACML_PATH)/lib + +LAPACK = + +# location of BLACS and SCALAPACK +# optional only required if SCA is defined below +#BLACSdir = $(VSC)/BLACS/BLACS/LIB +#BLACS=$(BLACSdir)/blacsF77init_MPI-LINUX-OPENMPI-0.a $(BLACSdir)/blacs_MPI-LINUX-OPENMPI-0.a $(BLACSdir)/blacsF77init_MPI-LINUX-OPENMPI-0.a + +## For openmpi +#BLACS=-lmkl_blacs_openmpi_lp64 +BLACS= + +## For mpich +#BLACS=-lmkl_blacs_lp64 + +## Just a test for qlogic +#BLACS=-lmkl_blacs_intelmpi_lp64 + +SCA_= + +# BLACS and SCALAPACK libraries if available +# if SCA is defined SCALAPACK will be used + +#SCA=/home/martinh/opt/scalapack/mvapich2/2.0.2/lib/libscalapack.a +SCA=/home/martinh/opt/scalapack/openmpi-1.5.5/2.0.2/lib/libscalapack.a + +LINK= +#----------------------------------------------------------------------- +# END CUSTOMIZATION +#----------------------------------------------------------------------- + +#----------------------------------------------------------------------- +# options for CPP in parallel version (see also above): +# NGZhalf charge density reduced in Z direction +# wNGZhalf gamma point only reduced in Z direction +# scaLAPACK use scaLAPACK (usually slower on 100 Mbit Net) +#----------------------------------------------------------------------- +ifdef SCA +CPP = $(CPP_) -DMPI -DHOST=\"AMD\ Open64\" \ + -Dkind8 -DNGZhalf -DCACHE_SIZE=4000 -Davoidalloc \ + -DscaLAPACK -Duse_collective -Davoidalloc +else +CPP = $(CPP_) -DMPI -DHOST=\"AMD Open64\" \ + -Dkind8 -DNGZhalf -DCACHE_SIZE=4000 -Davoidalloc \ + -Duse_collective -Davoidalloc +endif + +#----------------------------------------------------------------------- +# libraries for vasp +#----------------------------------------------------------------------- +ifdef SCA +LIB = -L../vasp.5.lib.open64 -ldmy \ + ../vasp.5.lib.open64/linpack_double.o \ + $(SCA) $(LAPACK) $(BLAS) +else +LIB = -L../vasp.5.lib.open64 -ldmy \ + ../vasp.5.lib.open64/linpack_double.o \ + $(LAPACK) $(BLAS) +endif + +# FFT: fftmpi.o with fft3dlib of Juergen Furthmueller +# must be used for this benchmark +FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o +#rv,sgi FFT3D = fftmpi.o fftmpi_map.o fft3dlib.o +#FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o +#FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o ./libfftw3.a +#FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o ./libfftw3xf_intel.a +#FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o /opt/intel/Compiler/11.0/083/mkl/lib/em64t/libfftw3xf_intel.a +# FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o $(VSC)/FFTW_Intel/lib/em64t/libfftw3xf_intel.a + +#----------------------------------------------------------------------- +# general rules and compile lines +#----------------------------------------------------------------------- +OBJ_HIGH = +OBJ_MED = relativistic.o LDApU.o paw_base.o +OBJ_LOW = fftmpiw.o fftmpi.o fftw3d.o wave_high.o broyden.o +OBJ_NOOPT = nmr.o + +BASIC= symmetry.o symlib.o lattlib.o random.o + +SOURCE= base.o mpi.o smart_allocate.o xml.o \ + constant.o jacobi.o main_mpi.o scala.o \ + asa.o lattice.o poscar.o ini.o xclib.o xclib_grad.o \ + radial.o pseudo.o mgrid.o gridq.o ebs.o \ + mkpoints.o wave.o wave_mpi.o wave_high.o \ + $(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \ + mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \ + constrmag.o cl_shift.o relativistic.o LDApU.o \ + paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o paw.o \ + mkpoints_full.o charge.o dipol.o pot.o \ + dos.o elf.o tet.o tetweight.o hamil_rot.o \ + steep.o chain.o dyna.o sphpro.o us.o core_rel.o \ + aedens.o wavpre.o wavpre_noio.o broyden.o \ + dynbr.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \ + brent.o stufak.o fileio.o opergrid.o stepver.o \ + chgloc.o fast_aug.o fock.o mkpoints_change.o sym_grad.o \ + mymath.o internals.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \ + hamil_high.o nmr.o force.o \ + pead.o mlwf.o subrot.o subrot_scf.o pwlhf.o gw_model.o optreal.o davidson.o \ + electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \ + optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \ + hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \ + lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \ + linear_optics.o linear_response.o \ + setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \ + ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \ + ump2.o bse.o acfdt.o chi.o sydmat.o + +INC= + +vasp: $(SOURCE) $(FFT3D) $(INC) main.o + rm -f vasp + $(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK) +vasp_: $(SOURCE) $(FFT3D) $(INC) main.o + rm -f vasp_ + $(FCL) -o vasp_ main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK) +makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC) + $(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB) +zgemmtest: zgemmtest.o base.o random.o $(INC) + $(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB) +dgemmtest: dgemmtest.o base.o random.o $(INC) + $(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB) +ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC) + $(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB) +kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC) + $(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB) + +clean: + -rm -f *.mod *.f90 *.o *.L ; touch *.F + +main.o: main$(SUFFIX) + $(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX) +xcgrad.o: xcgrad$(SUFFIX) + $(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX) +xcspin.o: xcspin$(SUFFIX) + $(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX) + +makeparam.o: makeparam$(SUFFIX) + $(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX) + +makeparam$(SUFFIX): makeparam.F main.F +# +# MIND: I do not have a full dependency list for the include +# and MODULES: here are only the minimal basic dependencies +# if one strucuture is changed then touch_dep must be called +# with the corresponding name of the structure +# +base.o: base.inc base.F +mgrid.o: mgrid.inc mgrid.F +constant.o: constant.inc constant.F +lattice.o: lattice.inc lattice.F +setex.o: setexm.inc setex.F +pseudo.o: pseudo.inc pseudo.F +poscar.o: poscar.inc poscar.F +mkpoints.o: mkpoints.inc mkpoints.F +wave.o: wave.inc wave.F +nonl.o: nonl.inc nonl.F +nonlr.o: nonlr.inc nonlr.F + +fft3dlib_f77.o: fft3dlib_f77.F + $(CPP) + $(F77) $(FFLAGS_F77) -c $*$(SUFFIX) + +.F.o: + $(CPP) + $(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX) +.F$(SUFFIX): + $(CPP) +$(SUFFIX).o: + $(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX) + +# special rules +#----------------------------------------------------------------------- + +# -tpp5|6|7 P, PII-PIII, PIV +# -xW use SIMD (does not pay of on PII, since fft3d uses double prec) +# all other options do no affect the code performance since -O1 is used +$(OBJ_HIGH): + $(CPP) + $(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX) +$(OBJ_MED): + $(CPP) + $(FC) $(FFLAGS) $(OFLAG_MED) $(INCS) -c $*$(SUFFIX) +$(OBJ_LOW): + $(CPP) + $(FC) $(FFLAGS) $(OFLAG_LOW) $(INCS) -c $*$(SUFFIX) +$(OBJ_NOOPT): + $(CPP) + $(FC) $(FFLAGS) $(OFLAG_NOOPT) $(INCS) -c $*$(SUFFIX)