Various fixes

NLeSC · Jul 31, 2013 · 5446dcd · 5446dcd
1 parent f3c2f84
commit 5446dcd
Show file tree

Hide file tree

Showing 10 changed files with 506 additions and 38 deletions.
diff --git a/build/compile.mk b/build/compile.mk
@@ -133,6 +133,9 @@ include $(DEPENDS)
 #----------------------------------------------------------------------------
 
 # Cancel the implicit gmake rules for compiling
+#
+# 	@cd $(POPEXEDIR)/compile && $(NVCC) $(CUFLAGS) -ptx $<
+
 %.o : %.f
 %.o : %.f90
 %.o : %.c

diff --git a/build/linuxDAS4.gnu b/build/linuxDAS4.gnu
@@ -12,18 +12,18 @@ MPILIB = -L/cm/shared/apps/openmpi/intel/64/1.4.4/lib64/
 #CUDALIB = -L/cm/shared/apps/cuda40/toolkit/4.0.17/lib64/
 CUDALIB = -L/cm/shared/apps/cuda50/toolkit/current/lib64/
 
-F77 = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90
-F90 = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90
-LD = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90 -lcurl $(CUDALIB) -lcudart -lstdc++   -shared-intel -i-dynamic
-CC = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpicc
+F77 = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90 -r8 -O3 
+F90 = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90 -r8 -O3 
+LD = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpif90 -r8 -O3  -lcurl $(CUDALIB) -lcudart -lstdc++   -shared-intel -i-dynamic
+CC = /cm/shared/apps/openmpi/intel/64/1.4.4/bin/mpicc -O3 
 
 Cp = /bin/cp
 Cpp = cpp -P
 AWK = /usr/bin/gawk
 ABI = 
 COMMDIR = mpi
 
-NVCC = nvcc 
+NVCC = nvcc -O3 
 
 
 
@@ -76,9 +76,9 @@ CFLAGS = $(ABI)
 
 ifeq ($(OPTIMIZE),yes)
 #  CFLAGS := $(CFLAGS) -O 
-  CFLAGS := $(CFLAGS) -O3
+  CFLAGS := $(CFLAGS) 
 else
-  CFLAGS := $(CFLAGS) -g -check all -ftrapuv
+  CFLAGS := $(CFLAGS) -O3 -check all -ftrapuv
 endif
 
 CFLAGS := $(CFLAGS)
@@ -98,17 +98,17 @@ ifeq ($(TRAP_FPE),yes)
 endif
 
 ifeq ($(OPTIMIZE),yes)
+  FFLAGS = $(FBASE) 
 #  FFLAGS = $(FBASE) -O3
-  FFLAGS = $(FBASE) -O2
 else
-  FFLAGS = $(FBASE) -g -check bounds
+  FFLAGS = $(FBASE) -O3 -check bounds
 endif
 
 #DAS4 specific
 FFLAGS := $(FFLAGS) -convert  big_endian
 FFLAGS := $(FFLAGS) -mcmodel=medium -shared-intel -i-dynamic
 #-i-dynamic
-#FFLAGS := $(FFLAGS) 
+
 
 
 #----------------------------------------------------------------------------
@@ -117,13 +117,19 @@ FFLAGS := $(FFLAGS) -mcmodel=medium -shared-intel -i-dynamic
 #
 #----------------------------------------------------------------------------
 
-CUFLAGS = -Xptxas=-v -arch=compute_20 -code=sm_20
+CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64
+
+#CUFLAGS = -gencode arch=compute_20,code=sm_20 -Xptxas=-v
+
 #-prec-sqrt=true -fmad=false
 
 ifeq ($(OPTIMIZE),yes)
-  CUFLAGS := $(CUFLAGS) -O3
+  CUFLAGS := $(CUFLAGS)
 endif
-
+
+CUFLAGS := $(CUFLAGS) 
+
+
 #----------------------------------------------------------------------------
 #
 #                           Loader Flags and Libraries

diff --git a/build/linuxDAS4gnu.gnu b/build/linuxDAS4gnu.gnu
@@ -11,17 +11,17 @@
 #CUDALIB = -L/cm/shared/apps/cuda40/toolkit/4.0.17/lib64/
 CUDALIB = -L/cm/shared/apps/cuda50/toolkit/current/lib64/
 
-F77 = mpif90 -p -O3
-F90 = mpif90 -p -O3
-LD = mpif90 -p -O3 -lcurl $(CUDALIB) -lcudart -lstdc++
-CC = gcc -p -O3
+F77 = mpif90 -O0 -g
+F90 = mpif90 -O0 -g
+LD = mpif90 -O0 -g  -lcurl $(CUDALIB) -lcudart -lstdc++
+CC = gcc -O0 -g
 Cp = /bin/cp
 Cpp = cpp -P
 AWK = /usr/bin/gawk
 ABI = 
 COMMDIR = mpi
 
-NVCC = nvcc 
+NVCC = nvcc -O0 -g
 
 #  Enable MPI library for parallel code, yes/no.
 
@@ -73,6 +73,7 @@ CFLAGS = $(ABI)
 
 ifeq ($(OPTIMIZE),yes)
 #  CFLAGS := $(CFLAGS) -O 
+  CFLAGS := $(CFLAGS)  
 else
   CFLAGS := $(CFLAGS) -check all -ftrapuv
 endif
@@ -94,7 +95,7 @@ ifeq ($(TRAP_FPE),yes)
 endif
 
 ifeq ($(OPTIMIZE),yes)
-#  FFLAGS = $(FBASE) -O3
+#  FFLAGS = $(FBASE) 
   FFLAGS = $(FBASE)
 else
   FFLAGS = $(FBASE) -check bounds
@@ -112,11 +113,16 @@ FFLAGS := $(FFLAGS)
 #
 #----------------------------------------------------------------------------
 
-CUFLAGS = -Xptxas=-v -arch=compute_20 -code=sm_20
+#CUFLAGS = -Xptxas=-v -arch=compute_20 -code=sm_20
+
+#CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64
+
+CUFLAGS = -gencode arch=compute_20,code=sm_20 -Xptxas=-v
+
 #-prec-sqrt=true -fmad=false
 
 ifeq ($(OPTIMIZE),yes)
-  CUFLAGS := $(CUFLAGS) -O3
+  CUFLAGS := $(CUFLAGS) 
 endif
 
 #----------------------------------------------------------------------------

diff --git a/build/linuxg95_mpi_gpu.gnu b/build/linuxg95_mpi_gpu.gnu
@@ -0,0 +1,134 @@
+
+#-----------------------------------------------------------------------
+#
+# File:  sgialtix_mpi.gnu
+#
+#  Contains compiler and loader options for the SGI Altix using the 
+#  intel compiler and specifies the mpi directory for communications 
+#  modules.
+#
+#-----------------------------------------------------------------------
+F77 = mpif77
+F90 = mpif90
+LD = mpif90 
+CC = cc
+
+Cp = /bin/cp
+Cpp = cpp -P
+AWK = /usr/bin/gawk
+ABI = 
+COMMDIR = mpi
+NVCC = nvcc 
+
+#  Enable MPI library for parallel code, yes/no.
+
+MPI = yes
+
+# Adjust these to point to where netcdf is installed
+
+# These have been loaded as a module so no values necessary
+NETCDFINC = -I/cm/shared/apps/netcdf/gcc/64/4.1.1/include
+NETCDFLIB = -L/cm/shared/apps/netcdf/gcc/64/4.1.1/lib
+
+#  Enable trapping and traceback of floating point exceptions, yes/no.
+#  Note - Requires 'setenv TRAP_FPE "ALL=ABORT,TRACE"' for traceback.
+
+TRAP_FPE = no
+
+#------------------------------------------------------------------
+#  precompiler options
+#------------------------------------------------------------------
+
+#DCOUPL              = -Dcoupled
+DHIRES               = -D_HIRES
+#PRINT                = -DJASON_PRINT 
+#PRINT_HALO           = -DJASON_PRINT_HALO
+#PRINT_REDIST         = -DJASON_PRINT_REDIST
+#PRINT_LOOP           = -DJASON_PRINT_LOOP
+#TIMER                = -DJASON_TIMER 
+#FIX_DATA             = -DJASON_FIX_DATA
+#LOG_FILE             = -DJASON_SIMPLE_LOG_FILENAME
+FLOW                 = -D_USE_FLOW_CONTROL
+#SEND                 = -DJASON_PRINT_SEND  
+FLUSH                = -DJASON_FLUSH
+GPU                  = -DBEN_GPU
+
+Cpp_opts =   \
+      $(DCOUPL) $(DHIRES) $(TIMER) $(PRINT) $(PRINT_LOOP) $(LOG_FILE) $(FLOW) $(FIX_DATA) $(SEND) $(FLUSH) $(PRINT_REDIST) $(GPU)
+
+Cpp_opts := $(Cpp_opts) -DPOSIX 
+
+#----------------------------------------------------------------------------
+#
+#                           C Flags
+#
+#----------------------------------------------------------------------------
+
+CFLAGS = $(ABI) 
+
+ifeq ($(OPTIMIZE),yes)
+  CFLAGS := $(CFLAGS) -O3 -march=corei7
+# -mcmodel=medium
+else
+  CFLAGS := $(CFLAGS) -g -check all -ftrapuv
+endif
+
+#----------------------------------------------------------------------------
+#
+#                           FORTRAN Flags
+#
+#----------------------------------------------------------------------------
+
+FBASE = $(ABI) $(NETCDFINC) $(MPI_COMPILE_FLAGS) -I$(DepDir) 
+MODSUF = mod
+
+ifeq ($(TRAP_FPE),yes)
+  FBASE := $(FBASE) 
+endif
+
+ifeq ($(OPTIMIZE),yes)
+  FFLAGS = $(FBASE) -O3 -march=corei7 -fconvert=swap 
+#-fmax-stack-var-size=536870912
+#-mcmodel=medium
+else
+  FFLAGS = $(FBASE) -g -check bounds -fconvert=swap
+endif
+
+#----------------------------------------------------------------------------
+#
+#                           CUDA Flags
+#
+#----------------------------------------------------------------------------
+
+CUFLAGS = -gencode arch=compute_35,code=sm_35 -Xptxas=-v -maxrregcount=64 -gencode arch=compute_20,code=sm_20 
+#CUFLAGS = -gencode arch=compute_20,code=sm_20 -Xptxas=-v
+
+#-prec-sqrt=true -fmad=false
+
+ifeq ($(OPTIMIZE),yes)
+  CUFLAGS := $(CUFLAGS)
+endif
+
+CUFLAGS := $(CUFLAGS)
+
+#----------------------------------------------------------------------------
+#
+#                           Loader Flags and Libraries
+#
+#----------------------------------------------------------------------------
+
+LDFLAGS = $(ABI) 
+
+LIBS = $(NETCDFLIB) -L/cm/shared/apps/cuda50/toolkit/current/lib64/ -lnetcdf -lcurl -lcudart -lstdc++ 
+
+ifeq ($(MPI),yes)
+  LIBS := $(LIBS) $(MPI_LD_FLAGS) -lmpi 
+endif
+
+ifeq ($(TRAP_FPE),yes)
+  LIBS := $(LIBS) 
+endif
+
+LDLIBS = $(LIBS)
+
+#----------------------------------------------------------------------------