forked from SPECFEM/specfem3d_globe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
flags.guess
262 lines (245 loc) · 10.8 KB
/
flags.guess
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/bin/sh
# Attempt to guess suitable flags for the Fortran compiler.
# one can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
# to the disk for one MPI slice after the other, and to make the solver do the same thing when reading the files back from disk.
# one can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
# range checking options at run time (for instance -check all for Intel ifort) and, more importantly, modern compilers
# vectorize the SPECFEM3D_GLOBE code very well and in practice this option only makes the code 3% to 5% faster in the best case,
# therefore we suggest not activating it.
# for the OpenMP version, one can add -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements
# for some critical loops instead of OpenMP CRITICAL regions.
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
# if you want to compile with OpenMP, add this to the flags listed below, depending on the compiler you use:
#
# -fopenmp for GNU gfortran
# -openmp for Intel ifort
# -mp for Portland pgfortran
# -qsmp=omp for IBM xlf
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
# If you run very large meshes on a relatively small number
# of processors, the static memory size needed on each processor might become
# greater than 2 gigabytes, which is the upper limit for 32-bit addressing
# (dynamic memory allocation is always OK, even beyond the 2 GB limit; only static memory has a problem).
# In this case, on some compilers you may need to add -mcmodel=medium (if you do not use the Intel ifort / icc compiler)
# or -mcmodel=medium -shared-intel (if you use the Intel ifort / icc compiler)
# to the configure options of CFLAGS, FCFLAGS and LDFLAGS otherwise the compiler will display an error
# message (for instance 'relocation truncated to fit: R\_X86\_64\_PC32 against .bss' or something similar);
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
# First find the "real" compiler
# e.g., Cray provides a Programming Environment that wraps around compilers,
# but the command-line options are still compiler-specific.
case $FC in
ftn|*/ftn)
case $PE_ENV in
CRAY)
my_FC=crayftn
;;
GNU)
my_FC=gfortran
;;
INTEL)
my_FC=ifort
;;
PATHSCALE)
my_FC=pathf90
;;
PGI)
my_FC=pgfortran
;;
*)
# Unrecognized (this will not set any default flags)
my_FC="$FC"
;;
esac
;;
*)
my_FC="$FC"
;;
esac
case $my_FC in
ftn|*/ftn|crayftn|*/crayftn)
#
# Cray Fortran
#
DEF_FFLAGS="-M 1193 -M 1438"
OPT_FFLAGS="-O3 -Onoaggress -Oipa0 -hfp2 -Ovector3 -Oscalar3 -Ocache2 -Ounroll2 -Ofusion2"
# -Oaggress -Oipa4 would make it even more aggressive
DEBUG_FFLAGS="-eC -eD -ec -en -eI -ea -g -G0"
;;
pgf95|*/pgf95|pgf90|*/pgf90|pgfortran|*/pgfortran)
#
# Portland PGI
#
DEF_FFLAGS="-Mdclchk -Minform=warn -mcmodel=medium"
OPT_FFLAGS="-Mnobounds -fast"
DEBUG_FFLAGS="-Mbounds"
;;
ifort|*/ifort)
#
# Intel ifort Fortran90 for Linux
# check: http://software.intel.com/sites/products/documentation/hpc/compilerpro/en-us/fortran/lin/compiler_f/index.htm
#
# option "-assume buffered_io" is important especially on
# parallel file systems like SFS 3.2 / Lustre 1.8. If omitted
# I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
# However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1
DEF_FFLAGS="-xHost -fpe0 -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -diag-disable 6477 -implicitnone -gen-interfaces -warn all" # -mcmodel=medium -shared-intel
OPT_FFLAGS="-O3 -check nobounds"
DEBUG_FFLAGS="-check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv"
#
;;
gfortran|*/gfortran|f95|*/f95)
#
# GNU gfortran
#
DEF_FFLAGS="-std=gnu -fimplicit-none -frange-check -fmax-errors=10 -pedantic -pedantic-errors -Waliasing -Wampersand -Wcharacter-truncation -Wline-truncation -Wsurprising -Wno-tabs -Wunderflow -ffpe-trap=invalid,zero,overflow -Wunused -Werror" # -mcmodel=medium
OPT_FFLAGS="-O2"
DEBUG_FFLAGS="-g -O0 -ggdb -fbacktrace -fbounds-check"
# useful to track loss of accuracy because of automatic double to single precision conversion: -Wconversion (this may generate many warnings...)
;;
g95|*/g95)
#
# g95 (free f95 compiler from http://www.g95.org)
#
DEF_FFLAGS="-fimplicit-none"
OPT_FFLAGS="-O"
DEBUG_FFLAGS="-g -O0 -fbounds-check -ftrace"
;;
f90|*/f90)
case $host_os in
Linux)
#
# AbSoft
#
case $host_cpu in
i*86 | x86_64)
DEF_FFLAGS="-W132 -s -cpu:p7 -v -YDEALLOC=ALL"
OPT_FFLAGS="-O3"
DEBUG_FFLAGS=""
;;
esac
;;
irix)
################ SGI Irix #################
DEF_MPIFC=$FC
DEF_MPILIBS="-lmpi -lfastm -lfpe"
DEF_FFLAGS="-ansi -u -64 -OPT:Olimit=0 -OPT:roundoff=3 -OPT:IEEE_arithmetic=3 -r10000 -mips4"
OPT_FFLAGS="-O3"
DEBUG_FFLAGS="-check_bounds"
;;
superux*)
################## NEC SX ##################
DEF_MPIFC=$FC
DEF_FFLAGS="-C hopt -R2 -Wf\" -L nostdout noinclist mrgmsg noeject -msg b -pvctl loopcnt=14000000 expand=10 fullmsg vecthreshold=20 -s\" -pi auto line=100 exp=swap_all,rank"
OPT_FFLAGS=""
DEBUG_FFLAGS=""
;;
esac
;;
lf95|*/lf95)
#
# Lahey f90
#
DEF_FFLAGS="--warn --wo --tpp --f95 --dal"
OPT_FFLAGS="-O"
DEBUG_FFLAGS="--chk"
;;
######## IBM ######
mpxlf*|*/mpxlf*)
DEF_MPIFC=$FC
;;
*xlf*|*/*xlf*)
#
# on some (but not all) IBM machines one might need to add -qsave otherwise the IBM compiler allocates the
# arrays in the stack and the code crashes if the stack size is too
# small (which is sometimes the case, but less often these days on large machines)
#
# you will probably need to add " module load bgq-xl " or similar to your .bash_profile to load the compilers
#
# It could also help to put this in your .bash_profile: export XLFRTEOPTS=aggressive_array_io=yes:buffering=enable
#
# on IBM with xlf one should also set
#
# CC = xlc_r
# CFLAGS = -O3 -q64
#
# or
#
# CC = gcc
# CFLAGS = -O3 -m64
#
# for the C compiler when using -q64 for the Fortran compiler
#
# on IBM xlf90 compiler:
# when encountering errors: ...relocation truncated to fit: R_PPC_LOCAL24PC...
# one should also use additional flags:
# CFLAGS = -Wl,-relax
#
# The -qstrict option prevents some minor differences of results between xlf and
# other compilers. There is a small decrease in performance, but generally
# it is small or negligible compared to other issues like slow I/O.
#
DEF_FFLAGS="-qassert=contig -qhot -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003std -g -qsuppress=1518-234 -qsuppress=1518-317 -qsuppress=1518-318 -qsuppress=1500-036"
OPT_FFLAGS="-O4 -qstrict -Q -Wl,-relax"
# Options -qreport -qsource -qlist create a *.lst file containing detailed information about vectorization.
DEBUG_FFLAGS="-g -O0 -C -qddim -qfullpath -qflttrap=overflow:zerodivide:invalid:enable -qfloat=nans -qinitauto=7FBFFFFF"
#
# On IBM BlueGene at IDRIS (France) use:
# -qtune=auto -qarch=450d -qsave instead of -qtune=auto -qarch=auto
#
;;
pathf90|*/pathf90)
#
# pathscale
#
# one should also set
# CC = pathcc
# CFLAGS = -O2
#
DEF_FFLAGS="-fno-math-errno -ffast-math -msse3 -march=auto -fno-second-underscore -align64"
OPT_FFLAGS="-O3 -OPT:Ofast -LNO:fusion=2 -LNO:simd=2 -LNO:simd_verbose=ON"
DEBUG_FFLAGS="-g2"
;;
esac
case $build_os in
irix)
################ SGI Irix #################
##
## CAUTION: always define setenv TRAP_FPE OFF on SGI before compiling
##
FCENV="TRAP_FPE=OFF"
;;
esac
# If you wish, set CFLAGS here instead of on the command-line.
# This will then be persistent across calls to configure.
# If you don't set it, then the default value will be determined by autoconf.
#DEF_CFLAGS="-g -O3"
if test "x$FLAGS_CHECK" = "x"; then
if test "x$COND_DEBUG_TRUE" = "x"; then
FLAGS_CHECK="$DEF_FFLAGS $DEBUG_FFLAGS"
else
FLAGS_CHECK="$DEF_FFLAGS $OPT_FFLAGS"
fi
fi
if test "x$MPIFC" = "x"; then
MPIFC="$DEF_MPIFC"
if test "x$MPILIBS" = "x"; then
MPILIBS="$DEF_MPILIBS"
fi
fi
echo MPIFC=\"$MPIFC\" | sed 's/\$/\\\$/g'
echo MPILIBS=\"$MPILIBS\" | sed 's/\$/\\\$/g'
echo FLAGS_CHECK=\"$FLAGS_CHECK\" | sed 's/\$/\\\$/g'
echo FCENV=\"$FCENV\" | sed 's/\$/\\\$/g'
if [ -n "${DEF_CFLAGS+set}" -a -z "${CFLAGS+set}" ]; then
# We only set this if it's set in this file and the user hasn't
# overridden it on the command-line.
echo CFLAGS=\"$DEF_CFLAGS\" | sed 's/\$/\\\$/g'
fi
# end of file