flags.guess

#!/bin/sh

# Attempt to guess suitable flags for the Fortran compiler.

# one can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
# to the disk for one MPI slice after the other, and to make the solver do the same thing when reading the files back from disk.

# one can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
# range checking options at run time (for instance -check all for Intel ifort) and, more importantly, modern compilers
# vectorize the SPECFEM3D_GLOBE code very well and in practice this option only makes the code 3% to 5% faster in the best case,
# therefore we suggest not activating it.

# for the OpenMP version, one can add -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements
# for some critical loops instead of OpenMP CRITICAL regions.

###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# if you want to compile with OpenMP, add this to the flags listed below, depending on the compiler you use:
#
#   -fopenmp for GNU gfortran
#   -openmp for Intel ifort
#   -mp for Portland pgfortran
#   -qsmp=omp for IBM xlf

###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# If you run very large meshes on a relatively small number
# of processors, the static memory size needed on each processor might become
# greater than 2 gigabytes, which is the upper limit for 32-bit addressing
# (dynamic memory allocation is always OK, even beyond the 2 GB limit; only static memory has a problem).
# In this case, on some compilers you may need to add -mcmodel=medium (if you do not use the Intel ifort / icc compiler)
# or -mcmodel=medium -shared-intel (if you use the Intel ifort / icc compiler)
# to the configure options of CFLAGS, FCFLAGS and LDFLAGS otherwise the compiler will display an error
# message (for instance 'relocation truncated to fit: R\_X86\_64\_PC32 against .bss' or something similar);

###########################################################################################################################
###########################################################################################################################
###########################################################################################################################

# First find the "real" compiler
# e.g., Cray provides a Programming Environment that wraps around compilers,
# but the command-line options are still compiler-specific.
case $FC in
    ftn|*/ftn)
        case $PE_ENV in
            CRAY)
                my_FC=crayftn
                ;;
            GNU)
                my_FC=gfortran
                ;;
            INTEL)
                my_FC=ifort
                ;;
            PATHSCALE)
                my_FC=pathf90
                ;;
            PGI)
                my_FC=pgfortran
                ;;
            *)
                # Unrecognized (this will not set any default flags)
                my_FC="$FC"
                ;;
        esac
        ;;
    *)
        my_FC="$FC"
        ;;
esac

case $my_FC in
    ftn|*/ftn|crayftn|*/crayftn)
        #
        # Cray Fortran
        #
        DEF_FFLAGS="-M 1193 -M 1438"
        OPT_FFLAGS="-O3 -Onoaggress -Oipa0 -hfp2 -Ovector3 -Oscalar3 -Ocache2 -Ounroll2 -Ofusion2"
        # -Oaggress -Oipa4 would make it even more aggressive
        DEBUG_FFLAGS="-eC -eD -ec -en -eI -ea -g -G0"
        ;;
    pgf95|*/pgf95|pgf90|*/pgf90|pgfortran|*/pgfortran)
        #
        # Portland PGI
        #
        DEF_FFLAGS="-Mdclchk -Minform=warn -mcmodel=medium"
        OPT_FFLAGS="-Mnobounds -fast"
        DEBUG_FFLAGS="-Mbounds"
        ;;
    ifort|*/ifort)
        #
        # Intel ifort Fortran90 for Linux
        # check: http://software.intel.com/sites/products/documentation/hpc/compilerpro/en-us/fortran/lin/compiler_f/index.htm
        #
        # option "-assume buffered_io" is important especially on
        # parallel file systems like SFS 3.2 / Lustre 1.8. If omitted
        # I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
        # However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1 
        DEF_FFLAGS="-xHost -fpe0 -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -diag-disable 6477 -implicitnone -gen-interfaces -warn all" # -mcmodel=medium -shared-intel
        OPT_FFLAGS="-O3 -check nobounds"
        DEBUG_FFLAGS="-check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv"
        #
        ;;
    gfortran|*/gfortran|f95|*/f95)
        #
        # GNU gfortran
        #
        DEF_FFLAGS="-std=gnu -fimplicit-none -frange-check -fmax-errors=10 -pedantic -pedantic-errors -Waliasing -Wampersand -Wcharacter-truncation -Wline-truncation -Wsurprising -Wno-tabs -Wunderflow -ffpe-trap=invalid,zero,overflow -Wunused -Werror" # -mcmodel=medium
        OPT_FFLAGS="-O2"
        DEBUG_FFLAGS="-g -O0 -ggdb -fbacktrace -fbounds-check"
        # useful to track loss of accuracy because of automatic double to single precision conversion:  -Wconversion  (this may generate many warnings...)
        ;;
    g95|*/g95)
        #
        # g95 (free f95 compiler from http://www.g95.org)
        #
        DEF_FFLAGS="-fimplicit-none"
        OPT_FFLAGS="-O"
        DEBUG_FFLAGS="-g -O0 -fbounds-check -ftrace"
        ;;
    f90|*/f90)
        case $host_os in
            Linux)
                #
                # AbSoft
                #
                case $host_cpu in
                    i*86 | x86_64)
                        DEF_FFLAGS="-W132 -s -cpu:p7 -v -YDEALLOC=ALL"
                        OPT_FFLAGS="-O3"
                        DEBUG_FFLAGS=""
                        ;;
                esac
                ;;
            irix)
                ################ SGI Irix #################
                DEF_MPIFC=$FC
                DEF_MPILIBS="-lmpi -lfastm -lfpe"
                DEF_FFLAGS="-ansi -u -64 -OPT:Olimit=0 -OPT:roundoff=3 -OPT:IEEE_arithmetic=3 -r10000 -mips4"
                OPT_FFLAGS="-O3"
                DEBUG_FFLAGS="-check_bounds"
                ;;
            superux*)
                ################## NEC SX ##################
                DEF_MPIFC=$FC
                DEF_FFLAGS="-C hopt -R2 -Wf\" -L nostdout noinclist mrgmsg noeject -msg b -pvctl loopcnt=14000000 expand=10 fullmsg vecthreshold=20 -s\" -pi auto line=100 exp=swap_all,rank"
                OPT_FFLAGS=""
                DEBUG_FFLAGS=""
                ;;
        esac
        ;;
    lf95|*/lf95)
        #
        # Lahey f90
        #
        DEF_FFLAGS="--warn --wo --tpp --f95 --dal"
        OPT_FFLAGS="-O"
        DEBUG_FFLAGS="--chk"
        ;;
    ######## IBM ######
    mpxlf*|*/mpxlf*)
        DEF_MPIFC=$FC
        ;;
    *xlf*|*/*xlf*)
        #
        # on some (but not all) IBM machines one might need to add -qsave otherwise the IBM compiler allocates the
        # arrays in the stack and the code crashes if the stack size is too
        # small (which is sometimes the case, but less often these days on large machines)
        #
        # you will probably need to add " module load bgq-xl " or similar to your .bash_profile to load the compilers
        #
        # It could also help to put this in your .bash_profile: export XLFRTEOPTS=aggressive_array_io=yes:buffering=enable
        #
        # on IBM with xlf one should also set
        #
        # CC = xlc_r
        # CFLAGS = -O3 -q64
        #
        # or
        #
        # CC = gcc
        # CFLAGS = -O3 -m64
        #
        # for the C compiler when using -q64 for the Fortran compiler
        #
        # on IBM xlf90 compiler:
        # when encountering errors: ...relocation truncated to fit: R_PPC_LOCAL24PC...
        # one should also use additional flags:
        # CFLAGS = -Wl,-relax
        #
        # The -qstrict option prevents some minor differences of results between xlf and
        # other compilers. There is a small decrease in performance, but generally
        # it is small or negligible compared to other issues like slow I/O.
        #
        DEF_FFLAGS="-qassert=contig -qhot -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003std -g -qsuppress=1518-234 -qsuppress=1518-317 -qsuppress=1518-318 -qsuppress=1500-036"
        OPT_FFLAGS="-O4 -qstrict -Q -Wl,-relax"
        # Options -qreport -qsource -qlist create a *.lst file containing detailed information about vectorization.
        DEBUG_FFLAGS="-g -O0 -C -qddim -qfullpath -qflttrap=overflow:zerodivide:invalid:enable -qfloat=nans -qinitauto=7FBFFFFF"
        #
        # On IBM BlueGene at IDRIS (France) use:
        # -qtune=auto -qarch=450d -qsave     instead of -qtune=auto -qarch=auto
        #
        ;;
    pathf90|*/pathf90)
        #
        # pathscale
        #
        # one should also set
        # CC = pathcc
        # CFLAGS = -O2
        #
        DEF_FFLAGS="-fno-math-errno -ffast-math -msse3 -march=auto -fno-second-underscore -align64"
        OPT_FFLAGS="-O3 -OPT:Ofast -LNO:fusion=2 -LNO:simd=2 -LNO:simd_verbose=ON"
        DEBUG_FFLAGS="-g2"
        ;;

esac

case $build_os in
    irix)
        ################ SGI Irix #################
        ##
        ##  CAUTION: always define setenv TRAP_FPE OFF on SGI before compiling
        ##
        FCENV="TRAP_FPE=OFF"
        ;;
esac

# If you wish, set CFLAGS here instead of on the command-line.
# This will then be persistent across calls to configure.
# If you don't set it, then the default value will be determined by autoconf.
#DEF_CFLAGS="-g -O3"

if test "x$FLAGS_CHECK" = "x"; then
    if test "x$COND_DEBUG_TRUE" = "x"; then
        FLAGS_CHECK="$DEF_FFLAGS $DEBUG_FFLAGS"
    else
        FLAGS_CHECK="$DEF_FFLAGS $OPT_FFLAGS"
    fi
fi
if test "x$MPIFC" = "x"; then
    MPIFC="$DEF_MPIFC"
    if test "x$MPILIBS" = "x"; then
        MPILIBS="$DEF_MPILIBS"
    fi
fi

echo MPIFC=\"$MPIFC\" | sed 's/\$/\\\$/g'
echo MPILIBS=\"$MPILIBS\" | sed 's/\$/\\\$/g'
echo FLAGS_CHECK=\"$FLAGS_CHECK\" | sed 's/\$/\\\$/g'
echo FCENV=\"$FCENV\" | sed 's/\$/\\\$/g'
if [ -n "${DEF_CFLAGS+set}" -a -z "${CFLAGS+set}" ]; then
# We only set this if it's set in this file and the user hasn't
# overridden it on the command-line.
echo CFLAGS=\"$DEF_CFLAGS\" | sed 's/\$/\\\$/g'
fi

# end of file