diff --git a/scripts/djb_generator/builder.py b/scripts/djb_generator/builder.py index 68d66c5..9b50e6e 100644 --- a/scripts/djb_generator/builder.py +++ b/scripts/djb_generator/builder.py @@ -134,9 +134,14 @@ def main(): argParser = argparse.ArgumentParser() argParser.add_argument("-ws", "--wordsize", help="The number of bits addressable by the CPU at once (default: 256)", default=256, type=int, dest="ws") argParser.add_argument("-s", "--digit-size", help="The number of bits per digit (default: 64)", default=64, type=int, dest="size") + argParser.add_argument("-n", "--no-avx", help="Generates code for machines not supporting AVX intrinsics", action="store_false", dest="AVX") args = argParser.parse_args() # Setting memory word size related parameters + if not args.AVX: + args.ws = args.size + k.AVX = False + k.WS = args.ws k.DIGIT_SIZE_b = args.size k.DIGIT_SIZE_B = k.DIGIT_SIZE_b >> 3 diff --git a/scripts/djb_generator/constants.py b/scripts/djb_generator/constants.py index 86c2686..18423c7 100644 --- a/scripts/djb_generator/constants.py +++ b/scripts/djb_generator/constants.py @@ -18,6 +18,7 @@ DIGIT_SIZE_B = 0 DIGIT_SIZE_b =0 WS = 0 +AVX = True # Maximum size for direct multiplications CHUNK_SIZE = 9 diff --git a/scripts/djb_generator/generator.py b/scripts/djb_generator/generator.py index bcd58ce..43fec2a 100644 --- a/scripts/djb_generator/generator.py +++ b/scripts/djb_generator/generator.py @@ -257,7 +257,7 @@ def scalarprod(nr, res, na, a0, a1, nb, b0, b1, offset=0): # Generates the code for a base multiplication def GF2X_MUL(nr, res, na, a, nb, b): - if na < k.CHUNK_SIZE: + if k.AVX and na < k.CHUNK_SIZE: return "gf2x_mul_" + str(na) + "_avx(" + res + ", " + a + ", " + b +");" return "GF2X_MUL(" + str(nr) + ", " + res + ", " + str(na) + ", " + a + ", " + str(nb) + ", " + b + ");" diff --git a/src/benchmarking/CMakeLists.txt b/src/benchmarking/CMakeLists.txt index 5c8e2fa..8883c88 100644 --- a/src/benchmarking/CMakeLists.txt +++ b/src/benchmarking/CMakeLists.txt @@ -7,7 +7,7 @@ set(OPTIMIZATION_FLAGS "-O2 -fipa-cp-clone -floop-interchange -fgcse-after-reloa -fsplit-loops -fsplit-paths -ftree-loop-distribution -ftree-loop-vectorize \ -ftree-partial-pre -funswitch-loops -fvect-cost-model -fvect-cost-model=dynamic -fversion-loops-for-strides") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -pedantic -Wuninitialized -fno-var-tracking-assignments -march=native -O0 -g3 -Wno-unused-function") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -pedantic -Wuninitialized -fno-var-tracking-assignments -march=native -O3 -g3 -Wno-unused-function") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -Wall -pedantic -Wuninitialized -march=native -g3") message("FLAGS" ${CMAKE_C_FLAGS})