diff --git a/config/macros.m4 b/config/macros.m4 index 88db6877..b3bb2ef6 100644 --- a/config/macros.m4 +++ b/config/macros.m4 @@ -433,6 +433,7 @@ dnl AX_FLAGS_SAVE() dnl AX_FLAGS_RESTORE() ]) + # AX_SELECT_BINARY_TYPE # --------------------- # Check the binary type the user wants to build and verify whether it can be successfully built @@ -1143,6 +1144,7 @@ AC_DEFUN([AX_PROG_COUNTERS], [ AC_REQUIRE([AX_PROG_PMAPI]) AC_REQUIRE([AX_PROG_PAPI]) + AC_REQUIRE([AX_PROG_L4STAT]) if test "${papi_paths}" = "not_set" ; then if test "${target_os}" = "aix" ; then @@ -1154,7 +1156,7 @@ AC_DEFUN([AX_PROG_COUNTERS], fi fi - if test "${PMAPI_ENABLED}" = "yes" -o "${PAPI_ENABLED}" = "yes" ; then + if test "${PMAPI_ENABLED}" = "yes" -o "${PAPI_ENABLED}" = "yes" -o "${L4STAT_ENABLED}" = "yes"; then AC_DEFINE([USE_HARDWARE_COUNTERS], 1, [Enable HWC support]) use_hw_counters="1" else @@ -1167,6 +1169,26 @@ AC_DEFUN([AX_PROG_COUNTERS], fi ]) +# AX_PROG_L4STAT +# ------------- +AC_DEFUN([AX_PROG_L4STAT], +[ + AC_ARG_ENABLE(l4stat, + AC_HELP_STRING( + [--enable-l4stat], + [Enable L4STAT driver to gather CPU performance counters] + ), + [enable_l4stat="${enableval}"], + [enable_l4stat="not_set"] + ) + + if test "${enable_l4stat}" = "yes" ; then + L4STAT_ENABLED="yes" + AC_DEFINE([L4STAT], [1], [L4STAT is used as API to gain access to CPU hwc]) + fi + + AM_CONDITIONAL(L4STAT, test "${L4STAT_ENABLED}" = "yes") +]) # AX_PROG_PMAPI # ------------- diff --git a/config/openmp.m4 b/config/openmp.m4 index dc0933df..d15e2e90 100644 --- a/config/openmp.m4 +++ b/config/openmp.m4 @@ -90,15 +90,20 @@ AC_DEFUN([AX_CHECK_OPENMP], AC_DEFUN([AX_HAVE_SYNC_FETCH_AND_ADD], [ AC_MSG_CHECKING([for __sync_fetch_and_add availability]) - AC_TRY_LINK( - [ ], - [ volatile int i; __sync_fetch_and_add(&i,1); ], - [ have_sync_fetch_and_add="yes" ] - ) - - if test "${have_sync_fetch_and_add}" = "yes" ; then - AC_DEFINE([HAVE__SYNC_FETCH_AND_ADD], 1, [Define if __sync_fetch_and_add is available]) - AC_MSG_RESULT([yes]) + # GR740 has a non-functional __sync_fetch_and_add + if test "${IS_GR740_MACHINE}" = "no"; then + AC_TRY_LINK( + [ ], + [ volatile int i; __sync_fetch_and_add(&i,1); ], + [ have_sync_fetch_and_add="yes" ] + ) + + if test "${have_sync_fetch_and_add}" = "yes"; then + AC_DEFINE([HAVE__SYNC_FETCH_AND_ADD], 1, [Define if __sync_fetch_and_add is available]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi else AC_MSG_RESULT([no]) fi diff --git a/config/posix-clock-macros.m4 b/config/posix-clock-macros.m4 index e93edf36..ab9b7dfb 100644 --- a/config/posix-clock-macros.m4 +++ b/config/posix-clock-macros.m4 @@ -2,7 +2,7 @@ # ------------------- AC_DEFUN([AX_CHECK_POSIX_CLOCK], [ - if test "${Architecture}" = "arm" -o "${Architecture}" = "arm64" ; then + if test "${Architecture}" = "arm" -o "${Architecture}" = "arm64" -o "${Architecture}" = "sparc"; then USE_POSIX_CLOCK="yes" else USE_POSIX_CLOCK="no" diff --git a/config/show-config.m4 b/config/show-config.m4 index a25e5917..cd695e9e 100644 --- a/config/show-config.m4 +++ b/config/show-config.m4 @@ -41,14 +41,16 @@ AC_DEFUN([AX_SHOW_CONFIGURATION], AX_JAVA_SHOW_CONFIGURATION echo - if test "${PMAPI_ENABLED}" = "yes" -o "${PAPI_ENABLED}" = "yes" ; then + if test "${PMAPI_ENABLED}" = "yes" -o "${PAPI_ENABLED}" = "yes" -o "${L4STAT_ENABLED}" = "yes"; then echo Performance counters: yes if test "${PMAPI_ENABLED}" = "yes" ; then echo -e \\\tPerformance API: PMAPI - else + elif test "${PAPI_ENABLED}" = "yes"; then echo -e \\\tPerformance API: PAPI echo -e \\\tPAPI home: ${PAPI_HOME} echo -e \\\tSampling support: ${PAPI_SAMPLING_ENABLED} + else + echo -e \\\tPerformance API: L4STAT fi else echo Performance counters: no diff --git a/config/system.m4 b/config/system.m4 index a0cfd62f..0509455f 100644 --- a/config/system.m4 +++ b/config/system.m4 @@ -82,6 +82,21 @@ AC_DEFUN([AX_SYSTEM_TYPE], target_os="linux" fi + AC_ARG_ENABLE(gr740, + AC_HELP_STRING( + [--enable-gr740], + [Enable compilation for the GR740 board] + ), + [enable_gr740="${enableval}"], + [enable_gr740="no"] + ) + IS_GR740_MACHINE=${enable_gr740} + if test "${IS_GR740_MACHINE}" = "yes" ; then + target_cpu="sparc" + target_os="rtems" + fi + AM_CONDITIONAL(IS_GR740_MACHINE, test "${IS_GR740_MACHINE}" = "yes") + # Check if this is an Altix machine and if it has an /dev/mmtimer device # (which is a global clock!) AC_ARG_ENABLE(check-altix, @@ -138,6 +153,8 @@ AC_DEFUN([AX_SYSTEM_TYPE], AC_DEFINE([ARCH_MIPS], [1], [Define if architecture is MIPS]) ;; sparc64 ) Architecture="sparc64" AC_DEFINE([ARCH_SPARC64], [1], [Define if architecture is SPARC64]) ;; + sparc ) Architecture="sparc" + AC_DEFINE([ARCH_SPARC], [1], [Define if architecture is SPARC]) ;; riscv64 ) Architecture="riscv" if test "${target_cpu}" == "riscv64" ; then @@ -163,6 +180,8 @@ AC_DEFUN([AX_SYSTEM_TYPE], AC_DEFINE([OS_SOLARIS], [1], [Define if operating system is Solaris]) ;; darwin* ) OperatingSystem="darwin" AC_DEFINE([OS_DARWIN], [1], [Define if operating system is Darwin]) ;; + rtems* ) OperatingSystem="rtems" + AC_DEFINE([OS_RTEMS], [1], [Define if operating system is RTEMS]) ;; esac # Publish these defines for conditional compilation @@ -180,8 +199,9 @@ AC_DEFUN([AX_SYSTEM_TYPE], AM_CONDITIONAL(OS_DEC, test "${OperatingSystem}" = "dec" ) AM_CONDITIONAL(OS_IRIX, test "${OperatingSystem}" = "irix" ) AM_CONDITIONAL(OS_FREEBSD, test "${OperatingSystem}" = "freebsd" ) - AM_CONDITIONAL(OS_DARWIN, test "${OperatingSystem}" = "darwin" ) + AM_CONDITIONAL(OS_DARWIN, test "${OperatingSystem}" = "darwin" ) AM_CONDITIONAL(OS_SOLARIS, test "${OperatingSystem}" = "solaris" ) + AM_CONDITIONAL(OS_RTEMS, test "${OperatingSystem}" = "rtems" ) # Special flags for specific systems or architectures if test "${OperatingSystem}" = "freebsd" ; then diff --git a/config/ucontext.m4 b/config/ucontext.m4 index 7d7cc102..bea76a4d 100644 --- a/config/ucontext.m4 +++ b/config/ucontext.m4 @@ -33,7 +33,12 @@ AC_DEFUN([AX_CHECK_UCONTEXT], AC_MSG_RESULT([${STRUCT_UCONTEXT_TYPE}]) if test "${STRUCT_UCONTEXT_TYPE}" = "unknown"; then + if test "${IS_GR740_MACHINE}" = "yes"; then + AC_MSG_WARN([Ucontext not available for GR740, sampling is driven by hardware timers]) + else AC_MSG_ERROR([Unknown definition of struct ucontext. Please check the definition in sys/ucontext.h or libc's ucontext.h and extend the configure macro]) + fi + else AC_DEFINE_UNQUOTED([STRUCT_UCONTEXT], ${STRUCT_UCONTEXT_TYPE}, [Definition of struct ucontext]) fi diff --git a/configure.ac b/configure.ac index 17180b2d..a254047a 100644 --- a/configure.ac +++ b/configure.ac @@ -122,7 +122,7 @@ AC_PROG_FC(${wanted_F_Compilers}) AM_CONDITIONAL(HAVE_FC, test "$FC" != "") -if test "${IS_SPARC64_MACHINE}" != "yes" ; then +if test "${IS_SPARC64_MACHINE}" != "yes" -a test "${IS_GR740_MACHINE}" != "yes" ; then AC_FC_WRAPPERS fi AX_JAVA @@ -472,10 +472,13 @@ AC_CHECK_SIZEOF(ssize_t,${size_t_size}) AC_CHECK_SIZEOF(size_t,${size_t_size}) AC_CHECK_SIZEOF(void*,${voidp_size}) +# GR740 has a non-functional mallinfo structure +if test "${IS_GR740_MACHINE}" != "yes" ; then AC_CHECK_MEMBER(struct mallinfo.arena, [AC_DEFINE([HAVE_MALLINFO], [1], [Whether the system supports mallinfo structure])], [], [#include ]) +fi AC_CHECK_MEMBER(union perf_mem_data_src.val, [AC_DEFINE([HAVE_PERF_MEM_DATA_SRC], [1], [Whether the system includes perf_mem_data_src/val])], diff --git a/src/common/num_hwc.h b/src/common/num_hwc.h index f32358b0..cd453a72 100644 --- a/src/common/num_hwc.h +++ b/src/common/num_hwc.h @@ -36,6 +36,8 @@ # define MAX_HWC 8 # elif defined (ARCH_PPC) # define MAX_HWC 8 +# elif defined (OS_RTEMS) +# define MAX_HWC 4 # else # define MAX_HWC 8 # endif diff --git a/src/common/utils.c b/src/common/utils.c index 8a41194d..989e166a 100644 --- a/src/common/utils.c +++ b/src/common/utils.c @@ -228,6 +228,12 @@ int __Extrae_Utils_append_from_to_file (const char *source, const char *destinat int __Extrae_Utils_rename_or_copy (char *origen, char *desti) { +#if defined (OS_RTEMS) + /* + * Renaming to an already existing filename using NFS on GR740 caused errors so we delete the destination file first + */ + remove(desti); +#endif if (rename (origen, desti) == -1) { if (errno == EXDEV) diff --git a/src/common/utils.h b/src/common/utils.h index c247f89d..ef8e9fab 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -60,6 +60,7 @@ int xtr_random(void); #endif #define STRINGIFY(s) #s +#define TOSTRING(x) STRINGIFY(x) #if DEBUG # define DBG(x, ...) fprintf(stderr, "DEBUG: " x, ##__VA_ARGS__) diff --git a/src/loader/Makefile.am b/src/loader/Makefile.am index cdcf8990..1627e209 100644 --- a/src/loader/Makefile.am +++ b/src/loader/Makefile.am @@ -5,4 +5,7 @@ bin_PROGRAMS = extrae-loader extrae_loader_SOURCES = \ extrae-loader.c +if !IS_GR740_MACHINE extrae_loader_LDFLAGS = -ldl +endif + diff --git a/src/merger/paraver/HardwareCounters.c b/src/merger/paraver/HardwareCounters.c index 57d79af2..6a1b71c4 100644 --- a/src/merger/paraver/HardwareCounters.c +++ b/src/merger/paraver/HardwareCounters.c @@ -619,13 +619,13 @@ int HardwareCounters_Emit (int ptask, int task, int thread, /* If using PAPI, they can be stored in absolute or relative manner, * depending whether sampling was activated or not */ -# if defined(SAMPLING_SUPPORT) +# if defined(SAMPLING_SUPPORT) && !defined(L4STAT) if (SetHWCIds[cnt].local_id != NO_COUNTER && SetHWCIds[cnt].local_id != SAMPLE_COUNTER) # else if (SetHWCIds[cnt].local_id != NO_COUNTER) # endif { -# if defined(SAMPLING_SUPPORT) +# if defined(SAMPLING_SUPPORT) && !defined(L4STAT) // Protect when counters are incorrect (major timestamp, lower counter value) if (Event->HWCValues[cnt] >= Sthread->counters[cnt]) # endif diff --git a/src/merger/paraver/HardwareCounters.h b/src/merger/paraver/HardwareCounters.h index 55f19456..da4cbf5b 100644 --- a/src/merger/paraver/HardwareCounters.h +++ b/src/merger/paraver/HardwareCounters.h @@ -122,6 +122,10 @@ int check_if_uncore_in_PFM(char *event_name); #elif defined(PMAPI_COUNTERS) # define GET_PARAVER_CODE_FOR_HWC(x, name) (HWC_BASE_PMAPI + x) # define LEGACY_HWC_COUNTER_TYPE(x) (HWC_BASE_PMAPI + x) +#elif defined(L4STAT) +# define HWC_L4STAT_BASE HWC_BASE_PAPI_PRESET +# define LEGACY_HWC_COUNTER_TYPE(x) HWC_L4STAT_BASE + ((x & 0x000000FF)) +# define GET_PARAVER_CODE_FOR_HWC(x, name) LEGACY_HWC_COUNTER_TYPE(x) #endif void HardwareCounters_AssignGlobalID (int ptask, int local_id, char *definition); diff --git a/src/merger/paraver/labels.c b/src/merger/paraver/labels.c index d93027dc..c9fca225 100644 --- a/src/merger/paraver/labels.c +++ b/src/merger/paraver/labels.c @@ -1093,6 +1093,19 @@ int Labels_GeneratePCFfile (char *name, long long options) return 0; } +#if defined(OS_RTEMS) +void Labels_loadRTEMSSymbols(char *executable_path, struct input_t * IFiles) +{ + + char path_binary2[256]; + strcpy(path_binary2,executable_path); + path_binary2[strlen(path_binary2)-strlen(".prv")] = (char) 0; + ObjectTable_AddBinaryObject (FALSE, IFiles[0].ptask, IFiles[0].task, + 0, 0xFFFFFFFF, 0, path_binary2); + +} +#endif + void Labels_loadLocalSymbols (int taskid, unsigned long nfiles, struct input_t * IFiles, UINT64 **io_StartingTimes, UINT64 **io_SynchronizationTimes) { diff --git a/src/merger/paraver/trace_to_prv.c b/src/merger/paraver/trace_to_prv.c index bc0e5216..098a789e 100644 --- a/src/merger/paraver/trace_to_prv.c +++ b/src/merger/paraver/trace_to_prv.c @@ -227,6 +227,30 @@ int Paraver_ProcessTraceFiles (unsigned long nfiles, fset = Create_FS (nfiles, files, taskid, PRV_SEMANTICS); error = (fset == NULL); +#if defined(OS_RTEMS) + // As there's no support to read XML from RTEMS we need to specify the program name and the destination folder as environment variables + char *prvfile = NULL; + env_program_name = getenv ("EXTRAE_PROGRAM_NAME"); + env_final_dir = getenv ("EXTRAE_FINAL_DIR"); + + prvfile = xmalloc(6 + ((env_final_dir != NULL) ? strlen(env_final_dir) : 1) + ((env_program_name != NULL) ? strlen(env_program_name) : 5))); + sprintf (PATH_NAME, "%s/%s.prv", (env_final_dir != NULL) ? env_final_dir : ".", (env_program_name != NULL) ? env_program_name : "TRACE"); + +#if defined(HAVE_BFD) + if (!__Extrae_Utils_file_exists(prvfile)){ + fprintf (stdout, "mpi2prv: WARNING binary file can not be found at the NFS mounted folder, calltrace info (function names) will be empty \n"); + set_option_dump_Addresses(FALSE); + } +#endif + + set_merge_OutputTraceName (prvfile); + set_merge_GivenTraceName (TRUE); + + if (taskid == 0) Labels_loadRTEMSSymbols(prvfile, files); + + xfree(prvfile); +#else + Labels_loadLocalSymbols (taskid, nfiles, files, &StartingTimes, &SynchronizationTimes); /* If no actual filename is given, use the binary name if possible */ @@ -249,6 +273,8 @@ int Paraver_ProcessTraceFiles (unsigned long nfiles, } } +#endif + if (__Extrae_Utils_file_exists(get_merge_OutputTraceName()) && !get_option_merge_TraceOverwrite()) { diff --git a/src/tracer/calltrace.c b/src/tracer/calltrace.c index a894f92d..98529baf 100644 --- a/src/tracer/calltrace.c +++ b/src/tracer/calltrace.c @@ -374,7 +374,7 @@ static int ValidAddress (void * Addr) { #endif /* OS_AIX */ -#if defined (OS_SOLARIS) +#if defined (OS_SOLARIS) || defined(OS_RTEMS) void Extrae_trace_callers (iotimer_t time, int offset, int type) { /* TODO */ diff --git a/src/tracer/clocks/Makefile.am b/src/tracer/clocks/Makefile.am index 00fc1b41..eeda4355 100644 --- a/src/tracer/clocks/Makefile.am +++ b/src/tracer/clocks/Makefile.am @@ -32,4 +32,6 @@ endif noinst_LTLIBRARIES = libclock.la libclock_la_SOURCES = $(CLOCKS) libclock_la_CFLAGS = -I$(CLOCKS_INC) -I$(COMMON_INC) -I$(TRACER_INC) +if !IS_GR740_MACHINE libclock_la_LDFLAGS = -lrt +endif diff --git a/src/tracer/defines.h b/src/tracer/defines.h index 6bd92817..08b11398 100644 --- a/src/tracer/defines.h +++ b/src/tracer/defines.h @@ -54,7 +54,9 @@ # if defined(FC_FUNC) # define CtoF77(x) FC_FUNC(x,x) # else -# error "Error! Not defined FC_FUNC, how do we deal with Fortran symbols?" +# if !defined(OS_RTEMS) +# error "Error! Not defined FC_FUNC, how do we deal with Fortran symbols?" +# endif # endif #endif /* HAVE_MPI */ diff --git a/src/tracer/hwc/Makefile.am b/src/tracer/hwc/Makefile.am index e6ed637a..c5915150 100644 --- a/src/tracer/hwc/Makefile.am +++ b/src/tracer/hwc/Makefile.am @@ -16,9 +16,14 @@ if HAVE_PMAPI HWC += common_hwc.c common_hwc.h \ pmapi_hwc.c pmapi_hwc.h else +if L4STAT +HWC += common_hwc.c common_hwc.h \ + l4stat_hwc.c l4stat_hwc.h +else HWC += fake_hwc.c endif endif +endif noinst_LTLIBRARIES = libhwc.la diff --git a/src/tracer/hwc/common_hwc.c b/src/tracer/hwc/common_hwc.c index 021ffd4f..4d698b70 100644 --- a/src/tracer/hwc/common_hwc.c +++ b/src/tracer/hwc/common_hwc.c @@ -55,7 +55,7 @@ /*------------------------------------------------ Global Variables ---------*/ int HWCEnabled = FALSE; /* Have the HWC been started? */ -#if !defined(SAMPLING_SUPPORT) +#if !defined(SAMPLING_SUPPORT) || defined(OS_RTEMS) int Reset_After_Read = TRUE; #else int Reset_After_Read = FALSE; @@ -564,6 +564,13 @@ void HWC_Parse_Env_Config (int task_id) * \param store_buffer Buffer where the counters will be stored. * \return 1 if counters were read successfully, 0 otherwise. */ +#if defined(OS_RTEMS) +void HWC_update_sampling(unsigned int tid){ + + HWCBE_UPDATE_SAMPLING(tid); + +} +#endif int HWC_Read (unsigned int tid, UINT64 time, long long *store_buffer) { int read_ok = FALSE, reset_ok = FALSE; @@ -579,6 +586,18 @@ int HWC_Read (unsigned int tid, UINT64 time, long long *store_buffer) } return (HWCEnabled && read_ok && reset_ok); } +#if defined(OS_RTEMS) +int HWC_Read_Sampling (unsigned int tid, UINT64 time, uint32_t *store_buffer) +{ + int read_ok = FALSE; + if (HWCEnabled) + { + TOUCH_LASTFIELD( store_buffer ); + read_ok = HWCBE_READ_Sampling (tid, store_buffer); + } + return (HWCEnabled && read_ok); +} +#endif /** * Resets the counters of the given thread. diff --git a/src/tracer/hwc/common_hwc.h b/src/tracer/hwc/common_hwc.h index da36ae8b..e2291287 100644 --- a/src/tracer/hwc/common_hwc.h +++ b/src/tracer/hwc/common_hwc.h @@ -180,6 +180,51 @@ extern int * HWC_current_set; #define HWCBE_GET_COUNTER_DEFINITIONS(count) \ HWCBE_PMAPI_GetCounterDefinitions(count) + +#elif defined(L4STAT) + +# include "l4stat_hwc.h" + +# define HWCBE_INITIALIZE(options) \ + HWCBE_L4STAT_Initialize (options) + +# define HWCBE_START_COUNTERS_THREAD(time, tid, forked) \ + HWCBE_L4STAT_Init_Thread(time, tid, forked) + +# define HWCBE_START_SET(glops, time, current_set, thread_id) \ + HWCBE_L4STAT_Start_Set(glops, time, current_set, thread_id) + +# define HWCBE_STOP_SET(time, current_set, thread_id) \ + HWCBE_L4STAT_Stop_Set(time, current_set, thread_id) + +# define HWCBE_ADD_SET(pretended_set, rank, ncounters, counters, domain, \ + change_at_globalops, change_at_time, num_overflows, \ + overflow_counters, overflow_values) \ + HWCBE_L4STAT_Add_Set(pretended_set, rank, ncounters, counters, domain, \ + change_at_globalops, change_at_time, num_overflows, \ + overflow_counters, overflow_values) + +# define HWCBE_READ(thread_id, store_buffer) \ + HWCBE_L4STAT_Read(thread_id, store_buffer) + +# define HWCBE_READ_Sampling(thread_id, store_buffer) \ + HWCBE_L4STAT_Read_Sampling(thread_id, store_buffer) + +# define HWCBE_RESET(thread_id) \ + HWCBE_L4STAT_Reset(thread_id) + +#define HWCBE_UPDATE_SAMPLING(thread_id) \ + HWCBE_L4STAT_Update_Sampling_Cores(thread_id) + +# define HWCBE_ACCUM(thread_id, store_buffer) \ + HWCBE_L4STAT_Accum(thread_id, store_buffer) + +# define HWCBE_CLEANUP_COUNTERS_THREAD(nthreads) \ + HWCBE_L4STAT_CleanUp(nthreads) + +#define HWCBE_GET_COUNTER_DEFINITIONS(count) \ + HWCBE_L4STAT_GetCounterDefinitions(count) + #endif #endif /* __COMMON_HWC_H__ */ diff --git a/src/tracer/hwc/l4stat_hwc.c b/src/tracer/hwc/l4stat_hwc.c new file mode 100644 index 00000000..8ca720ac --- /dev/null +++ b/src/tracer/hwc/l4stat_hwc.c @@ -0,0 +1,393 @@ +/*****************************************************************************\ + * ANALYSIS PERFORMANCE TOOLS * + * Extrae * + * Instrumentation package for parallel applications * + ***************************************************************************** + * ___ This library is free software; you can redistribute it and/or * + * / __ modify it under the terms of the GNU LGPL as published * + * / / _____ by the Free Software Foundation; either version 2.1 * + * / / / \ of the License, or (at your option) any later version. * + * ( ( ( B S C ) * + * \ \ \_____/ This library is distributed in hope that it will be * + * \ \__ useful but WITHOUT ANY WARRANTY; without even the * + * \___ implied warranty of MERCHANTABILITY or FITNESS FOR A * + * PARTICULAR PURPOSE. See the GNU LGPL for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License * + * along with this library; if not, write to the Free Software Foundation, * + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * + * The GNU LEsser General Public License is contained in the file COPYING. * + * --------- * + * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion * +\*****************************************************************************/ + +#include "common.h" + +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_STRINGS_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif + +#include "utils.h" +#include "events.h" +#include "clock.h" +#include "threadid.h" +#include "record.h" +#include "trace_macros.h" +#include "wrapper.h" +#include "stdio.h" +#include "common_hwc.h" +#include "l4stat_hwc.h" +#include /* for device driver prototypes */ +#include + +int sw_threads[8]; + + +/*------------------------------------------------ Static Variables ---------*/ + +static HWC_Definition_t *hwc_used = NULL; +static unsigned num_hwc_used = 0; + +static void HWCBE_L4STAT_AddDefinition(unsigned event_code, char *description) +{ + int found = FALSE; + unsigned u; + + for (u = 0; !found && (u < num_hwc_used); u++) + found = hwc_used[u].event_code == event_code; + + if (!found) + { + hwc_used = (HWC_Definition_t *)realloc(hwc_used, + sizeof(HWC_Definition_t) * (num_hwc_used + 1)); + if (hwc_used == NULL) + { + fprintf(stderr, "ERROR! Cannot allocate memory to add definitions for hardware counters\n"); + return; + } + hwc_used[num_hwc_used].event_code = event_code; + snprintf(hwc_used[num_hwc_used].description, + MAX_HWC_DESCRIPTION_LENGTH, "[%s]", description); + num_hwc_used++; + } +} + +HWC_Definition_t *HWCBE_L4STAT_GetCounterDefinitions(unsigned *count) +{ + *count = num_hwc_used; + return hwc_used; +} + +int HWCBE_L4STAT_Add_Set(int pretended_set, int rank, int ncounters, char **counters, + char *domain, char *change_at_globalops, char *change_at_time, + int num_overflows, char **overflow_counters, unsigned long long *overflow_values) +{ + int i, j, rc, num_set = HWC_num_sets; + int counter; + char *info; + + UNREFERENCED_PARAMETER(num_overflows); + UNREFERENCED_PARAMETER(overflow_counters); + UNREFERENCED_PARAMETER(overflow_values); + + if (ncounters == 0 || counters == NULL) + return 0; + + if (ncounters > MAX_HWC) + { + fprintf(stderr, PACKAGE_NAME ": You cannot provide more HWC counters than %d (see set %d)\n", MAX_HWC, pretended_set); + ncounters = MAX_HWC; + } + + HWC_sets = (struct HWC_Set_t *)realloc(HWC_sets, sizeof(struct HWC_Set_t) * (HWC_num_sets + 1)); + if (HWC_sets == NULL) + { + fprintf(stderr, PACKAGE_NAME ": Cannot allocate memory for HWC_set (rank %d)\n", rank); + return 0; + } + + /* Initialize this set */ + HWC_sets[num_set].num_counters = 0; + + for (i = 0; i < ncounters; i++) + { + /* counter_last_position will hold the address of the end of the + counter[i] string + This shall be compared with strtoul_check to know if the hex + is correct or not + */ + char *counter_last_position = &(counters[i][strlen(counters[i])]); + char *strtoul_check; + + HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = + strtoul(counters[i], &strtoul_check, 16); + + if (strtoul_check != counter_last_position) + { + int EventCode; + + if (rank == 0) + fprintf(stderr, PACKAGE_NAME ": Currently name translation of counters is disabled, please specify the counter number \n"); + continue; + } + else if (HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] > 0x9F || HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] < 0x0) + { + if (rank == 0) + fprintf(stderr, PACKAGE_NAME ": Wrong counter number %s, please specify a counter in the range 0x0 to 0x9F\n", counters[i]); + continue; + } + + info = l4stat_event_names[HWC_sets[num_set].counters[HWC_sets[num_set].num_counters]]; + if (strcmp(info, L4STAT_BAD_CMD) == 0) + { + if (rank == 0) + fprintf(stderr, PACKAGE_NAME ": Error! Cannot query information for hardware counter %s (0x%08x). Check set %d.\n", counters[i], HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], pretended_set); + + HWC_sets[num_set].counters[HWC_sets[num_set].num_counters] = NO_COUNTER; + } + else + { + if (rank == 0) + HWCBE_L4STAT_AddDefinition(HWC_sets[num_set].counters[HWC_sets[num_set].num_counters], info); + + HWC_sets[num_set].num_counters++; + } + } + + if (HWC_sets[num_set].num_counters == 0) + { + if (rank == 0) + fprintf(stderr, PACKAGE_NAME ": Set %d of counters seems to be empty/invalid, skipping\n", pretended_set); + return 0; + } + + HWC_sets[num_set].change_type = CHANGE_NEVER; + + // Initializes reading of counters here as it has to be done just once instead of in Start_Set that is called per thread + for (i = 0; i < 4; i++) + { + counter = HWC_sets[0].num_counters * i; + + for (j = 0; j < HWC_sets[0].num_counters; j++) + { + //printf("Enabling counter %i, cpu %i event: %i \n", counter, i, HWC_sets[0].counters[j] ); + ret = l4stat_counter_enable(counter, HWC_sets[0].counters[j], i, 0); + if (ret != L4STAT_ERR_OK) + { + printf("Error: %s failed on l4stat_counter_enable!\n", __func__); + exit(1); + } + ret = l4stat_counter_set(counter, 0); + if (ret != L4STAT_ERR_OK) + { + printf("Error: %s failed on l4stat_counter_set!\n", __func__); + return -1; + } + counter++; + } + } + + /* We validate this set */ + HWC_num_sets++; + + if (rank == 0) + { + fprintf(stdout, PACKAGE_NAME ": HWC set %d contains following counters < ", pretended_set); + for (i = 0; i < HWC_sets[num_set].num_counters; i++) + { + if (HWC_sets[num_set].counters[i] != NO_COUNTER) + { + fprintf(stdout, "%s (0x%08x) ", l4stat_event_names[HWC_sets[num_set].counters[i]], HWC_sets[num_set].counters[i]); + } + } + fprintf(stdout, ">"); + + if (HWC_sets[num_set].change_type == CHANGE_TIME) + fprintf(stdout, " - changing every %lld nanoseconds\n", HWC_sets[num_set].change_at); + else if (HWC_sets[num_set].change_type == CHANGE_GLOPS) + fprintf(stdout, " - changing every %lld global operations\n", HWC_sets[num_set].change_at); + else + fprintf(stdout, " - never changes\n"); + + fflush(stdout); + } + + return HWC_sets[num_set].num_counters; +} + +int HWCBE_L4STAT_Start_Set(UINT64 countglops, UINT64 time, int numset, int threadid) +{ + int rc; + + /* The given set is a valid one? */ + if (numset < 0 || numset >= HWC_num_sets) + return FALSE; + + HWC_current_changeat = HWC_sets[numset].change_at; + HWC_current_changetype = HWC_sets[numset].change_type; + HWC_current_timebegin[threadid] = time; + HWC_current_glopsbegin[threadid] = countglops; + + TRACE_EVENT(time, HWC_CHANGE_EV, numset); + + return TRUE; +} + +int HWCBE_L4STAT_Stop_Set(UINT64 time, int numset, int threadid) +{ + //Not required currently + return 1; +} + +void HWCBE_L4STAT_CleanUp(unsigned nthreads) +{ + UNREFERENCED_PARAMETER(nthreads); + + int ret; + int i; + + for (i = 0; i < 16; i++) + { + ret = l4stat_counter_clear(i); + if (ret != L4STAT_ERR_OK) + { + printf("Error: %s failed in l4stat_counter_clear!\n", __func__); + exit(1); + } + ret = l4stat_counter_disable(i); + if (ret != L4STAT_ERR_OK) + { + printf("Error: %s failed in l4stat_counter_disable!\n", __func__); + exit(1); + } + } +} + +/****************************************************************************** + ** Function name : L4STAT_Initialize + ** + ** Description : + ******************************************************************************/ + +void HWCBE_L4STAT_Initialize(int TRCOptions) +{ + UNREFERENCED_PARAMETER(TRCOptions); + HWCBE_L4STAT_CleanUp(0); +} + +int HWCBE_L4STAT_Init_Thread(UINT64 time, int threadid, int forked) +{ + int cpu_self, i, ret; + cpu_self = (int)rtems_get_current_processor(); + sw_threads[threadid] = cpu_self; + HWC_Thread_Initialized[threadid] = HWCBE_L4STAT_Start_Set(0, time, HWC_current_set[threadid], threadid); + + return HWC_Thread_Initialized[threadid]; +} + +int HWCBE_L4STAT_Read(unsigned int tid, long long *store_buffer) +{ + int cpu_self, i, ret; + uint32_t value; + cpu_self = (int)rtems_get_current_processor(); + int counter = HWC_sets[0].num_counters * cpu_self; + for (i = 0; i < HWC_sets[0].num_counters; i++) + { + ret = l4stat_counter_get(counter, &value); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat read thread!\n"); + return -1; + } + counter++; + store_buffer[i] = (long long)value; + } + return TRUE; +} + +int HWCBE_L4STAT_Read_Sampling(unsigned int tid, long long *store_buffer) +{ + int cpu_self, i, ret; + uint32_t value; + cpu_self = sw_threads[tid]; + int counter = HWC_sets[0].num_counters * cpu_self; + for (i = 0; i < HWC_sets[0].num_counters; i++) + { + ret = l4stat_counter_get(counter, &value); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat read thread!\n"); + return -1; + } + + store_buffer[i] = value; + ret = l4stat_counter_set(counter, 0); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat reset thread!\n"); + return -1; + } + counter++; + } + + return TRUE; +} + +int HWCBE_L4STAT_Reset(unsigned int tid) +{ + int cpu_self, i, ret; + uint32_t value; + cpu_self = (int)rtems_get_current_processor(); + int counter = HWC_sets[0].num_counters * cpu_self; + for (i = 0; i < HWC_sets[0].num_counters; i++) + { + ret = l4stat_counter_set(counter, 0); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat reset thread!\n"); + return -1; + } + counter++; + } + return TRUE; +} + +void HWCBE_L4STAT_Update_Sampling_Cores(unsigned int tid) +{ + sw_threads[tid] = (int)rtems_get_current_processor(); +} + +int HWCBE_L4STAT_Accum(unsigned int tid, long long *store_buffer) +{ + int cpu_self, i, ret; + uint32_t value; + cpu_self = (int)rtems_get_current_processor(); + int counter = HWC_sets[0].num_counters * cpu_self; + for (i = 0; i < HWC_sets[0].num_counters; i++) + { + ret = l4stat_counter_get(counter, &value); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat read accum thread!\n"); + return -1; + } + ret = l4stat_counter_set(counter, 0); + if (ret != L4STAT_ERR_OK) + { + printf("Error: l4stat reset accum thread!\n"); + return -1; + } + counter++; + store_buffer[i] += value; + } + return TRUE; +} diff --git a/src/tracer/hwc/l4stat_hwc.h b/src/tracer/hwc/l4stat_hwc.h new file mode 100644 index 00000000..3e0f466b --- /dev/null +++ b/src/tracer/hwc/l4stat_hwc.h @@ -0,0 +1,244 @@ +/*****************************************************************************\ + * ANALYSIS PERFORMANCE TOOLS * + * Extrae * + * Instrumentation package for parallel applications * + ***************************************************************************** + * ___ This library is free software; you can redistribute it and/or * + * / __ modify it under the terms of the GNU LGPL as published * + * / / _____ by the Free Software Foundation; either version 2.1 * + * / / / \ of the License, or (at your option) any later version. * + * ( ( ( B S C ) * + * \ \ \_____/ This library is distributed in hope that it will be * + * \ \__ useful but WITHOUT ANY WARRANTY; without even the * + * \___ implied warranty of MERCHANTABILITY or FITNESS FOR A * + * PARTICULAR PURPOSE. See the GNU LGPL for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License * + * along with this library; if not, write to the Free Software Foundation, * + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * + * The GNU LEsser General Public License is contained in the file COPYING. * + * --------- * + * Barcelona Supercomputing Center - Centro Nacional de Supercomputacion * +\*****************************************************************************/ + +#ifndef __L4STAT_HWC_H__ +#define __L4STAT_HWC_H__ + +#include "num_hwc.h" + +/*------------------------------------------------ Prototypes ---------------*/ + +void HWCBE_L4STAT_Initialize(int TRCOptions); +int HWCBE_L4STAT_Init_Thread(UINT64 time, int threadid, int forked); +int HWCBE_L4STAT_Allocate_eventsets_per_thread(int num_set, int old_thread_num, int new_thread_num); + +int HWCBE_L4STAT_Start_Set(UINT64 countglops, UINT64 time, int numset, int threadid); +int HWCBE_L4STAT_Stop_Set(UINT64 time, int numset, int threadid); +int HWCBE_L4STAT_Add_Set(int pretended_set, int rank, int ncounters, char **counters, char *domain, + char *change_at_globalops, char *change_at_time, int num_overflows, + char **overflow_counters, unsigned long long *overflow_values); + +int HWCBE_L4STAT_Read(unsigned int tid, long long *store_buffer); +int HWCBE_L4STAT_Read_Sampling(unsigned int tid, long long *store_buffer); +int HWCBE_L4STAT_Reset(unsigned int tid); +int HWCBE_L4STAT_Accum(unsigned int tid, long long *store_buffer); +void HWCBE_L4STAT_Update_Sampling_Cores(unsigned int tid); + +void HWCBE_L4STAT_CleanUp(unsigned nthreads); + +HWC_Definition_t *HWCBE_L4STAT_GetCounterDefinitions(unsigned *count); + +#define L4STAT_BAD_CMD "N/A. Wrong event" + +#define L4STAT_NULL -1 + +static const char *l4stat_event_names[] = { + "Instruction cache miss", /* 0x00 */ + "Instruction MMU TLB miss", /* 0x01 */ + "Instruction cache hold", /* 0x02 */ + "Instruction MMU hold", /* 0x03 */ + L4STAT_BAD_CMD, /* 0x04 */ + L4STAT_BAD_CMD, /* 0x05 */ + L4STAT_BAD_CMD, /* 0x06 */ + L4STAT_BAD_CMD, /* 0x07 */ + "Data cache (read) miss", /* 0x08 */ + "Data MMU TLB miss", /* 0x09 */ + "Data cache hold", /* 0x0a */ + "Data MMU hold", /* 0x0b */ + L4STAT_BAD_CMD, /* 0x0c */ + L4STAT_BAD_CMD, /* 0x0d */ + L4STAT_BAD_CMD, /* 0x0e */ + L4STAT_BAD_CMD, /* 0x0f */ + "Data write buffer hold", /* 0x10 */ + "Total instruction count", /* 0x11 */ + "Integer instruction count", /* 0x12 */ + "Floating-point unit instruction count", /* 0x13 */ + "Branch prediction miss", /* 0x14 */ + "Execution time, exluding debug mode", /* 0x15 */ + L4STAT_BAD_CMD, /* 0x16 */ + "AHB utilization (per AHB master)", /* 0x17 */ + "AHB utilization (total)", /* 0x18 */ + L4STAT_BAD_CMD, /* 0x19 */ + L4STAT_BAD_CMD, /* 0x1a */ + L4STAT_BAD_CMD, /* 0x1b */ + L4STAT_BAD_CMD, /* 0x1c */ + L4STAT_BAD_CMD, /* 0x1d */ + L4STAT_BAD_CMD, /* 0x1e */ + L4STAT_BAD_CMD, /* 0x1f */ + L4STAT_BAD_CMD, /* 0x20 */ + L4STAT_BAD_CMD, /* 0x21 */ + "Integer branches", /* 0x22 */ + L4STAT_BAD_CMD, /* 0x23 */ + L4STAT_BAD_CMD, /* 0x24 */ + L4STAT_BAD_CMD, /* 0x25 */ + L4STAT_BAD_CMD, /* 0x26 */ + L4STAT_BAD_CMD, /* 0x27 */ + "CALL instructions", /* 0x28 */ + L4STAT_BAD_CMD, /* 0x29 */ + L4STAT_BAD_CMD, /* 0x2a */ + L4STAT_BAD_CMD, /* 0x2b */ + L4STAT_BAD_CMD, /* 0x2c */ + L4STAT_BAD_CMD, /* 0x2d */ + L4STAT_BAD_CMD, /* 0x2e */ + L4STAT_BAD_CMD, /* 0x2f */ + "Regular type 2 instructions", /* 0x30 */ + L4STAT_BAD_CMD, /* 0x31 */ + L4STAT_BAD_CMD, /* 0x32 */ + L4STAT_BAD_CMD, /* 0x33 */ + L4STAT_BAD_CMD, /* 0x34 */ + L4STAT_BAD_CMD, /* 0x35 */ + L4STAT_BAD_CMD, /* 0x36 */ + L4STAT_BAD_CMD, /* 0x37 */ + "LOAD and STORE instructions", /* 0x38 */ + "LOAD instructions", /* 0x39 */ + "STORE instructions", /* 0x3a */ + L4STAT_BAD_CMD, /* 0x3b */ + L4STAT_BAD_CMD, /* 0x3c */ + L4STAT_BAD_CMD, /* 0x3d */ + L4STAT_BAD_CMD, /* 0x3e */ + L4STAT_BAD_CMD, /* 0x3f */ + "AHB IDLE cycles", /* 0x40 */ + "AHB BUSY cycles", /* 0x41 */ + "AHB Non-Seq. transfers", /* 0x42 */ + "AHB Seq. transfers", /* 0x43 */ + "AHB read accesses", /* 0x44 */ + "AHB write accesses", /* 0x45 */ + "AHB byte accesses", /* 0x46 */ + "AHB half-word accesses", /* 0x47 */ + "AHB word accesses", /* 0x48 */ + "AHB double word accesses", /* 0x49 */ + "AHB quad word accesses", /* 0x4A */ + "AHB eight word accesses", /* 0x4B */ + "AHB waitstates", /* 0x4C */ + "AHB RETRY responses", /* 0x4D */ + "AHB SPLIT responses", /* 0x4E */ + "AHB SPLIT delay", /* 0x4F */ + "AHB bus locked", /* 0x50 */ + L4STAT_BAD_CMD, /* 0x51 */ + L4STAT_BAD_CMD, /* 0x52 */ + L4STAT_BAD_CMD, /* 0x53 */ + L4STAT_BAD_CMD, /* 0x54 */ + L4STAT_BAD_CMD, /* 0x55 */ + L4STAT_BAD_CMD, /* 0x56 */ + L4STAT_BAD_CMD, /* 0x57 */ + L4STAT_BAD_CMD, /* 0x58 */ + L4STAT_BAD_CMD, /* 0x59 */ + L4STAT_BAD_CMD, /* 0x5a */ + L4STAT_BAD_CMD, /* 0x5b */ + L4STAT_BAD_CMD, /* 0x5c */ + L4STAT_BAD_CMD, /* 0x5d */ + L4STAT_BAD_CMD, /* 0x5e */ + L4STAT_BAD_CMD, /* 0x5f */ + "external event 0", /* 0x60 */ + "external event 1", /* 0x61 */ + "external event 2", /* 0x62 */ + "external event 3", /* 0x63 */ + "external event 4", /* 0x64 */ + "external event 5", /* 0x65 */ + "external event 6", /* 0x66 */ + "external event 7", /* 0x67 */ + "external event 8", /* 0x68 */ + "external event 9", /* 0x69 */ + "external event 10", /* 0x6A */ + "external event 11", /* 0x6B */ + "external event 12", /* 0x6C */ + "external event 13", /* 0x6D */ + "external event 14", /* 0x6E */ + "external event 15", /* 0x6F */ + "AHB IDLE cycles (2)", /* 0x70 */ + "AHB BUSY cycles (2)", /* 0x71 */ + "AHB Non-Seq. transfers (2)", /* 0x72 */ + "AHB Seq. transfers (2)", /* 0x73 */ + "AHB read accesses (2)", /* 0x74 */ + "AHB write accesses (2)", /* 0x75 */ + "AHB byte accesses (2)", /* 0x76 */ + "AHB half-word accesses (2)", /* 0x77 */ + "AHB word accesses (2)", /* 0x78 */ + "AHB double word accesses (2)", /* 0x79 */ + "AHB quad word accesses (2)", /* 0x7A */ + "AHB eight word accesses (2)", /* 0x7B */ + "AHB waitstates (2)", /* 0x7C */ + "AHB RETRY responses (2)", /* 0x7D */ + "AHB SPLIT responses (2)", /* 0x7E */ + "AHB SPLIT delay (2)", /* 0x7F */ + "PMC: master 0 has grant", /* 0x80 */ + "PMC: master 1 has grant", /* 0x81 */ + "PMC: master 2 has grant", /* 0x82 */ + "PMC: master 3 has grant", /* 0x83 */ + "PMC: master 4 has grant", /* 0x84 */ + "PMC: master 5 has grant", /* 0x85 */ + "PMC: master 6 has grant", /* 0x86 */ + "PMC: master 7 has grant", /* 0x87 */ + "PMC: master 8 has grant", /* 0x88 */ + "PMC: master 9 has grant", /* 0x89 */ + "PMC: master 10 has grant", /* 0x8A */ + "PMC: master 11 has grant", /* 0x8B */ + "PMC: master 12 has grant", /* 0x8C */ + "PMC: master 13 has grant", /* 0x8D */ + "PMC: master 14 has grant", /* 0x8E */ + "PMC: master 15 has grant", /* 0x8F */ + "PMC: master 0 lacks grant", /* 0x90 */ + "PMC: master 1 lacks grant", /* 0x91 */ + "PMC: master 2 lacks grant", /* 0x92 */ + "PMC: master 3 lacks grant", /* 0x93 */ + "PMC: master 4 lacks grant", /* 0x94 */ + "PMC: master 5 lacks grant", /* 0x95 */ + "PMC: master 6 lacks grant", /* 0x96 */ + "PMC: master 7 lacks grant", /* 0x97 */ + "PMC: master 8 lacks grant", /* 0x98 */ + "PMC: master 9 lacks grant", /* 0x99 */ + "PMC: master 10 lacks grant", /* 0x9A */ + "PMC: master 11 lacks grant", /* 0x9B */ + "PMC: master 12 lacks grant", /* 0x9C */ + "PMC: master 13 lacks grant", /* 0x9D */ + "PMC: master 14 lacks grant", /* 0x9E */ + "PMC: master 15 lacks grant", /* 0x9F */ + ""}; + +/*------------------------------------------------ Useful Macros ------------*/ + +/** + * Stores which counters did overflow in the given buffer (?). + */ +#define HARDWARE_COUNTERS_OVERFLOW(nc, counters, no, counters_ovf, values_ptr) \ + { \ + int found, cc, co; \ + \ + for (cc = 0; cc < nc; cc++) \ + { \ + for (co = 0, found = 0; co < no; co++) \ + found |= counters[cc] == counters_ovf[co]; \ + if (found) \ + values_ptr[cc] = (long long)(SAMPLE_COUNTER); \ + else \ + values_ptr[cc] = (long long)(NO_COUNTER); \ + } \ + for (cc = nc; cc < MAX_HWC; cc++) \ + values_ptr[cc] = (long long)(NO_COUNTER); \ + } + +/** + * Returns the EventSet of the given thread for the current set. + */ + +#endif /* __PAPI_HWC_H__ */ diff --git a/src/tracer/sampling/timer/sampling-timer.c b/src/tracer/sampling/timer/sampling-timer.c index 8d429780..aad5ccac 100644 --- a/src/tracer/sampling/timer/sampling-timer.c +++ b/src/tracer/sampling/timer/sampling-timer.c @@ -56,6 +56,8 @@ #include "utils.h" #include "xalloc.h" +#if !defined(OS_RTEMS) + #if defined(SAMPLING_SUPPORT) int SamplingSupport = FALSE; static int SamplingRunning = FALSE; @@ -352,3 +354,49 @@ void unsetTimeSampling (void) SamplingRunning = FALSE; } } +#else +#include /* for device driver prototypes */ +#include + +rtems_id sampling_timer; +int sampling_period; + +void sample() +{ + Backend_setInSampling(THREADID, TRUE); + for (unsigned int thread_id = 0; thread_id < Extrae_get_num_threads(); thread_id++) + { + event_t evt; + evt.time = TIME; + evt.event = SAMPLING_EV; + evt.value = 1; +#if defined (PAPI_COUNTERS) + HARDWARE_COUNTERS_READ_SAMPLING(thread_id, evt, TRUE); +#endif + BUFFER_INSERT(thread_id, SAMPLING_BUFFER(thread_id), evt); + } + Backend_setInSampling(THREADID, FALSE); +} + +rtems_timer_service_routine hwc_sampling( + rtems_id timer, + void *arg) +{ + sample(); + rtems_timer_fire_after(timer, sampling_period, hwc_sampling, NULL); +} + +void setTimeSampling (int sampling_p) +{ + sampling_period=sampling_p; + rtems_timer_create(1, &sampling_timer); + rtems_timer_fire_after(sampling_timer, sampling_period, hwc_sampling, NULL); +} + +void unsetTimeSampling (void) +{ + rtems_timer_cancel(sampling_timer); +} +void setTimeSampling_postfork (void){}; +#endif + diff --git a/src/tracer/sampling/timer/sampling-timer.h b/src/tracer/sampling/timer/sampling-timer.h index e7d51a89..80e89059 100644 --- a/src/tracer/sampling/timer/sampling-timer.h +++ b/src/tracer/sampling/timer/sampling-timer.h @@ -24,6 +24,7 @@ #ifndef _SAMPLING_H_INCLUDED_ #define _SAMPLING_H_INCLUDED_ +#if !defined(OS_RTEMS) enum { SAMPLING_TIMING_REAL, @@ -37,6 +38,10 @@ void setTimeSampling_postfork (void); void Extrae_SamplingHandler (void* address); void Extrae_SamplingHandler_PAPI (void *address); +void unsetTimeSampling (void); +#else +void setTimeSampling (int sampling_p); void unsetTimeSampling (void); #endif +#endif diff --git a/src/tracer/trace_hwc.h b/src/tracer/trace_hwc.h index 142bb139..bee5e8db 100644 --- a/src/tracer/trace_hwc.h +++ b/src/tracer/trace_hwc.h @@ -50,6 +50,20 @@ MARK_SET_READ(tid, evt, read_ok); \ } +/* Store counters values in the event and mark them as read */ +#if defined(OS_RTEMS) +# define HARDWARE_COUNTERS_READ_SAMPLING(tid, evt, filter) \ +{ \ + int read_ok = FALSE; \ + if (filter && HWC_IsEnabled()) \ + { \ + read_ok = HWC_Read_Sampling (tid, evt.time, evt.HWCValues); \ + } \ + /* We write the counters even if there are errors while reading */ \ + MARK_SET_READ(tid, evt, read_ok); \ +} +#endif + # define HARDWARE_COUNTERS_ACCUMULATE(tid, evt, filter) \ { \ if (filter && HWC_IsEnabled()) \ diff --git a/src/tracer/wrappers/API/buffers.c b/src/tracer/wrappers/API/buffers.c index 619f9cd8..18352ac2 100644 --- a/src/tracer/wrappers/API/buffers.c +++ b/src/tracer/wrappers/API/buffers.c @@ -500,7 +500,7 @@ int Buffer_Flush(Buffer_t *buffer) DataBlocks_t *db = new_DataBlocks (buffer); event_t *head = NULL, *tail = NULL; int num_flushed, overflow; -#if defined(ARCH_SPARC64) +#if defined(ARCH_SPARC64) || defined(ARCH_SPARC) ssize_t r; #endif @@ -514,7 +514,7 @@ int Buffer_Flush(Buffer_t *buffer) num_flushed = Buffer_GetFillCount(buffer); CIRCULAR_STEP (tail, num_flushed, buffer->FirstEvt, buffer->LastEvt, &overflow); -#if !defined(ARCH_SPARC64) +#if !defined(ARCH_SPARC64) && !defined(ARCH_SPARC) # if defined(HAVE_ONLINE) /* Select events depending on the mask */ @@ -536,11 +536,30 @@ int Buffer_Flush(Buffer_t *buffer) DataBlocks_Free(db); #else /* ARCH_SPARC64 */ - - r = write (buffer->fd, head, buffer->FillCount*sizeof(event_t)); - if (r != buffer->FillCount*sizeof(event_t)) + #if !defined(OS_RTEMS) /* ARCH_SPARC64 */ + r = write (buffer->fd, head, buffer->FillCount*sizeof(event_t)); + if (r != buffer->FillCount*sizeof(event_t)) fprintf (stderr, "ERROR! Wrote %ld bytes instead of %ld bytes\n", r, buffer->FillCount*sizeof(event_t)); - + #else /* OS_RTEMS */ + /* NFS V2 has a maximun of 8192 bytes of burst write + * To avoid errors we write on the file per parts + * sizeof(event_t)=112 + * 60*112=6720 bytes write per iteration + * */ + int lines2Write=buffer->FillCount; + int iterationWrite=(lines2Write<60)? lines2Write : 60; + while (lines2Write>0){ + + r = write (buffer->fd, head, iterationWrite*sizeof(event_t)); + if (r != iterationWrite*sizeof(event_t)) + fprintf (stderr, "ERROR! Wrote %ld bytes instead of %ld bytes\n", r, buffer->FillCount*sizeof(event_t)); + + lines2Write-=iterationWrite; + head+=iterationWrite; + + iterationWrite=(lines2Write +#endif + #include #include "config.h" #include "ompt-wrapper.h" +#include "utils.h" + +#if defined (OS_RTEMS) + #define GET_REAL_FUNCTION(f) __real_##f + #define LINK_WRAP(f) __wrap_##f +#else + #define GET_REAL_FUNCTION(f) dlsym (RTLD_NEXT, TOSTRING(f)); + #define LINK_WRAP(f) f +#endif #define INSTRUMENT_OMP_WRAPPER(func) ((func != NULL) && (EXTRAE_INITIALIZED()) && (EXTRAE_ON())) diff --git a/src/tracer/wrappers/OMP/omp-common_c.c b/src/tracer/wrappers/OMP/omp-common_c.c index 151b8b3c..583c52f5 100644 --- a/src/tracer/wrappers/OMP/omp-common_c.c +++ b/src/tracer/wrappers/OMP/omp-common_c.c @@ -42,33 +42,40 @@ } \ } -#if defined(PIC) +#if defined(PIC) || defined (OS_RTEMS) int (*omp_get_thread_num_real)(void) = NULL; void (*omp_set_num_threads_real)(int) = NULL; void (*omp_set_lock_real)(omp_lock_t *) = NULL; void (*omp_unset_lock_real)(omp_lock_t *) = NULL; #endif /* PIC */ +#if defined (OS_RTEMS) +extern int __real_omp_get_thread_num (void ) __attribute__((weak)); +extern void __real_omp_set_num_threads(int num) __attribute__((weak)); +extern void __real_omp_set_lock (omp_lock_t *lock) __attribute__((weak)); +extern void __real_omp_unset_lock (omp_lock_t *lock) __attribute__((weak)); +#endif + void omp_common_get_hook_points_c (int rank) { UNREFERENCED_PARAMETER(rank); -#if defined(PIC) +#if defined(PIC) || defined (OS_RTEMS) /* Obtain @ for omp_get_thread_num_real */ omp_get_thread_num_real = - (int(*)(void)) dlsym (RTLD_NEXT, "omp_get_thread_num"); + (int(*)(void)) GET_REAL_FUNCTION(omp_get_thread_num); /* Obtain @ for omp_set_num_threads */ omp_set_num_threads_real = - (void(*)(int)) dlsym (RTLD_NEXT, "omp_set_num_threads"); + (void(*)(int)) GET_REAL_FUNCTION(omp_set_num_threads); /* Obtain @ for omp_set_lock */ omp_set_lock_real = - (void(*)(omp_lock_t*)) dlsym (RTLD_NEXT, "omp_set_lock"); + (void(*)(omp_lock_t*)) GET_REAL_FUNCTION(omp_set_lock); /* Obtain @ for omp_unset_lock */ omp_unset_lock_real = - (void(*)(omp_lock_t*)) dlsym (RTLD_NEXT, "omp_unset_lock"); + (void(*)(omp_lock_t*)) GET_REAL_FUNCTION(omp_unset_lock); #endif /* PIC */ } @@ -79,9 +86,8 @@ void omp_common_get_hook_points_c (int rank) * * \******************************************************************************/ -#if defined(PIC) - -int omp_get_thread_num (void) +#if defined(PIC) || defined (OS_RTEMS) +int LINK_WRAP(omp_get_thread_num) (void) { static int shown = FALSE; int res = 0; @@ -118,7 +124,7 @@ int omp_get_thread_num (void) } void -omp_set_num_threads(int num_threads) +LINK_WRAP(omp_set_num_threads) (int num_threads) { #if defined(DEBUG) fprintf(stderr, PACKAGE_NAME @@ -161,7 +167,7 @@ omp_set_num_threads(int num_threads) #endif } -void omp_set_lock (omp_lock_t *lock) +void LINK_WRAP(omp_set_lock) (omp_lock_t *lock) { void *lock_ptr = (void *)lock; @@ -197,7 +203,7 @@ void omp_set_lock (omp_lock_t *lock) #endif } -void omp_unset_lock (omp_lock_t *lock) +void LINK_WRAP(omp_unset_lock) (omp_lock_t *lock) { void *lock_ptr = (void *)lock; @@ -234,3 +240,4 @@ void omp_unset_lock (omp_lock_t *lock) } #endif /* PIC */ +