diff --git a/3rd-party/openpmix b/3rd-party/openpmix index 9edbb692131..bf92f149513 160000 --- a/3rd-party/openpmix +++ b/3rd-party/openpmix @@ -1 +1 @@ -Subproject commit 9edbb692131b20ec189a8a547beab88d42adf9e4 +Subproject commit bf92f1495130ee0cbe2faaecf6f75f8d089f7756 diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index a7f57231ada..9ed1192ccd4 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -146,7 +146,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[ [#include ]) AC_CHECK_DECLS([UCP_ATTR_FIELD_MEMORY_TYPES], [AC_DEFINE([HAVE_UCP_ATTR_MEMORY_TYPES], [1], - [have memory types attribute])], [], + [have memory types attribute])], + [AC_DEFINE([HAVE_UCP_ATTR_MEMORY_TYPES], [0],[])], [#include ]) AC_CHECK_DECLS([ucp_tag_send_nbx, ucp_tag_send_sync_nbx, diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index bf32627137e..e8dd9e6a25a 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -20,6 +20,8 @@ dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. dnl Copyright (c) 2015 Research Organization for Information Science dnl and Technology (RIST). All rights reserved. +dnl Copyright (c) 2019-2021 Triad National Security, LLC. All rights +dnl reserved. dnl dnl $COPYRIGHT$ dnl @@ -517,6 +519,13 @@ OPAL_WITH_OPTION_MIN_MAX_VALUE(port_name, 1024, 255, 2048) # Min length accroding to MPI-2.1, p. 418 OPAL_WITH_OPTION_MIN_MAX_VALUE(datarep_string, 128, 64, 256) +OPAL_WITH_OPTION_MIN_MAX_VALUE(pset_name_len, 512, 512, 4096) + +OPAL_WITH_OPTION_MIN_MAX_VALUE(stringtag_len, 1024, 256, 2048) + +AC_DEFINE_UNQUOTED([OPAL_ENABLE_CRDEBUG], [0], + [Whether we want checkpoint/restart enabled debugging functionality or not]) + # some systems don't want/like getpwuid AC_MSG_CHECKING([if want getpwuid support]) AC_ARG_ENABLE([getpwuid], diff --git a/examples/ring_c.c b/examples/ring_c.c index 8551e4f7015..86c205b3d21 100644 --- a/examples/ring_c.c +++ b/examples/ring_c.c @@ -7,8 +7,8 @@ * Simple ring test program in C. */ -#include "mpi.h" #include +#include "mpi.h" int main(int argc, char *argv[]) { @@ -34,8 +34,8 @@ int main(int argc, char *argv[]) if (0 == rank) { message = 10; - printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n", message, next, tag, - size); + printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n", + message, next, tag, size); MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD); printf("Process 0 sent to %d\n", next); } @@ -49,7 +49,8 @@ int main(int argc, char *argv[]) and can quit normally. */ while (1) { - MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); if (0 == rank) { --message; @@ -67,7 +68,8 @@ int main(int argc, char *argv[]) to be received before the program can exit */ if (0 == rank) { - MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } /* All done */ diff --git a/ompi/Makefile.am b/ompi/Makefile.am index b3952e375a5..d0bf58e69ea 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -200,6 +200,7 @@ include patterns/net/Makefile.am include patterns/comm/Makefile.am include mca/Makefile.am include util/Makefile.am +include instance/Makefile.am distclean-local: rm -f mpiext/static-components.h diff --git a/ompi/attribute/attribute.c b/ompi/attribute/attribute.c index aaf04a685e0..ffd6a967d00 100644 --- a/ompi/attribute/attribute.c +++ b/ompi/attribute/attribute.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -14,6 +15,8 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -240,8 +243,10 @@ #include "ompi/datatype/ompi_datatype.h" #include "ompi/communicator/communicator.h" /* ompi_communicator_t generated in [COPY|DELETE]_ATTR_CALLBACKS */ #include "ompi/win/win.h" /* ompi_win_t generated in [COPY|DELETE]_ATTR_CALLBACKS */ +#include "ompi/instance/instance.h" #include "ompi/mpi/fortran/base/fint_2_int.h" + /* * Macros */ @@ -255,10 +260,11 @@ #define attr_communicator_f c_f_to_c_index #define attr_datatype_f d_f_to_c_index #define attr_win_f w_f_to_c_index +#define attr_instance_f i_f_to_c_index -#define CREATE_KEY(key) opal_bitmap_find_and_set_first_unset_bit(key_bitmap, (key)) +#define CREATE_KEY(key) opal_bitmap_find_and_set_first_unset_bit(attr_subsys->key_bitmap, (key)) -#define FREE_KEY(key) opal_bitmap_clear_bit(key_bitmap, (key)) +#define FREE_KEY(key) opal_bitmap_clear_bit(attr_subsys->key_bitmap, (key)) /* Not checking for NULL_DELETE_FN here, since according to the @@ -406,6 +412,15 @@ typedef struct attribute_value_t { int av_sequence; } attribute_value_t; +/* + * struct to hold state of attr subsys + */ + +typedef struct attr_subsys_t { + opal_object_t super; + opal_hash_table_t *keyval_hash; + opal_bitmap_t *key_bitmap; + } attr_subsys_t; /* * Local functions @@ -413,6 +428,8 @@ typedef struct attribute_value_t { static void attribute_value_construct(attribute_value_t *item); static void ompi_attribute_keyval_construct(ompi_attribute_keyval_t *keyval); static void ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval); +static void attr_subsys_construct(attr_subsys_t *subsys); +static void attr_subsys_destruct(attr_subsys_t *subsys); static int set_value(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, attribute_value_t *new_attr, @@ -425,6 +442,13 @@ static MPI_Aint translate_to_aint(attribute_value_t *val); static int compare_attr_sequence(const void *attr1, const void *attr2); +/* + * attribute_subsys_t class + */ +static OBJ_CLASS_INSTANCE(attr_subsys_t, + opal_object_t, + attr_subsys_construct, + attr_subsys_destruct); /* * attribute_value_t class @@ -448,19 +472,17 @@ static OBJ_CLASS_INSTANCE(ompi_attribute_keyval_t, * Static variables */ -static opal_hash_table_t *keyval_hash; -static opal_bitmap_t *key_bitmap; -static int attr_sequence; +static attr_subsys_t *attr_subsys = NULL; static unsigned int int_pos = 12345; static unsigned int integer_pos = 12345; +static int attr_sequence; /* * MPI attributes are *not* high performance, so just use a One Big Lock * approach. However, this lock is released before a user provided callback is * triggered and acquired right after, allowing for recursive behaviors. */ -static opal_mutex_t attribute_lock; - +static opal_mutex_t attribute_lock = OPAL_MUTEX_STATIC_INIT; /* * attribute_value_t constructor function @@ -507,33 +529,63 @@ ompi_attribute_keyval_destruct(ompi_attribute_keyval_t *keyval) free(keyval->bindings_extra_state); } - opal_hash_table_remove_value_uint32(keyval_hash, keyval->key); + opal_hash_table_remove_value_uint32(attr_subsys->keyval_hash, keyval->key); FREE_KEY(keyval->key); } } -/* - * This will initialize the main list to store key- attribute - * items. This will be called one time, during MPI_INIT(). - */ -int ompi_attr_init(void) +int ompi_attr_get_ref(void) +{ + int ret = OMPI_SUCCESS; + + OPAL_THREAD_LOCK(&attribute_lock); + + if (NULL == attr_subsys) { + attr_subsys = OBJ_NEW(attr_subsys_t); + if (NULL == attr_subsys) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto fn_exit; + } + } else { + OBJ_RETAIN(attr_subsys); + } + +fn_exit: + OPAL_THREAD_UNLOCK(&attribute_lock); + + return ret; +} + +int ompi_attr_put_ref(void) +{ + if (NULL != attr_subsys) { + OBJ_RELEASE(attr_subsys); + } + return OMPI_SUCCESS; +} + +static void attr_subsys_construct(attr_subsys_t *subsys) { int ret; void *bogus = (void*) 1; int *p = (int *) &bogus; - keyval_hash = OBJ_NEW(opal_hash_table_t); - if (NULL == keyval_hash) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - key_bitmap = OBJ_NEW(opal_bitmap_t); + subsys->keyval_hash = OBJ_NEW(opal_hash_table_t); + assert (NULL != subsys->keyval_hash); + + subsys->key_bitmap = OBJ_NEW(opal_bitmap_t); + /* * Set the max size to OMPI_FORTRAN_HANDLE_MAX to enforce bound */ - opal_bitmap_set_max_size (key_bitmap, OMPI_FORTRAN_HANDLE_MAX); - if (0 != opal_bitmap_init(key_bitmap, 32)) { - return OMPI_ERR_OUT_OF_RESOURCE; + opal_bitmap_set_max_size (subsys->key_bitmap, + OMPI_FORTRAN_HANDLE_MAX); + ret = opal_bitmap_init(subsys->key_bitmap, 32); + assert(OPAL_SUCCESS == ret); + + for (int i = 0; i < MPI_ATTR_KEY_LAST; i++) { + opal_bitmap_set_bit(subsys->key_bitmap, i); } for (int_pos = 0; int_pos < (sizeof(void*) / sizeof(int)); @@ -550,31 +602,21 @@ int ompi_attr_init(void) } } - OBJ_CONSTRUCT(&attribute_lock, opal_mutex_t); + ret = opal_hash_table_init(subsys->keyval_hash, ATTR_TABLE_SIZE); + assert (OPAL_SUCCESS == ret); - if (OMPI_SUCCESS != (ret = opal_hash_table_init(keyval_hash, - ATTR_TABLE_SIZE))) { - return ret; - } - if (OMPI_SUCCESS != (ret = ompi_attr_create_predefined())) { - return ret; - } - - return OMPI_SUCCESS; + attr_sequence = 0; } /* - * Cleanup everything during MPI_Finalize(). + * Cleanup everything when no more refs to the attr subsys */ -int ompi_attr_finalize(void) +static void attr_subsys_destruct(attr_subsys_t *subsys) { ompi_attr_free_predefined(); - OBJ_DESTRUCT(&attribute_lock); - OBJ_RELEASE(keyval_hash); - OBJ_RELEASE(key_bitmap); - - return OMPI_SUCCESS; + OBJ_RELEASE(subsys->keyval_hash); + OBJ_RELEASE(subsys->key_bitmap); } /*****************************************************************************/ @@ -609,10 +651,15 @@ static int ompi_attr_create_keyval_impl(ompi_attribute_type_t type, /* Create a new unique key and fill the hash */ OPAL_THREAD_LOCK(&attribute_lock); - ret = CREATE_KEY(key); + ret = MPI_SUCCESS; + if (!(flags & OMPI_KEYVAL_PREDEFINED)) { + ret = CREATE_KEY(key); + } + if (OMPI_SUCCESS == ret) { keyval->key = *key; - ret = opal_hash_table_set_value_uint32(keyval_hash, *key, keyval); + ret = opal_hash_table_set_value_uint32(attr_subsys->keyval_hash, + *key, keyval); } if (OMPI_SUCCESS != ret) { @@ -635,11 +682,22 @@ int ompi_attr_create_keyval(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.c_ptr = extra_state; - return ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, - key, &es_tmp, flags, - bindings_extra_state); + rc = ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, + key, &es_tmp, flags, + bindings_extra_state); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_mpi_instance_release (); + } + + return rc; } int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, @@ -651,6 +709,12 @@ int ompi_attr_create_keyval_fint(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.f_integer = extra_state; #if SIZEOF_INT == OMPI_SIZEOF_FORTRAN_INTEGER @@ -670,6 +734,12 @@ int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, void *bindings_extra_state) { ompi_attribute_fortran_ptr_t es_tmp; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } es_tmp.f_address = extra_state; return ompi_attr_create_keyval_impl(type, copy_attr_fn, delete_attr_fn, @@ -687,7 +757,7 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, /* Find the key-value pair */ OPAL_THREAD_LOCK(&attribute_lock); - ret = opal_hash_table_get_value_uint32(keyval_hash, *key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, *key, (void **) &keyval); if ((OMPI_SUCCESS != ret) || (NULL == keyval) || (keyval->attr_type != type) || @@ -707,6 +777,9 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, opal_atomic_wmb(); OPAL_THREAD_UNLOCK(&attribute_lock); + /* balance out retain in keyval_create */ + ompi_mpi_instance_release (); + return MPI_SUCCESS; } @@ -720,7 +793,7 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, opal_hash_table_t **attr_hash, int key, void *attribute, bool predefined) { - int ret; + int ret = MPI_SUCCESS; attribute_value_t *new_attr = OBJ_NEW(attribute_value_t); if (NULL == new_attr) { return OMPI_ERR_OUT_OF_RESOURCE; @@ -942,7 +1015,7 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, /* Get the keyval in the main keyval hash - so that we know what the copy_attr_fn is */ - err = opal_hash_table_get_value_uint32(keyval_hash, key, + err = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &hash_value); if (OMPI_SUCCESS != err) { /* This should not happen! */ @@ -1037,7 +1110,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, attribute_value_t *attr; /* Check if the key is valid in the master keyval hash */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); if ((OMPI_SUCCESS != ret) || (NULL == keyval) || @@ -1053,7 +1126,7 @@ static int ompi_attr_delete_impl(ompi_attribute_type_t type, void *object, goto exit; } - /* Check if the key is valid for the communicator/window/dtype. If + /* Check if the key is valid for the communicator/window/dtype/instance. If yes, then delete the attribute and key entry from the object's hash */ ret = opal_hash_table_get_value_uint32(attr_hash, key, (void**) &attr); @@ -1198,7 +1271,7 @@ static int set_value(ompi_attribute_type_t type, void *object, /* Note that this function can be invoked by ompi_attr_copy_all() to set attributes on the new object (in addition to the top-level MPI_* functions that set attributes). */ - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); /* If key not found */ @@ -1242,7 +1315,7 @@ static int set_value(ompi_attribute_type_t type, void *object, had_old = true; } - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void **) &keyval); if ((OMPI_SUCCESS != ret ) || (NULL == keyval)) { /* Keyval has disappeared underneath us -- this shouldn't @@ -1288,7 +1361,7 @@ static int get_value(opal_hash_table_t *attr_hash, int key, with the key, then the call is valid and returns FALSE in the flag argument */ *flag = 0; - ret = opal_hash_table_get_value_uint32(keyval_hash, key, + ret = opal_hash_table_get_value_uint32(attr_subsys->keyval_hash, key, (void**) &keyval); if (OMPI_ERR_NOT_FOUND == ret) { return MPI_KEYVAL_INVALID; diff --git a/ompi/attribute/attribute.h b/ompi/attribute/attribute.h index 2bec4387dad..cac5a04b590 100644 --- a/ompi/attribute/attribute.h +++ b/ompi/attribute/attribute.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +58,7 @@ enum ompi_attribute_type_t { * with 1 so that we can have it initialized to 0 * using memset in the constructor */ TYPE_ATTR, /**< The attribute belongs to datatype object */ - WIN_ATTR /**< The attribute belongs to a win object */ + WIN_ATTR, /**< The attribute belongs to a win object */ }; typedef enum ompi_attribute_type_t ompi_attribute_type_t; @@ -119,6 +122,7 @@ union ompi_attribute_fn_ptr_union_t { MPI_Comm_delete_attr_function *attr_communicator_delete_fn; MPI_Type_delete_attr_function *attr_datatype_delete_fn; MPI_Win_delete_attr_function *attr_win_delete_fn; + MPI_Session_delete_attr_function *attr_instance_delete_fn; MPI_Comm_internal_copy_attr_function *attr_communicator_copy_fn; MPI_Type_internal_copy_attr_function *attr_datatype_copy_fn; @@ -155,7 +159,7 @@ typedef union ompi_attribute_fortran_ptr_t ompi_attribute_fortran_ptr_t; struct ompi_attribute_keyval_t { opal_object_t super; - ompi_attribute_type_t attr_type; /**< One of COMM/WIN/DTYPE. This + ompi_attribute_type_t attr_type; /**< One of COMM/WIN/DTYPE/INSTANCE. This will be used to cast the copy/delete attribute functions properly and error checking */ @@ -201,24 +205,28 @@ int ompi_attr_hash_init(opal_hash_table_t **hash) } /** - * Initialize the main attribute hash that stores the keyvals and meta data + * Increase the reference count on the attributes subsystem. Instantiate subsys if + * not yet instantiated. * * @return OMPI return code */ -int ompi_attr_init(void); +int ompi_attr_get_ref(void); + /** - * Destroy the main attribute hash that stores the keyvals and meta data + * Decrease the reference count on the attributes subsystem. Attributes subsystem + * resources are released when the count drops to zero. + * + * @return OMPI return code */ -int ompi_attr_finalize(void); - +int ompi_attr_put_ref(void); /** - * Create a new key for use by attribute of Comm/Win/Datatype + * Create a new key for use by attribute of Comm/Win/Datatype/Instance * - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) * @param copy_attr_fn Union variable containing the function pointer * to be used in order to copy the attribute (IN) * @param delete_attr_fn Function pointer to be used for deleting the @@ -277,7 +285,7 @@ OMPI_DECLSPEC int ompi_attr_create_keyval_aint(ompi_attribute_type_t type, /** * Free an attribute keyval - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) * @param key key, which is set to MPI_KEY_INVALID (IN/OUT) * @return OMPI error code */ @@ -286,10 +294,10 @@ int ompi_attr_free_keyval(ompi_attribute_type_t type, int *key, bool predefined); /** - * Set an attribute on the comm/win/datatype in a form valid for C. + * Set an attribute on the comm/win/datatype/instance in a form valid for C. * - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -317,8 +325,8 @@ int ompi_attr_set_c(ompi_attribute_type_t type, void *object, /** * Set an int predefined attribute in a form valid for C. * - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute value (IN) @@ -344,11 +352,11 @@ int ompi_attr_set_int(ompi_attribute_type_t type, void *object, int key, int attribute, bool predefined); /** - * Set an attribute on the comm/win/datatype in a form valid for + * Set an attribute on the comm/win/datatype/instance in a form valid for * Fortran MPI-1. * - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -375,11 +383,11 @@ OMPI_DECLSPEC int ompi_attr_set_fint(ompi_attribute_type_t type, void *object, bool predefined); /** - * Set an attribute on the comm/win/datatype in a form valid for + * Set an attribute on the comm/win/datatype/instance in a form valid for * Fortran MPI-2. * - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN/OUT) * @param key Key val for the attribute (IN) * @param attribute The actual attribute pointer (IN) @@ -406,7 +414,7 @@ OMPI_DECLSPEC int ompi_attr_set_aint(ompi_attribute_type_t type, void *object, bool predefined); /** - * Get an attribute on the comm/win/datatype in a form valid for C. + * Get an attribute on the comm/win/datatype/instance in a form valid for C. * * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) @@ -432,7 +440,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, /** - * Get an attribute on the comm/win/datatype in a form valid for + * Get an attribute on the comm/win/datatype/instance in a form valid for * Fortran MPI-1. * * @param attr_hash The attribute hash table hanging on the object(IN) @@ -459,7 +467,7 @@ int ompi_attr_get_c(opal_hash_table_t *attr_hash, int key, /** - * Get an attribute on the comm/win/datatype in a form valid for + * Get an attribute on the comm/win/datatype/instance in a form valid for * Fortran MPI-2. * * @param attr_hash The attribute hash table hanging on the object(IN) @@ -486,9 +494,9 @@ OMPI_DECLSPEC int ompi_attr_get_aint(opal_hash_table_t *attr_hash, int key, /** - * Delete an attribute on the comm/win/datatype - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The actual Comm/Win/Datatype object (IN) + * Delete an attribute on the comm/win/datatype/instance + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The actual Comm/Win/Datatype/Instance object (IN) * @param attr_hash The attribute hash table hanging on the object(IN) * @param key Key val for the attribute (IN) * @param predefined Whether the key is predefined or not 0/1 (IN) @@ -503,7 +511,7 @@ int ompi_attr_delete(ompi_attribute_type_t type, void *object, /** * This to be used from functions like MPI_*_DUP in order to copy all - * the attributes from the old Comm/Win/Dtype object to a new + * the attributes from the old Comm/Win/Dtype/Instance object to a new * object. * @param type Type of attribute (COMM/WIN/DTYPE) (IN) * @param old_object The old COMM/WIN/DTYPE object (IN) @@ -512,6 +520,7 @@ int ompi_attr_delete(ompi_attribute_type_t type, void *object, * @param newattr_hash The attribute hash table hanging on new object(IN) * @return OMPI error code * + * Note: not valid for instance objects as they can not be copied */ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, @@ -520,10 +529,10 @@ int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, /** - * This to be used to delete all the attributes from the Comm/Win/Dtype + * This to be used to delete all the attributes from the Comm/Win/Dtype/Instance * object in one shot - * @param type Type of attribute (COMM/WIN/DTYPE) (IN) - * @param object The COMM/WIN/DTYPE object (IN) + * @param type Type of attribute (COMM/WIN/DTYPE/INSTANCE) (IN) + * @param object The COMM/WIN/DTYPE/INSTANCE object (IN) * @param attr_hash The attribute hash table hanging on the object(IN) * @return OMPI error code * diff --git a/ompi/attribute/attribute_predefined.c b/ompi/attribute/attribute_predefined.c index df948378bb7..0582b598eeb 100644 --- a/ompi/attribute/attribute_predefined.c +++ b/ompi/attribute/attribute_predefined.c @@ -193,10 +193,10 @@ static int create_comm(int target_keyval, bool want_inherit) ompi_attribute_fn_ptr_union_t copy; ompi_attribute_fn_ptr_union_t del; - keyval = -1; copy.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) (want_inherit ? MPI_COMM_DUP_FN : MPI_COMM_NULL_COPY_FN); del.attr_communicator_delete_fn = MPI_COMM_NULL_DELETE_FN; + keyval = target_keyval; err = ompi_attr_create_keyval(COMM_ATTR, copy, del, &keyval, NULL, OMPI_KEYVAL_PREDEFINED, NULL); if (MPI_SUCCESS != err) { @@ -223,9 +223,9 @@ static int create_win(int target_keyval) ompi_attribute_fn_ptr_union_t copy; ompi_attribute_fn_ptr_union_t del; - keyval = -1; copy.attr_win_copy_fn = (MPI_Win_internal_copy_attr_function*)MPI_WIN_NULL_COPY_FN; del.attr_win_delete_fn = MPI_WIN_NULL_DELETE_FN; + keyval = target_keyval; err = ompi_attr_create_keyval(WIN_ATTR, copy, del, &keyval, NULL, OMPI_KEYVAL_PREDEFINED, NULL); if (MPI_SUCCESS != err) { diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index c31a698b88e..9b007ddaaf5 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -24,6 +24,8 @@ * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,6 +55,8 @@ #include "ompi/mca/pml/pml.h" #include "ompi/request/request.h" +#include "ompi/runtime/params.h" + /* ** sort-function for MPI_Comm_split */ @@ -91,6 +95,10 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro opal_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req); +static int ompi_comm_get_rprocs (ompi_communicator_t *local_comm, ompi_communicator_t *bridge_comm, + int local_leader, int remote_leader, int tag, int rsize, + ompi_proc_t ***rprocs); + /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ @@ -107,15 +115,15 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, - ompi_group_t *remote_group ) + ompi_group_t *remote_group, + uint32_t flags) { ompi_request_t *req; int rc; rc = ompi_comm_set_nb (ncomm, oldcomm, local_size, local_ranks, remote_size, remote_ranks, - attr, errh, copy_topocomponent, local_group, remote_group, &req); + attr, errh, local_group, remote_group, flags, &req); if (OMPI_SUCCESS != rc) { return rc; } @@ -127,23 +135,25 @@ int ompi_comm_set ( ompi_communicator_t **ncomm, return rc; } +static int ompi_comm_set_simple (ompi_communicator_t **ncomm, ompi_errhandler_t *errhandler, + ompi_group_t *local_group) +{ + return ompi_comm_set (ncomm, NULL, local_group->grp_proc_count, NULL, 0, NULL, NULL, errhandler, + local_group, NULL, 0); +} + + /* * if remote_group == &ompi_mpi_group_null, then the new communicator * is forced to be an inter communicator. */ -int ompi_comm_set_nb ( ompi_communicator_t **ncomm, - ompi_communicator_t *oldcomm, - int local_size, - int *local_ranks, - int remote_size, - int *remote_ranks, - opal_hash_table_t *attr, - ompi_errhandler_t *errh, - bool copy_topocomponent, - ompi_group_t *local_group, - ompi_group_t *remote_group, - ompi_request_t **req ) +int ompi_comm_set_nb (ompi_communicator_t **ncomm, ompi_communicator_t *oldcomm, int local_size, + int *local_ranks, int remote_size, int *remote_ranks, opal_hash_table_t *attr, + ompi_errhandler_t *errh, ompi_group_t *local_group, ompi_group_t *remote_group, + uint32_t flags, ompi_request_t **req) { + bool copy_topocomponent = !!(flags & OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); + bool dup_comm = !(flags & OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP); ompi_communicator_t *newcomm = NULL; int ret; @@ -165,8 +175,6 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, newcomm->super.s_info = NULL; /* fill in the inscribing hyper-cube dimensions */ newcomm->c_cube_dim = opal_cube_dim(local_size); - newcomm->c_id_available = MPI_UNDEFINED; - newcomm->c_id_start_index = MPI_UNDEFINED; if (NULL == local_group) { /* determine how the list of local_rank can be stored most @@ -181,6 +189,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, OBJ_RETAIN(newcomm->c_local_group); } newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank; + newcomm->c_assertions = 0; /* Set remote group and duplicate the local comm, if applicable */ if ( NULL != remote_group ) { @@ -198,15 +207,45 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } newcomm->c_flags |= OMPI_COMM_INTER; + newcomm->c_index_vec = malloc(remote_size * sizeof(int)); + if (NULL == newcomm->c_index_vec) { + OBJ_RELEASE(newcomm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (int i = 0; i < remote_size; i++) { + if (OMPI_COMM_IS_GLOBAL_INDEX(newcomm)) { + newcomm->c_index_vec[i] = newcomm->c_index; + } else { + newcomm->c_index_vec[i] = -2; + } + } - old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm; + if (dup_comm) { + old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm; - /* NTH: use internal idup function that takes a local group argument */ - ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL, - &newcomm->c_local_comm, req); + /* NTH: use internal idup function that takes a local group argument */ + ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL, + &newcomm->c_local_comm, req); + } else { + /* take ownership of the old communicator (it must be an intracommunicator) */ + assert (OMPI_COMM_IS_INTRA(oldcomm)); + newcomm->c_local_comm = oldcomm; + } } else { newcomm->c_remote_group = newcomm->c_local_group; OBJ_RETAIN(newcomm->c_remote_group); + newcomm->c_index_vec = malloc(local_size * sizeof(int)); + if (NULL == newcomm->c_index_vec) { + OBJ_RELEASE(newcomm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + for (int i = 0; i < local_size; i++) { + if (OMPI_COMM_IS_GLOBAL_INDEX(newcomm)) { + newcomm->c_index_vec[i] = newcomm->c_index; + } else { + newcomm->c_index_vec[i] = -2; + } + } } /* Check how many different jobids are represented in this communicator. @@ -221,7 +260,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, OBJ_RETAIN ( newcomm->error_handler ); /* Set Topology, if required and if available */ - if ( copy_topocomponent && (NULL != oldcomm->c_topo) ) { + if (NULL != oldcomm && copy_topocomponent && (NULL != oldcomm->c_topo) ) { /** * The MPI standard is pretty clear on this, the topology information * behave as info keys, and is copied only on MPI_Comm_dup. @@ -233,7 +272,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } /* Copy attributes and call according copy functions, if required */ - if (NULL != oldcomm->c_keyhash) { + if (NULL != oldcomm && NULL != oldcomm->c_keyhash) { if (NULL != attr) { ompi_attr_hash_init(&newcomm->c_keyhash); if (OMPI_SUCCESS != (ret = ompi_attr_copy_all (COMM_ATTR, oldcomm, @@ -245,6 +284,10 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, } } + if (NULL != oldcomm) { + newcomm->instance = oldcomm->instance; + } + *ncomm = newcomm; return (OMPI_SUCCESS); } @@ -271,8 +314,8 @@ int ompi_comm_group ( ompi_communicator_t* comm, ompi_group_t **group ) /* ** Counterpart to MPI_Comm_create. To be used within OMPI. */ -int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, - ompi_communicator_t **newcomm ) +int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opal_info_t *info, + ompi_communicator_t **newcomm) { ompi_communicator_t *newcomp = NULL; int rsize; @@ -350,9 +393,9 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, rranks, /* remote_ranks */ NULL, /* attrs */ comm->error_handler, /* error handler */ - false, /* dont copy the topo */ group, /* local group */ - remote_group); /* remote group */ + remote_group, /* remote group */ + 0); /* flags */ if ( OMPI_SUCCESS != rc ) { goto exit; @@ -364,9 +407,19 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, goto exit; } + if (OMPI_COMM_IS_INTRA(comm)) { + newcomp->c_index_vec[newcomp->c_my_rank] = newcomp->c_index; + } + + /* Copy info if there is one. */ + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d CREATE FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %s CREATE FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* Activate the communicator and init coll-component */ rc = ompi_comm_activate (&newcomp, comm, NULL, NULL, NULL, false, mode); @@ -397,6 +450,11 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, return ( rc ); } +int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group, + ompi_communicator_t **newcomm ) +{ + return ompi_comm_create_w_info (comm, group, NULL, newcomm); +} /**********************************************************************/ /**********************************************************************/ @@ -574,9 +632,9 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, rranks, /* remote_ranks */ NULL, /* attrs */ comm->error_handler,/* error handler */ - pass_on_topo, - local_group, /* local group */ - remote_group); /* remote group */ + local_group, /* local group */ + remote_group, /* remote group */ + pass_on_topo ? OMPI_COMM_SET_FLAG_COPY_TOPOLOGY : 0); /* flags */ if ( OMPI_SUCCESS != rc ) { goto exit; @@ -586,9 +644,8 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, OBJ_RELEASE(local_group); if (NULL != newcomp->c_local_comm) { snprintf(newcomp->c_local_comm->c_name, MPI_MAX_OBJECT_NAME, - "MPI COMMUNICATOR %d SPLIT FROM %d", - newcomp->c_local_comm->c_contextid, - comm->c_local_comm->c_contextid ); + "MPI COMM %s SPLIT FROM %s", ompi_comm_print_cid (newcomp), + ompi_comm_print_cid (comm)); } } @@ -606,9 +663,13 @@ int ompi_comm_split_with_info( ompi_communicator_t* comm, int color, int key, goto exit; } + if (!inter) { + newcomp->c_index_vec[newcomp->c_my_rank] = newcomp->c_index; + } + /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s SPLIT FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* Copy info if there is one */ if (info) { @@ -915,8 +976,7 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, do { rc = ompi_comm_set (&newcomp, comm, my_size, lranks, my_rsize, - rranks, NULL, comm->error_handler, false, - NULL, NULL); + rranks, NULL, comm->error_handler, NULL, NULL, 0); if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { break; } @@ -927,10 +987,10 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, break; } - // Copy info if there is one. - newcomp->super.s_info = OBJ_NEW(opal_info_t); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL); if (info) { - opal_info_dup(info, &(newcomp->super.s_info)); + opal_infosubscribe_change_info(&newcomp->super, info); } /* Activate the communicator and init coll-component */ @@ -954,8 +1014,8 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key, *newcomm = newcomp; /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SPLIT_TYPE FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s SPLIT_TYPE FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); break; } @@ -1009,9 +1069,9 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ comm->c_local_group, /* local group */ - remote_group ); /* remote group */ + remote_group, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); /* flags */ if ( OMPI_SUCCESS != rc) { return rc; } @@ -1024,13 +1084,14 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, opal_info_t *info, omp } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s DUP FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); // Copy info if there is one. - newcomp->super.s_info = OBJ_NEW(opal_info_t); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_LAZY_BARRIER); + ompi_comm_assert_subscribe (newcomp, OMPI_COMM_ASSERT_ACTIVE_POLL); if (info) { - opal_info_dup(info, &(newcomp->super.s_info)); + opal_infosubscribe_change_info(&newcomp->super, info); } /* activate communicator and init coll-module */ @@ -1106,9 +1167,9 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ group, /* local group */ remote_group, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY, /* flags */ subreq); /* new subrequest */ if (OMPI_SUCCESS != rc) { ompi_comm_request_return (request); @@ -1177,8 +1238,8 @@ static int ompi_comm_idup_with_info_activate (ompi_comm_request_t *request) } /* Set name for debugging purposes */ - snprintf(context->newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d DUP FROM %d", - context->newcomp->c_contextid, context->comm->c_contextid ); + snprintf(context->newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s DUP FROM %s", + ompi_comm_print_cid (context->newcomp), ompi_comm_print_cid (context->comm)); /* activate communicator and init coll-module */ rc = ompi_comm_activate_nb (&context->newcomp, context->comm, NULL, NULL, NULL, false, mode, subreq); @@ -1216,9 +1277,9 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int NULL, /* remote_procs */ comm->c_keyhash, /* attrs */ comm->error_handler, /* error handler */ - true, /* copy the topo */ group, /* local group */ - NULL); /* remote group */ + NULL, /* remote group */ + OMPI_COMM_SET_FLAG_COPY_TOPOLOGY); /* flags */ if ( OMPI_SUCCESS != rc) { return rc; } @@ -1231,8 +1292,8 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int } /* Set name for debugging purposes */ - snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d GROUP FROM %d", - newcomp->c_contextid, comm->c_contextid ); + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s GROUP FROM %s", + ompi_comm_print_cid (newcomp), ompi_comm_print_cid (comm)); /* activate communicator and init coll-module */ rc = ompi_comm_activate (&newcomp, comm, NULL, &tag, NULL, false, mode); @@ -1245,6 +1306,337 @@ int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int return MPI_SUCCESS; } +int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info_t *info, + ompi_errhandler_t *errhandler, ompi_communicator_t **newcomm) +{ + ompi_communicator_t *newcomp = NULL; + int rc; + + *newcomm = MPI_COMM_NULL; + + rc = ompi_comm_set_simple (&newcomp, errhandler, group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* Determine context id. It is identical to f_2_c_handle */ + rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, NULL, false, + OMPI_COMM_CID_GROUP_NEW); + if ( OMPI_SUCCESS != rc ) { + return rc; + } + + /* Set name for debugging purposes */ + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s FROM GROUP", + ompi_comm_print_cid (newcomp)); + + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (NULL == newcomp->super.s_info) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* activate communicator and init coll-module. use the group allreduce implementation as + * no collective module has yet been selected. the tag does not matter as any tag will + * be unique on the new communicator. */ + rc = ompi_comm_activate (&newcomp, newcomp, NULL, &(int) {0xfeed}, NULL, + false, OMPI_COMM_CID_GROUP); + if ( OMPI_SUCCESS != rc ) { + return rc; + } + + newcomp->instance = group->grp_instance; + + *newcomm = newcomp; + return MPI_SUCCESS; +} + +int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, ompi_communicator_t *bridge_comm, + int remote_leader, int tag, ompi_communicator_t **newintercomm) +{ + int local_size = 0, local_rank = 0, lleader = 0, rleader = 0, rc, rsize = 0; + struct ompi_proc_t **rprocs; + ompi_communicator_t *newcomp; + ompi_group_t *new_group_pointer; + + *newintercomm = MPI_COMM_NULL; + + local_size = ompi_comm_size ( local_comm ); + local_rank = ompi_comm_rank ( local_comm ); + lleader = local_leader; + rleader = remote_leader; + + if ( MPI_PARAM_CHECK ) { + if ( (0 > local_leader) || (local_leader >= local_size) ) { + return OMPI_ERR_BAD_PARAM; + } + + /* remember that the remote_leader and bridge_comm arguments + just have to be valid at the local_leader */ + if ( local_rank == local_leader ) { + if (ompi_comm_invalid (bridge_comm) || (bridge_comm->c_flags & OMPI_COMM_INTER)) { + return MPI_ERR_COMM; + } + + if ((remote_leader < 0) || (remote_leader >= ompi_comm_size(bridge_comm))) { + return OMPI_ERR_BAD_PARAM; + } + } /* if ( local_rank == local_leader ) */ + } + + if (local_rank == local_leader) { + MPI_Request req; + + /* local leader exchange group sizes lists */ + rc = MCA_PML_CALL(irecv (&rsize, 1, MPI_INT, rleader, tag, bridge_comm, &req)); + if ( rc != MPI_SUCCESS ) { + return rc; + } + rc = MCA_PML_CALL(send (&local_size, 1, MPI_INT, rleader, tag, + MCA_PML_BASE_SEND_STANDARD, bridge_comm)); + if ( rc != MPI_SUCCESS ) { + return rc; + } + rc = ompi_request_wait (&req, MPI_STATUS_IGNORE); + if ( rc != MPI_SUCCESS ) { + return rc; + } + } + + /* bcast size and list of remote processes to all processes in local_comm */ + rc = local_comm->c_coll->coll_bcast (&rsize, 1, MPI_INT, lleader, local_comm, + local_comm->c_coll->coll_bcast_module); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + rc = ompi_comm_get_rprocs (local_comm, bridge_comm, lleader, remote_leader, tag, rsize, &rprocs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + /* put group elements in the list */ + new_group_pointer = ompi_group_allocate_plist_w_procs (rprocs, rsize); + if (OPAL_UNLIKELY(NULL == new_group_pointer)) { + free (rprocs); + return MPI_ERR_GROUP; + } + + if (MPI_PARAM_CHECK) { + bool overlap = ompi_group_overlap (local_comm->c_local_group, new_group_pointer); + if (overlap && MPI_THREAD_MULTIPLE != ompi_mpi_thread_provided) { + ompi_group_free (&new_group_pointer); + return OMPI_ERR_BAD_PARAM; + } + } + + rc = ompi_comm_set (&newcomp, /* new comm */ + local_comm, /* old comm */ + local_comm->c_local_group->grp_proc_count, /* local_size */ + NULL, /* local_procs*/ + rsize, /* remote_size */ + NULL, /* remote_procs */ + NULL, /* attrs */ + local_comm->error_handler, /* error handler*/ + local_comm->c_local_group, /* local group */ + new_group_pointer, /* remote group */ + 0); /* flags */ + + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_group_free (&new_group_pointer); + return rc; + } + + /* Determine context id. It is identical to f_2_c_handle */ + rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader, + &rleader, false, OMPI_COMM_CID_INTRA_BRIDGE); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&newcomp); + return rc; + } + + /* activate comm and init coll-module */ + rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader, + false, OMPI_COMM_CID_INTRA_BRIDGE); + if ( MPI_SUCCESS != rc ) { + ompi_comm_free (&newcomp); + return rc; + } + + *newintercomm = newcomp; + + return OMPI_SUCCESS; +} + +int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_leader, + ompi_group_t *remote_group, int remote_leader, const char *tag, + opal_info_t *info, ompi_errhandler_t *errhandler, + ompi_communicator_t **newintercomm) +{ + ompi_communicator_t *newcomp = NULL, *local_comm, *leader_comm = MPI_COMM_NULL; + ompi_comm_extended_cid_block_t new_block; + bool i_am_leader = local_leader == local_group->grp_my_rank; + ompi_proc_t **rprocs; + uint64_t data[4]; + int leader_comm_remote_leader; + char *sub_tag = NULL; + size_t rsize; + int rc; + + *newintercomm = MPI_COMM_NULL; + + /* create a local communicator first. create a unique tag for this communicator */ + asprintf (&sub_tag, "%s-OMPIi-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (local_group, local_leader))); + if (OPAL_UNLIKELY(NULL == sub_tag)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + rc = ompi_comm_create_from_group (local_group, sub_tag, info, errhandler, &local_comm); + free (sub_tag); + sub_tag = NULL; + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + if (i_am_leader) { + /* create a bridge communicator for the leaders (so we can use the existing collectives + * for activation). there are probably more efficient ways to do this but for intercommunicator + * creation is not considered a performance critical operation. */ + ompi_proc_t **leader_procs, *my_proc; + ompi_group_t *leader_group; + + leader_procs = calloc (2, sizeof (*leader_procs)); + + my_proc = leader_procs[0] = ompi_group_get_proc_ptr (local_group, local_leader, true); + leader_procs[1] = ompi_group_get_proc_ptr (remote_group, remote_leader, true); + + if (leader_procs[0] != leader_procs[1]) { + /* NTH: they are definitely different (can the ever be the same) */ + if (leader_procs[0]->super.proc_name.jobid > leader_procs[1]->super.proc_name.jobid || + (leader_procs[0]->super.proc_name.jobid == leader_procs[1]->super.proc_name.jobid && + leader_procs[0]->super.proc_name.vpid > leader_procs[1]->super.proc_name.vpid)) { + ompi_proc_t *tmp = leader_procs[0]; + leader_procs[0] = leader_procs[1]; + leader_procs[1] = tmp; + } + + /* create a unique tag for allocating the leader communicator. we can eliminate this step + * if we take a CID from the newly allocated block belonging to local_comm. this is + * a note to make this change at a later time. */ + asprintf (&sub_tag, "%s-OMPIi-LC", tag); + if (OPAL_UNLIKELY(NULL == sub_tag)) { + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + leader_group = ompi_group_allocate_plist_w_procs (leader_procs, 2); + ompi_set_group_rank (leader_group, my_proc); + if (OPAL_UNLIKELY(NULL == leader_group)) { + free (sub_tag); + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* remote leader is whichever rank I am not */ + leader_comm_remote_leader = !(leader_group->grp_my_rank); + + rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm); + OBJ_RELEASE(leader_group); + free (sub_tag); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* grab a CID for the intercomm while we are at it */ + ompi_comm_extended_cid_block_new (&leader_comm->c_contextidb, &new_block, false); + + data[0] = remote_group->grp_proc_count; + /* store the relevant new_block data */ + data[1] = new_block.block_cid.cid_base; + data[2] = new_block.block_cid.cid_sub.u64; + data[3] = new_block.block_level; + } else { + free (leader_procs); + } + + rsize = remote_group->grp_proc_count; + } + + /* bcast size and list of remote processes to all processes in local_comm */ + rc = local_comm->c_coll->coll_bcast (data, 4, MPI_UINT64_T, local_leader, local_comm, + local_comm->c_coll->coll_bcast_module); + rsize = data[0]; + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* using 0 for the tag because we control both local_comm and leader_comm */ + rc = ompi_comm_get_rprocs (local_comm, leader_comm, local_leader, leader_comm_remote_leader, 0, rsize, &rprocs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + if (!i_am_leader) { + /* create a new group containing the remote processes for non-leader ranks */ + remote_group = ompi_group_allocate_plist_w_procs (rprocs, rsize); + if (OPAL_UNLIKELY(NULL == remote_group)) { + free (rprocs); + ompi_comm_free (&local_comm); + return OMPI_ERR_OUT_OF_RESOURCE; + } + } else { + OBJ_RETAIN(remote_group); + } + + rc = ompi_comm_set (&newcomp, local_comm, local_group->grp_proc_count, NULL, remote_group->grp_proc_count, + NULL, NULL, errhandler, local_group, remote_group, OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP); + OBJ_RELEASE(remote_group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&local_comm); + return rc; + } + + /* will be using a communicator ID derived from the bridge communicator to save some time */ + new_block.block_cid.cid_base = data[1]; + new_block.block_cid.cid_sub.u64 = data[2]; + new_block.block_nextsub = 0; + new_block.block_nexttag = 0; + new_block.block_level = (int8_t) data[3]; + + rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, &new_block, false, OMPI_COMM_CID_GROUP_NEW); + if ( OMPI_SUCCESS != rc ) { + OBJ_RELEASE(newcomp); + return rc; + } + + /* Set name for debugging purposes */ + snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI INTERCOMM %s FROM GROUP", ompi_comm_print_cid (newcomp)); + + // Copy info if there is one. + newcomp->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &(newcomp->super.s_info)); + } + + /* activate communicator and init coll-module */ + rc = ompi_comm_activate (&newcomp, local_comm, leader_comm, &local_leader, &leader_comm_remote_leader, + false, OMPI_COMM_CID_INTRA_BRIDGE); + if (MPI_COMM_NULL != leader_comm) { + ompi_comm_free (&leader_comm); + } + + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + ompi_comm_free (&newcomp); + return rc; + } + + *newintercomm = newcomp; + + return MPI_SUCCESS; +} + /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ @@ -1255,10 +1647,15 @@ int ompi_comm_compare(ompi_communicator_t *comm1, ompi_communicator_t *comm2, in int lresult, rresult=MPI_CONGRUENT; int cmp_result; + if (comm1->instance != comm2->instance) { + printf("Tried to compare two comms from different sessions\n"); + return OMPI_ERR_BAD_PARAM; + } + comp1 = (ompi_communicator_t *) comm1; comp2 = (ompi_communicator_t *) comm2; - if ( comp1->c_contextid == comp2->c_contextid ) { + if ( comp1->c_index == comp2->c_index ) { *result = MPI_IDENT; return MPI_SUCCESS; } @@ -1452,7 +1849,7 @@ static int ompi_comm_allgather_emulate_intra( void *inbuf, int incount, int ompi_comm_free( ompi_communicator_t **comm ) { int ret; - int cid = (*comm)->c_contextid; + int cid = (*comm)->c_index; int is_extra_retain = OMPI_COMM_IS_EXTRA_RETAIN(*comm); /* Release attributes. We do this now instead of during the @@ -1522,7 +1919,7 @@ int ompi_comm_free( ompi_communicator_t **comm ) * makes sure that the pointer to the dependent communicator * still contains a valid object. */ - ompi_communicator_t *tmpcomm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_mpi_communicators, cid); + ompi_communicator_t *tmpcomm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_comm_array, cid); if ( NULL != tmpcomm ){ ompi_comm_free(&tmpcomm); } @@ -1535,13 +1932,13 @@ int ompi_comm_free( ompi_communicator_t **comm ) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, - ompi_communicator_t *bridge_comm, - int local_leader, - int remote_leader, - int tag, - int rsize, - ompi_proc_t ***prprocs ) +/** + * This is a short-hand routine used in intercomm_create. + * The routine makes sure, that all processes have afterwards + * a list of ompi_proc_t pointers for the remote group. + */ +int ompi_comm_get_rprocs (ompi_communicator_t *local_comm, ompi_communicator_t *bridge_comm, + int local_leader, int remote_leader, int tag, int rsize, ompi_proc_t ***prprocs) { MPI_Request req; int rc = OMPI_SUCCESS; @@ -1731,31 +2128,6 @@ int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -/** - * This routine verifies, whether local_group and remote group are overlapping - * in intercomm_create - */ -int ompi_comm_overlapping_groups (int size, ompi_proc_t **lprocs, - int rsize, ompi_proc_t ** rprocs) - -{ - int rc=OMPI_SUCCESS; - int i,j; - - for (i=0; ic_contextid); + opal_output(0, "Dumping information for comm_cid %s\n", ompi_comm_print_cid (comm)); opal_output(0," f2c index:%d cube_dim: %d\n", comm->c_f_to_c_index, comm->c_cube_dim); opal_output(0," Local group: size = %d my_rank = %d\n", @@ -2012,8 +2384,8 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, /* there is no cid at this stage ... make this right and make edgars * code call this function and remove dupli cde */ - snprintf (comm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMMUNICATOR %d", - comm->c_contextid); + snprintf (comm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMMUNICATOR %s", + ompi_comm_print_cid (comm)); /* determine the cube dimensions */ comm->c_cube_dim = opal_cube_dim(comm->c_local_group->grp_proc_count); @@ -2032,3 +2404,28 @@ static int ompi_comm_copy_topo(ompi_communicator_t *oldcomm, newcomm->c_flags |= newcomm->c_topo->type; return OMPI_SUCCESS; } + +char *ompi_comm_print_cid (const ompi_communicator_t *comm) +{ +#if OPAL_HAVE_THREAD_LOCAL + static opal_thread_local char cid_buffer[2][20]; + static opal_thread_local int cid_buffer_index = 0; +#else + /* no thread local == you get what you get. upgrade your compiler */ + static char cid_buffer[2][20]; + static int cid_buffer_index = 0; +#endif + int bindex = cid_buffer_index; + + if (mca_pml_base_supports_extended_cid () && !OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + snprintf (cid_buffer[bindex], sizeof (cid_buffer[0]), "0x%" PRIx64 "%08" PRIx64, + comm->c_contextid.cid_base, + comm->c_contextid.cid_sub.u64); + } else { + snprintf (cid_buffer[bindex], sizeof (cid_buffer[0]), "%d", comm->c_index); + } + + cid_buffer_index = cid_buffer_index ? 0 : 1; + + return cid_buffer[bindex]; +} diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 9015f26bbeb..c1d482cb16c 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -24,6 +24,8 @@ * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,9 +33,11 @@ * $HEADER$ */ + #include "ompi_config.h" #include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/pmix-internal.h" #include "opal/util/printf.h" #include "ompi/proc/proc.h" @@ -47,6 +51,15 @@ #include "ompi/mca/coll/base/base.h" #include "ompi/request/request.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/ompi_rte.h" + +#include "pmix.h" + +/* for use when we don't have a PMIx that supports CID generation */ +opal_atomic_int64_t ompi_comm_next_base_cid = 1; + +/* A macro comparing two CIDs */ +#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_index < (comm2)->c_index)? 1:0) struct ompi_comm_cid_context_t; @@ -216,6 +229,7 @@ static ompi_comm_cid_context_t *mca_comm_cid_context_alloc (ompi_communicator_t context->allreduce_fn = ompi_comm_allreduce_inter_nb; break; case OMPI_COMM_CID_GROUP: + case OMPI_COMM_CID_GROUP_NEW: context->allreduce_fn = ompi_comm_allreduce_group_nb; context->pml_tag = ((int *) arg0)[0]; break; @@ -287,12 +301,137 @@ static volatile int64_t ompi_comm_cid_lowest_id = INT64_MAX; static int ompi_comm_cid_epoch = INT_MAX; #endif /* OPAL_ENABLE_FT_MPI */ +static int ompi_comm_ext_cid_new_block (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_comm_extended_cid_block_t *new_block, + const void *arg0, const void *arg1, bool send_first, int mode, + ompi_request_t **req) +{ + pmix_info_t pinfo, *results = NULL; + size_t nresults; + opal_process_name_t *name_array; + char *tag = NULL; + size_t proc_count, cid_base = 0UL; + int rc, leader_rank; + pmix_proc_t *procs; + + rc = ompi_group_to_proc_name_array (newcomm->c_local_group, &name_array, &proc_count); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + switch (mode) { + case OMPI_COMM_CID_GROUP_NEW: + tag = (char *) arg0; + break; + case OMPI_COMM_CID_GROUP: + ompi_group_translate_ranks (newcomm->c_local_group, 1, &(int){0}, + comm->c_local_group, &leader_rank); + + tag = ompi_comm_extended_cid_get_unique_tag (&comm->c_contextidb, *((int *) arg0), leader_rank); + break; + case OMPI_COMM_CID_INTRA: + tag = ompi_comm_extended_cid_get_unique_tag (&comm->c_contextidb, -1, 0); + break; + } + + PMIX_INFO_LOAD(&pinfo, PMIX_GROUP_ASSIGN_CONTEXT_ID, NULL, PMIX_BOOL); + + PMIX_PROC_CREATE(procs, proc_count); + for (size_t i = 0 ; i < proc_count; ++i) { + OPAL_PMIX_CONVERT_NAME(&procs[i],&name_array[i]); + } + + rc = PMIx_Group_construct(tag, procs, proc_count, &pinfo, 1, &results, &nresults); + PMIX_INFO_DESTRUCT(&pinfo); + + if (NULL != results) { + PMIX_VALUE_GET_NUMBER(rc, &results[0].value, cid_base, size_t); + PMIX_INFO_FREE(results, nresults); + } + + PMIX_PROC_FREE(procs, proc_count); + free (name_array); + + rc = PMIx_Group_destruct (tag, NULL, 0); + + ompi_comm_extended_cid_block_initialize (new_block, cid_base, 0, 0); + + return OMPI_SUCCESS; +} + +static int ompi_comm_nextcid_ext_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, + ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, + bool send_first, int mode, ompi_request_t **req) +{ + ompi_comm_extended_cid_block_t *block; + bool is_new_block = false; + int rc; + + if (OMPI_COMM_CID_GROUP == mode || OMPI_COMM_CID_GROUP_NEW == mode) { + /* new block belongs to the new communicator */ + block = &newcomm->c_contextidb; + } else { + block = &comm->c_contextidb; + } + + if (NULL == arg1) { + if (OMPI_COMM_CID_GROUP == mode || OMPI_COMM_CID_GROUP_NEW == mode || + !ompi_comm_extended_cid_block_available (&comm->c_contextidb)) { + /* need a new block. it will be either assigned the the new communicator (MPI_Comm_create*_group) + * or the parent (which has no more CIDs in its block) */ + rc = ompi_comm_ext_cid_new_block (newcomm, comm, block, arg0, arg1, send_first, mode, req); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + return rc; + } + + is_new_block = true; + } + } else { + /* got a block already */ + *block = *((ompi_comm_extended_cid_block_t *) arg1); + is_new_block = true; + } + + if (block != &newcomm->c_contextidb) { + (void) ompi_comm_extended_cid_block_new (block, &newcomm->c_contextidb, is_new_block); + } + + for (unsigned int i = ompi_comm_array.lowest_free ; i < mca_pml.pml_max_contextid ; ++i) { + bool flag = opal_pointer_array_test_and_set_item (&ompi_comm_array, i, newcomm); + if (true == flag) { + newcomm->c_index = i; + break; + } + } + + newcomm->c_contextid = newcomm->c_contextidb.block_cid; + + opal_hash_table_set_value_ptr (&ompi_comm_hash, &newcomm->c_contextid, + sizeof (newcomm->c_contextid), (void *) newcomm); + *req = &ompi_request_empty; + /* nothing more to do here */ + return OMPI_SUCCESS; +} + int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm, ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req) { ompi_comm_cid_context_t *context; ompi_comm_request_t *request; + if (mca_pml_base_supports_extended_cid() && OMPI_COMM_CID_INTER != mode && + OMPI_COMM_CID_INTRA_BRIDGE != mode && OMPI_COMM_CID_INTRA_PMIX != mode && + OMPI_COMM_CID_INTRA_FT != mode && OMPI_COMM_CID_INTER_FT != mode && + OMPI_COMM_CID_INTRA_PMIX_FT != mode) { + return ompi_comm_nextcid_ext_nb (newcomm, comm, bridgecomm, arg0, arg1, send_first, mode, req); + } + + /* old CID algorighm */ + + /* need to add support for MPI_Comm_create_from_group for these configurations */ + assert (NULL != comm); + + newcomm->c_flags |= OMPI_COMM_GLOBAL_INDEX; context = mca_comm_cid_context_alloc (newcomm, comm, bridgecomm, arg0, arg1, "nextcid", send_first, mode); @@ -300,7 +439,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com return OMPI_ERR_OUT_OF_RESOURCE; } - context->start = ompi_mpi_communicators.lowest_free; + context->start = ompi_comm_array.lowest_free; request = ompi_comm_request_get (); if (NULL == request) { @@ -332,9 +471,11 @@ int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, return rc; } - ompi_request_wait_completion (req); - rc = req->req_status.MPI_ERROR; - ompi_comm_request_return ((ompi_comm_request_t *) req); + if (&ompi_request_empty != req) { + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + } return rc; } @@ -342,7 +483,7 @@ int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm, static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; - int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag); + int64_t my_id = ((int64_t) ompi_comm_get_local_cid (context->comm) << 32 | context->pml_tag); ompi_request_t *subreq; bool flag = false; int ret = OMPI_SUCCESS; @@ -366,7 +507,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) flag = false; context->nextlocal_cid = mca_pml.pml_max_contextid; for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { - flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, + flag = opal_pointer_array_test_and_set_item (&ompi_comm_array, i, context->comm); if (true == flag) { context->nextlocal_cid = i; @@ -407,7 +548,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request) return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1); err_exit: if (participate && flag) { - opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + opal_pointer_array_test_and_set_item(&ompi_comm_array, context->nextlocal_cid, NULL); } ompi_comm_cid_lowest_id = INT64_MAX; OPAL_THREAD_UNLOCK(&ompi_cid_lock); @@ -424,7 +565,7 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request) if (OMPI_SUCCESS != request->super.req_status.MPI_ERROR) { if (participate) { - opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + opal_pointer_array_set_item(&ompi_comm_array, context->nextlocal_cid, NULL); } return request->super.req_status.MPI_ERROR; } @@ -438,9 +579,9 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request) } else { context->flag = (context->nextcid == context->nextlocal_cid); if ( participate && !context->flag) { - opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + opal_pointer_array_set_item(&ompi_comm_array, context->nextlocal_cid, NULL); - context->flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, + context->flag = opal_pointer_array_test_and_set_item (&ompi_comm_array, context->nextcid, context->comm); } } @@ -458,7 +599,7 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request) ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1); } else { if (participate && context->flag ) { - opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL); + opal_pointer_array_test_and_set_item(&ompi_comm_array, context->nextlocal_cid, NULL); } ompi_comm_cid_lowest_id = INT64_MAX; } @@ -474,7 +615,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) if (OMPI_SUCCESS != request->super.req_status.MPI_ERROR) { if (participate) { - opal_pointer_array_set_item(&ompi_mpi_communicators, context->nextcid, NULL); + opal_pointer_array_set_item(&ompi_comm_array, context->nextcid, NULL); } return request->super.req_status.MPI_ERROR; } @@ -492,7 +633,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) context->nextlocal_cid = mca_pml.pml_max_contextid; for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) { bool flag; - flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i, + flag = opal_pointer_array_test_and_set_item (&ompi_comm_array, i, context->comm); if (true == flag) { context->nextlocal_cid = i; @@ -503,12 +644,19 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) } /* set the according values to the newcomm */ - context->newcomm->c_contextid = context->nextcid; #if OPAL_ENABLE_FT_MPI context->newcomm->c_epoch = INT_MAX - context->rflag; /* reorder for simpler debugging */ ompi_comm_cid_epoch -= 1; /* protected by the cid_lock */ #endif /* OPAL_ENABLE_FT_MPI */ - opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm); + context->newcomm->c_index = context->nextcid; + if (OMPI_COMM_IS_INTRA(context->newcomm)) { + context->newcomm->c_index_vec[context->newcomm->c_my_rank] = context->newcomm->c_index; + } + /* to simplify coding always set the global CID even if it isn't used by the + * active PML */ + context->newcomm->c_contextid.cid_base = 0; + context->newcomm->c_contextid.cid_sub.u64 = context->nextcid; + opal_pointer_array_set_item (&ompi_comm_array, context->nextcid, context->newcomm); /* unlock the cid generator */ ompi_comm_cid_lowest_id = INT64_MAX; @@ -520,7 +668,7 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) if (participate && (0 != context->flag)) { /* we could use this cid, but other don't agree */ - opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, NULL); + opal_pointer_array_set_item (&ompi_comm_array, context->nextcid, NULL); context->start = context->nextcid + 1; /* that's where we can start the next round */ } @@ -553,6 +701,74 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request) /* Non-blocking version of ompi_comm_activate */ static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request); +static int ompi_comm_activate_complete (ompi_communicator_t **newcomm, ompi_communicator_t *comm) +{ + int ret; + + /** + * Check to see if this process is in the new communicator. + * + * Specifically, this function is invoked by all proceses in the + * old communicator, regardless of whether they are in the new + * communicator or not. This is because it is far simpler to use + * MPI collective functions on the old communicator to determine + * some data for the new communicator (e.g., remote_leader) than + * to kludge up our own pseudo-collective routines over just the + * processes in the new communicator. Hence, *all* processes in + * the old communicator need to invoke this function. + * + * That being said, only processes in the new communicator need to + * select a coll module for the new communicator. More + * specifically, proceses who are not in the new communicator + * should *not* select a coll module -- for example, + * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who + * are not in the new communicator. This can cause errors in the + * selection / initialization of a coll module. Plus, it's + * wasteful -- processes in the new communicator will end up + * freeing the new communicator anyway, so we might as well leave + * the coll selection as NULL (the coll base comm unselect code + * handles that case properly). + */ + if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) { + return OMPI_SUCCESS; + } + + /* Let the collectives components fight over who will do + collective on this new comm. */ + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) { + OBJ_RELEASE(*newcomm); + *newcomm = MPI_COMM_NULL; + return ret; + } + + /* For an inter communicator, we have to deal with the potential + * problem of what is happening if the local_comm that we created + * has a lower CID than the parent comm. This is not a problem + * as long as the user calls MPI_Comm_free on the inter communicator. + * However, if the communicators are not freed by the user but released + * by Open MPI in MPI_Finalize, we walk through the list of still available + * communicators and free them one by one. Thus, local_comm is freed before + * the actual inter-communicator. However, the local_comm pointer in the + * inter communicator will still contain the 'previous' address of the local_comm + * and thus this will lead to a segmentation violation. In order to prevent + * that from happening, we increase the reference counter local_comm + * by one if its CID is lower than the parent. We cannot increase however + * its reference counter if the CID of local_comm is larger than + * the CID of the inter communicators, since a regular MPI_Comm_free would + * leave in that the case the local_comm hanging around and thus we would not + * recycle CID's properly, which was the reason and the cause for this trouble. + */ + if (OMPI_COMM_IS_INTER(*newcomm)) { + if (OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) { + OMPI_COMM_SET_EXTRA_RETAIN (*newcomm); + OBJ_RETAIN (*newcomm); + } + } + + /* done */ + return OMPI_SUCCESS; +} + int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *comm, ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req) @@ -562,6 +778,8 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c ompi_request_t *subreq; int ret = 0; + /* the caller should not pass NULL for comm (it may be the same as *newcomm) */ + assert (NULL != comm); context = mca_comm_cid_context_alloc (*newcomm, comm, bridgecomm, arg0, arg1, "activate", send_first, mode); if (NULL == context) { @@ -605,7 +823,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c *req = &request->super; - return OMPI_SUCCESS; + return ret; } int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm, @@ -620,9 +838,11 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm return rc; } - ompi_request_wait_completion (req); - rc = req->req_status.MPI_ERROR; - ompi_comm_request_return ((ompi_comm_request_t *) req); + if (&ompi_request_empty != req) { + ompi_request_wait_completion (req); + rc = req->req_status.MPI_ERROR; + ompi_comm_request_return ((ompi_comm_request_t *) req); + } return rc; } @@ -630,70 +850,7 @@ int ompi_comm_activate (ompi_communicator_t **newcomm, ompi_communicator_t *comm static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request) { ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context; - int ret; - - /** - * Check to see if this process is in the new communicator. - * - * Specifically, this function is invoked by all proceses in the - * old communicator, regardless of whether they are in the new - * communicator or not. This is because it is far simpler to use - * MPI collective functions on the old communicator to determine - * some data for the new communicator (e.g., remote_leader) than - * to kludge up our own pseudo-collective routines over just the - * processes in the new communicator. Hence, *all* processes in - * the old communicator need to invoke this function. - * - * That being said, only processes in the new communicator need to - * select a coll module for the new communicator. More - * specifically, proceses who are not in the new communicator - * should *not* select a coll module -- for example, - * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who - * are not in the new communicator. This can cause errors in the - * selection / initialization of a coll module. Plus, it's - * wasteful -- processes in the new communicator will end up - * freeing the new communicator anyway, so we might as well leave - * the coll selection as NULL (the coll base comm unselect code - * handles that case properly). - */ - if (MPI_UNDEFINED == (context->newcomm)->c_local_group->grp_my_rank) { - return OMPI_SUCCESS; - } - - /* Let the collectives components fight over who will do - collective on this new comm. */ - if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(context->newcomm))) { - OBJ_RELEASE(context->newcomm); - *context->newcommp = MPI_COMM_NULL; - return ret; - } - - /* For an inter communicator, we have to deal with the potential - * problem of what is happening if the local_comm that we created - * has a lower CID than the parent comm. This is not a problem - * as long as the user calls MPI_Comm_free on the inter communicator. - * However, if the communicators are not freed by the user but released - * by Open MPI in MPI_Finalize, we walk through the list of still available - * communicators and free them one by one. Thus, local_comm is freed before - * the actual inter-communicator. However, the local_comm pointer in the - * inter communicator will still contain the 'previous' address of the local_comm - * and thus this will lead to a segmentation violation. In order to prevent - * that from happening, we increase the reference counter local_comm - * by one if its CID is lower than the parent. We cannot increase however - * its reference counter if the CID of local_comm is larger than - * the CID of the inter communicators, since a regular MPI_Comm_free would - * leave in that the case the local_comm hanging around and thus we would not - * recycle CID's properly, which was the reason and the cause for this trouble. - */ - if (OMPI_COMM_IS_INTER(context->newcomm)) { - if (OMPI_COMM_CID_IS_LOWER(context->newcomm, context->comm)) { - OMPI_COMM_SET_EXTRA_RETAIN (context->newcomm); - OBJ_RETAIN (context->newcomm); - } - } - - /* done */ - return OMPI_SUCCESS; + return ompi_comm_activate_complete (context->newcommp, context->comm); } /**************************************************************************/ diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index bdd3499a801..df46b9cdd99 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -23,6 +23,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,6 +50,7 @@ #include "ompi/attribute/attribute.h" #include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" +#include "ompi/instance/instance.h" /* ** Table for Fortran <-> C communicator handle conversion @@ -55,14 +58,17 @@ ** on cid. ** */ -opal_pointer_array_t ompi_mpi_communicators = {{0}}; +opal_pointer_array_t ompi_comm_array = {{0}}; opal_pointer_array_t ompi_comm_f_to_c_table = {{0}}; +opal_hash_table_t ompi_comm_hash = {{0}}; ompi_predefined_communicator_t ompi_mpi_comm_world = {{{{0}}}}; ompi_predefined_communicator_t ompi_mpi_comm_self = {{{{0}}}}; ompi_predefined_communicator_t ompi_mpi_comm_null = {{{{0}}}}; ompi_communicator_t *ompi_mpi_comm_parent = NULL; +static bool ompi_comm_intrinsic_init; + ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = &ompi_mpi_comm_world; ompi_predefined_communicator_t *ompi_mpi_comm_self_addr = @@ -82,70 +88,135 @@ OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_infosubscriber_t, shortcut for finalize and abort. */ int ompi_comm_num_dyncomm=0; +static int ompi_comm_finalize (void); + /* * Initialize comm world/self/null/parent. */ int ompi_comm_init(void) { - ompi_group_t *group; - size_t size; - /* Setup communicator array */ - OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 16, + OBJ_CONSTRUCT(&ompi_comm_array, opal_pointer_array_t); + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_array, 16, OMPI_FORTRAN_HANDLE_MAX, 64) ) { return OMPI_ERROR; } + OBJ_CONSTRUCT(&ompi_comm_hash, opal_hash_table_t); + if (OPAL_SUCCESS != opal_hash_table_init (&ompi_comm_hash, 1024)) { + return OMPI_ERROR; + } + /* Setup f to c table (we can no longer use the cid as the fortran handle) */ OBJ_CONSTRUCT(&ompi_comm_f_to_c_table, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_f_to_c_table, 8, - OMPI_FORTRAN_HANDLE_MAX, 32) ) { + if( OPAL_SUCCESS != opal_pointer_array_init (&ompi_comm_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 32) ) { + return OMPI_ERROR; + } + + /* + * reserve indices in the F to C table for: + * MPI_COMM_WORLD + * MPI_COMM_SELF + * MPI_COMM_NULL + */ + + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 0, + (void *)-1L)) { return OMPI_ERROR; } + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 1, + (void *)-1L)) { + return OMPI_ERROR; + } + + if (OPAL_SUCCESS != opal_pointer_array_set_item(&ompi_comm_f_to_c_table, + 2, + (void *)-1L)) { + return OMPI_ERROR; + } + + /* Setup MPI_COMM_NULL */ + OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t); + assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2); + ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; + ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; + OBJ_RETAIN(&ompi_mpi_group_null.group); + OBJ_RETAIN(&ompi_mpi_group_null.group); + + (void) ompi_comm_extended_cid_block_new (&ompi_mpi_comm_world.comm.c_contextidb, + &ompi_mpi_comm_null.comm.c_contextidb, false); + ompi_mpi_comm_null.comm.c_contextid = ompi_mpi_comm_null.comm.c_contextidb.block_cid; + ompi_mpi_comm_null.comm.c_index = 2; + ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL; + + ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; + OBJ_RETAIN( &ompi_mpi_errors_are_fatal.eh ); + opal_pointer_array_set_item (&ompi_comm_array, 2, &ompi_mpi_comm_null); + + opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", + sizeof(ompi_mpi_comm_null.comm.c_name)); + ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; + + /* Initialize the parent communicator to MPI_COMM_NULL */ + ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; + OBJ_RETAIN(&ompi_mpi_comm_null); + OBJ_RETAIN(&ompi_mpi_group_null.group); + + /* initialize communicator requests (for ompi_comm_idup) */ + ompi_comm_request_init (); + + /* get a reference on the attributes subsys */ + ompi_attr_get_ref(); + + ompi_mpi_instance_append_finalize (ompi_comm_finalize); + + return OMPI_SUCCESS; +} + +int ompi_comm_init_mpi3 (void) +{ + ompi_group_t *group; + int ret; + + /* the intrinsic communicators have been initialized */ + ompi_comm_intrinsic_init = true; + /* Setup MPI_COMM_WORLD */ OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t); assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0); - group = OBJ_NEW(ompi_group_t); - - size = ompi_process_info.num_procs; - group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *)); - group->grp_proc_count = size; - - for (size_t i = 0 ; i < size ; ++i) { - opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; - /* look for existing ompi_proc_t that matches this name */ - group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); - if (NULL == group->grp_proc_pointers[i]) { - /* set sentinel value */ - group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); - } else { - OBJ_RETAIN (group->grp_proc_pointers[i]); - } + + ret = ompi_group_from_pset (ompi_mpi_instance_default, "mpi://WORLD", &group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; } OMPI_GROUP_SET_INTRINSIC (group); - OMPI_GROUP_SET_DENSE (group); - ompi_set_group_rank(group, ompi_proc_local()); - - ompi_mpi_comm_world.comm.c_contextid = 0; - ompi_mpi_comm_world.comm.c_id_start_index = 4; - ompi_mpi_comm_world.comm.c_id_available = 4; + ompi_comm_extended_cid_block_initialize (&ompi_mpi_comm_world.comm.c_contextidb, 0, 0, 0); + ompi_mpi_comm_world.comm.c_contextid = ompi_mpi_comm_world.comm.c_contextidb.block_cid; + ompi_mpi_comm_world.comm.c_index = 0; ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank; + ompi_mpi_comm_world.comm.c_index_vec = malloc(group->grp_proc_count * sizeof(int)); + for (int i = 0; i < group->grp_proc_count; i++) { + ompi_mpi_comm_world.comm.c_index_vec[i] = 0; + } ompi_mpi_comm_world.comm.c_local_group = group; ompi_mpi_comm_world.comm.c_remote_group = group; OBJ_RETAIN(ompi_mpi_comm_world.comm.c_remote_group); - ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int)size); + ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim ((int) group->grp_proc_count); ompi_mpi_comm_world.comm.error_handler = ompi_initial_error_handler_eh; OBJ_RETAIN( ompi_mpi_comm_world.comm.error_handler ); OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm); - opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world); + opal_pointer_array_set_item (&ompi_comm_array, 0, &ompi_mpi_comm_world); opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD", sizeof(ompi_mpi_comm_world.comm.c_name)); - ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_INTRINSIC; + ompi_mpi_comm_world.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; /* We have to create a hash (although it is legal to leave this filed NULL -- the attribute accessor functions will intepret @@ -176,68 +247,48 @@ int ompi_comm_init(void) /* Setup MPI_COMM_SELF */ OBJ_CONSTRUCT(&ompi_mpi_comm_self, ompi_communicator_t); assert(ompi_mpi_comm_self.comm.c_f_to_c_index == 1); - group = OBJ_NEW(ompi_group_t); - group->grp_proc_pointers = ompi_proc_self(&size); - group->grp_my_rank = 0; - group->grp_proc_count = (int)size; + + ret = ompi_group_from_pset (ompi_mpi_instance_default, "mpi://SELF", &group); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + OMPI_GROUP_SET_INTRINSIC (group); - OMPI_GROUP_SET_DENSE (group); - ompi_mpi_comm_self.comm.c_contextid = 1; - ompi_mpi_comm_self.comm.c_id_start_index = 20; - ompi_mpi_comm_self.comm.c_id_available = 20; + (void) ompi_comm_extended_cid_block_new (&ompi_mpi_comm_world.comm.c_contextidb, + &ompi_mpi_comm_self.comm.c_contextidb, false); + ompi_mpi_comm_self.comm.c_contextid = ompi_mpi_comm_self.comm.c_contextidb.block_cid; + ompi_mpi_comm_self.comm.c_index = 1; ompi_mpi_comm_self.comm.c_my_rank = group->grp_my_rank; + ompi_mpi_comm_self.comm.c_index_vec = malloc(1 * sizeof(int)); + ompi_mpi_comm_self.comm.c_index_vec[0] = 1; ompi_mpi_comm_self.comm.c_local_group = group; ompi_mpi_comm_self.comm.c_remote_group = group; OBJ_RETAIN(ompi_mpi_comm_self.comm.c_remote_group); ompi_mpi_comm_self.comm.error_handler = ompi_initial_error_handler_eh; OBJ_RETAIN( ompi_mpi_comm_self.comm.error_handler ); OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm); - opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self); + opal_pointer_array_set_item (&ompi_comm_array, 1, &ompi_mpi_comm_self); opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF", sizeof(ompi_mpi_comm_self.comm.c_name)); - ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_INTRINSIC; + ompi_mpi_comm_self.comm.c_flags |= OMPI_COMM_NAMEISSET | OMPI_COMM_INTRINSIC | + OMPI_COMM_GLOBAL_INDEX; /* We can set MPI_COMM_SELF's keyhash to NULL because it has no predefined attributes. If a user defines an attribute on MPI_COMM_SELF, the keyhash will automatically be created. */ ompi_mpi_comm_self.comm.c_keyhash = NULL; - /* Setup MPI_COMM_NULL */ - OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t); - assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2); - ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group; - ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group; - OBJ_RETAIN(&ompi_mpi_group_null.group); - OBJ_RETAIN(&ompi_mpi_group_null.group); - - ompi_mpi_comm_null.comm.c_contextid = 2; - ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL; - - /* unlike world, self, and parent, comm_null does not inherit the initial error - * handler */ - ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh; - OBJ_RETAIN( ompi_mpi_comm_null.comm.error_handler ); - opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null); - - opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL", - sizeof(ompi_mpi_comm_null.comm.c_name)); - ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_NAMEISSET; - ompi_mpi_comm_null.comm.c_flags |= OMPI_COMM_INTRINSIC; + /* + * finally here we set the predefined attribute keyvals + */ + ompi_attr_create_predefined(); - /* Initialize the parent communicator to MPI_COMM_NULL */ - ompi_mpi_comm_parent = &ompi_mpi_comm_null.comm; - OBJ_RETAIN(&ompi_mpi_comm_null); - OBJ_RETAIN(&ompi_mpi_group_null.group); OBJ_RETAIN(&ompi_mpi_errors_are_fatal.eh); /* During dyn_init, the comm_parent error handler will be set to the same * as comm_world (thus, the initial error handler). */ - /* initialize communicator requests (for ompi_comm_idup) */ - ompi_comm_request_init (); - return OMPI_SUCCESS; } @@ -264,32 +315,37 @@ ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size ) /* fill in the inscribing hyper-cube dimensions */ new_comm->c_cube_dim = opal_cube_dim(local_size); + new_comm->c_index_vec = malloc(new_comm->c_local_group->grp_proc_count * sizeof(int)); + printf("proc_count: %d\n", new_comm->c_local_group->grp_proc_count); + fflush(stdout); return new_comm; } -int ompi_comm_finalize(void) +static int ompi_comm_finalize (void) { - int max, i; + int max, i, ret = OMPI_SUCCESS; ompi_communicator_t *comm; - /* Shut down MPI_COMM_SELF */ - OBJ_DESTRUCT( &ompi_mpi_comm_self ); - /* disconnect all dynamic communicators */ ompi_dpm_dyn_finalize(); - /* Free the attributes on comm world. This is not done in the - * destructor as we delete attributes in ompi_comm_free (which - * is not called for comm world) */ - if (NULL != ompi_mpi_comm_world.comm.c_keyhash) { - /* Ignore errors when deleting attributes on comm_world */ - (void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm, ompi_mpi_comm_world.comm.c_keyhash); - OBJ_RELEASE(ompi_mpi_comm_world.comm.c_keyhash); - } + if (ompi_comm_intrinsic_init) { + /* tear down MPI-3 predefined communicators (not initialized unless using MPI_Init) */ + /* Free the attributes on comm world. This is not done in the + * destructor as we delete attributes in ompi_comm_free (which + * is not called for comm world) */ + if (NULL != ompi_mpi_comm_world.comm.c_keyhash) { + /* Ignore errors when deleting attributes on comm_world */ + (void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm, ompi_mpi_comm_world.comm.c_keyhash); + OBJ_RELEASE(ompi_mpi_comm_world.comm.c_keyhash); + } - /* Shut down MPI_COMM_WORLD */ - OBJ_DESTRUCT( &ompi_mpi_comm_world ); + /* Shut down MPI_COMM_SELF */ + OBJ_DESTRUCT( &ompi_mpi_comm_self ); + /* Shut down MPI_COMM_WORLD */ + OBJ_DESTRUCT( &ompi_mpi_comm_world ); + } /* Shut down the parent communicator, if it exists */ if( ompi_mpi_comm_parent != &ompi_mpi_comm_null.comm ) { @@ -324,13 +380,13 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT( &ompi_mpi_comm_null ); /* Check whether we have some communicators left */ - max = opal_pointer_array_get_size(&ompi_mpi_communicators); + max = opal_pointer_array_get_size(&ompi_comm_array); for ( i=3; ic_f_to_c_index = opal_pointer_array_add(&ompi_comm_f_to_c_table, comm); + int idx; comm->c_name[0] = '\0'; - comm->c_contextid = MPI_UNDEFINED; - comm->c_id_available = MPI_UNDEFINED; - comm->c_id_start_index = MPI_UNDEFINED; + comm->c_index = MPI_UNDEFINED; comm->c_flags = 0; comm->c_my_rank = 0; comm->c_cube_dim = 0; + comm->c_index_vec = NULL; comm->c_local_group = NULL; comm->c_remote_group = NULL; comm->error_handler = NULL; @@ -385,6 +444,21 @@ static void ompi_comm_construct(ompi_communicator_t* comm) comm->c_topo = NULL; comm->c_coll = NULL; comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE; + comm->instance = NULL; + + /* + * magic numerology - see TOPDIR/ompi/include/mpif-values.pl + */ + idx = (comm == (ompi_communicator_t*)ompi_mpi_comm_world_addr) ? 0 : + (comm == (ompi_communicator_t*)ompi_mpi_comm_self_addr) ? 1 : + (comm == (ompi_communicator_t*)ompi_mpi_comm_null_addr) ? 2 : -1; + if (-1 == idx) { + comm->c_f_to_c_index = opal_pointer_array_add(&ompi_comm_f_to_c_table, + comm); + } else { + opal_pointer_array_set_item(&ompi_comm_f_to_c_table, idx, comm); + comm->c_f_to_c_index = idx; + } /* A keyhash will be created if/when an attribute is cached on this communicator */ @@ -473,11 +547,15 @@ static void ompi_comm_destruct(ompi_communicator_t* comm) #endif /* OPAL_ENABLE_FT_MPI */ /* mark this cid as available */ - if ( MPI_UNDEFINED != (int)comm->c_contextid && - NULL != opal_pointer_array_get_item(&ompi_mpi_communicators, - comm->c_contextid)) { - opal_pointer_array_set_item ( &ompi_mpi_communicators, - comm->c_contextid, NULL); + if ( MPI_UNDEFINED != (int)comm->c_index && + NULL != opal_pointer_array_get_item(&ompi_comm_array, + comm->c_index)) { + opal_pointer_array_set_item ( &ompi_comm_array, + comm->c_index, NULL); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + opal_hash_table_remove_value_ptr (&ompi_comm_hash, &comm->c_contextid, + sizeof (comm->c_contextid)); + } } /* reset the ompi_comm_f_to_c_table entry */ @@ -509,6 +587,8 @@ OMPI_COMM_SET_INFO_FN(no_any_source, OMPI_COMM_ASSERT_NO_ANY_SOURCE) OMPI_COMM_SET_INFO_FN(no_any_tag, OMPI_COMM_ASSERT_NO_ANY_TAG) OMPI_COMM_SET_INFO_FN(allow_overtake, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) OMPI_COMM_SET_INFO_FN(exact_length, OMPI_COMM_ASSERT_EXACT_LENGTH) +OMPI_COMM_SET_INFO_FN(lazy_barrier, OMPI_COMM_ASSERT_LAZY_BARRIER) +OMPI_COMM_SET_INFO_FN(active_poll, OMPI_COMM_ASSERT_ACTIVE_POLL) void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag) { @@ -525,5 +605,11 @@ void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag) case OMPI_COMM_ASSERT_EXACT_LENGTH: opal_infosubscribe_subscribe (&comm->super, "mpi_assert_exact_length", "false", ompi_comm_set_exact_length); break; + case OMPI_COMM_ASSERT_LAZY_BARRIER: + opal_infosubscribe_subscribe (&comm->super, "ompi_assert_lazy_barrier", "false", ompi_comm_set_lazy_barrier); + break; + case OMPI_COMM_ASSERT_ACTIVE_POLL: + opal_infosubscribe_subscribe (&comm->super, "ompi_assert_active_poll", "true", ompi_comm_set_active_poll); + break; } } diff --git a/ompi/communicator/comm_request.c b/ompi/communicator/comm_request.c index e1092deb400..d89c77097c7 100644 --- a/ompi/communicator/comm_request.c +++ b/ompi/communicator/comm_request.c @@ -269,6 +269,10 @@ ompi_comm_request_t *ompi_comm_request_get (void) void ompi_comm_request_return (ompi_comm_request_t *request) { + if ((void *) &ompi_request_empty == (void *) request) { + return; + } + if (request->context) { OBJ_RELEASE (request->context); request->context = NULL; diff --git a/ompi/communicator/communicator.h b/ompi/communicator/communicator.h index d1e82a7dcfd..2386539bdeb 100644 --- a/ompi/communicator/communicator.h +++ b/ompi/communicator/communicator.h @@ -16,12 +16,14 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Inria. All rights reserved. * Copyright (c) 2011-2013 Universite Bordeaux 1 - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,6 +49,8 @@ #include "ompi/info/info.h" #include "ompi/proc/proc.h" +#include "opal/util/printf.h" + BEGIN_C_DECLS OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); @@ -63,6 +67,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_PML_ADDED 0x00001000 #define OMPI_COMM_EXTRA_RETAIN 0x00004000 #define OMPI_COMM_MAPBY_NODE 0x00008000 +#define OMPI_COMM_GLOBAL_INDEX 0x00010000 /* some utility #defines */ #define OMPI_COMM_IS_INTER(comm) ((comm)->c_flags & OMPI_COMM_INTER) @@ -80,6 +85,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); OMPI_COMM_IS_GRAPH((comm)) || \ OMPI_COMM_IS_DIST_GRAPH((comm))) #define OMPI_COMM_IS_MAPBY_NODE(comm) ((comm)->c_flags & OMPI_COMM_MAPBY_NODE) +#define OMPI_COMM_IS_GLOBAL_INDEX(comm) ((comm)->c_flags & OMPI_COMM_GLOBAL_INDEX) #define OMPI_COMM_SET_DYNAMIC(comm) ((comm)->c_flags |= OMPI_COMM_DYNAMIC) #define OMPI_COMM_SET_INVALID(comm) ((comm)->c_flags |= OMPI_COMM_INVALID) @@ -92,12 +98,16 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_ASSERT_NO_ANY_SOURCE 0x00000002 #define OMPI_COMM_ASSERT_EXACT_LENGTH 0x00000004 #define OMPI_COMM_ASSERT_ALLOW_OVERTAKE 0x00000008 +#define OMPI_COMM_ASSERT_LAZY_BARRIER 0x00000010 +#define OMPI_COMM_ASSERT_ACTIVE_POLL 0x00000020 #define OMPI_COMM_CHECK_ASSERT(comm, flag) !!((comm)->c_assertions & flag) #define OMPI_COMM_CHECK_ASSERT_NO_ANY_TAG(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_TAG) #define OMPI_COMM_CHECK_ASSERT_NO_ANY_SOURCE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_NO_ANY_SOURCE) #define OMPI_COMM_CHECK_ASSERT_EXACT_LENGTH(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_EXACT_LENGTH) #define OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE) +#define OMPI_COMM_CHECK_ASSERT_LAZY_BARRIER(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_LAZY_BARRIER) +#define OMPI_COMM_CHECK_ASSERT_ACTIVE_POLL(comm) OMPI_COMM_CHECK_ASSERT(comm, OMPI_COMM_ASSERT_ACTIVE_POLL) /** * Modes required for acquiring the new comm-id. @@ -111,10 +121,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_CID_INTRA_BRIDGE 0x00000080 #define OMPI_COMM_CID_INTRA_PMIX 0x00000100 #define OMPI_COMM_CID_GROUP 0x00000200 +#define OMPI_COMM_CID_GROUP_NEW 0x00000400 #if OPAL_ENABLE_FT_MPI -#define OMPI_COMM_CID_INTRA_FT 0x00000400 -#define OMPI_COMM_CID_INTER_FT 0x00000800 -#define OMPI_COMM_CID_INTRA_PMIX_FT 0x00001000 +#define OMPI_COMM_CID_INTRA_FT 0x00000800 +#define OMPI_COMM_CID_INTER_FT 0x00001000 +#define OMPI_COMM_CID_INTRA_PMIX_FT 0x00002000 #endif /* OPAL_ENABLE_FT_MPI */ /** @@ -125,10 +136,103 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t); #define OMPI_COMM_BLOCK_OTHERS 8 /* A macro comparing two CIDs */ -#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_contextid < (comm2)->c_contextid)? 1:0) +#define OMPI_COMM_CID_IS_LOWER(comm1,comm2) ( ((comm1)->c_index < (comm2)->c_index)? 1:0) -OMPI_DECLSPEC extern opal_pointer_array_t ompi_mpi_communicators; +OMPI_DECLSPEC extern opal_hash_table_t ompi_comm_hash; +OMPI_DECLSPEC extern opal_pointer_array_t ompi_comm_array; OMPI_DECLSPEC extern opal_pointer_array_t ompi_comm_f_to_c_table; + +struct ompi_comm_extended_cid_t { + uint64_t cid_base; + union { + uint64_t u64; + uint8_t u8[8]; + } cid_sub; +}; +typedef struct ompi_comm_extended_cid_t ompi_comm_extended_cid_t; + +struct ompi_comm_extended_cid_block_t { + ompi_comm_extended_cid_t block_cid; + /** can be used to get a unique string tag for pmix context creation */ + uint64_t block_nexttag; + uint8_t block_nextsub; + uint8_t block_level; +}; +typedef struct ompi_comm_extended_cid_block_t ompi_comm_extended_cid_block_t; + +static inline void ompi_comm_extended_cid_block_initialize (ompi_comm_extended_cid_block_t *block, uint64_t cid_base, + uint64_t cid_sub, uint8_t block_level) +{ + block->block_cid.cid_base = cid_base; + block->block_cid.cid_sub.u64 = cid_sub; + block->block_level = block_level; + block->block_nextsub = 0; + block->block_nexttag = 0; +} + +static inline bool ompi_comm_extended_cid_block_available (ompi_comm_extended_cid_block_t *block) +{ + return (4 > block->block_level && 0xff > block->block_nextsub); +} + +static inline char *ompi_comm_extended_cid_get_unique_tag (ompi_comm_extended_cid_block_t *block, int tag, + int leader) +{ + char *id; + + /* create a unique ID for this */ + if (-1 == tag) { + opal_asprintf (&id, "ALL:%" PRIx64 "-%" PRIx64 "-%" PRIx64, block->block_cid.cid_base, + block->block_cid.cid_sub.u64, ++block->block_nexttag); + } else { + opal_asprintf (&id, "GROUP:%" PRIx64 "-%" PRIx64 "-%d-%d", block->block_cid.cid_base, + block->block_cid.cid_sub.u64, tag, leader); + } + + return id; +} + +/** + * Create a new sub-block from an existing block + * + * @param[in] block block + * @param[out] new_block new CID block + * @param[in] use_current use the current CID of the existing block as the base + * + * This function creates a new CID block from an existing block. The use_current flag + * can be used to indicate that the new block should use the existing CID. This can + * be used to assign the first CID in a new block. + */ +static inline int ompi_comm_extended_cid_block_new (ompi_comm_extended_cid_block_t *block, + ompi_comm_extended_cid_block_t *new_block, + bool use_current) +{ + if (!ompi_comm_extended_cid_block_available (block)) { + /* a new block is needed */ + return OMPI_ERR_OUT_OF_RESOURCE; + } + + new_block->block_cid = block->block_cid; + if (!use_current) { + new_block->block_cid.cid_sub.u8[3 - block->block_level] = ++block->block_nextsub; + } + + new_block->block_level = block->block_level + 1; + new_block->block_nextsub = 0; + + return OMPI_SUCCESS; +} + +struct ompi_comm_cid_t { + opal_object_t super; + ompi_group_t cid_group; + ompi_comm_extended_cid_t cid_value; + uint8_t cid_sublevel; +}; +typedef struct ompi_comm_cid_t ompi_comm_cid_t; + +OBJ_CLASS_DECLARATION(ompi_comm_cid_t); + #if OPAL_ENABLE_FT_MPI /** * This array holds the number of time each id has been used. In the case where a communicator @@ -158,12 +262,14 @@ struct ompi_communicator_t { opal_mutex_t c_lock; /* mutex for name and potentially attributes */ char c_name[MPI_MAX_OBJECT_NAME]; - uint32_t c_contextid; - int c_my_rank; - uint32_t c_flags; /* flags, e.g. intercomm, - topology, etc. */ - uint32_t c_assertions; /* info assertions */ - + ompi_comm_extended_cid_t c_contextid; + ompi_comm_extended_cid_block_t c_contextidb; + int c_index; + int *c_index_vec; + int c_my_rank; + uint32_t c_flags; /* flags, e.g. intercomm, + topology, etc. */ + uint32_t c_assertions; /* info assertions */ int c_id_available; /* the currently available Cid for allocation to a child*/ int c_id_start_index; /* the starting index of the block of cids @@ -219,6 +325,9 @@ struct ompi_communicator_t { */ opal_atomic_int32_t c_nbc_tag; + /* instance that this comm belongs to */ + ompi_instance_t* instance; + #if OPAL_ENABLE_FT_MPI /** MPI_ANY_SOURCE Failed Group Offset - OMPI_Comm_failure_get_acked */ int any_source_offset; @@ -326,7 +435,7 @@ typedef struct ompi_communicator_t ompi_communicator_t; * the PREDEFINED_COMMUNICATOR_PAD macro? * A: Most likely not, but it would be good to check. */ -#define PREDEFINED_COMMUNICATOR_PAD 512 +#define PREDEFINED_COMMUNICATOR_PAD 1024 struct ompi_predefined_communicator_t { struct ompi_communicator_t comm; @@ -372,7 +481,7 @@ OMPI_DECLSPEC extern ompi_predefined_communicator_t *ompi_mpi_comm_null_addr; * ompi_comm_invalid() but also explictily checks to see if the * handle is MPI_COMM_NULL. */ -static inline int ompi_comm_invalid(ompi_communicator_t* comm) +static inline int ompi_comm_invalid (const ompi_communicator_t* comm) { if ((NULL == comm) || (MPI_COMM_NULL == comm) || (OMPI_COMM_IS_FREED(comm)) || (OMPI_COMM_IS_INVALID(comm)) ) @@ -384,7 +493,7 @@ static inline int ompi_comm_invalid(ompi_communicator_t* comm) /** * rank w/in the communicator */ -static inline int ompi_comm_rank(ompi_communicator_t* comm) +static inline int ompi_comm_rank (const ompi_communicator_t* comm) { return comm->c_my_rank; } @@ -392,7 +501,7 @@ static inline int ompi_comm_rank(ompi_communicator_t* comm) /** * size of the communicator */ -static inline int ompi_comm_size(ompi_communicator_t* comm) +static inline int ompi_comm_size (const ompi_communicator_t* comm) { return comm->c_local_group->grp_proc_count; } @@ -401,7 +510,7 @@ static inline int ompi_comm_size(ompi_communicator_t* comm) * size of the remote group for inter-communicators. * returns zero for an intra-communicator */ -static inline int ompi_comm_remote_size(ompi_communicator_t* comm) +static inline int ompi_comm_remote_size (const ompi_communicator_t* comm) { return (comm->c_flags & OMPI_COMM_INTER ? comm->c_remote_group->grp_proc_count : 0); } @@ -410,20 +519,41 @@ static inline int ompi_comm_remote_size(ompi_communicator_t* comm) * Context ID for the communicator, suitable for passing to * ompi_comm_lookup for getting the communicator back */ -static inline uint32_t ompi_comm_get_cid(ompi_communicator_t* comm) +static inline uint32_t ompi_comm_get_local_cid (const ompi_communicator_t* comm) +{ + return comm->c_index; +} + +/** + * Get the extended context ID for the communicator, suitable for passing + * to ompi_comm_lookup_cid for getting the communicator back + */ +static inline ompi_comm_extended_cid_t ompi_comm_get_extended_cid (const ompi_communicator_t *comm) { return comm->c_contextid; } +static inline bool ompi_communicator_cid_compare (const ompi_communicator_t *comm, const ompi_comm_extended_cid_t cid) +{ + return comm->c_contextid.cid_base == cid.cid_base && comm->c_contextid.cid_sub.u64 == cid.cid_sub.u64; +} + /* return pointer to communicator associated with context id cid, * No error checking is done*/ -static inline ompi_communicator_t *ompi_comm_lookup(uint32_t cid) +static inline ompi_communicator_t *ompi_comm_lookup (const uint32_t c_index) { /* array of pointers to communicators, indexed by context ID */ - return (ompi_communicator_t*)opal_pointer_array_get_item(&ompi_mpi_communicators, cid); + return (ompi_communicator_t *) opal_pointer_array_get_item (&ompi_comm_array, c_index); +} + +static inline ompi_communicator_t *ompi_comm_lookup_cid (const ompi_comm_extended_cid_t cid) +{ + ompi_communicator_t *comm = NULL; + (void) opal_hash_table_get_value_ptr (&ompi_comm_hash, &cid, sizeof (cid), (void *) &comm); + return comm; } -static inline struct ompi_proc_t* ompi_comm_peer_lookup(ompi_communicator_t* comm, int peer_id) +static inline struct ompi_proc_t* ompi_comm_peer_lookup (const ompi_communicator_t* comm, const int peer_id) { #if OPAL_ENABLE_DEBUG if(peer_id >= comm->c_remote_group->grp_proc_count) { @@ -616,7 +746,7 @@ OMPI_DECLSPEC int ompi_comm_revoke_finalize(void); #endif /* OPAL_ENABLE_FT_MPI */ -static inline bool ompi_comm_peer_invalid(ompi_communicator_t* comm, int peer_id) +static inline bool ompi_comm_peer_invalid (const ompi_communicator_t* comm, const int peer_id) { if(peer_id < 0 || peer_id >= comm->c_remote_group->grp_proc_count) { return true; @@ -624,12 +754,18 @@ static inline bool ompi_comm_peer_invalid(ompi_communicator_t* comm, int peer_id return false; } +char *ompi_comm_print_cid (const ompi_communicator_t *comm); /** - * Initialise MPI_COMM_WORLD and MPI_COMM_SELF + * @brief Initialize the communicator subsystem as well as MPI_COMM_NULL. */ int ompi_comm_init(void); +/** + * Initialise MPI_COMM_WORLD and MPI_COMM_SELF + */ +int ompi_comm_init_mpi3 (void); + /** * extract the local group from a communicator */ @@ -641,6 +777,9 @@ OMPI_DECLSPEC int ompi_comm_group (ompi_communicator_t *comm, ompi_group_t **gro int ompi_comm_create (ompi_communicator_t* comm, ompi_group_t *group, ompi_communicator_t** newcomm); +int ompi_comm_create_w_info (ompi_communicator_t *comm, ompi_group_t *group, opal_info_t *info, + ompi_communicator_t **newcomm); + /** * Non-collective create communicator based on a group @@ -648,6 +787,26 @@ int ompi_comm_create (ompi_communicator_t* comm, ompi_group_t *group, int ompi_comm_create_group (ompi_communicator_t *comm, ompi_group_t *group, int tag, ompi_communicator_t **newcomm); +/** + * Non-collective create communicator based on a group with no base communicator + */ +int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info_t *info, + ompi_errhandler_t *errhandler, ompi_communicator_t **newcomm); + +/** + * create an intercommunicator + */ +int ompi_intercomm_create (ompi_communicator_t *local_comm, int local_leader, ompi_communicator_t *bridge_comm, + int remote_leader, int tag, ompi_communicator_t **newintercomm); + +/** + * Non-collective create intercommunicator based on a group with no base communicator + */ +int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_leader, + ompi_group_t *remote_group, int remote_leader, const char *tag, + opal_info_t *info, ompi_errhandler_t *errhandler, + ompi_communicator_t **newintercomm); + /** * Take an almost complete communicator and reserve the CID as well * as activate it (initialize the collective and the topologies). @@ -821,11 +980,6 @@ OMPI_DECLSPEC int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_commu ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1, bool send_first, int mode, ompi_request_t **req); -/** - * shut down the communicator infrastructure. - */ -int ompi_comm_finalize (void); - /** * This is THE routine, where all the communicator stuff * is really set. @@ -841,6 +995,7 @@ int ompi_comm_finalize (void); * @param[in] copy_topocomponent whether to copy the topology * @param[in] local_group local process group (may be NULL if local_ranks array supplied) * @param[in] remote_group remote process group (may be NULL) + * @param[in] flags flags to control the behavior of ompi_comm_set_nb */ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, ompi_communicator_t* oldcomm, @@ -850,9 +1005,20 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, - ompi_group_t *remote_group ); + ompi_group_t *remote_group, + uint32_t flags); + +/** + * @brief Don't duplicate the local communicator. just reference it directly. This + * flag passes ownership to the new communicator. + */ +#define OMPI_COMM_SET_FLAG_LOCAL_COMM_NODUP 0x00000001 + +/** + * @brief Copy the topology from the old communicator + */ +#define OMPI_COMM_SET_FLAG_COPY_TOPOLOGY 0x00000002 /** * This is THE routine, where all the communicator stuff @@ -869,6 +1035,7 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm, * @param[in] copy_topocomponent whether to copy the topology * @param[in] local_group local process group (may be NULL if local_ranks array supplied) * @param[in] remote_group remote process group (may be NULL) + * @param[in] flags flags to control the behavior of ompi_comm_set_nb * @param[out] req ompi_request_t object for tracking completion */ OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm, @@ -879,30 +1046,10 @@ OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm, int *remote_ranks, opal_hash_table_t *attr, ompi_errhandler_t *errh, - bool copy_topocomponent, ompi_group_t *local_group, ompi_group_t *remote_group, - ompi_request_t **req ); - -/** - * This is a short-hand routine used in intercomm_create. - * The routine makes sure, that all processes have afterwards - * a list of ompi_proc_t pointers for the remote group. - */ -int ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, - ompi_communicator_t *bridge_comm, - int local_leader, - int remote_leader, - int tag, - int rsize, - struct ompi_proc_t ***prprocs ); - -/** - * This routine verifies, whether local_group and remote group are overlapping - * in intercomm_create - */ -int ompi_comm_overlapping_groups (int size, struct ompi_proc_t ** lprocs, - int rsize, struct ompi_proc_t ** rprocs); + uint32_t flags, + ompi_request_t **req); /** * This is a routine determining whether the local or the diff --git a/ompi/communicator/ft/comm_ft.c b/ompi/communicator/ft/comm_ft.c index 3532954f06b..3d49ef3a64b 100644 --- a/ompi/communicator/ft/comm_ft.c +++ b/ompi/communicator/ft/comm_ft.c @@ -246,7 +246,8 @@ int ompi_comm_shrink_internal(ompi_communicator_t* comm, ompi_communicator_t** n /* --------------------------------------------------------- */ /* Set name for debugging purposes */ snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMMUNICATOR %d SHRUNK FROM %d", - newcomp->c_contextid, comm->c_contextid ); + ompi_comm_get_local_cid(newcomp), + ompi_comm_get_local_cid(comm)); start = PMPI_Wtime(); /* activate communicator and init coll-module */ ret = ompi_comm_activate( &newcomp, /* new communicator */ diff --git a/ompi/communicator/ft/comm_ft_detector.c b/ompi/communicator/ft/comm_ft_detector.c index 03eccda908b..a3e0d2e0196 100644 --- a/ompi/communicator/ft/comm_ft_detector.c +++ b/ompi/communicator/ft/comm_ft_detector.c @@ -380,7 +380,7 @@ static int fd_heartbeat_request(comm_detector_t* detector) { detector->hb_observing = rank; ompi_comm_heartbeat_req_t* msg = calloc(sizeof(*msg)+regsize, 1); - msg->super.cid = comm->c_contextid; + msg->super.cid = comm->c_index; msg->super.epoch = comm->c_epoch; msg->super.type = comm_heartbeat_request_cb_type; msg->from = comm->c_my_rank; @@ -674,7 +674,7 @@ static int fd_heartbeat_send(comm_detector_t* detector) { /* send the heartbeat with eager send */ ompi_comm_heartbeat_message_t msg; - msg.super.cid = comm->c_contextid; + msg.super.cid = comm->c_index; msg.super.epoch = comm->c_epoch; msg.super.type = comm_heartbeat_recv_cb_type; msg.from = detector->hb_rdma_rank; /* comm->c_my_rank; except during finalize when it is equal to detector->hb_observer */ diff --git a/ompi/communicator/ft/comm_ft_propagator.c b/ompi/communicator/ft/comm_ft_propagator.c index dbb20275d53..389b47346a7 100644 --- a/ompi/communicator/ft/comm_ft_propagator.c +++ b/ompi/communicator/ft/comm_ft_propagator.c @@ -73,7 +73,7 @@ int ompi_comm_failure_propagate(ompi_communicator_t* comm, ompi_proc_t* proc, in ompi_comm_failure_propagator_message_t msg; /* Broadcast the 'failure_propagator' signal to all other processes. */ - msg.rbcast_msg.cid = comm->c_contextid; + msg.rbcast_msg.cid = comm->c_index; msg.rbcast_msg.epoch = comm->c_epoch; msg.rbcast_msg.type = comm_failure_propagator_cb_type; msg.proc_name = proc->super.proc_name; diff --git a/ompi/communicator/ft/comm_ft_reliable_bcast.c b/ompi/communicator/ft/comm_ft_reliable_bcast.c index 7302737f1bd..06cd7ccc892 100644 --- a/ompi/communicator/ft/comm_ft_reliable_bcast.c +++ b/ompi/communicator/ft/comm_ft_reliable_bcast.c @@ -181,7 +181,7 @@ static void ompi_comm_rbcast_bml_recv_cb( OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->cid, msg->epoch)); return; } - if(OPAL_UNLIKELY( msg->cid != comm->c_contextid )) { + if(OPAL_UNLIKELY( msg->cid != comm->c_index )) { OPAL_OUTPUT_VERBOSE((2, ompi_ftmpi_output_handle, "%s %s: Info: received a late rbcast message with CID %3d:%d during an MPI_COMM_DUP that is trying to reuse that CID (thus increasing the epoch) - ignoring, nothing to do", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __func__, msg->cid, msg->epoch)); diff --git a/ompi/communicator/ft/comm_ft_revoke.c b/ompi/communicator/ft/comm_ft_revoke.c index 027e659814a..159bf542932 100644 --- a/ompi/communicator/ft/comm_ft_revoke.c +++ b/ompi/communicator/ft/comm_ft_revoke.c @@ -56,7 +56,7 @@ int ompi_comm_revoke_internal(ompi_communicator_t* comm) if( ompi_comm_revoke_local(comm, NULL) ) { /* Broadcast the 'revoke' signal to all other processes. */ ompi_comm_rbcast_message_t msg; - msg.cid = comm->c_contextid; + msg.cid = comm->c_index; msg.epoch = comm->c_epoch; msg.type = comm_revoke_cb_type; ret = ompi_comm_rbcast(comm, &msg, sizeof(msg)); diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index 26978d0867e..97f87d53bdf 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -118,7 +118,6 @@ OMPI_DECLSPEC extern opal_convertor_t* ompi_mpi_local_convertor; extern struct opal_pointer_array_t ompi_datatype_f_to_c_table; OMPI_DECLSPEC int32_t ompi_datatype_init( void ); -OMPI_DECLSPEC int32_t ompi_datatype_finalize( void ); OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_init( void ); OMPI_DECLSPEC int32_t ompi_datatype_default_convertors_fini( void ); diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 5a9a0aa9110..6ab1da0712b 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -18,6 +18,8 @@ * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,9 +38,13 @@ #include "opal/class/opal_pointer_array.h" #include "ompi/datatype/ompi_datatype.h" #include "ompi/datatype/ompi_datatype_internal.h" +#include "ompi/instance/instance.h" +#include "ompi/attribute/attribute.h" #include "mpi.h" +static int ompi_datatype_finalize (void); + /** * This is the number of predefined datatypes. It is different than the MAX_PREDEFINED * as it include all the optional datatypes (such as MPI_INTEGER?, MPI_REAL?). @@ -472,6 +478,7 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; int32_t ompi_datatype_init( void ) { int32_t i; + int ret = OMPI_SUCCESS; opal_datatype_init(); @@ -670,23 +677,26 @@ int32_t ompi_datatype_init( void ) } } ompi_datatype_default_convertors_init(); + + /* get a reference to the attributes subsys */ + ret = ompi_attr_get_ref(); + if (OMPI_SUCCESS != ret) { + return ret; + } + + ompi_mpi_instance_append_finalize (ompi_datatype_finalize); return OMPI_SUCCESS; } -int32_t ompi_datatype_finalize( void ) +static int ompi_datatype_finalize (void) { + int ret = OMPI_SUCCESS; + /* As the synonyms are just copies of the internal data we should not free them. * Anyway they are over the limit of OMPI_DATATYPE_MPI_MAX_PREDEFINED so they will never get freed. */ - /* As they are statically allocated they cannot be released. - * But we can call OBJ_DESTRUCT, just to free all internally allocated ressources. - */ - for( int i = 0; i < ompi_datatype_number_of_predefined_data; i++ ) { - opal_datatype_t* datatype = (opal_datatype_t*)opal_pointer_array_get_item(&ompi_datatype_f_to_c_table, i ); - OBJ_DESTRUCT(datatype); - } /* Get rid of the Fortran2C translation table */ OBJ_DESTRUCT(&ompi_datatype_f_to_c_table); @@ -697,7 +707,10 @@ int32_t ompi_datatype_finalize( void ) /* don't call opal_datatype_finalize () as it no longer exists. the function will be called * opal_finalize_util (). */ - return OMPI_SUCCESS; + /* release a reference to the attributes subsys */ + ret = ompi_attr_put_ref(); + + return ret; } diff --git a/ompi/debuggers/ompi_common_dll.c b/ompi/debuggers/ompi_common_dll.c index 9395d93241a..65a388a68b2 100644 --- a/ompi/debuggers/ompi_common_dll.c +++ b/ompi/debuggers/ompi_common_dll.c @@ -324,6 +324,10 @@ int ompi_fill_in_type_info(mqs_image *image, char **message) qh_type, ompi_communicator_t, c_name); ompi_field_offset(i_info->ompi_communicator_t.offset.c_contextid, qh_type, ompi_communicator_t, c_contextid); + ompi_field_offset(i_info->ompi_communicator_t.offset.c_contextidb, + qh_type, ompi_communicator_t, c_contextidb); + ompi_field_offset(i_info->ompi_communicator_t.offset.c_index, + qh_type, ompi_communicator_t, c_index); ompi_field_offset(i_info->ompi_communicator_t.offset.c_my_rank, qh_type, ompi_communicator_t, c_my_rank); ompi_field_offset(i_info->ompi_communicator_t.offset.c_local_group, diff --git a/ompi/debuggers/ompi_common_dll_defs.h b/ompi/debuggers/ompi_common_dll_defs.h index 6f4e6b89381..5fe11d3986e 100644 --- a/ompi/debuggers/ompi_common_dll_defs.h +++ b/ompi/debuggers/ompi_common_dll_defs.h @@ -198,6 +198,8 @@ typedef struct struct { int c_name; int c_contextid; + int c_contextidb; + int c_index; int c_my_rank; int c_local_group; int c_remote_group; diff --git a/ompi/debuggers/ompi_msgq_dll.c b/ompi/debuggers/ompi_msgq_dll.c index f5afab5d2e7..5f1f28999d9 100644 --- a/ompi/debuggers/ompi_msgq_dll.c +++ b/ompi/debuggers/ompi_msgq_dll.c @@ -650,11 +650,13 @@ static int rebuild_communicator_list (mqs_process *proc) if( 0 == comm_ptr ) continue; commcount++; /* Now let's grab the data we want from inside */ + /* NTH: XXXXXXXXXXXXX FIXME!!!!!!!!!!!!!! c_index is local but MSGQ needs a global identifier + * that is sizeof (void *) or smaller. */ DEBUG(VERBOSE_GENERAL, ("Retrieve context_id from 0x%llx and local_rank from 0x%llx\n", - (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_contextid), + (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_index), (long long)(comm_ptr + i_info->ompi_communicator_t.offset.c_my_rank))); context_id = ompi_fetch_int( proc, - comm_ptr + i_info->ompi_communicator_t.offset.c_contextid, + comm_ptr + i_info->ompi_communicator_t.offset.c_index, p_info ); /* Do we already have this communicator ? */ old = find_communicator(p_info, context_id); diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 59c3a0e715c..39d8bcfcc57 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -21,7 +21,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -491,10 +491,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, NULL , /* remote_procs */ NULL, /* attrs */ comm->error_handler, /* error handler */ - NULL, /* topo component */ group, /* local group */ - new_group_pointer /* remote group */ - ); + new_group_pointer, /* remote group */ + 0); /* flags */ if (OMPI_SUCCESS != rc) { goto exit; } @@ -689,7 +688,7 @@ static int dpm_convert(opal_list_t *infos, char *ck, *ptr, *help_str = NULL; int rc; char **tmp; - dpm_conflicts_t *modifiers; + dpm_conflicts_t *modifiers = NULL; const char *attr; /* pick the modifiers to be checked */ @@ -1711,15 +1710,6 @@ int ompi_dpm_dyn_init(void) return OMPI_SUCCESS; } - -/* - * finalize the module - */ -int ompi_dpm_finalize(void) -{ - return OMPI_SUCCESS; -} - static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int count) { for(int i = 0; i < count; i++) { @@ -1749,9 +1739,9 @@ int ompi_dpm_dyn_finalize(void) return OMPI_ERR_OUT_OF_RESOURCE; } - max = opal_pointer_array_get_size(&ompi_mpi_communicators); + max = opal_pointer_array_get_size(&ompi_comm_array); for (i=3; i0) + * @returns OMPI_ERROR otherwise + * + */ +int ompi_mpi_errcode_add (int errclass); + +/** + * Add an error class + * + * @param: none + * + * @returns the new error class on SUCCESS (>0) + * @returns OMPI_ERROR otherwise + * + */ +int ompi_mpi_errclass_add (void); + +/** + * Add an error string to an error code + * + * @param: error code for which the string is defined + * @param: error string to add + * @param: length of the string + * + * @returns OMPI_SUCCESS on success + * @returns OMPI_ERROR on error + */ +int ompi_mpi_errnum_add_string (int errnum, const char* string, int len); + END_C_DECLS #endif /* OMPI_MPI_ERRCODE_H */ diff --git a/ompi/errhandler/errhandler.c b/ompi/errhandler/errhandler.c index 3a2d2eab162..bea40f93412 100644 --- a/ompi/errhandler/errhandler.c +++ b/ompi/errhandler/errhandler.c @@ -17,6 +17,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,7 +38,7 @@ #include "opal/mca/pmix/pmix-internal.h" #include "opal/util/string_copy.h" #include "opal/mca/backtrace/backtrace.h" - +#include "ompi/runtime/mpiruntime.h" /* * Table for Fortran <-> C errhandler handle conversion @@ -137,56 +139,60 @@ int ompi_initial_errhandler_init(void) { return OMPI_SUCCESS; } +static int ompi_errhandler_finalize (void); + /* * Initialize OMPI errhandler infrastructure */ int ompi_errhandler_init(void) { - /* initialize ompi_errhandler_f_to_c_table */ + OBJ_CONSTRUCT( &ompi_errhandler_f_to_c_table, opal_pointer_array_t); + if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_errhandler_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 16) ) { + return OMPI_ERROR; + } - OBJ_CONSTRUCT( &ompi_errhandler_f_to_c_table, opal_pointer_array_t); - if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_errhandler_f_to_c_table, 8, - OMPI_FORTRAN_HANDLE_MAX, 16) ) { - return OMPI_ERROR; - } + /* Initialize the predefined error handlers */ + OBJ_CONSTRUCT( &ompi_mpi_errhandler_null.eh, ompi_errhandler_t ); + if( ompi_mpi_errhandler_null.eh.eh_f_to_c_index != OMPI_ERRHANDLER_NULL_FORTRAN ) { + return OMPI_ERROR; + } - /* Initialize the predefined error handlers */ - OBJ_CONSTRUCT( &ompi_mpi_errhandler_null.eh, ompi_errhandler_t ); - if( ompi_mpi_errhandler_null.eh.eh_f_to_c_index != OMPI_ERRHANDLER_NULL_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errhandler_null.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errhandler_null.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errhandler_null.eh.eh_comm_fn = NULL; - ompi_mpi_errhandler_null.eh.eh_file_fn = NULL; - ompi_mpi_errhandler_null.eh.eh_win_fn = NULL ; - ompi_mpi_errhandler_null.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", - sizeof(ompi_mpi_errhandler_null.eh.eh_name)); - - OBJ_CONSTRUCT( &ompi_mpi_errors_are_fatal.eh, ompi_errhandler_t ); - if( ompi_mpi_errors_are_fatal.eh.eh_f_to_c_index != OMPI_ERRORS_ARE_FATAL_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errors_are_fatal.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errors_are_fatal.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errors_are_fatal.eh.eh_comm_fn = ompi_mpi_errors_are_fatal_comm_handler; - ompi_mpi_errors_are_fatal.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; - ompi_mpi_errors_are_fatal.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler ; - ompi_mpi_errors_are_fatal.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errors_are_fatal.eh.eh_name, - "MPI_ERRORS_ARE_FATAL", - sizeof(ompi_mpi_errors_are_fatal.eh.eh_name)); - - OBJ_CONSTRUCT( &ompi_mpi_errors_return.eh, ompi_errhandler_t ); - if( ompi_mpi_errors_return.eh.eh_f_to_c_index != OMPI_ERRORS_RETURN_FORTRAN ) - return OMPI_ERROR; - ompi_mpi_errors_return.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; - ompi_mpi_errors_return.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; - ompi_mpi_errors_return.eh.eh_comm_fn = ompi_mpi_errors_return_comm_handler; - ompi_mpi_errors_return.eh.eh_file_fn = ompi_mpi_errors_return_file_handler; - ompi_mpi_errors_return.eh.eh_win_fn = ompi_mpi_errors_return_win_handler; - ompi_mpi_errors_return.eh.eh_fort_fn = NULL; - opal_string_copy(ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", - sizeof(ompi_mpi_errors_return.eh.eh_name)); + ompi_mpi_errhandler_null.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errhandler_null.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errhandler_null.eh.eh_comm_fn = NULL; + ompi_mpi_errhandler_null.eh.eh_file_fn = NULL; + ompi_mpi_errhandler_null.eh.eh_win_fn = NULL ; + ompi_mpi_errhandler_null.eh.eh_fort_fn = NULL; + opal_string_copy (ompi_mpi_errhandler_null.eh.eh_name, "MPI_ERRHANDLER_NULL", + sizeof(ompi_mpi_errhandler_null.eh.eh_name)); + + OBJ_CONSTRUCT( &ompi_mpi_errors_are_fatal.eh, ompi_errhandler_t ); + if( ompi_mpi_errors_are_fatal.eh.eh_f_to_c_index != OMPI_ERRORS_ARE_FATAL_FORTRAN ) + return OMPI_ERROR; + ompi_mpi_errors_are_fatal.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errors_are_fatal.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errors_are_fatal.eh.eh_comm_fn = ompi_mpi_errors_are_fatal_comm_handler; + ompi_mpi_errors_are_fatal.eh.eh_file_fn = ompi_mpi_errors_are_fatal_file_handler; + ompi_mpi_errors_are_fatal.eh.eh_win_fn = ompi_mpi_errors_are_fatal_win_handler; + ompi_mpi_errors_are_fatal.eh.eh_instance_fn = ompi_mpi_errors_are_fatal_instance_handler; + ompi_mpi_errors_are_fatal.eh.eh_fort_fn = NULL; + opal_string_copy(ompi_mpi_errors_are_fatal.eh.eh_name, + "MPI_ERRORS_ARE_FATAL", + sizeof(ompi_mpi_errors_are_fatal.eh.eh_name)); + + OBJ_CONSTRUCT( &ompi_mpi_errors_return.eh, ompi_errhandler_t ); + if( ompi_mpi_errors_return.eh.eh_f_to_c_index != OMPI_ERRORS_RETURN_FORTRAN ) + return OMPI_ERROR; + ompi_mpi_errors_return.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED; + ompi_mpi_errors_return.eh.eh_lang = OMPI_ERRHANDLER_LANG_C; + ompi_mpi_errors_return.eh.eh_comm_fn = ompi_mpi_errors_return_comm_handler; + ompi_mpi_errors_return.eh.eh_file_fn = ompi_mpi_errors_return_file_handler; + ompi_mpi_errors_return.eh.eh_win_fn = ompi_mpi_errors_return_win_handler; + ompi_mpi_errors_return.eh.eh_instance_fn = ompi_mpi_errors_return_instance_handler; + ompi_mpi_errors_return.eh.eh_fort_fn = NULL; + opal_string_copy(ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN", + sizeof(ompi_mpi_errors_return.eh.eh_name)); OBJ_CONSTRUCT( &ompi_mpi_errors_abort.eh, ompi_errhandler_t ); if( ompi_mpi_errors_abort.eh.eh_f_to_c_index != OMPI_ERRORS_ABORT_FORTRAN ) @@ -206,14 +212,24 @@ int ompi_errhandler_init(void) if( NULL != env ) { ompi_process_info.initial_errhandler = strndup(env, MPI_MAX_INFO_VAL); } - return ompi_initial_errhandler_init(); + +/* HPP TODO CHECK */ + ompi_initial_errhandler_init(); + ompi_mpi_instance_append_finalize (ompi_errhandler_finalize); + + return OMPI_SUCCESS; } -/* - * Clean up the errorhandler resources +/** + * Finalize the error handler interface. + * + * @returns OMPI_SUCCESS Always + * + * Invoked on instance teardown if ompi_errhandler_init() was called; tears down the error handler + * interface, and destroys the F2C translation table. */ -int ompi_errhandler_finalize(void) +static int ompi_errhandler_finalize (void) { OBJ_DESTRUCT(&ompi_mpi_errhandler_null.eh); OBJ_DESTRUCT(&ompi_mpi_errors_return.eh); @@ -232,46 +248,62 @@ int ompi_errhandler_finalize(void) return OMPI_SUCCESS; } +void ompi_errhandler_free (ompi_errhandler_t *errhandler) +{ + OBJ_RELEASE(errhandler); + ompi_mpi_instance_release (); +} ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, - ompi_errhandler_generic_handler_fn_t *func, + ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t lang) { - ompi_errhandler_t *new_errhandler; - - /* Create a new object and ensure that it's valid */ - - new_errhandler = OBJ_NEW(ompi_errhandler_t); - if (NULL != new_errhandler) { - if (0 > new_errhandler->eh_f_to_c_index) { - OBJ_RELEASE(new_errhandler); - new_errhandler = NULL; - } else { - - /* We cast the user's callback function to any one of the - function pointer types in the union; it doesn't matter which. - It only matters that we dereference/use the right member when - invoking the callback. */ - - new_errhandler->eh_mpi_object_type = object_type; - new_errhandler->eh_lang = lang; - switch (object_type ) { - case (OMPI_ERRHANDLER_TYPE_COMM): - new_errhandler->eh_comm_fn = (MPI_Comm_errhandler_function *)func; - break; - case (OMPI_ERRHANDLER_TYPE_FILE): - new_errhandler->eh_file_fn = (ompi_file_errhandler_function *)func; - break; - case (OMPI_ERRHANDLER_TYPE_WIN): - new_errhandler->eh_win_fn = (MPI_Win_errhandler_function *)func; - break; - default: - break; - } - - new_errhandler->eh_fort_fn = (ompi_errhandler_fortran_handler_fn_t *)func; + ompi_errhandler_t *new_errhandler; + int ret; + + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return NULL; + } + + /* Create a new object and ensure that it's valid */ + + new_errhandler = OBJ_NEW(ompi_errhandler_t); + if (NULL != new_errhandler) { + if (0 > new_errhandler->eh_f_to_c_index) { + OBJ_RELEASE(new_errhandler); + new_errhandler = NULL; + } else { + + /* We cast the user's callback function to any one of the + function pointer types in the union; it doesn't matter which. + It only matters that we dereference/use the right member when + invoking the callback. */ + + new_errhandler->eh_mpi_object_type = object_type; + new_errhandler->eh_lang = lang; + switch (object_type ) { + case OMPI_ERRHANDLER_TYPE_COMM: + new_errhandler->eh_comm_fn = (MPI_Comm_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_FILE: + new_errhandler->eh_file_fn = (ompi_file_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_WIN: + new_errhandler->eh_win_fn = (MPI_Win_errhandler_function *)func; + break; + case OMPI_ERRHANDLER_TYPE_INSTANCE: + new_errhandler->eh_instance_fn = (MPI_Session_errhandler_function *)func; + break; + default: + break; + } + } + + new_errhandler->eh_fort_fn = (ompi_errhandler_fortran_handler_fn_t *)func; + } - } /* All done */ @@ -322,9 +354,9 @@ int ompi_errhandler_proc_failed_internal(ompi_proc_t* ompi_proc, int status, boo /* Communicator State: * Let them know about the failure. */ - max_num_comm = opal_pointer_array_get_size(&ompi_mpi_communicators); + max_num_comm = opal_pointer_array_get_size(&ompi_comm_array); for( i = 0; i < max_num_comm; ++i ) { - comm = (ompi_communicator_t *)opal_pointer_array_get_item(&ompi_mpi_communicators, i); + comm = (ompi_communicator_t *)opal_pointer_array_get_item(&ompi_comm_array, i); if( NULL == comm ) { continue; } diff --git a/ompi/errhandler/errhandler.h b/ompi/errhandler/errhandler.h index 572deeb9bf3..97305dec541 100644 --- a/ompi/errhandler/errhandler.h +++ b/ompi/errhandler/errhandler.h @@ -17,6 +17,8 @@ * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,7 +85,8 @@ enum ompi_errhandler_type_t { OMPI_ERRHANDLER_TYPE_PREDEFINED, OMPI_ERRHANDLER_TYPE_COMM, OMPI_ERRHANDLER_TYPE_WIN, - OMPI_ERRHANDLER_TYPE_FILE + OMPI_ERRHANDLER_TYPE_FILE, + OMPI_ERRHANDLER_TYPE_INSTANCE, }; typedef enum ompi_errhandler_type_t ompi_errhandler_type_t; @@ -109,6 +112,7 @@ struct ompi_errhandler_t { MPI_Comm_errhandler_function *eh_comm_fn; ompi_file_errhandler_function *eh_file_fn; MPI_Win_errhandler_function *eh_win_fn; + MPI_Session_errhandler_function *eh_instance_fn; ompi_errhandler_fortran_handler_fn_t *eh_fort_fn; /* index in Fortran <-> C translation array */ @@ -188,6 +192,10 @@ OMPI_DECLSPEC extern void (*ompi_initial_error_handler)(struct ompi_communicator struct ompi_request_t; +/* declared here because we can't include instance.h from this header + * because it would create a circular dependency */ +extern opal_atomic_int32_t ompi_instance_count; + /** * This is the macro to check the state of MPI and determine whether * it was properly initialized and not yet finalized. @@ -203,15 +211,13 @@ struct ompi_request_t; * potentially-performance-critical code paths) before reading the * variable. */ -#define OMPI_ERR_INIT_FINALIZE(name) \ - { \ - int32_t state = ompi_mpi_state; \ - if (OPAL_UNLIKELY(state < OMPI_MPI_STATE_INIT_COMPLETED || \ - state > OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT)) { \ - ompi_errhandler_invoke(NULL, NULL, -1, \ +#define OMPI_ERR_INIT_FINALIZE(name) \ + { \ + if (OPAL_UNLIKELY(0 == ompi_instance_count)) { \ + ompi_errhandler_invoke(NULL, NULL, -1, \ ompi_errcode_get_mpi_code(MPI_ERR_ARG), \ - name); \ - } \ + name); \ + } \ } /** @@ -327,16 +333,6 @@ struct ompi_request_t; */ int ompi_errhandler_init(void); - /** - * Finalize the error handler interface. - * - * @returns OMPI_SUCCESS Always - * - * Invokes from ompi_mpi_finalize(); tears down the error handler - * interface, and destroys the F2C translation table. - */ - int ompi_errhandler_finalize(void); - /** * \internal * @@ -382,8 +378,9 @@ struct ompi_request_t; /** * Create a ompi_errhandler_t * - * @param object_type Enum of the type of MPI object - * @param func Function pointer of the error handler + * @param[in] object_type Enum of the type of MPI object + * @param[in] func Function pointer of the error handler + * @param[in] language Calling language * * @returns errhandler Pointer to the ompi_errorhandler_t that will be * created and returned @@ -402,9 +399,11 @@ struct ompi_request_t; * same as sizeof(void(*)). */ OMPI_DECLSPEC ompi_errhandler_t *ompi_errhandler_create(ompi_errhandler_type_t object_type, - ompi_errhandler_generic_handler_fn_t *func, + ompi_errhandler_generic_handler_fn_t *func, ompi_errhandler_lang_t language); + OMPI_DECLSPEC void ompi_errhandler_free (ompi_errhandler_t *errhandler); + /** * Callback function to alert the MPI layer of an error or notification * from the internal RTE and/or the resource manager. diff --git a/ompi/errhandler/errhandler_predefined.c b/ompi/errhandler/errhandler_predefined.c index 4c9353465f2..7ec6f24b5c7 100644 --- a/ompi/errhandler/errhandler_predefined.c +++ b/ompi/errhandler/errhandler_predefined.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -17,6 +18,8 @@ * All rights reserved. * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,8 +44,10 @@ #include "ompi/communicator/communicator.h" #include "ompi/file/file.h" #include "ompi/win/win.h" +#include "ompi/instance/instance.h" #include "opal/util/printf.h" #include "opal/util/output.h" +#include "ompi/runtime/mpiruntime.h" /* * Local functions @@ -172,6 +177,25 @@ void ompi_mpi_errors_abort_win_handler(struct ompi_win_t **win, va_end(arglist); } +void ompi_mpi_errors_are_fatal_instance_handler (struct ompi_instance_t **instance, + int *error_code, ...) +{ + char *name; + struct ompi_communicator_t *abort_comm = NULL; + va_list arglist; + + va_start(arglist, error_code); + + if (NULL != instance) { + name = (*instance)->i_name; + } else { + name = NULL; + } + /* NTH: for now we still call these "sessions" */ + backend_abort(true, "session", abort_comm, name, error_code, arglist); + va_end(arglist); +} + void ompi_mpi_errors_return_comm_handler(struct ompi_communicator_t **comm, int *error_code, ...) { @@ -208,6 +232,18 @@ void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win, } +void ompi_mpi_errors_return_instance_handler (struct ompi_instance_t **instance, + int *error_code, ...) +{ + /* Don't need anything more -- just need this function to exist */ + /* Silence some compiler warnings */ + + va_list arglist; + va_start(arglist, error_code); + va_end(arglist); +} + + static void out(char *str, char *arg) { if (ompi_rte_initialized && diff --git a/ompi/errhandler/errhandler_predefined.h b/ompi/errhandler/errhandler_predefined.h index 07e306e9a08..c663d962220 100644 --- a/ompi/errhandler/errhandler_predefined.h +++ b/ompi/errhandler/errhandler_predefined.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -9,6 +10,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +27,7 @@ struct ompi_communicator_t; struct ompi_file_t; struct ompi_win_t; +struct ompi_instance_t; /** * Handler function for MPI_ERRORS_ARE_FATAL @@ -34,6 +38,8 @@ OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **f int *error_code, ...); OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win, int *error_code, ...); +OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_instance_handler(struct ompi_instance_t **win, + int *error_code, ...); /** * Handler function for MPI_ERRORS_ABORT @@ -54,6 +60,8 @@ OMPI_DECLSPEC void ompi_mpi_errors_return_file_handler(struct ompi_file_t **file int *error_code, ...); OMPI_DECLSPEC void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win, int *error_code, ...); +OMPI_DECLSPEC void ompi_mpi_errors_return_instance_handler(struct ompi_instance_t **win, + int *error_code, ...); #endif /* OMPI_ERRHANDLER_PREDEFINED_H */ diff --git a/ompi/file/file.c b/ompi/file/file.c index bf546a55694..a0b466e3910 100644 --- a/ompi/file/file.c +++ b/ompi/file/file.c @@ -16,6 +16,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -54,6 +56,7 @@ ompi_predefined_file_t *ompi_mpi_file_null_addr = &ompi_mpi_file_null; */ static void file_constructor(ompi_file_t *obj); static void file_destructor(ompi_file_t *obj); +static int ompi_file_finalize (void); /* @@ -89,6 +92,7 @@ int ompi_file_init(void) &ompi_mpi_file_null.file); /* All done */ + ompi_mpi_instance_append_finalize (ompi_file_finalize); return OMPI_SUCCESS; } @@ -160,10 +164,14 @@ int ompi_file_close(ompi_file_t **file) } -/* - * Shut down the MPI_File bookkeeping +/** + * Tear down MPI_File handling. + * + * @retval OMPI_SUCCESS Always. + * + * Invoked during instance teardown if ompi_file_init() is called. */ -int ompi_file_finalize(void) +static int ompi_file_finalize (void) { int i, max; size_t num_unnamed; diff --git a/ompi/file/file.h b/ompi/file/file.h index bb50903ae5d..cb90c56fa6c 100644 --- a/ompi/file/file.h +++ b/ompi/file/file.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -16,6 +16,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 University of Houston. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -185,15 +187,6 @@ int ompi_file_set_name(ompi_file_t *file, char *name); */ int ompi_file_close(ompi_file_t **file); -/** - * Tear down MPI_File handling. - * - * @retval OMPI_SUCCESS Always. - * - * Invoked during ompi_mpi_finalize(). - */ -int ompi_file_finalize(void); - /** * Check to see if an MPI_File handle is valid. * diff --git a/ompi/group/group.c b/ompi/group/group.c index ad60a0d7ea7..dff579aba43 100644 --- a/ompi/group/group.c +++ b/ompi/group/group.c @@ -18,6 +18,7 @@ * reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights * $COPYRIGHT$ * * Additional copyrights may follow @@ -606,3 +607,36 @@ int ompi_group_count_local_peers (ompi_group_t *group) return local_peers; } + +int ompi_group_to_proc_name_array (ompi_group_t *group, opal_process_name_t **name_array, size_t *name_array_size) +{ + opal_process_name_t *array = calloc (group->grp_proc_count, sizeof (array[0])); + + if (NULL == array) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (int i = 0 ; i < group->grp_proc_count ; ++i) { + array[i] = ompi_group_get_proc_name (group, i); + } + + *name_array = array; + *name_array_size = group->grp_proc_count; + + return OMPI_SUCCESS; +} + +bool ompi_group_overlap (const ompi_group_t *group1, const ompi_group_t *group2) +{ + for (int i = 0 ; i < group1->grp_proc_count ; ++i) { + opal_process_name_t proc1 = ompi_group_get_proc_name (group1, i); + for (int j = 0 ; j < group2->grp_proc_count ; ++j) { + opal_process_name_t proc2 = ompi_group_get_proc_name (group2, j); + if (0 == opal_compare_proc (proc1, proc2)) { + return true; + } + } + } + + return false; +} diff --git a/ompi/group/group.h b/ompi/group/group.h index 966ab5f8306..1e87ecd8556 100644 --- a/ompi/group/group.h +++ b/ompi/group/group.h @@ -14,10 +14,12 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,6 +41,7 @@ #include "opal/class/opal_pointer_array.h" #include "opal/mca/threads/threads.h" #include "opal/util/output.h" +#include "ompi/instance/instance.h" BEGIN_C_DECLS @@ -98,6 +101,8 @@ struct ompi_group_t { struct ompi_group_strided_data_t grp_strided; struct ompi_group_bitmap_data_t grp_bitmap; } sparse_data; + + ompi_instance_t *grp_instance; /**< instance this group was allocated within */ }; typedef struct ompi_group_t ompi_group_t; @@ -168,6 +173,19 @@ ompi_group_t *ompi_group_allocate_sporadic(int group_size); ompi_group_t *ompi_group_allocate_strided(void); ompi_group_t *ompi_group_allocate_bmap(int orig_group_size, int group_size); +/** + * @brief Allocate a dense group from a group + * + * @param[in] group group + * + * @returns new group pointer on success + * @returns NULL on error + * + * This function duplicates a group. The new group will have a dense process + * table. + */ +ompi_group_t *ompi_group_flatten (ompi_group_t *group, int max_procs); + /** * Increment the reference count of the proc structures. * @@ -193,14 +211,6 @@ OMPI_DECLSPEC void ompi_group_decrement_proc_count(ompi_group_t *group); int ompi_group_init(void); -/** - * Clean up OMPI group infrastructure. - * - * @return Error code - */ -int ompi_group_finalize(void); - - /** * Get group size. * @@ -384,15 +394,15 @@ static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int ran #if OMPI_GROUP_SPARSE do { if (OMPI_GROUP_IS_DENSE(group)) { - return ompi_group_dense_lookup (group, rank, allocate); + break; } int ranks1 = rank; ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); group = group->grp_parent_group_ptr; } while (1); -#else - return ompi_group_dense_lookup (group, rank, allocate); #endif + + return ompi_group_dense_lookup (group, rank, allocate); } /** @@ -402,9 +412,23 @@ static inline ompi_proc_t *ompi_group_get_proc_ptr (ompi_group_t *group, int ran * or cached in the proc hash table) or a sentinel value representing the proc. This * differs from ompi_group_get_proc_ptr() which returns the ompi_proc_t or NULL. */ -ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank); +static inline ompi_proc_t *ompi_group_get_proc_ptr_raw (const ompi_group_t *group, int rank) +{ +#if OMPI_GROUP_SPARSE + do { + if (OMPI_GROUP_IS_DENSE(group)) { + break; + } + int ranks1 = rank; + ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); + group = group->grp_parent_group_ptr; + } while (1); +#endif -static inline opal_process_name_t ompi_group_get_proc_name (ompi_group_t *group, int rank) + return group->grp_proc_pointers[rank]; +} + +static inline opal_process_name_t ompi_group_get_proc_name (const ompi_group_t *group, int rank) { ompi_proc_t *proc = ompi_group_get_proc_ptr_raw (group, rank); if (ompi_proc_is_sentinel (proc)) { @@ -472,6 +496,17 @@ bool ompi_group_have_remote_peers (ompi_group_t *group); */ int ompi_group_count_local_peers (ompi_group_t *group); +/** + * @brief Check if groups overlap + * + * @param[in] group1 ompi group + * @param[in] group2 ompi group + * + * @returns true if any proc in group1 is also in group2 + * @returns false otherwise + */ +bool ompi_group_overlap (const ompi_group_t *group1, const ompi_group_t *group2); + /** * Function to print the group info */ @@ -482,5 +517,19 @@ int ompi_group_dump (ompi_group_t* group); */ int ompi_group_div_ceil (int num, int den); +/** + * Create a process name array from a group + */ +int ompi_group_to_proc_name_array (ompi_group_t *group, opal_process_name_t **name_array, size_t *name_array_size); + +/** + * Return instance from a group + */ + +static inline ompi_instance_t *ompi_group_get_instance(ompi_group_t *group) +{ + return group->grp_instance; +} + END_C_DECLS #endif /* OMPI_GROUP_H */ diff --git a/ompi/group/group_init.c b/ompi/group/group_init.c index fed47997218..3a92d888f2e 100644 --- a/ompi/group/group_init.c +++ b/ompi/group/group_init.c @@ -16,6 +16,8 @@ * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,6 +34,8 @@ static void ompi_group_construct(ompi_group_t *); static void ompi_group_destruct(ompi_group_t *); +static int ompi_group_finalize (void); + OBJ_CLASS_INSTANCE(ompi_group_t, opal_object_t, ompi_group_construct, @@ -227,6 +231,71 @@ ompi_group_t *ompi_group_allocate_bmap(int orig_group_size , int group_size) return new_group; } +/** + * @brief Allocate a dense group from a group + * + * @param[in] group group + * + * @returns new group pointer on success + * @returns NULL on error + * + * This function duplicates a group. The new group will have a dense process + * table. + */ +ompi_group_t *ompi_group_flatten (ompi_group_t *group, int max_procs) +{ + int proc_count = (max_procs > group->grp_proc_count) ? group->grp_proc_count : max_procs; + size_t proc_pointer_array_size = proc_count * sizeof (group->grp_proc_pointers[0]); + ompi_group_t *new_group = OBJ_NEW(ompi_group_t);; + if (NULL == new_group) { + return NULL; + } + + if (0 > new_group->grp_f_to_c_index) { + OBJ_RELEASE (new_group); + return NULL; + } + + if (0 != proc_count) { + new_group->grp_proc_pointers = malloc (proc_pointer_array_size); + if (OPAL_UNLIKELY(NULL == new_group->grp_proc_pointers)) { + OBJ_RELEASE(new_group); + return NULL; + } + + /* + * Allocate array of (ompi_proc_t *)'s, one for each + * process in the group. + */ + if (!OMPI_GROUP_IS_DENSE(group)) { + for (int i = 0 ; i < proc_count ; i++) { + new_group->grp_proc_pointers[i] = ompi_group_peer_lookup (group, i); + } + } else { + memcpy (new_group->grp_proc_pointers, group->grp_proc_pointers, proc_pointer_array_size); + } + } + + /* set the group size */ + new_group->grp_proc_count = proc_count; + + if (group->grp_my_rank >= max_procs) { + /* initialize our rank to MPI_UNDEFINED */ + new_group->grp_my_rank = MPI_UNDEFINED; + } else { + /* rank is the same as in the old group */ + new_group->grp_my_rank = group->grp_my_rank; + } + + new_group->grp_instance = group->grp_instance; + + OMPI_GROUP_SET_DENSE(new_group); + + ompi_group_increment_proc_count (new_group); + + return new_group; +} + /* * increment the reference count of the proc structures */ @@ -363,6 +432,8 @@ int ompi_group_init(void) ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_DENSE; ompi_mpi_group_empty.group.grp_flags |= OMPI_GROUP_INTRINSIC; + ompi_mpi_instance_append_finalize (ompi_group_finalize); + return OMPI_SUCCESS; } @@ -370,7 +441,7 @@ int ompi_group_init(void) /* * Clean up group infrastructure */ -int ompi_group_finalize(void) +static int ompi_group_finalize (void) { ompi_mpi_group_null.group.grp_flags = 0; OBJ_DESTRUCT(&ompi_mpi_group_null); diff --git a/ompi/group/group_plist.c b/ompi/group/group_plist.c index 16816a20659..771bd921efa 100644 --- a/ompi/group/group_plist.c +++ b/ompi/group/group_plist.c @@ -61,38 +61,6 @@ static int ompi_group_dense_overlap (ompi_group_t *group1, ompi_group_t *group2, return overlap_count; } -static struct ompi_proc_t *ompi_group_dense_lookup_raw (ompi_group_t *group, const int peer_id) -{ - if (OPAL_UNLIKELY(ompi_proc_is_sentinel (group->grp_proc_pointers[peer_id]))) { - ompi_proc_t *proc = - (ompi_proc_t *) ompi_proc_lookup (ompi_proc_sentinel_to_name ((uintptr_t) group->grp_proc_pointers[peer_id])); - if (NULL != proc) { - /* replace sentinel value with an actual ompi_proc_t */ - group->grp_proc_pointers[peer_id] = proc; - /* retain the proc */ - OBJ_RETAIN(group->grp_proc_pointers[peer_id]); - } - } - - return group->grp_proc_pointers[peer_id]; -} - -ompi_proc_t *ompi_group_get_proc_ptr_raw (ompi_group_t *group, int rank) -{ -#if OMPI_GROUP_SPARSE - do { - if (OMPI_GROUP_IS_DENSE(group)) { - return ompi_group_dense_lookup_raw (group, rank); - } - int ranks1 = rank; - ompi_group_translate_ranks (group, 1, &ranks1, group->grp_parent_group_ptr, &rank); - group = group->grp_parent_group_ptr; - } while (1); -#else - return ompi_group_dense_lookup_raw (group, rank); -#endif -} - int ompi_group_calc_plist ( int n , const int *ranks ) { return sizeof(char *) * n ; } diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index 66e0c51e3fb..5b2ec70b708 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -22,6 +22,10 @@ * Copyright (c) 2017-2019 IBM Corporation. All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,6 +103,12 @@ /* Maximum length of processor names (default is 256) */ #undef OPAL_MAX_PROCESSOR_NAME +/* Maximum length of processor names (default is 1024) */ +#undef OPAL_MAX_PSET_NAME_LEN + +/* Maximum length of from group tag (default is 256) */ +#undef OPAL_MAX_STRINGTAG_LEN + /* The number or Fortran INTEGER in MPI Status */ #undef OMPI_FORTRAN_STATUS_SIZE @@ -418,6 +428,8 @@ typedef struct mca_base_var_enum_t *MPI_T_enum; typedef struct ompi_mpit_cvar_handle_t *MPI_T_cvar_handle; typedef struct mca_base_pvar_handle_t *MPI_T_pvar_handle; typedef struct mca_base_pvar_session_t *MPI_T_pvar_session; +typedef struct ompi_instance_t *MPI_Session; + /* * MPI_Status @@ -457,6 +469,7 @@ typedef int (MPI_Datarep_extent_function)(MPI_Datatype, MPI_Aint *, void *); typedef int (MPI_Datarep_conversion_function)(void *, MPI_Datatype, int, void *, MPI_Offset, void *); typedef void (MPI_Comm_errhandler_function)(MPI_Comm *, int *, ...); +typedef void (MPI_Session_errhandler_function) (MPI_Session *, int *, ...); /* This is a little hackish, but errhandler.h needs space for a MPI_File_errhandler_function. While it could just be removed, this @@ -475,6 +488,7 @@ typedef int (MPI_Type_delete_attr_function)(MPI_Datatype, int, typedef int (MPI_Win_copy_attr_function)(MPI_Win, int, void *, void *, void *, int *); typedef int (MPI_Win_delete_attr_function)(MPI_Win, int, void *, void *); +typedef int (MPI_Session_delete_attr_function)(MPI_Session, int, void *, void *); typedef int (MPI_Grequest_query_function)(void *, MPI_Status *); typedef int (MPI_Grequest_free_function)(void *); typedef int (MPI_Grequest_cancel_function)(void *, int); @@ -529,6 +543,8 @@ typedef MPI_Win_errhandler_function MPI_Win_errhandler_fn #define MPI_DISTRIBUTE_CYCLIC 1 /* cyclic distribution */ #define MPI_DISTRIBUTE_NONE 2 /* not distributed */ #define MPI_DISTRIBUTE_DFLT_DARG (-1) /* default distribution arg */ +#define MPI_MAX_PSET_NAME_LEN OPAL_MAX_PSET_NAME_LEN /* max pset name len */ +#define MPI_MAX_STRINGTAG_LEN OPAL_MAX_STRINGTAG_LEN /* max length of string arg to comm from group funcs*/ /* * Since these values are arbitrary to Open MPI, we might as well make @@ -599,6 +615,7 @@ enum { /* MPI-4 */ MPI_FT, /* used by OPAL_ENABLE_FT_MPI */ + MPI_ATTR_KEY_LAST, }; /* @@ -692,6 +709,15 @@ enum { error codes without breaking ABI. */ #define MPI_ERR_LASTCODE 92 +/* + * Session flags valid for MPI_Session_init + */ + +#define MPI_FLAG_THREAD_NONCONCURRENT_SINGLE 0x00000001ul +#define MPI_FLAG_THREAD_NONCONCURRENT_FUNNELED 0x00000002ul +#define MPI_FLAG_THREAD_NONCONCURRENT_SERIALIZED 0x00000004ul +#define MPI_FLAG_THREAD_CONCURRENT 0x00000008ul + /* * Comparison results. Don't change the order of these, the group * comparison functions rely on it. @@ -827,6 +853,7 @@ enum { /* * NULL handles */ +#define MPI_SESSION_NULL OMPI_PREDEFINED_GLOBAL(MPI_Session, ompi_mpi_instance_null) #define MPI_GROUP_NULL OMPI_PREDEFINED_GLOBAL(MPI_Group, ompi_mpi_group_null) #define MPI_COMM_NULL OMPI_PREDEFINED_GLOBAL(MPI_Comm, ompi_mpi_comm_null) #define MPI_REQUEST_NULL OMPI_PREDEFINED_GLOBAL(MPI_Request, ompi_request_null) @@ -960,6 +987,8 @@ OMPI_DECLSPEC extern struct ompi_predefined_communicator_t ompi_mpi_comm_null; OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_empty; OMPI_DECLSPEC extern struct ompi_predefined_group_t ompi_mpi_group_null; +OMPI_DECLSPEC extern struct ompi_predefined_instance_t ompi_mpi_instance_null; + OMPI_DECLSPEC extern struct ompi_predefined_request_t ompi_request_null; OMPI_DECLSPEC extern struct ompi_predefined_message_t ompi_message_null; @@ -1285,6 +1314,11 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub; #define PMPI_Aint_add(base, disp) MPI_Aint_add(base, disp) #define PMPI_Aint_diff(addr1, addr2) MPI_Aint_diff(addr1, addr2) +/* + * Predefined info keys + */ +#define MPI_INFO_KEY_SESSION_PSET_SIZE "size" + /* * MPI API */ @@ -1372,6 +1406,8 @@ OMPI_DECLSPEC int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_copy MPI_Comm_delete_attr_function *comm_delete_attr_fn, int *comm_keyval, void *extra_state); OMPI_DECLSPEC int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int MPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); OMPI_DECLSPEC int MPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); OMPI_DECLSPEC int MPI_Comm_disconnect(MPI_Comm *comm); @@ -1592,6 +1628,7 @@ OMPI_DECLSPEC int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC MPI_Group MPI_Group_f2c(MPI_Fint group); OMPI_DECLSPEC int MPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int MPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); OMPI_DECLSPEC int MPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, @@ -1633,6 +1670,9 @@ OMPI_DECLSPEC int MPI_Init_thread(int *argc, char ***argv, int required, OMPI_DECLSPEC int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int MPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); OMPI_DECLSPEC int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); OMPI_DECLSPEC int MPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, @@ -1793,6 +1833,25 @@ OMPI_DECLSPEC int MPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatype OMPI_DECLSPEC int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint MPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int MPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int MPI_Session_create_keyval (MPI_Session_delete_attr_function *session_delete_attr_fn, + int *session_keyval, void *extra_state); +OMPI_DECLSPEC int MPI_Session_delete_attr (MPI_Session session, int session_keyval); +OMPI_DECLSPEC int MPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int MPI_Session_free_keyval(int *session_keyval); +OMPI_DECLSPEC int MPI_Session_get_attr (MPI_Session session, int session_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int MPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int MPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session MPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int MPI_Session_set_attr (MPI_Session session, int session_keyval, void *attribute_val); +OMPI_DECLSPEC int MPI_Session_set_info (MPI_Session session, MPI_Info info); OMPI_DECLSPEC int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); @@ -2067,6 +2126,8 @@ OMPI_DECLSPEC int PMPI_Comm_create_keyval(MPI_Comm_copy_attr_function *comm_cop MPI_Comm_delete_attr_function *comm_delete_attr_fn, int *comm_keyval, void *extra_state); OMPI_DECLSPEC int PMPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm); +OMPI_DECLSPEC int PMPI_Comm_create_from_group(MPI_Group group, const char *tag, MPI_Info info, + MPI_Errhandler errhandler, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm); OMPI_DECLSPEC int PMPI_Comm_delete_attr(MPI_Comm comm, int comm_keyval); OMPI_DECLSPEC int PMPI_Comm_disconnect(MPI_Comm *comm); @@ -2268,6 +2329,7 @@ OMPI_DECLSPEC int PMPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC MPI_Group PMPI_Group_f2c(MPI_Fint group); OMPI_DECLSPEC int PMPI_Group_free(MPI_Group *group); +OMPI_DECLSPEC int PMPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup); OMPI_DECLSPEC int PMPI_Group_incl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); OMPI_DECLSPEC int PMPI_Group_intersection(MPI_Group group1, MPI_Group group2, @@ -2309,6 +2371,9 @@ OMPI_DECLSPEC int PMPI_Init_thread(int *argc, char ***argv, int required, OMPI_DECLSPEC int PMPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +OMPI_DECLSPEC int PMPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm); OMPI_DECLSPEC int PMPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); OMPI_DECLSPEC int PMPI_Iprobe(int source, int tag, MPI_Comm comm, int *flag, @@ -2469,6 +2534,25 @@ OMPI_DECLSPEC int PMPI_Sendrecv(const void *sendbuf, int sendcount, MPI_Datatyp OMPI_DECLSPEC int PMPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status *status); +OMPI_DECLSPEC MPI_Fint PMPI_Session_c2f (const MPI_Session session); +OMPI_DECLSPEC int PMPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, + MPI_Errhandler *errhandler); +OMPI_DECLSPEC int PMPI_Session_create_keyval (MPI_Session_delete_attr_function *session_delete_attr_fn, + int *session_keyval, void *extra_state); +OMPI_DECLSPEC int PMPI_Session_delete_attr (MPI_Session session, int session_keyval); +OMPI_DECLSPEC int PMPI_Session_finalize (MPI_Session *session); +OMPI_DECLSPEC int PMPI_Session_free_keyval(int *session_keyval); +OMPI_DECLSPEC int PMPI_Session_get_attr (MPI_Session datatype, int session_keyval, + void *attribute_val, int *flag); +OMPI_DECLSPEC int PMPI_Session_get_info (MPI_Session session, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names); +OMPI_DECLSPEC int PMPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name); +OMPI_DECLSPEC int PMPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used); +OMPI_DECLSPEC int PMPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, + MPI_Session *session); +OMPI_DECLSPEC MPI_Session PMPI_Session_f2c (MPI_Fint session); +OMPI_DECLSPEC int PMPI_Session_set_attr (MPI_Session session, int session_keyval, void *attribute_val); +OMPI_DECLSPEC int PMPI_Session_set_info (MPI_Session session, MPI_Info info); OMPI_DECLSPEC int PMPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); diff --git a/ompi/include/mpif-config.h.in b/ompi/include/mpif-config.h.in index a3a6d7b0c1e..fc2054df637 100644 --- a/ompi/include/mpif-config.h.in +++ b/ompi/include/mpif-config.h.in @@ -13,6 +13,8 @@ ! Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved ! Copyright (c) 2013 Los Alamos National Security, LLC. All rights ! reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -60,6 +62,8 @@ integer MPI_MAX_INFO_VAL integer MPI_MAX_PORT_NAME integer MPI_MAX_DATAREP_STRING + integer MPI_MAX_PSET_NAME_LEN + integer MPI_MAX_STRINGTAG_LEN parameter (MPI_MAX_PROCESSOR_NAME=@OPAL_MAX_PROCESSOR_NAME@-1) parameter (MPI_MAX_ERROR_STRING=@OPAL_MAX_ERROR_STRING@-1) parameter (MPI_MAX_OBJECT_NAME=@OPAL_MAX_OBJECT_NAME@-1) @@ -68,6 +72,8 @@ parameter (MPI_MAX_INFO_VAL=@OPAL_MAX_INFO_VAL@-1) parameter (MPI_MAX_PORT_NAME=@OPAL_MAX_PORT_NAME@-1) parameter (MPI_MAX_DATAREP_STRING=@OPAL_MAX_DATAREP_STRING@-1) + parameter (MPI_MAX_PSET_NAME_LEN=@OPAL_MAX_PSET_NAME_LEN@-1) + parameter (MPI_MAX_STRINGTAG_LEN=@OPAL_MAX_STRINGTAG_LEN@-1) ! ! MPI F08 conformance diff --git a/ompi/info/info.c b/ompi/info/info.c index 6785fde5dfa..da9500bb6df 100644 --- a/ompi/info/info.c +++ b/ompi/info/info.c @@ -17,7 +17,7 @@ * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -54,6 +54,7 @@ #include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/params.h" #include "ompi/runtime/ompi_rte.h" +#include "ompi/instance/instance.h" /* * Global variables @@ -86,9 +87,9 @@ opal_pointer_array_t ompi_info_f_to_c_table = {{0}}; * fortran to C translation table. It also fills in the values * for the MPI_INFO_GET_ENV object */ + int ompi_mpiinfo_init(void) { - char *cptr, **tmp; /* initialize table */ @@ -102,10 +103,26 @@ int ompi_mpiinfo_init(void) OBJ_CONSTRUCT(&ompi_mpi_info_null.info, ompi_info_t); assert(ompi_mpi_info_null.info.i_f_to_c_index == 0); - /* Create MPI_INFO_ENV */ + /* Create MPI_INFO_ENV - we create here for the f_to_c. Can't fill in + here because most info needed is only available after a call to + ompi_rte_init. */ OBJ_CONSTRUCT(&ompi_mpi_info_env.info, ompi_info_t); assert(ompi_mpi_info_env.info.i_f_to_c_index == 1); + ompi_mpi_instance_append_finalize (ompi_mpiinfo_finalize); + + /* All done */ + + return OMPI_SUCCESS; +} + +/* + * Fill in the MPI_INFO_ENV if using MPI3 initialization + */ +int ompi_mpiinfo_init_mpi3(void) +{ + char *cptr, **tmp; + /* fill the env info object */ /* command for this app_context */ @@ -368,6 +385,31 @@ static void info_destructor(ompi_info_t *info) } +ompi_info_t *ompi_info_allocate (void) +{ + ompi_info_t *new_info; + int rc; + + rc = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) { + /* NTH: seriously, what can we do other than abort () or return? we failed to + * set up the most basic infrastructure! */ + return NULL; + } + + /* + * Call the object create function. This function not only + * allocates the space for MPI_Info, but also calls all the + * relevant init functions. Should I check if the fortran + * handle is valid + */ + new_info = OBJ_NEW(ompi_info_t); + if (NULL == new_info) { + return NULL; + } + + return new_info; +} /* * Free an info handle and all of its keys and values. @@ -377,5 +419,9 @@ int ompi_info_free (ompi_info_t **info) (*info)->i_freed = true; OBJ_RELEASE(*info); *info = MPI_INFO_NULL; + + /* release the retain() from info create/dup */ + ompi_mpi_instance_release (); + return MPI_SUCCESS; } diff --git a/ompi/info/info.h b/ompi/info/info.h index 4fffe6df14c..87df44c03e2 100644 --- a/ompi/info/info.h +++ b/ompi/info/info.h @@ -16,6 +16,9 @@ * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -83,11 +86,17 @@ OMPI_DECLSPEC extern ompi_predefined_info_t *ompi_mpi_info_null_addr; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_info_t); /** - * This function is invoked during ompi_mpi_init() and sets up + * This function is invoked during ompi_instance_retain() and sets up * MPI_Info handling. */ int ompi_mpiinfo_init(void); +/** + * This function is invoked during ompi_mpi_init() and sets up + * the MPI_INFO_ENV object + */ +int ompi_mpiinfo_init_mpi3(void); + /** * This function is used to free a ompi level info */ @@ -155,6 +164,15 @@ OMPI_DECLSPEC int ompi_info_value_to_bool(char *value, bool *interp); OMPI_DECLSPEC int ompi_info_get_nkeys(ompi_info_t *info, int *nkeys); +/** + * @brief Allocate a new info object + * + * This helper function ensures that the minimum infrastructure is initialized + * for creation/modification/destruction of an info object. Do not call + * OBJ_NEW(opal_info_t) directly. + */ +OMPI_DECLSPEC ompi_info_t *ompi_info_allocate (void); + END_C_DECLS /** diff --git a/ompi/instance/Makefile.am b/ompi/instance/Makefile.am new file mode 100644 index 00000000000..2ee7f5d59a3 --- /dev/null +++ b/ompi/instance/Makefile.am @@ -0,0 +1,26 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2016 IBM Corporation. All rights reserved. +# Copyright (c) 2018 Triad National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from ompi/Makefile.am + +headers += instance/instance.h + +lib@OMPI_LIBMPI_NAME@_la_SOURCES += instance/instance.c diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c new file mode 100644 index 00000000000..104ac40f69a --- /dev/null +++ b/ompi/instance/instance.c @@ -0,0 +1,1266 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "instance.h" + +#include "opal/util/arch.h" + +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/runtime/opal_params.h" + +#include "ompi/mca/pml/pml.h" +#include "ompi/runtime/params.h" + +#include "ompi/interlib/interlib.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/errhandler/errcode.h" +#include "ompi/message/message.h" +#include "ompi/info/info.h" +#include "ompi/attribute/attribute.h" +#include "ompi/op/op.h" +#include "ompi/dpm/dpm.h" +#include "ompi/file/file.h" +#include "ompi/mpiext/mpiext.h" + +#include "ompi/mca/hook/base/base.h" +#include "ompi/mca/op/base/base.h" +#include "opal/mca/allocator/base/base.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/mpool/base/base.h" +#include "ompi/mca/bml/base/base.h" +#include "ompi/mca/pml/base/base.h" +#include "ompi/mca/coll/base/base.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/io/base/base.h" +#include "ompi/mca/topo/base/base.h" +#include "opal/mca/pmix/base/base.h" + +#include "opal/mca/mpool/base/mpool_base_tree.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/util/timings.h" +#include "opal/mca/pmix/pmix-internal.h" + +ompi_predefined_instance_t ompi_mpi_instance_null = {{{{0}}}}; + +static opal_recursive_mutex_t instance_lock = OPAL_RECURSIVE_MUTEX_STATIC_INIT; + +/** MPI_Init instance */ +ompi_instance_t *ompi_mpi_instance_default = NULL; + +enum { + OMPI_INSTANCE_INITIALIZING = -1, + OMPI_INSTANCE_FINALIZING = -2, +}; + +opal_atomic_int32_t ompi_instance_count = 0; + +static const char *ompi_instance_builtin_psets[] = { + "mpi://WORLD", + "mpi://SELF", + "mpix://SHARED", +}; + +static const int32_t ompi_instance_builtin_count = 3; + +/** finalization functions that need to be called on teardown */ +static opal_finalize_domain_t ompi_instance_basic_domain; +static opal_finalize_domain_t ompi_instance_common_domain; + +static void ompi_instance_construct (ompi_instance_t *instance) +{ + instance->i_f_to_c_index = opal_pointer_array_add (&ompi_instance_f_to_c_table, instance); + instance->i_name[0] = '\0'; + instance->i_flags = 0; + instance->i_keyhash = NULL; + instance->errhandler_type = OMPI_ERRHANDLER_TYPE_INSTANCE; +} + +OBJ_CLASS_INSTANCE(ompi_instance_t, opal_infosubscriber_t, ompi_instance_construct, NULL); + +/* NTH: frameworks needed by MPI */ +static mca_base_framework_t *ompi_framework_dependencies[] = { + &ompi_hook_base_framework, &ompi_op_base_framework, + &opal_allocator_base_framework, &opal_rcache_base_framework, &opal_mpool_base_framework, + &ompi_bml_base_framework, &ompi_pml_base_framework, &ompi_coll_base_framework, + &ompi_osc_base_framework, NULL, +}; + +static mca_base_framework_t *ompi_lazy_frameworks[] = { + &ompi_io_base_framework, &ompi_topo_base_framework, NULL, +}; + + +static int ompi_mpi_instance_finalize_common (void); + +/* + * Hash tables for MPI_Type_create_f90* functions + */ +opal_hash_table_t ompi_mpi_f90_integer_hashtable = {{0}}; +opal_hash_table_t ompi_mpi_f90_real_hashtable = {{0}}; +opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; + +static size_t ompi_mpi_instance_num_pmix_psets; +static char **ompi_mpi_instance_pmix_psets; +/* + * Per MPI-2:9.5.3, MPI_REGISTER_DATAREP is a memory leak. There is + * no way to *de*register datareps once they've been registered. So + * we have to track all registrations here so that they can be + * de-registered during MPI_FINALIZE so that memory-tracking debuggers + * don't show Open MPI as leaking memory. + */ +opal_list_t ompi_registered_datareps = {{0}}; + +opal_pointer_array_t ompi_instance_f_to_c_table = {{0}}; + +static int ompi_instance_print_error (const char *error, int ret) +{ + /* Only print a message if one was not already printed */ + if (NULL != error && OMPI_ERR_SILENT != ret) { + const char *err_msg = opal_strerror(ret); + opal_show_help("help-mpi-runtime.txt", + "mpi_init:startup:internal-failure", true, + "MPI_INIT", "MPI_INIT", error, err_msg, ret); + } + + return ret; +} + +static int ompi_mpi_instance_cleanup_pml (void) +{ + /* call del_procs on all allocated procs even though some may not be known + * to the pml layer. the pml layer is expected to be resilient and ignore + * any unknown procs. */ + size_t nprocs = 0; + ompi_proc_t **procs; + + procs = ompi_proc_get_allocated (&nprocs); + MCA_PML_CALL(del_procs(procs, nprocs)); + free(procs); + + return OMPI_SUCCESS; +} + +/** + * Static functions used to configure the interactions between the OPAL and + * the runtime. + */ +static char *_process_name_print_for_opal (const opal_process_name_t procname) +{ + ompi_process_name_t *rte_name = (ompi_process_name_t*)&procname; + return OMPI_NAME_PRINT(rte_name); +} + +static int _process_name_compare (const opal_process_name_t p1, const opal_process_name_t p2) +{ + ompi_process_name_t *o1 = (ompi_process_name_t *) &p1; + ompi_process_name_t *o2 = (ompi_process_name_t *) &p2; + return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); +} + +static int _convert_string_to_process_name (opal_process_name_t *name, const char* name_string) +{ + return ompi_rte_convert_string_to_process_name(name, name_string); +} + +static int _convert_process_name_to_string (char **name_string, const opal_process_name_t *name) +{ + return ompi_rte_convert_process_name_to_string(name_string, name); +} + +static int32_t ompi_mpi_instance_init_basic_count; +static bool ompi_instance_basic_init; + +void ompi_mpi_instance_release (void) +{ + opal_mutex_lock (&instance_lock); + + if (0 != --ompi_mpi_instance_init_basic_count) { + opal_mutex_unlock (&instance_lock); + return; + } + + opal_argv_free (ompi_mpi_instance_pmix_psets); + ompi_mpi_instance_pmix_psets = NULL; + + opal_finalize_cleanup_domain (&ompi_instance_basic_domain); + OBJ_DESTRUCT(&ompi_instance_basic_domain); + + opal_finalize_util (); + + opal_mutex_unlock (&instance_lock); +} + +int ompi_mpi_instance_retain (void) +{ + int ret; + + opal_mutex_lock (&instance_lock); + + if (0 < ompi_mpi_instance_init_basic_count++) { + opal_mutex_unlock (&instance_lock); + return OMPI_SUCCESS; + } + + /* Setup enough to check get/set MCA params */ + if (OPAL_SUCCESS != (ret = opal_init_util (NULL, NULL))) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_instance_init: opal_init_util failed", ret); + } + + ompi_instance_basic_init = true; + + OBJ_CONSTRUCT(&ompi_instance_basic_domain, opal_finalize_domain_t); + opal_finalize_domain_init (&ompi_instance_basic_domain, "ompi_mpi_instance_retain"); + opal_finalize_set_domain (&ompi_instance_basic_domain); + + /* Setup f to c table */ + OBJ_CONSTRUCT(&ompi_instance_f_to_c_table, opal_pointer_array_t); + if (OPAL_SUCCESS != opal_pointer_array_init (&ompi_instance_f_to_c_table, 8, + OMPI_FORTRAN_HANDLE_MAX, 32)) { + return OMPI_ERROR; + } + + /* setup the default error handler on instance_null */ + OBJ_CONSTRUCT(&ompi_mpi_instance_null, ompi_instance_t); + ompi_mpi_instance_null.instance.error_handler = &ompi_mpi_errors_return.eh; + + /* Convince OPAL to use our naming scheme */ + opal_process_name_print = _process_name_print_for_opal; + opal_compare_proc = _process_name_compare; + opal_convert_string_to_process_name = _convert_string_to_process_name; + opal_convert_process_name_to_string = _convert_process_name_to_string; + opal_proc_for_name = ompi_proc_for_name; + + /* Register MCA variables */ + if (OPAL_SUCCESS != (ret = ompi_mpi_register_params ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_init: ompi_register_mca_variables failed", ret); + } + + /* initialize error handlers */ + if (OMPI_SUCCESS != (ret = ompi_errhandler_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_errhandler_init() failed", ret); + } + + /* initialize error codes */ + if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_mpi_errcode_init() failed", ret); + } + + /* initialize internal error codes */ + if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_errcode_intern_init() failed", ret); + } + + /* initialize info */ + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init ())) { + return ompi_instance_print_error ("ompi_info_init() failed", ret); + } + + ompi_instance_basic_init = false; + + opal_mutex_unlock (&instance_lock); + + return OMPI_SUCCESS; +} + +static void fence_release(pmix_status_t status, void *cbdata) +{ + volatile bool *active = (volatile bool*)cbdata; + OPAL_ACQUIRE_OBJECT(active); + *active = false; + OPAL_POST_OBJECT(active); +} + +static void evhandler_reg_callbk(pmix_status_t status, + size_t evhandler_ref, + void *cbdata) +{ + opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + + lock->status = status; + OPAL_PMIX_WAKEUP_THREAD(lock); +} + +/** + * @brief Function that starts up the common components needed by all instances + */ +static int ompi_mpi_instance_init_common (void) +{ + int ret; + ompi_proc_t **procs; + size_t nprocs; + volatile bool active; + bool background_fence = false; + pmix_info_t info[2]; + pmix_status_t rc; + opal_pmix_lock_t mylock; + OMPI_TIMING_INIT(64); + + ret = ompi_mpi_instance_retain (); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; + } + + OBJ_CONSTRUCT(&ompi_instance_common_domain, opal_finalize_domain_t); + opal_finalize_domain_init (&ompi_instance_common_domain, "ompi_mpi_instance_init_common"); + opal_finalize_set_domain (&ompi_instance_common_domain); + + if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { + return ompi_instance_print_error ("ompi_mpi_init: opal_arch_set_fortran_logical_size failed", ret); + } + + /* _After_ opal_init_util() but _before_ orte_init(), we need to + set an MCA param that tells libevent that it's ok to use any + mechanism in libevent that is available on this platform (e.g., + epoll and friends). Per opal/event/event.s, we default to + select/poll -- but we know that MPI processes won't be using + pty's with the event engine, so it's ok to relax this + constraint and let any fd-monitoring mechanism be used. */ + + ret = mca_base_var_find("opal", "event", "*", "event_include"); + if (ret >= 0) { + char *allvalue = "all"; + /* We have to explicitly "set" the MCA param value here + because libevent initialization will re-register the MCA + param and therefore override the default. Setting the value + here puts the desired value ("all") in different storage + that is not overwritten if/when the MCA param is + re-registered. This is unless the user has specified a different + value for this MCA parameter. Make sure we check to see if the + default is specified before forcing "all" in case that is not what + the user desires. Note that we do *NOT* set this value as an + environment variable, just so that it won't be inherited by + any spawned processes and potentially cause unintented + side-effects with launching RTE tools... */ + mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); + } + + OMPI_TIMING_NEXT("initialization"); + + /* Setup RTE */ + if (OMPI_SUCCESS != (ret = ompi_rte_init (NULL, NULL))) { + return ompi_instance_print_error ("ompi_mpi_init: ompi_rte_init failed", ret); + } + + /* open the ompi hook framework */ + for (int i = 0 ; ompi_framework_dependencies[i] ; ++i) { + ret = mca_base_framework_open (ompi_framework_dependencies[i], 0); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + char error_msg[256]; + snprintf (error_msg, sizeof(error_msg), "mca_base_framework_open on %s_%s failed", + ompi_framework_dependencies[i]->framework_project, + ompi_framework_dependencies[i]->framework_name); + return ompi_instance_print_error (error_msg, ret); + } + } + + OMPI_TIMING_NEXT("rte_init"); + OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); + OMPI_TIMING_IMPORT_OPAL("rte_init"); + + ompi_rte_initialized = true; + + /* Register the default errhandler callback */ + /* we want to go first */ + PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_PREPEND, NULL, PMIX_BOOL); + /* give it a name so we can distinguish it */ + PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING); + OPAL_PMIX_CONSTRUCT_LOCK(&mylock); + PMIx_Register_event_handler(NULL, 0, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); + OPAL_PMIX_WAIT_THREAD(&mylock); + rc = mylock.status; + OPAL_PMIX_DESTRUCT_LOCK(&mylock); + PMIX_INFO_DESTRUCT(&info[0]); + PMIX_INFO_DESTRUCT(&info[1]); + if (PMIX_SUCCESS != rc) { + ret = opal_pmix_convert_status(rc); + return ret; + } + + /* initialize info */ + if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init_mpi3())) { + return ompi_instance_print_error ("ompi_info_init_mpi3() failed", ret); + } + + /* declare our presence for interlib coordination, and + * register for callbacks when other libs declare. XXXXXX -- TODO -- figure out how + * to specify the thread level when different instances may request different levels. */ + if (OMPI_SUCCESS != (ret = ompi_interlib_declare(MPI_THREAD_MULTIPLE, OMPI_IDENT_STRING))) { + return ompi_instance_print_error ("ompi_interlib_declare", ret); + } + + /* initialize datatypes. This step should be done early as it will + * create the local convertor and local arch used in the proc + * init. + */ + if (OMPI_SUCCESS != (ret = ompi_datatype_init())) { + return ompi_instance_print_error ("ompi_datatype_init() failed", ret); + } + + /* Initialize OMPI procs */ + if (OMPI_SUCCESS != (ret = ompi_proc_init())) { + return ompi_instance_print_error ("mca_proc_init() failed", ret); + } + + /* Initialize the op framework. This has to be done *after* + ddt_init, but befor mca_coll_base_open, since some collective + modules (e.g., the hierarchical coll component) may need ops in + their query function. */ + if (OMPI_SUCCESS != (ret = ompi_op_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("ompi_op_base_find_available() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_op_init())) { + return ompi_instance_print_error ("ompi_op_init() failed", ret); + } + + /* In order to reduce the common case for MPI apps (where they + don't use MPI-2 IO or MPI-1/3 topology functions), the io and + topo frameworks are initialized lazily, at the first use of + relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*), + so they are not opened here. */ + + /* Select which MPI components to use */ + + if (OMPI_SUCCESS != (ret = mca_pml_base_select (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("mca_pml_base_select() failed", ret); + } + + OMPI_TIMING_IMPORT_OPAL("orte_init"); + OMPI_TIMING_NEXT("rte_init-commit"); + + /* exchange connection info - this function may also act as a barrier + * if data exchange is required. The modex occurs solely across procs + * in our job. If a barrier is required, the "modex" function will + * perform it internally */ + rc = PMIx_Commit(); + if (PMIX_SUCCESS != rc) { + ret = opal_pmix_convert_status(rc); + return ret; /* TODO: need to fix this */ + } + + OMPI_TIMING_NEXT("commit"); +#if (OPAL_ENABLE_TIMING) + if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && + opal_pmix_collect_all_data && !ompi_singleton) { + if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("timing: pmix-barrier-1 failed", ret); + } + OMPI_TIMING_NEXT("pmix-barrier-1"); + if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { + return ompi_instance_print_error ("timing: pmix-barrier-2 failed", ret); + } + OMPI_TIMING_NEXT("pmix-barrier-2"); + } +#endif + + if (!ompi_singleton) { + if (opal_pmix_base_async_modex) { + /* if we are doing an async modex, but we are collecting all + * data, then execute the non-blocking modex in the background. + * All calls to modex_recv will be cached until the background + * modex completes. If collect_all_data is false, then we skip + * the fence completely and retrieve data on-demand from the + * source node. + */ + if (opal_pmix_collect_all_data) { + /* execute the fence_nb in the background to collect + * the data */ + background_fence = true; + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); + if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, + fence_release, + (void*)&active))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } + } + } else { + /* we want to do the modex - we block at this point, but we must + * do so in a manner that allows us to call opal_progress so our + * event library can be cycled as we have tied PMIx to that + * event base */ + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if( PMIX_SUCCESS != rc) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } + } + + OMPI_TIMING_NEXT("modex"); + + /* select buffered send allocator component to be used */ + if (OMPI_SUCCESS != (ret = mca_pml_base_bsend_init ())) { + return ompi_instance_print_error ("mca_pml_base_bsend_init() failed", ret); + } + + if (OMPI_SUCCESS != (ret = mca_coll_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("mca_coll_base_find_available() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_osc_base_find_available (OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { + return ompi_instance_print_error ("ompi_osc_base_find_available() failed", ret); + } + + /* io and topo components are not selected here -- see comment + above about the io and topo frameworks being loaded lazily */ + + /* Initialize each MPI handle subsystem */ + /* initialize requests */ + if (OMPI_SUCCESS != (ret = ompi_request_init ())) { + return ompi_instance_print_error ("ompi_request_init() failed", ret); + } + + if (OMPI_SUCCESS != (ret = ompi_message_init ())) { + return ompi_instance_print_error ("ompi_message_init() failed", ret); + } + + /* initialize groups */ + if (OMPI_SUCCESS != (ret = ompi_group_init ())) { + return ompi_instance_print_error ("ompi_group_init() failed", ret); + } + + ompi_mpi_instance_append_finalize (ompi_mpi_instance_cleanup_pml); + + /* initialize communicator subsystem */ + if (OMPI_SUCCESS != (ret = ompi_comm_init ())) { + opal_mutex_unlock (&instance_lock); + return ompi_instance_print_error ("ompi_comm_init() failed", ret); + } + + if (mca_pml_base_requires_world ()) { + /* need to set up comm world for this instance -- XXX -- FIXME -- probably won't always + * be the case. */ + if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) { + return ompi_instance_print_error ("ompi_comm_init_mpi3 () failed", ret); + } + } + + /* initialize file handles */ + if (OMPI_SUCCESS != (ret = ompi_file_init ())) { + return ompi_instance_print_error ("ompi_file_init() failed", ret); + } + + /* initialize windows */ + if (OMPI_SUCCESS != (ret = ompi_win_init ())) { + return ompi_instance_print_error ("ompi_win_init() failed", ret); + } + + /* Setup the dynamic process management (DPM) subsystem */ + if (OMPI_SUCCESS != (ret = ompi_dpm_init ())) { + return ompi_instance_print_error ("ompi_dpm_init() failed", ret); + } + + + /* identify the architectures of remote procs and setup + * their datatype convertors, if required + */ + if (OMPI_SUCCESS != (ret = ompi_proc_complete_init())) { + return ompi_instance_print_error ("ompi_proc_complete_init failed", ret); + } + + /* start PML/BTL's */ + ret = MCA_PML_CALL(enable(true)); + if( OMPI_SUCCESS != ret ) { + return ompi_instance_print_error ("PML control failed", ret); + } + + /* some btls/mtls require we call add_procs with all procs in the job. + * since the btls/mtls have no visibility here it is up to the pml to + * convey this requirement */ + if (mca_pml_base_requires_world ()) { + if (NULL == (procs = ompi_proc_world (&nprocs))) { + return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret); + } + } else { + /* add all allocated ompi_proc_t's to PML (below the add_procs limit this + * behaves identically to ompi_proc_world ()) */ + if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { + return ompi_instance_print_error ("ompi_proc_get_allocated () failed", ret); + } + } + + ret = MCA_PML_CALL(add_procs(procs, nprocs)); + free(procs); + /* If we got "unreachable", then print a specific error message. + Otherwise, if we got some other failure, fall through to print + a generic message. */ + if (OMPI_ERR_UNREACH == ret) { + opal_show_help("help-mpi-runtime.txt", + "mpi_init:startup:pml-add-procs-fail", true); + return ret; + } else if (OMPI_SUCCESS != ret) { + return ompi_instance_print_error ("PML add procs failed", ret); + } + + /* Determine the overall threadlevel support of all processes + in MPI_COMM_WORLD. This has to be done before calling + coll_base_comm_select, since some of the collective components + e.g. hierarch, might create subcommunicators. The threadlevel + requested by all processes is required in order to know + which cid allocation algorithm can be used. */ + if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { + return ompi_instance_print_error ("ompi_mpi_init: ompi_comm_cid_init failed", ret); + } + + /* Do we need to wait for a debugger? */ + ompi_rte_wait_for_debugger(); + + /* Next timing measurement */ + OMPI_TIMING_NEXT("modex-barrier"); + + if (!ompi_singleton) { + /* if we executed the above fence in the background, then + * we have to wait here for it to complete. However, there + * is no reason to do two barriers! */ + if (background_fence) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else if (!ompi_async_mpi_init) { + /* wait for everyone to reach this point - this is a hard + * barrier requirement at this time, though we hope to relax + * it at a later point */ + bool flag = false; + active = true; + OPAL_POST_OBJECT(&active); + PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); + if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, + fence_release, (void*)&active))) { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } + } + + /* check for timing request - get stop time and report elapsed + time if so, then start the clock again */ + OMPI_TIMING_NEXT("barrier"); + +#if OPAL_ENABLE_PROGRESS_THREADS == 0 + /* Start setting up the event engine for MPI operations. Don't + block in the event library, so that communications don't take + forever between procs in the dynamic code. This will increase + CPU utilization for the remainder of MPI_INIT when we are + blocking on RTE-level events, but may greatly reduce non-TCP + latency. */ + opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK); +#endif + + /* Undo OPAL calling opal_progress_event_users_increment() during + opal_init, to get better latency when not using TCP. Do + this *after* dyn_init, as dyn init uses lots of RTE + communication and we don't want to hinder the performance of + that code. */ + opal_progress_event_users_decrement(); + + /* see if yield_when_idle was specified - if so, use it */ + opal_progress_set_yield_when_idle (ompi_mpi_yield_when_idle); + + /* negative value means use default - just don't do anything */ + if (ompi_mpi_event_tick_rate >= 0) { + opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate); + } + + /* At this point, we are fully configured and in MPI mode. Any + communication calls here will work exactly like they would in + the user's code. Setup the connections between procs and warm + them up with simple sends, if requested */ + + if (OMPI_SUCCESS != (ret = ompi_mpiext_init())) { + return ompi_instance_print_error ("ompi_mpiext_init", ret); + } + + /* Initialize the registered datarep list to be empty */ + OBJ_CONSTRUCT(&ompi_registered_datareps, opal_list_t); + + /* Initialize the arrays used to store the F90 types returned by the + * MPI_Type_create_f90_XXX functions. + */ + OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */); + + OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP); + + OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t); + opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP); + + return OMPI_SUCCESS; +} + +int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance) +{ + ompi_instance_t *new_instance; + int ret; + + *instance = &ompi_mpi_instance_null.instance; + + /* If thread support was enabled, then setup OPAL to allow for them by deault. This must be done + * early to prevent a race condition that can occur with orte_init(). */ + if (ts_level == MPI_THREAD_MULTIPLE) { + opal_set_using_threads(true); + } + + opal_mutex_lock (&instance_lock); + if (0 == opal_atomic_fetch_add_32 (&ompi_instance_count, 1)) { + ret = ompi_mpi_instance_init_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + return ret; + } + } + + new_instance = OBJ_NEW(ompi_instance_t); + if (OPAL_UNLIKELY(NULL == new_instance)) { + if (0 == opal_atomic_add_fetch_32 (&ompi_instance_count, -1)) { + ret = ompi_mpi_instance_finalize_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + } + } + opal_mutex_unlock (&instance_lock); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + new_instance->error_handler = errhandler; + OBJ_RETAIN(new_instance->error_handler); + + /* Copy info if there is one. */ + if (OPAL_UNLIKELY(NULL != info)) { + new_instance->super.s_info = OBJ_NEW(opal_info_t); + if (info) { + opal_info_dup(info, &new_instance->super.s_info); + } + } + + *instance = new_instance; + opal_mutex_unlock (&instance_lock); + + return OMPI_SUCCESS; +} + +static int ompi_mpi_instance_finalize_common (void) +{ + uint32_t key; + ompi_datatype_t *datatype; + int ret; + + /* As finalize is the last legal MPI call, we are allowed to force the release + * of the user buffer used for bsend, before going anywhere further. + */ + (void) mca_pml_base_bsend_detach (NULL, NULL); + + /* Shut down any bindings-specific issues: C++, F77, F90 */ + + /* Remove all memory associated by MPI_REGISTER_DATAREP (per + MPI-2:9.5.3, there is no way for an MPI application to + *un*register datareps, but we don't want the OMPI layer causing + memory leaks). */ + OPAL_LIST_DESTRUCT(&ompi_registered_datareps); + + /* Remove all F90 types from the hash tables */ + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable); + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable); + OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) + OBJ_RELEASE(datatype); + OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable); + + /* If requested, print out a list of memory allocated by ALLOC_MEM + but not freed by FREE_MEM */ + if (0 != ompi_debug_show_mpi_alloc_mem_leaks) { + mca_mpool_base_tree_print (ompi_debug_show_mpi_alloc_mem_leaks); + } + + opal_finalize_cleanup_domain (&ompi_instance_common_domain); + + if (NULL != ompi_mpi_main_thread) { + OBJ_RELEASE(ompi_mpi_main_thread); + ompi_mpi_main_thread = NULL; + } + + /* Leave the RTE */ + if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { + return ret; + } + + ompi_rte_initialized = false; + + for (int i = 0 ; ompi_lazy_frameworks[i] ; ++i) { + if (0 < ompi_lazy_frameworks[i]->framework_refcnt) { + /* May have been "opened" multiple times. We want it closed now! */ + ompi_lazy_frameworks[i]->framework_refcnt = 1; + + ret = mca_base_framework_close (ompi_lazy_frameworks[i]); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + } + + int last_framework = 0; + for (int i = 0 ; ompi_framework_dependencies[i] ; ++i) { + last_framework = i; + } + + for (int j = last_framework ; j >= 0; --j) { + ret = mca_base_framework_close (ompi_framework_dependencies[j]); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; + } + } + + ompi_proc_finalize(); + + OBJ_DESTRUCT(&ompi_mpi_instance_null); + + ompi_mpi_instance_release (); + + if (0 == opal_initialized) { + /* if there is no MPI_T_init_thread that has been MPI_T_finalize'd, + * then be gentle to the app and release all the memory now (instead + * of the opal library destructor */ + opal_class_finalize (); + } + + return OMPI_SUCCESS; +} + +int ompi_mpi_instance_finalize (ompi_instance_t **instance) +{ + int ret = OMPI_SUCCESS; + + OBJ_RELEASE(*instance); + + opal_mutex_lock (&instance_lock); + if (0 == opal_atomic_add_fetch_32 (&ompi_instance_count, -1)) { + ret = ompi_mpi_instance_finalize_common (); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + opal_mutex_unlock (&instance_lock); + } + } + opal_mutex_unlock (&instance_lock); + + *instance = &ompi_mpi_instance_null.instance; + + return ret; +} + +static void ompi_instance_get_num_psets_complete (pmix_status_t status, + pmix_info_t *info, + size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + size_t n; + pmix_status_t rc; + size_t sz; + size_t num_pmix_psets = 0; + char *pset_names = NULL; + + opal_pmix_lock_t *lock = (opal_pmix_lock_t *) cbdata; + + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key,PMIX_QUERY_NUM_PSETS)) { + PMIX_VALUE_UNLOAD(rc, + &info[n].value, + (void **)&num_pmix_psets, + &sz); + if (num_pmix_psets != ompi_mpi_instance_num_pmix_psets) { + opal_argv_free (ompi_mpi_instance_pmix_psets); + ompi_mpi_instance_pmix_psets = NULL; + } + ompi_mpi_instance_num_pmix_psets = num_pmix_psets; + } else if (0 == strcmp (info[n].key, PMIX_QUERY_PSET_NAMES)) { + if (ompi_mpi_instance_pmix_psets) { + opal_argv_free (ompi_mpi_instance_pmix_psets); + } + PMIX_VALUE_UNLOAD(rc, + &info[n].value, + (void **)&pset_names, + &sz); + ompi_mpi_instance_pmix_psets = opal_argv_split (pset_names, ','); + ompi_mpi_instance_num_pmix_psets = opal_argv_count (ompi_mpi_instance_pmix_psets); + free(pset_names); + } + } + + if (NULL != release_fn) { + release_fn(release_cbdata); + } + OPAL_PMIX_WAKEUP_THREAD(lock); +} + +static void ompi_instance_refresh_pmix_psets (const char *key) +{ + pmix_status_t rc; + pmix_query_t query; + opal_pmix_lock_t lock; + bool refresh = true; + + opal_mutex_lock (&instance_lock); + + PMIX_QUERY_CONSTRUCT(&query); + PMIX_ARGV_APPEND(rc, query.keys, key); + PMIX_INFO_CREATE(query.qualifiers, 1); + query.nqual = 1; + PMIX_INFO_LOAD(&query.qualifiers[0], PMIX_QUERY_REFRESH_CACHE, &refresh, PMIX_BOOL); + + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + + /* + * TODO: need to handle this better + */ + if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(&query, 1, + ompi_instance_get_num_psets_complete, + (void*)&lock))) { + opal_mutex_unlock (&instance_lock); + } + + OPAL_PMIX_WAIT_THREAD(&lock); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + + opal_mutex_unlock (&instance_lock); +} + + +int ompi_instance_get_num_psets (ompi_instance_t *instance, int *npset_names) +{ + ompi_instance_refresh_pmix_psets (PMIX_QUERY_NUM_PSETS); + *npset_names = ompi_instance_builtin_count + ompi_mpi_instance_num_pmix_psets; + + return OMPI_SUCCESS; +} + +int ompi_instance_get_nth_pset (ompi_instance_t *instance, int n, int *len, char *pset_name) +{ + if (NULL == ompi_mpi_instance_pmix_psets && n >= ompi_instance_builtin_count) { + ompi_instance_refresh_pmix_psets (PMIX_QUERY_PSET_NAMES); + } + + if ((size_t) n >= (ompi_instance_builtin_count + ompi_mpi_instance_num_pmix_psets) || n < 0) { + return OMPI_ERR_BAD_PARAM; + } + + if (0 == *len) { + if (n < ompi_instance_builtin_count) { + *len = strlen(ompi_instance_builtin_psets[n]) + 1; + } else { + *len = strlen (ompi_mpi_instance_pmix_psets[n - ompi_instance_builtin_count]) + 1; + } + return OMPI_SUCCESS; + } + + if (n < ompi_instance_builtin_count) { + strncpy (pset_name, ompi_instance_builtin_psets[n], *len); + } else { + strncpy (pset_name, ompi_mpi_instance_pmix_psets[n - ompi_instance_builtin_count], *len); + } + + return OMPI_SUCCESS; +} + +static int ompi_instance_group_world (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + size_t size; + + size = ompi_process_info.num_procs; + + group = ompi_group_allocate (size); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (size_t i = 0 ; i < size ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[i]) { + /* set sentinel value */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[i]); + } + } + + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_shared (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + opal_process_name_t wildcard_rank; + int ret; + size_t size; + char **peers; + char *val; + + /* Find out which processes are local */ + wildcard_rank.jobid = OMPI_PROC_MY_NAME->jobid; + wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; + + OPAL_MODEX_RECV_VALUE(ret, PMIX_LOCAL_PEERS, &wildcard_rank, &val, PMIX_STRING); + if (OPAL_SUCCESS != ret || NULL == val) { + return OMPI_ERROR; + } + + peers = opal_argv_split(val, ','); + free (val); + if (OPAL_UNLIKELY(NULL == peers)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + size = opal_argv_count (peers); + + group = ompi_group_allocate (size); + if (OPAL_UNLIKELY(NULL == group)) { + opal_argv_free (peers); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for (size_t i = 0 ; NULL != peers[i] ; ++i) { + opal_process_name_t name = {.vpid = strtoul(peers[i], NULL, 10), .jobid = OMPI_PROC_MY_NAME->jobid}; + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[i]) { + /* set sentinel value */ + group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[i]); + } + } + + opal_argv_free (peers); + + /* group is dense */ + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_self (ompi_instance_t *instance, ompi_group_t **group_out) +{ + ompi_group_t *group; + size_t size; + + group = OBJ_NEW(ompi_group_t); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + group->grp_proc_pointers = ompi_proc_self(&size); + group->grp_my_rank = 0; + group->grp_proc_count = size; + + /* group is dense */ + OMPI_GROUP_SET_DENSE (group); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_group_pmix_pset (ompi_instance_t *instance, const char *pset_name, ompi_group_t **group_out) +{ + pmix_status_t rc; + pmix_proc_t p; + ompi_group_t *group; + pmix_value_t *pval = NULL; + char *stmp = NULL; + size_t size = 0; + + /* make the group large enough to hold world */ + group = ompi_group_allocate (ompi_process_info.num_procs); + if (OPAL_UNLIKELY(NULL == group)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + + for (size_t i = 0 ; i < ompi_process_info.num_procs ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + + OPAL_PMIX_CONVERT_NAME(&p, &name); + rc = PMIx_Get(&p, PMIX_PSET_NAME, NULL, 0, &pval); + if (OPAL_UNLIKELY(PMIX_SUCCESS != rc)) { + OBJ_RELEASE(group); + return opal_pmix_convert_status(rc); + } + + PMIX_VALUE_UNLOAD(rc, + pval, + (void **)&stmp, + &size); + if (0 != strcmp (pset_name, stmp)) { + PMIX_VALUE_RELEASE(pval); + free(stmp); + continue; + } + PMIX_VALUE_RELEASE(pval); + free(stmp); + + /* look for existing ompi_proc_t that matches this name */ + group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_lookup (name); + if (NULL == group->grp_proc_pointers[size]) { + /* set sentinel value */ + group->grp_proc_pointers[size] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name); + } else { + OBJ_RETAIN (group->grp_proc_pointers[size]); + } + ++size; + } + + /* shrink the proc array if needed */ + if (size < (size_t) group->grp_proc_count) { + void *tmp = realloc (group->grp_proc_pointers, size * sizeof (group->grp_proc_pointers[0])); + if (OPAL_UNLIKELY(NULL == tmp)) { + OBJ_RELEASE(group); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + group->grp_proc_pointers = (ompi_proc_t **) tmp; + group->grp_proc_count = (int) size; + } + + ompi_set_group_rank (group, ompi_proc_local()); + + group->grp_instance = instance; + + *group_out = group; + return OMPI_SUCCESS; +} + +static int ompi_instance_get_pmix_pset_size (ompi_instance_t *instance, const char *pset_name, size_t *size_out) +{ + pmix_status_t rc; + pmix_proc_t p; + pmix_value_t *pval = NULL; + size_t size = 0; + char *stmp = NULL; + + for (size_t i = 0 ; i < ompi_process_info.num_procs ; ++i) { + opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid}; + + OPAL_PMIX_CONVERT_NAME(&p, &name); + rc = PMIx_Get(&p, PMIX_PSET_NAME, NULL, 0, &pval); + if (OPAL_UNLIKELY(PMIX_SUCCESS != rc)) { + return rc; + } + + PMIX_VALUE_UNLOAD(rc, + pval, + (void **)&stmp, + &size); + + size += (0 == strcmp (pset_name, stmp)); + PMIX_VALUE_RELEASE(pval); + free(stmp); + + ++size; + } + + *size_out = size; + + return OMPI_SUCCESS; +} + +int ompi_group_from_pset (ompi_instance_t *instance, const char *pset_name, ompi_group_t **group_out) +{ + if (group_out == MPI_GROUP_NULL) { + return OMPI_ERR_BAD_PARAM; + } + + if (0 == strncmp (pset_name, "mpi://", 6)) { + pset_name += 6; + if (0 == strcmp (pset_name, "WORLD")) { + return ompi_instance_group_world (instance, group_out); + } + if (0 == strcmp (pset_name, "SELF")) { + return ompi_instance_group_self (instance, group_out); + } + } + + if (0 == strncmp (pset_name, "mpix://", 7)) { + pset_name += 7; + if (0 == strcmp (pset_name, "SHARED")) { + return ompi_instance_group_shared (instance, group_out); + } + } + + return ompi_instance_group_pmix_pset (instance, pset_name, group_out); +} + +int ompi_instance_get_pset_info (ompi_instance_t *instance, const char *pset_name, opal_info_t **info_used) +{ + ompi_info_t *info = ompi_info_allocate (); + char tmp[16]; + size_t size = 0UL; + int ret; + + *info_used = (opal_info_t *) MPI_INFO_NULL; + + if (OPAL_UNLIKELY(NULL == info)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + if (0 == strncmp (pset_name, "mpi://", 6)) { + pset_name += 6; + if (0 == strcmp (pset_name, "world")) { + size = ompi_process_info.num_procs; + } else if (0 == strcmp (pset_name, "self")) { + size = 1; + } else if (0 == strcmp (pset_name, "shared")) { + size = ompi_process_info.num_local_peers + 1; + } + } else { + ompi_instance_get_pmix_pset_size (instance, pset_name, &size); + } + + snprintf (tmp, 16, "%" PRIsize_t, size); + ret = opal_info_set (&info->super, MPI_INFO_KEY_SESSION_PSET_SIZE, tmp); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + ompi_info_free (&info); + return ret; + } + + *info_used = &info->super; + + return OMPI_SUCCESS; +} diff --git a/ompi/instance/instance.h b/ompi/instance/instance.h new file mode 100644 index 00000000000..13945a92362 --- /dev/null +++ b/ompi/instance/instance.h @@ -0,0 +1,157 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#if !defined(OMPI_INSTANCE_H) +#define OMPI_INSTANCE_H + +#include "opal/class/opal_object.h" +#include "opal/class/opal_hash_table.h" +#include "opal/util/info_subscriber.h" +#include "ompi/errhandler/errhandler.h" +#include "opal/mca/threads/mutex.h" +#include "ompi/communicator/comm_request.h" + +#include "mpi.h" +#include "ompi/mca/coll/coll.h" +#include "ompi/info/info.h" +#include "ompi/proc/proc.h" + +struct ompi_group_t; + +struct ompi_instance_t { + opal_infosubscriber_t super; + int i_thread_level; + char i_name[MPI_MAX_OBJECT_NAME]; + uint32_t i_flags; + + /* Attributes */ + opal_hash_table_t *i_keyhash; + + /* index in Fortran <-> C translation array (for when I get around + * to implementing fortran support-- UGH) */ + int i_f_to_c_index; + + ompi_errhandler_t *error_handler; + ompi_errhandler_type_t errhandler_type; +}; + +typedef struct ompi_instance_t ompi_instance_t; + +OBJ_CLASS_DECLARATION(ompi_instance_t); + +/* Define for the preallocated size of the predefined handle. + * Note that we are using a pointer type as the base memory chunk + * size so when the bitness changes the size of the handle changes. + * This is done so we don't end up needing a structure that is + * incredibly larger than necessary because of the bitness. + * + * This padding mechanism works as a (likely) compile time check for when the + * size of the ompi_communicator_t exceeds the predetermined size of the + * ompi_predefined_communicator_t. It also allows us to change the size of + * the ompi_communicator_t without impacting the size of the + * ompi_predefined_communicator_t structure for some number of additions. + * + * Note: we used to define the PAD as a multiple of sizeof(void*). + * However, this makes a different size PAD, depending on + * sizeof(void*). In some cases + * (https://github.com/open-mpi/ompi/issues/3610), 32 bit builds can + * run out of space when 64 bit builds are still ok. So we changed to + * use just a naked byte size. As a rule of thumb, however, the size + * should probably still be a multiple of 8 so that it has the + * possibility of being nicely aligned. + * + * As an example: + * If the size of ompi_communicator_t is less than the size of the _PAD then + * the _PAD ensures that the size of the ompi_predefined_communicator_t is + * whatever size is defined below in the _PAD macro. + * However, if the size of the ompi_communicator_t grows larger than the _PAD + * (say by adding a few more function pointers to the structure) then the + * 'padding' variable will be initialized to a large number often triggering + * a 'array is too large' compile time error. This signals two things: + * 1) That the _PAD should be increased. + * 2) That users need to be made aware of the size change for the + * ompi_predefined_communicator_t structure. + * + * Q: So you just made a change to communicator structure, do you need to adjust + * the PREDEFINED_COMMUNICATOR_PAD macro? + * A: Most likely not, but it would be good to check. + */ +#define PREDEFINED_INSTANCE_PAD 512 + +struct ompi_predefined_instance_t { + ompi_instance_t instance; + char padding[PREDEFINED_INSTANCE_PAD - sizeof(ompi_instance_t)]; +}; +typedef struct ompi_predefined_instance_t ompi_predefined_instance_t; + +/** + * @brief NULL instance + */ +OMPI_DECLSPEC extern ompi_predefined_instance_t ompi_mpi_instance_null; + +OMPI_DECLSPEC extern opal_pointer_array_t ompi_instance_f_to_c_table; + +extern ompi_instance_t *ompi_mpi_instance_default; + +/** + * @brief Bring up the bare minimum infrastructure to support pre-session_init functions. + * + * List of subsystems initialized: + * - OPAL (including class system) + * - Error handlers + * - MPI Info + */ +int ompi_mpi_instance_retain (void); + +/** + * @brief Release (and possibly teardown) pre-session_init infrastructure. + */ +void ompi_mpi_instance_release (void); + +/** + * @brief Create a new MPI instance + * + * @param[in] ts_level thread support level (see mpi.h) + * @param[in] info info object + * @param[in] errhander errhandler to set on the instance + */ +OMPI_DECLSPEC int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance); + +/** + * @brief Destroy an MPI instance and set it to MPI_SESSION_NULL + */ +OMPI_DECLSPEC int ompi_mpi_instance_finalize (ompi_instance_t **instance); + + +/** + * @brief Add a function to the finalize chain. Note this function will be called + * when the last instance has been destroyed. + */ +#define ompi_mpi_instance_append_finalize opal_finalize_register_cleanup + +/** + * @brief Get an MPI group object for a named process set. + * + * @param[in] instance MPI instance (session) + * @param[in] pset_name Name of process set (includes mpi://world, mpi://self) + * @param[out group_out New MPI group + */ +OMPI_DECLSPEC int ompi_group_from_pset (ompi_instance_t *instance, const char *pset_name, struct ompi_group_t **group_out); + +OMPI_DECLSPEC int ompi_instance_get_num_psets (ompi_instance_t *instance, int *npset_names); +OMPI_DECLSPEC int ompi_instance_get_nth_pset (ompi_instance_t *instance, int n, int *len, char *pset_name); +OMPI_DECLSPEC int ompi_instance_get_pset_info (ompi_instance_t *instance, const char *pset_name, opal_info_t **info_used); + +/** + * @brief current number of active instances + */ +extern opal_atomic_int32_t ompi_instance_count; + +#endif /* !defined(OMPI_INSTANCE_H) */ diff --git a/ompi/mca/bml/base/base.h b/ompi/mca/bml/base/base.h index b7a226ac6ec..723f905cc8c 100644 --- a/ompi/mca/bml/base/base.h +++ b/ompi/mca/bml/base/base.h @@ -61,6 +61,7 @@ OMPI_DECLSPEC extern mca_bml_base_component_t mca_bml_component; OMPI_DECLSPEC extern mca_bml_base_module_t mca_bml; OMPI_DECLSPEC extern mca_base_framework_t ompi_bml_base_framework; OMPI_DECLSPEC extern opal_mutex_t mca_bml_lock; +OMPI_DECLSPEC extern bool mca_bml_component_init_called; static inline struct mca_bml_base_endpoint_t *mca_bml_base_get_endpoint (struct ompi_proc_t *proc) { if (OPAL_UNLIKELY(NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML])) { diff --git a/ompi/mca/bml/base/bml_base_frame.c b/ompi/mca/bml/base/bml_base_frame.c index b5a63dd9a2e..a76a891e49d 100644 --- a/ompi/mca/bml/base/bml_base_frame.c +++ b/ompi/mca/bml/base/bml_base_frame.c @@ -127,5 +127,11 @@ static int mca_bml_base_close( void ) return ret; } - return mca_base_framework_close(&opal_btl_base_framework); + ret = mca_base_framework_close(&opal_btl_base_framework); + if (OMPI_SUCCESS != ret) { + return ret; + } + + mca_bml_component_init_called = false; + return OMPI_SUCCESS; } diff --git a/ompi/mca/bml/base/bml_base_init.c b/ompi/mca/bml/base/bml_base_init.c index 9a2efec8ccc..6d1060d4690 100644 --- a/ompi/mca/bml/base/bml_base_init.c +++ b/ompi/mca/bml/base/bml_base_init.c @@ -42,12 +42,12 @@ mca_bml_base_module_t mca_bml = { }; mca_bml_base_component_t mca_bml_component = {{0}}; -static bool init_called = false; +bool mca_bml_component_init_called = false; bool mca_bml_base_inited(void) { - return init_called; + return mca_bml_component_init_called; } int mca_bml_base_init( bool enable_progress_threads, @@ -57,11 +57,11 @@ int mca_bml_base_init( bool enable_progress_threads, int priority = 0, best_priority = -1; mca_base_component_list_item_t *cli = NULL; - if (init_called) { + if (true == mca_bml_component_init_called) { return OPAL_SUCCESS; } - init_called = true; + mca_bml_component_init_called = true; OPAL_LIST_FOREACH(cli, &ompi_bml_base_framework.framework_components, mca_base_component_list_item_t) { component = (mca_bml_base_component_t*) cli->cli_component; diff --git a/ompi/mca/coll/base/coll_base_comm_select.c b/ompi/mca/coll/base/coll_base_comm_select.c index fcdb8649eba..d5c3a0dbfe5 100644 --- a/ompi/mca/coll/base/coll_base_comm_select.c +++ b/ompi/mca/coll/base/coll_base_comm_select.c @@ -102,8 +102,8 @@ int mca_coll_base_comm_select(ompi_communicator_t * comm) /* Announce */ opal_output_verbose(9, ompi_coll_base_framework.framework_output, - "coll:base:comm_select: new communicator: %s (cid %d)", - comm->c_name, comm->c_contextid); + "coll:base:comm_select: new communicator: %s (cid %s)", + comm->c_name, ompi_comm_print_cid (comm)); /* Initialize all the relevant pointers, since they're used as * sentinel values */ diff --git a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c index 6537bb9bc7c..fe9ad25917c 100644 --- a/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c +++ b/ompi/mca/coll/ftagree/coll_ftagree_earlyreturning.c @@ -491,7 +491,7 @@ static void era_debug_print_group(int lvl, ompi_group_t *group, ompi_communicato } s = 128 + n * 16; str = (char*)malloc(s); - sprintf(str, "Group of size %d. Ranks in %d.%d: (", n, comm->c_contextid, comm->c_epoch); + sprintf(str, "Group of size %d. Ranks in %d.%d: (", n, comm->c_index, comm->c_epoch); p = strlen(str); for(i = 0; i < n; i++) { snprintf(str + p, s - p, "%d%s", gra[i], i==n-1 ? "" : ", "); @@ -896,7 +896,7 @@ static void era_agreement_info_set_comm(era_agreement_info_t *ci, ompi_communica int *src_ra; int r, grp_size; - assert( comm->c_contextid == ci->agreement_id.ERAID_FIELDS.contextid ); + assert( comm->c_index == ci->agreement_id.ERAID_FIELDS.contextid ); assert( comm->c_epoch == ci->agreement_id.ERAID_FIELDS.epoch ); assert( ci->comm == NULL ); ci->comm = comm; @@ -1622,11 +1622,11 @@ static void era_decide(era_value_t *decided_value, era_agreement_info_t *ci) ompi_communicator_t *comm; era_rank_item_t *rl; int r, s, dead_size; - void *value; assert( 0 != ci->agreement_id.ERAID_FIELDS.agreementid ); #if OPAL_ENABLE_DEBUG + void *value; r = era_parent(ci); if( opal_hash_table_get_value_uint64(&era_passed_agreements, ci->agreement_id.ERAID_KEY, &value) == OMPI_SUCCESS ) { @@ -2166,7 +2166,7 @@ static void send_msg(ompi_communicator_t *comm, } #endif /* OPAL_ENABLE_DEBUG */ - assert( NULL == comm || agreement_id.ERAID_FIELDS.contextid == ompi_comm_get_cid(comm) ); + assert( NULL == comm || agreement_id.ERAID_FIELDS.contextid == ompi_comm_get_local_cid(comm) ); assert( NULL == comm || agreement_id.ERAID_FIELDS.epoch == comm->c_epoch ); if( NULL == comm ) { @@ -2810,7 +2810,7 @@ static void era_on_comm_rank_failure(ompi_communicator_t *comm, int rank, bool r &key64, &next_value, node, &node); - if( cid.ERAID_FIELDS.contextid == comm->c_contextid && + if( cid.ERAID_FIELDS.contextid == comm->c_contextid.cid_sub.u64 && cid.ERAID_FIELDS.epoch == comm->c_epoch ) { ci = (era_agreement_info_t *)value; OPAL_OUTPUT_VERBOSE((6, ompi_ftmpi_output_handle, @@ -3026,7 +3026,7 @@ static int mca_coll_ftagree_era_prepare_agreement(ompi_communicator_t* comm, } /* Let's find the id of the new agreement */ - agreement_id.ERAID_FIELDS.contextid = comm->c_contextid; + agreement_id.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; agreement_id.ERAID_FIELDS.epoch = comm->c_epoch; agreement_id.ERAID_FIELDS.agreementid = (uint16_t)ag_info->agreement_seq_num; @@ -3226,10 +3226,19 @@ int mca_coll_ftagree_era_inter(void *contrib, contriblh[0] = ~0; contriblh[1] = *(int*)contrib; } - ompi_comm_set(&shadowcomm, comm, - ompi_group_size(uniongrp), NULL, 0, NULL, - NULL, comm->error_handler, NULL, - uniongrp, NULL); + + ompi_comm_set(&shadowcomm, /* new comm */ + comm, /* old comm */ + ompi_group_size(uniongrp), /* local_size */ + NULL, /* local_procs */ + 0, /* remote_size */ + NULL, /* remote procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* local group */ + uniongrp, /* remote group */ + 0); /* flags */ + ompi_group_free(&uniongrp); shadowcomm->c_contextid = comm->c_contextid; shadowcomm->c_epoch = comm->c_epoch; @@ -3364,7 +3373,7 @@ int mca_coll_ftagree_era_free_comm(ompi_communicator_t* comm, } while(rc != MPI_SUCCESS); OBJ_RELEASE(acked); - aid.ERAID_FIELDS.contextid = comm->c_contextid; + aid.ERAID_FIELDS.contextid = comm->c_contextid.cid_sub.u64; aid.ERAID_FIELDS.epoch = comm->c_epoch; opal_mutex_lock(&era_mutex); diff --git a/ompi/mca/coll/portals4/coll_portals4_allreduce.c b/ompi/mca/coll/portals4/coll_portals4_allreduce.c index 513457d7296..dff13daf083 100644 --- a/ompi/mca/coll/portals4/coll_portals4_allreduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_allreduce.c @@ -96,13 +96,13 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, */ /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_ALLREDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_ALLREDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_ALLREDUCE, 0, internal_count); if ((ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.allreduce.trig_ct_h)) != 0) { diff --git a/ompi/mca/coll/portals4/coll_portals4_barrier.c b/ompi/mca/coll/portals4/coll_portals4_barrier.c index e510ae88d18..a3bedcd0bd0 100644 --- a/ompi/mca/coll/portals4/coll_portals4_barrier.c +++ b/ompi/mca/coll/portals4/coll_portals4_barrier.c @@ -55,10 +55,10 @@ barrier_hypercube_top(struct ompi_communicator_t *comm, return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BARRIER, 0, count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BARRIER, 0, count); /* Build "tree" out of hypercube */ diff --git a/ompi/mca/coll/portals4/coll_portals4_bcast.c b/ompi/mca/coll/portals4/coll_portals4_bcast.c index 8432d5823cd..1dcc819aa00 100644 --- a/ompi/mca/coll/portals4/coll_portals4_bcast.c +++ b/ompi/mca/coll/portals4/coll_portals4_bcast.c @@ -200,13 +200,13 @@ bcast_kary_tree_top(void *buff, int count, } /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); /* The data will be cut in segment_nb segments. @@ -531,13 +531,13 @@ bcast_pipeline_top(void *buff, int count, } /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_BCAST, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_BCAST, 0, internal_count); /* The data will be cut in segment_nb segments. * nb_long segments will have a size of (seg_size + 1) diff --git a/ompi/mca/coll/portals4/coll_portals4_gather.c b/ompi/mca/coll/portals4/coll_portals4_gather.c index 274e9d4ee89..57ca706133e 100644 --- a/ompi/mca/coll/portals4/coll_portals4_gather.c +++ b/ompi/mca/coll/portals4/coll_portals4_gather.c @@ -360,7 +360,7 @@ setup_gather_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Gather Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.gather.gather_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.gather.gather_match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, @@ -413,7 +413,7 @@ setup_sync_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Sync Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.gather.sync_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.gather.sync_match_bits, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_GATHER, 0, request->u.gather.coll_count); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, diff --git a/ompi/mca/coll/portals4/coll_portals4_reduce.c b/ompi/mca/coll/portals4/coll_portals4_reduce.c index a96db5088a9..b5c5b110629 100644 --- a/ompi/mca/coll/portals4/coll_portals4_reduce.c +++ b/ompi/mca/coll/portals4/coll_portals4_reduce.c @@ -97,13 +97,13 @@ reduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count, */ /* Compute match bits */ - COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_cid(comm), 1, 0, + COLL_PORTALS4_SET_BITS(match_bits_ack, ompi_comm_get_local_cid(comm), 1, 0, COLL_PORTALS4_REDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_cid(comm), 0, 1, + COLL_PORTALS4_SET_BITS(match_bits_rtr, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_REDUCE, 0, internal_count); - COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0, + COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_REDUCE, 0, internal_count); if ((ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &request->u.reduce.trig_ct_h)) != 0) { diff --git a/ompi/mca/coll/portals4/coll_portals4_scatter.c b/ompi/mca/coll/portals4/coll_portals4_scatter.c index 0049a61d001..1c7f2d3d78d 100644 --- a/ompi/mca/coll/portals4/coll_portals4_scatter.c +++ b/ompi/mca/coll/portals4/coll_portals4_scatter.c @@ -133,7 +133,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Scatter Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_local_cid(comm), 0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, @@ -194,7 +194,7 @@ setup_sync_handles(struct ompi_communicator_t *comm, /**********************************/ /* Setup Sync Handles */ /**********************************/ - COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm), + COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_local_cid(comm), 0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count); OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index e386ed543c7..e57d94ed500 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -175,8 +175,9 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) are not on this node, then we don't want to run */ if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) || ompi_group_have_remote_peers (comm->c_local_group)) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + "coll:sm:comm_query (%s/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); + return NULL; } /* Get the priority level attached to this module. If priority is less @@ -184,8 +185,9 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) *priority = mca_coll_sm_component.sm_priority; if (mca_coll_sm_component.sm_priority < 0) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name); - return NULL; + "coll:sm:comm_query (%s/%s): priority too low; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); + return NULL; } sm_module = OBJ_NEW(mca_coll_sm_module_t); @@ -213,8 +215,8 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority) sm_module->super.coll_scatterv = NULL; opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:comm_query (%d/%s): pick me! pick me!", - comm->c_contextid, comm->c_name); + "coll:sm:comm_query (%s/%s): pick me! pick me!", + ompi_comm_print_cid (comm), comm->c_name); return &(sm_module->super); } @@ -228,8 +230,8 @@ static int sm_module_enable(mca_coll_base_module_t *module, if (NULL == comm->c_coll->coll_reduce || NULL == comm->c_coll->coll_reduce_module) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): no underlying reduce; disqualifying myself", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): no underlying reduce; disqualifying myself", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERROR; } @@ -265,8 +267,8 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, c->sm_comm_num_segments * 3); if (NULL == maffinity) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): malloc failed (1)", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): malloc failed (1)", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -292,8 +294,8 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, if (NULL == data) { free(maffinity); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): malloc failed (2)", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): malloc failed (2)", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } data->mcb_operation_count = 0; @@ -468,24 +470,24 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, /* Wait for everyone in this communicator to attach and setup */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): waiting for peers to attach", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): waiting for peers to attach", + ompi_comm_print_cid (comm), comm->c_name); SPIN_CONDITION(size == data->sm_bootstrap_meta->module_seg->seg_inited, seg_init_exit); /* Once we're all here, remove the mmap file; it's not needed anymore */ if (0 == rank) { unlink(data->sm_bootstrap_meta->shmem_ds.seg_name); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): removed mmap file %s", - comm->c_contextid, comm->c_name, + "coll:sm:enable (%s/%s): removed mmap file %s", + ompi_comm_print_cid (comm), comm->c_name, data->sm_bootstrap_meta->shmem_ds.seg_name); } /* All done */ opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable (%d/%s): success!", - comm->c_contextid, comm->c_name); + "coll:sm:enable (%s/%s): success!", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_SUCCESS; } @@ -518,12 +520,12 @@ static int bootstrap_comm(ompi_communicator_t *comm, lowest_name = OMPI_CAST_RTE_NAME(&proc->super.proc_name); } } - opal_asprintf(&shortpath, "coll-sm-cid-%d-name-%s.mmap", comm->c_contextid, + opal_asprintf(&shortpath, "coll-sm-cid-%s-name-%s.mmap", ompi_comm_print_cid (comm), OMPI_NAME_PRINT(lowest_name)); if (NULL == shortpath) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): asprintf failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): asprintf failed", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } fullpath = opal_os_path(false, ompi_process_info.job_session_dir, @@ -531,8 +533,8 @@ static int bootstrap_comm(ompi_communicator_t *comm, free(shortpath); if (NULL == fullpath) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): opal_os_path failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): opal_os_path failed", + ompi_comm_print_cid (comm), comm->c_name); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -562,14 +564,14 @@ static int bootstrap_comm(ompi_communicator_t *comm, (num_segments * (comm_size * control_size * 2)) + (num_segments * (comm_size * frag_size)); opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): attaching to %" PRIsize_t " byte mmap: %s", - comm->c_contextid, comm->c_name, size, fullpath); + "coll:sm:enable:bootstrap comm (%s/%s): attaching to %" PRIsize_t " byte mmap: %s", + ompi_comm_print_cid (comm), comm->c_name, size, fullpath); if (0 == ompi_comm_rank (comm)) { data->sm_bootstrap_meta = mca_common_sm_module_create_and_attach (size, fullpath, sizeof(mca_common_sm_seg_header_t), 8); if (NULL == data->sm_bootstrap_meta) { opal_output_verbose(10, ompi_coll_base_framework.framework_output, - "coll:sm:enable:bootstrap comm (%d/%s): mca_common_sm_init_group failed", - comm->c_contextid, comm->c_name); + "coll:sm:enable:bootstrap comm (%s/%s): mca_common_sm_init_group failed", + ompi_comm_print_cid (comm), comm->c_name); free(fullpath); return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/mtl/mtl.h b/ompi/mca/mtl/mtl.h index 24b2153064d..219ecc94d0c 100644 --- a/ompi/mca/mtl/mtl.h +++ b/ompi/mca/mtl/mtl.h @@ -65,6 +65,7 @@ typedef struct mca_mtl_request_t mca_mtl_request_t; #if OPAL_CUDA_SUPPORT #define MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE 0x00000002 #endif +#define MCA_MTL_BASE_FLAG_SUPPORTS_EXT_CID 0x00000004 /** * Initialization routine for MTL component diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 6626c754a97..cbebd602002 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -72,6 +72,16 @@ extern opal_thread_local int per_thread_ctx; extern opal_thread_local struct fi_cq_tagged_entry wc[MTL_OFI_MAX_PROG_EVENT_COUNT]; #endif +struct mca_mtl_ofi_cid_hdr_t { + ompi_comm_extended_cid_t hdr_cid; + int16_t hdr_src_c_index; + int32_t hdr_src; + bool need_response; + bool ofi_cq_data; +}; + +typedef struct mca_mtl_ofi_cid_hdr_t mca_mtl_ofi_cid_hdr_t; + /* Set OFI context for operations which generate completion events */ __opal_attribute_always_inline__ static inline void set_thread_context(int ctxt) @@ -446,6 +456,124 @@ ompi_mtl_ofi_map_comm_to_ctxt(uint32_t comm_id) return ompi_mtl_ofi.comm_to_context[comm_id]; } +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer(bool blocking, struct ompi_communicator_t *comm, int src); + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int dest, + bool ofi_cq_data, + bool is_send); + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_recv_excid_error_callback(struct fi_cq_err_entry *error, + ompi_mtl_ofi_request_t *ofi_req) +{ + ompi_status_public_t *status; + assert(ofi_req->super.ompi_req); + status = &ofi_req->super.ompi_req->req_status; + status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits); + status->MPI_SOURCE = mtl_ofi_get_source((struct fi_cq_tagged_entry *) error); + + switch (error->err) { + case FI_ETRUNC: + status->MPI_ERROR = MPI_ERR_TRUNCATE; + break; + case FI_ECANCELED: + status->_cancelled = true; + break; + default: + status->MPI_ERROR = MPI_ERR_INTERN; + } + + ofi_req->super.completion_callback(&ofi_req->super); + return OMPI_SUCCESS; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer_callback(struct fi_cq_tagged_entry *wc, + ompi_mtl_ofi_request_t *ofi_req) +{ + ofi_req->completion_count--; + int ret; + mca_mtl_ofi_cid_hdr_t *buffer = (mca_mtl_ofi_cid_hdr_t *)ofi_req->buffer; + size_t length = sizeof(mca_mtl_ofi_cid_hdr_t); + ompi_comm_extended_cid_t excid; + ompi_communicator_t *comm; + int src = buffer->hdr_src; + excid.cid_base = buffer->hdr_cid.cid_base; + excid.cid_sub.u64 = buffer->hdr_cid.cid_sub.u64; + for (int i = 0; i < 8; i++) { + excid.cid_sub.u8[i] = buffer->hdr_cid.cid_sub.u8[i]; + } + + comm = ompi_comm_lookup_cid(excid); + if (comm == NULL) { + comm = ompi_comm_lookup(buffer->hdr_src_c_index); + } + + if (comm == NULL) { + return OMPI_SUCCESS; + } + + if (comm->c_index_vec[src] < -1 && buffer->need_response) { + comm->c_index_vec[src] = buffer->hdr_src_c_index; + ret = ompi_mtl_ofi_send_excid(ofi_req->mtl, comm, src, buffer->ofi_cq_data, false); + } else { + comm->c_index_vec[src] = buffer->hdr_src_c_index; + } + + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + return OMPI_SUCCESS; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_post_recv_excid_buffer(bool blocking, struct ompi_communicator_t *comm, int src) +{ + int ctxt_id = 0; + ssize_t ret; + ompi_mtl_ofi_request_t *ofi_req = malloc(sizeof(ompi_mtl_ofi_request_t)); + mca_mtl_ofi_cid_hdr_t *start = malloc(sizeof(mca_mtl_ofi_cid_hdr_t)); + size_t length = sizeof(mca_mtl_ofi_cid_hdr_t); + set_thread_context(ctxt_id); + + ofi_req->type = OMPI_MTL_OFI_RECV; + ofi_req->event_callback = ompi_mtl_ofi_post_recv_excid_buffer_callback; + ofi_req->error_callback = ompi_mtl_ofi_recv_excid_error_callback; + ofi_req->buffer = start; + ofi_req->length = length; + ofi_req->convertor = NULL; + ofi_req->req_started = false; + ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req->remote_addr = NULL; + ofi_req->match_bits = NULL; + ofi_req->completion_count = 1; + ofi_req->comm = comm; + + MTL_OFI_RETRY_UNTIL_DONE(fi_recv(ompi_mtl_ofi.ofi_ctxt[0].rx_ep, + start, + length, + NULL, + FI_ADDR_UNSPEC, + (void *)&ofi_req->ctx), ret); + if (OPAL_UNLIKELY(0 > ret)) { + if (NULL != ofi_req->buffer) { + free(ofi_req->buffer); + } + MTL_OFI_LOG_FI_ERR(ret, "fi_recv failed"); + return ompi_mtl_ofi_get_error(ret); + } + + if (blocking) { + while (comm->c_index_vec[src] < 0) { + ompi_mtl_ofi_progress(); + } + } + + return OMPI_SUCCESS; +} + __opal_attribute_always_inline__ static inline int ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, struct ompi_communicator_t *comm, @@ -458,7 +586,11 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, ssize_t ret = OMPI_SUCCESS; int ctxt_id = 0; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ack_req = malloc(sizeof(ompi_mtl_ofi_request_t)); @@ -491,18 +623,122 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, return OMPI_SUCCESS; } -__opal_attribute_always_inline__ static inline int -ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, +static int +ompi_mtl_ofi_send_excid(struct mca_mtl_base_module_t *mtl, struct ompi_communicator_t *comm, int dest, - int tag, - struct opal_convertor_t *convertor, - mca_pml_base_send_mode_t mode, - bool ofi_cq_data) + bool ofi_cq_data, + bool is_send) +{ + ssize_t ret = OMPI_SUCCESS; + ompi_mtl_ofi_request_t *ofi_req = malloc(sizeof(ompi_mtl_ofi_request_t)); + int ctxt_id = 0; + mca_mtl_ofi_cid_hdr_t *start = malloc(sizeof(mca_mtl_ofi_cid_hdr_t)); + ompi_proc_t *ompi_proc = NULL; + mca_mtl_ofi_endpoint_t *endpoint = NULL; + fi_addr_t sep_peer_fiaddr = 0; + + ctxt_id = 0; + set_thread_context(ctxt_id); + + /** + * Create a send request, start it and wait until it completes. + */ + ofi_req->event_callback = ompi_mtl_ofi_send_callback; + ofi_req->error_callback = ompi_mtl_ofi_send_error_callback; + + ompi_proc = ompi_comm_peer_lookup(comm, dest); + endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc); + + /* For Scalable Endpoints, gather target receive context */ + sep_peer_fiaddr = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); + + start->hdr_cid = comm->c_contextid; + start->hdr_src = comm->c_my_rank; + start->hdr_src_c_index = comm->c_index; + start->ofi_cq_data = ofi_cq_data; + if (comm->c_index_vec[dest] < 0) { + start->need_response = true; + } else { + start->need_response = false; + } + size_t length = sizeof(mca_mtl_ofi_cid_hdr_t); + + ofi_req->length = length; + ofi_req->status.MPI_ERROR = OMPI_SUCCESS; + ofi_req->completion_count = 0; + if (OPAL_UNLIKELY(length > endpoint->mtl_ofi_module->max_msg_size)) { + opal_show_help("help-mtl-ofi.txt", + "message too big", false, + length, endpoint->mtl_ofi_module->max_msg_size); + return OMPI_ERROR; + } + + if (OPAL_UNLIKELY(ofi_req->status.MPI_ERROR != OMPI_SUCCESS)) + return ofi_req->status.MPI_ERROR; + + if (ompi_mtl_ofi.max_inject_size >= length) { + if (ofi_cq_data) { + MTL_OFI_RETRY_UNTIL_DONE(fi_injectdata(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + comm->c_my_rank, + sep_peer_fiaddr), ret); + } else { + MTL_OFI_RETRY_UNTIL_DONE(fi_inject(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + sep_peer_fiaddr), ret); + } + if (OPAL_UNLIKELY(0 > ret)) { + MTL_OFI_LOG_FI_ERR(ret, + ofi_cq_data ? "fi_injectdata failed" + : "fi_inject failed"); + + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + return ofi_req->status.MPI_ERROR; + } + } else { + ofi_req->completion_count = 1; + if (ofi_cq_data) { + MTL_OFI_RETRY_UNTIL_DONE(fi_senddata(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + NULL, + comm->c_my_rank, + sep_peer_fiaddr, + (void *) &ofi_req->ctx), ret); + } else { + MTL_OFI_RETRY_UNTIL_DONE(fi_send(ompi_mtl_ofi.ofi_ctxt[0].tx_ep, + start, + length, + NULL, + sep_peer_fiaddr, + (void *) &ofi_req->ctx), ret); + } + if (OPAL_UNLIKELY(0 > ret)) { + MTL_OFI_LOG_FI_ERR(ret, + ofi_cq_data ? "fi_tsenddata failed" + : "fi_tsend failed"); + ofi_req->status.MPI_ERROR = ompi_mtl_ofi_get_error(ret); + } + } + + return ofi_req->status.MPI_ERROR; +} + +__opal_attribute_always_inline__ static inline int +ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, + struct ompi_communicator_t *comm, + int dest, + int tag, + struct opal_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool ofi_cq_data) { ssize_t ret = OMPI_SUCCESS; ompi_mtl_ofi_request_t ofi_req; - int ompi_ret, ctxt_id = 0; + int ompi_ret, ctxt_id = 0, c_index_for_tag; void *start; bool free_after; size_t length; @@ -513,9 +749,29 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, fi_addr_t src_addr = 0; fi_addr_t sep_peer_fiaddr = 0; + if (OPAL_LIKELY(OMPI_COMM_IS_GLOBAL_INDEX(comm))) { + c_index_for_tag = comm->c_index; + } else { + if (comm->c_index_vec[dest] < -1) { + comm->c_index_vec[dest] = -1; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, dest, ofi_cq_data, true); + } + + if (comm->c_index_vec[dest] < 0) { + while (comm->c_index_vec[dest] < 0) { + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(true, comm, dest); + } + } + c_index_for_tag = comm->c_index_vec[dest]; + } + ompi_mtl_ofi_set_mr_null(&ofi_req); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); set_thread_context(ctxt_id); /** @@ -548,10 +804,10 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl, } if (ofi_cq_data) { - match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); + match_bits = mtl_ofi_create_send_tag_CQD(c_index_for_tag, tag); src_addr = sep_peer_fiaddr; } else { - match_bits = mtl_ofi_create_send_tag(comm->c_contextid, + match_bits = mtl_ofi_create_send_tag(c_index_for_tag, comm->c_my_rank, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not supported */ } @@ -659,7 +915,7 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, { ssize_t ret = OMPI_SUCCESS; ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t *) mtl_request; - int ompi_ret, ctxt_id = 0; + int ompi_ret, ctxt_id = 0, c_index_for_tag; void *start; size_t length; bool free_after; @@ -671,7 +927,26 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + c_index_for_tag = comm->c_index; + } else { + if (comm->c_index_vec[dest] < -1) { + comm->c_index_vec[dest] = -1; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, dest, ofi_cq_data, true); + } + if (comm->c_index_vec[dest] < 0) { + while (comm->c_index_vec[dest] < 0) { + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(true, comm, dest); + } + } + c_index_for_tag = comm->c_index_vec[dest]; + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ofi_req->event_callback = ompi_mtl_ofi_isend_callback; @@ -699,9 +974,9 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl, } if (ofi_cq_data) { - match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag); + match_bits = mtl_ofi_create_send_tag_CQD(c_index_for_tag, tag); } else { - match_bits = mtl_ofi_create_send_tag(comm->c_contextid, + match_bits = mtl_ofi_create_send_tag(c_index_for_tag, comm->c_my_rank, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not supported */ } @@ -767,7 +1042,11 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, ompi_status_public_t *status = NULL; struct fi_msg_tagged tagged_msg; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = ofi_req->comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } assert(ofi_req->super.ompi_req); status = &ofi_req->super.ompi_req->req_status; @@ -912,7 +1191,24 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + if ((src == MPI_ANY_SOURCE || comm->c_index_vec[src] < 0) && + !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ompi_ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && comm->c_index_vec[src] < -1) { + comm->c_index_vec[src] = -1; + ompi_ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } + set_thread_context(ctxt_id); if (ofi_cq_data) { @@ -922,10 +1218,10 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl, remote_addr = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not used */ } @@ -1048,7 +1344,12 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl, ompi_mtl_ofi_set_mr_null(ofi_req); - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } + set_thread_context(ctxt_id); ompi_ret = ompi_mtl_datatype_recv_buf(convertor, @@ -1159,7 +1460,23 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl, uint64_t msgflags = FI_PEEK | FI_COMPLETION; int ctxt_id = 0; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + if ((src == MPI_ANY_SOURCE || comm->c_index_vec[src] < 0) && + !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && comm->c_index_vec[src] < -1) { + comm->c_index_vec[src] = -1; + ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); if (ofi_cq_data) { @@ -1170,11 +1487,11 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl, remote_proc = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); /* src_addr is ignored when FI_DIRECTED_RECV is not used */ } @@ -1241,7 +1558,22 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl, uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION; int ctxt_id = 0; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid); + if (!OMPI_COMM_IS_GLOBAL_INDEX(comm)) { + if ((src == MPI_ANY_SOURCE || comm->c_index_vec[src] < 0) && !ompi_mtl_ofi.has_posted_initial_buffer) { + ompi_mtl_ofi.has_posted_initial_buffer = true; + ret = ompi_mtl_ofi_post_recv_excid_buffer(false, comm, -1); + } + if (src >= 0 && comm->c_index_vec[src] < -1) { + comm->c_index_vec[src] = -1; + ret = ompi_mtl_ofi_send_excid(mtl, comm, src, ofi_cq_data, false); + } + } + + if (ompi_mtl_ofi.total_ctxts_used > 0) { + ctxt_id = comm->c_contextid.cid_sub.u64 % ompi_mtl_ofi.total_ctxts_used; + } else { + ctxt_id = 0; + } set_thread_context(ctxt_id); ofi_req = malloc(sizeof *ofi_req); @@ -1260,12 +1592,12 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl, remote_proc = fi_rx_addr(endpoint->peer_fiaddr, ctxt_id, ompi_mtl_ofi.rx_ctx_bits); } - mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid, + mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_index, tag); } else { /* src_addr is ignored when FI_DIRECTED_RECV is not used */ - mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src, + mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_index, src, tag); } @@ -1335,7 +1667,7 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl, int ret, ctxt_id = 0; ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request; - ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid); + ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_index); switch (ofi_req->type) { case OMPI_MTL_OFI_SEND: @@ -1401,10 +1733,10 @@ static int ompi_mtl_ofi_init_contexts(struct mca_mtl_base_module_t *mtl, * will be assigned to contexts in a round-robin fashion. */ if (ompi_mtl_ofi.num_ofi_contexts <= ompi_mtl_ofi.total_ctxts_used) { - ompi_mtl_ofi.comm_to_context[comm->c_contextid] = comm->c_contextid % + ompi_mtl_ofi.comm_to_context[comm->c_index] = comm->c_index % ompi_mtl_ofi.total_ctxts_used; if (!ompi_mtl_ofi.threshold_comm_context_id) { - ompi_mtl_ofi.threshold_comm_context_id = comm->c_contextid; + ompi_mtl_ofi.threshold_comm_context_id = comm->c_index; opal_show_help("help-mtl-ofi.txt", "SEP thread grouping ctxt limit", true, ctxt_id, ompi_process_info.nodename, __FILE__, __LINE__); @@ -1464,7 +1796,7 @@ static int ompi_mtl_ofi_init_contexts(struct mca_mtl_base_module_t *mtl, /* Initialize per-context lock */ OBJ_CONSTRUCT(&ompi_mtl_ofi.ofi_ctxt[ctxt_id].context_lock, opal_mutex_t); - if (MPI_COMM_WORLD == comm) { + if (!ompi_mtl_ofi.is_initialized) { ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, opal_common_ofi.output, @@ -1474,7 +1806,7 @@ static int ompi_mtl_ofi_init_contexts(struct mca_mtl_base_module_t *mtl, } } - ompi_mtl_ofi.comm_to_context[comm->c_contextid] = ompi_mtl_ofi.total_ctxts_used; + ompi_mtl_ofi.comm_to_context[comm->c_index] = ompi_mtl_ofi.total_ctxts_used; ompi_mtl_ofi.total_ctxts_used++; return OMPI_SUCCESS; @@ -1508,12 +1840,12 @@ static int ompi_mtl_ofi_finalize_contexts(struct mca_mtl_base_module_t *mtl, if (ompi_mtl_ofi.thread_grouping && ompi_mtl_ofi.threshold_comm_context_id && - ((uint32_t) ompi_mtl_ofi.threshold_comm_context_id <= comm->c_contextid)) { + ((uint32_t) ompi_mtl_ofi.threshold_comm_context_id <= comm->c_index)) { return OMPI_SUCCESS; } ctxt_id = ompi_mtl_ofi.thread_grouping ? - ompi_mtl_ofi.comm_to_context[comm->c_contextid] : 0; + ompi_mtl_ofi.comm_to_context[comm->c_index] : 0; /* * For regular EPs, TX/RX contexts are aliased to SEP object which is @@ -1562,9 +1894,10 @@ ompi_mtl_ofi_add_comm(struct mca_mtl_base_module_t *mtl, /* If no thread grouping, add new OFI context only * for MPI_COMM_WORLD. */ - (!ompi_mtl_ofi.thread_grouping && (MPI_COMM_WORLD == comm))) { + (!ompi_mtl_ofi.thread_grouping && (!ompi_mtl_ofi.is_initialized))) { ret = ompi_mtl_ofi_init_contexts(mtl, comm, ep_type); + ompi_mtl_ofi.is_initialized = true; if (OMPI_SUCCESS != ret) { goto error; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index a7e1c46a6b5..466d69446a7 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -639,7 +639,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, interface and local communication and remote communication. */ hints->mode = FI_CONTEXT | FI_CONTEXT2; hints->ep_attr->type = FI_EP_RDM; - hints->caps |= FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_DIRECTED_RECV; + hints->caps |= FI_MSG | FI_TAGGED | FI_LOCAL_COMM | FI_REMOTE_COMM | FI_DIRECTED_RECV; hints->tx_attr->msg_order = FI_ORDER_SAS; hints->rx_attr->msg_order = FI_ORDER_SAS; hints->rx_attr->op_flags = FI_COMPLETION; @@ -1060,6 +1060,10 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, * Set the ANY_SRC address. */ ompi_mtl_ofi.any_addr = FI_ADDR_UNSPEC; + ompi_mtl_ofi.is_initialized = false; + ompi_mtl_ofi.has_posted_initial_buffer = false; + + ompi_mtl_ofi.base.mtl_flags |= MCA_MTL_BASE_FLAG_SUPPORTS_EXT_CID; #if OPAL_CUDA_SUPPORT mca_common_cuda_stage_one_init(); diff --git a/ompi/mca/mtl/ofi/mtl_ofi_types.h b/ompi/mca/mtl/ofi/mtl_ofi_types.h index a2c2f3d4308..4d04e8ef6e5 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_types.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_types.h @@ -95,6 +95,9 @@ typedef struct mca_mtl_ofi_module_t { /** Optimized function Symbol Tables **/ struct ompi_mtl_ofi_symtable sym_table; + bool is_initialized; + bool has_posted_initial_buffer; + } mca_mtl_ofi_module_t; extern mca_mtl_ofi_module_t ompi_mtl_ofi; diff --git a/ompi/mca/mtl/psm2/mtl_psm2_probe.c b/ompi/mca/mtl/psm2/mtl_psm2_probe.c index b81317507be..c5b7f7ab93a 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_probe.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_probe.c @@ -39,7 +39,7 @@ int ompi_mtl_psm2_iprobe(struct mca_mtl_base_module_t* mtl, psm2_mq_status2_t mqstat; psm2_error_t err; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_iprobe2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqstat); @@ -88,7 +88,7 @@ ompi_mtl_psm2_improbe(struct mca_mtl_base_module_t *mtl, psm2_mq_req_t mqreq; psm2_error_t err; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_improbe2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, &mqtag, &tagsel, &mqreq, &mqstat); diff --git a/ompi/mca/mtl/psm2/mtl_psm2_recv.c b/ompi/mca/mtl/psm2/mtl_psm2_recv.c index ff5c54067ce..83fdfcfec81 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_recv.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_recv.c @@ -63,7 +63,7 @@ ompi_mtl_psm2_irecv(struct mca_mtl_base_module_t* mtl, mtl_psm2_request->convertor = convertor; mtl_psm2_request->type = OMPI_mtl_psm2_IRECV; - PSM2_MAKE_TAGSEL(src, tag, comm->c_contextid, mqtag, tagsel); + PSM2_MAKE_TAGSEL(src, tag, comm->c_index, mqtag, tagsel); err = psm2_mq_irecv2(ompi_mtl_psm2.mq, PSM2_MQ_ANY_ADDR, diff --git a/ompi/mca/mtl/psm2/mtl_psm2_send.c b/ompi/mca/mtl/psm2/mtl_psm2_send.c index 6acb30cf6d2..59742ace546 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2_send.c +++ b/ompi/mca/mtl/psm2/mtl_psm2_send.c @@ -48,7 +48,7 @@ ompi_mtl_psm2_send(struct mca_mtl_base_module_t* mtl, assert(mtl == &ompi_mtl_psm2.super); - PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_index, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, &mtl_psm2_request.buf, @@ -106,7 +106,7 @@ ompi_mtl_psm2_isend(struct mca_mtl_base_module_t* mtl, assert(mtl == &ompi_mtl_psm2.super); - PSM2_MAKE_MQTAG(comm->c_contextid, comm->c_my_rank, tag, mqtag); + PSM2_MAKE_MQTAG(comm->c_index, comm->c_my_rank, tag, mqtag); ret = ompi_mtl_datatype_pack(convertor, diff --git a/ompi/mca/op/base/base.h b/ompi/mca/op/base/base.h index f26992c23a5..ea284a0e6f2 100644 --- a/ompi/mca/op/base/base.h +++ b/ompi/mca/op/base/base.h @@ -45,7 +45,6 @@ typedef struct ompi_op_base_selected_module_t { ompi_op_base_module_t *op_module; } ompi_op_base_selected_module_t; - /** * Find all available op components. */ diff --git a/ompi/mca/osc/base/base.h b/ompi/mca/osc/base/base.h index f52f64e6ea7..6b050fbf472 100644 --- a/ompi/mca/osc/base/base.h +++ b/ompi/mca/osc/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. @@ -8,6 +9,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2016-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,8 +50,6 @@ int ompi_osc_base_select(ompi_win_t *win, int flavor, int *model); -int ompi_osc_base_finalize(void); - OMPI_DECLSPEC extern mca_base_framework_t ompi_osc_base_framework; diff --git a/ompi/mca/osc/base/osc_base_frame.c b/ompi/mca/osc/base/osc_base_frame.c index 24a6a9dc126..b2f60917cdb 100644 --- a/ompi/mca/osc/base/osc_base_frame.c +++ b/ompi/mca/osc/base/osc_base_frame.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. @@ -9,6 +10,8 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,6 +39,22 @@ #include "ompi/mca/osc/base/static-components.h" +static int ompi_osc_base_finalize(void) +{ + opal_list_item_t* item; + + /* Finalize all available modules */ + while (NULL != + (item = opal_list_remove_first(&ompi_osc_base_framework.framework_components))) { + ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) + ((mca_base_component_list_item_t*) item)->cli_component; + component->osc_finalize(); + OBJ_RELEASE(item); + } + return OMPI_SUCCESS; +} + + int ompi_osc_base_find_available(bool enable_progress_threads, bool enable_mpi_threads) @@ -56,22 +75,9 @@ ompi_osc_base_find_available(bool enable_progress_threads, OBJ_RELEASE(cli); } } - return OMPI_SUCCESS; -} -int -ompi_osc_base_finalize(void) -{ - opal_list_item_t* item; + ompi_mpi_instance_append_finalize (ompi_osc_base_finalize); - /* Finalize all available modules */ - while (NULL != - (item = opal_list_remove_first(&ompi_osc_base_framework.framework_components))) { - ompi_osc_base_component_t *component = (ompi_osc_base_component_t*) - ((mca_base_component_list_item_t*) item)->cli_component; - component->osc_finalize(); - OBJ_RELEASE(item); - } return OMPI_SUCCESS; } diff --git a/ompi/mca/osc/base/osc_base_obj_convert.h b/ompi/mca/osc/base/osc_base_obj_convert.h index 86b2bc63452..c04d448f969 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.h +++ b/ompi/mca/osc/base/osc_base_obj_convert.h @@ -30,6 +30,8 @@ #include "ompi/proc/proc.h" #include "ompi/op/op.h" +#include "ompi/runtime/mpiruntime.h" + BEGIN_C_DECLS /** diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index 293885eb88e..cca7ad6703e 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -413,9 +413,9 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit opal_output_verbose(1, ompi_osc_base_framework.framework_output, "portals4 component creating window with id %d", - ompi_comm_get_cid(module->comm)); + ompi_comm_get_local_cid(module->comm)); - opal_asprintf(&name, "portals4 window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "portals4 window %d", ompi_comm_get_local_cid(module->comm)); ompi_win_set_name(win, name); free(name); diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 74398060081..4cf5b947861 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -654,9 +654,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s if (0 == local_rank) { /* allocate the shared memory segment */ - ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d", + ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%s", mca_osc_rdma_component.backing_directory, ompi_process_info.nodename, - OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm)); + OMPI_PROC_MY_NAME->jobid, ompi_comm_print_cid(module->comm)); if (0 > ret) { ret = OMPI_ERR_OUT_OF_RESOURCE; } else { @@ -1369,8 +1369,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, return ret; } - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %d", - flavor, ompi_comm_get_cid(module->comm)); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "creating osc/rdma window of flavor %d with id %s", + flavor, ompi_comm_print_cid (module->comm)); /* peer data */ if (world_size > init_limit) { @@ -1477,7 +1477,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* update component data */ OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); ret = opal_hash_table_set_value_uint32(&mca_osc_rdma_component.modules, - ompi_comm_get_cid(module->comm), + ompi_comm_get_local_cid(module->comm), module); OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); if (OMPI_SUCCESS != ret) { @@ -1488,7 +1488,7 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* fill in window information */ *model = MPI_WIN_UNIFIED; win->w_osc_module = (ompi_osc_base_module_t*) module; - opal_asprintf(&name, "rdma window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "rdma window %s", ompi_comm_print_cid(module->comm)); ompi_win_set_name(win, name); free(name); @@ -1503,8 +1503,8 @@ static int ompi_osc_rdma_component_select (struct ompi_win_t *win, void **base, /* for now the leader is always rank 0 in the communicator */ module->leader = ompi_osc_rdma_module_peer (module, 0); - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %d", - ompi_comm_get_cid(module->comm)); + OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "finished creating osc/rdma window with id %s", + ompi_comm_print_cid(module->comm)); } return ret; diff --git a/ompi/mca/osc/rdma/osc_rdma_module.c b/ompi/mca/osc/rdma/osc_rdma_module.c index cda38baaa24..7d2c20ab55a 100644 --- a/ompi/mca/osc/rdma/osc_rdma_module.c +++ b/ompi/mca/osc/rdma/osc_rdma_module.c @@ -58,8 +58,8 @@ int ompi_osc_rdma_free(ompi_win_t *win) if (NULL != module->comm) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "rdma component destroying window with id %d", - ompi_comm_get_cid(module->comm)); + "rdma component destroying window with id %s", + ompi_comm_print_cid(module->comm)); /* finish with a barrier */ if (ompi_group_size(win->w_group) > 1) { @@ -70,7 +70,7 @@ int ompi_osc_rdma_free(ompi_win_t *win) /* remove from component information */ OPAL_THREAD_LOCK(&mca_osc_rdma_component.lock); opal_hash_table_remove_value_uint32(&mca_osc_rdma_component.modules, - ompi_comm_get_cid(module->comm)); + ompi_comm_get_local_cid(module->comm)); OPAL_THREAD_UNLOCK(&mca_osc_rdma_component.lock); } diff --git a/ompi/mca/osc/sm/osc_sm_component.c b/ompi/mca/osc/sm/osc_sm_component.c index 99486725ba6..84e7a65033f 100644 --- a/ompi/mca/osc/sm/osc_sm_component.c +++ b/ompi/mca/osc/sm/osc_sm_component.c @@ -285,9 +285,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64); if (0 == ompi_comm_rank (module->comm)) { char *data_file; - ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d.%d", - mca_osc_sm_component.backing_directory, ompi_process_info.nodename, - OMPI_PROC_MY_NAME->jobid, (int) OMPI_PROC_MY_NAME->vpid, ompi_comm_get_cid(module->comm)); + ret = opal_asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d.%s", + mca_osc_sm_component.backing_directory, ompi_process_info.nodename, + OMPI_PROC_MY_NAME->jobid, (int) OMPI_PROC_MY_NAME->vpid, + ompi_comm_print_cid(module->comm)); if (ret < 0) { free(rbuf); return OMPI_ERR_OUT_OF_RESOURCE; diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index a9db0952776..eb006e06a6a 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -1,6 +1,9 @@ /* * Copyright (C) Mellanox Technologies Ltd. 2001-2017. ALL RIGHTS RESERVED. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -392,7 +395,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in } *model = MPI_WIN_UNIFIED; - opal_asprintf(&name, "ucx window %d", ompi_comm_get_cid(module->comm)); + opal_asprintf(&name, "ucx window %s", ompi_comm_print_cid(module->comm)); ompi_win_set_name(win, name); free(name); diff --git a/ompi/mca/pml/base/base.h b/ompi/mca/pml/base/base.h index 8eb37e48448..433c7a60833 100644 --- a/ompi/mca/pml/base/base.h +++ b/ompi/mca/pml/base/base.h @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +11,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,8 +63,6 @@ OMPI_DECLSPEC int mca_pml_base_pml_check_selected(const char *my_pml, struct ompi_proc_t **procs, size_t nprocs); -OMPI_DECLSPEC int mca_pml_base_finalize(void); - /* not #if conditional on OPAL_ENABLE_FT_MPI for ABI */ OMPI_DECLSPEC int mca_pml_base_revoke_comm(struct ompi_communicator_t *comm, bool coll_only); diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index 3826253e2ae..b17d03a2387 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -16,6 +16,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +36,7 @@ #include "ompi/mca/pml/base/pml_base_sendreq.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "opal/mca/mpool/mpool.h" +#include "ompi/runtime/mpiruntime.h" #ifdef HAVE_UNISTD_H #include @@ -56,6 +59,8 @@ static opal_atomic_int32_t mca_pml_bsend_init = 0; /* defined in pml_base_open.c */ extern char *ompi_pml_base_bsend_allocator_name; +static int mca_pml_base_bsend_fini (void); + /* * Routine to return pages to sub-allocator as needed */ @@ -77,7 +82,7 @@ static void* mca_pml_bsend_alloc_segment(void *ctx, size_t *size_inout) /* * One time initialization at startup */ -int mca_pml_base_bsend_init(bool thread_safe) +int mca_pml_base_bsend_init (void) { size_t tmp; @@ -100,6 +105,9 @@ int mca_pml_base_bsend_init(bool thread_safe) tmp >>= 1; mca_pml_bsend_pagebits++; } + + ompi_mpi_instance_append_finalize (mca_pml_base_bsend_fini); + return OMPI_SUCCESS; } @@ -107,7 +115,7 @@ int mca_pml_base_bsend_init(bool thread_safe) /* * One-time cleanup at shutdown - release any resources. */ -int mca_pml_base_bsend_fini(void) +static int mca_pml_base_bsend_fini (void) { if(OPAL_THREAD_ADD_FETCH32(&mca_pml_bsend_init,-1) > 0) return OMPI_SUCCESS; diff --git a/ompi/mca/pml/base/pml_base_bsend.h b/ompi/mca/pml/base/pml_base_bsend.h index e50bdc7b5e8..725427e27f1 100644 --- a/ompi/mca/pml/base/pml_base_bsend.h +++ b/ompi/mca/pml/base/pml_base_bsend.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -10,6 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,8 +28,7 @@ BEGIN_C_DECLS -OMPI_DECLSPEC int mca_pml_base_bsend_init(bool enable_mpi_threads); -OMPI_DECLSPEC int mca_pml_base_bsend_fini(void); +OMPI_DECLSPEC int mca_pml_base_bsend_init (void); int mca_pml_base_bsend_attach(void* addr, int size); int mca_pml_base_bsend_detach(void* addr, int* size); diff --git a/ompi/mca/pml/base/pml_base_frame.c b/ompi/mca/pml/base/pml_base_frame.c index 5481095d486..d33b4f2cab0 100644 --- a/ompi/mca/pml/base/pml_base_frame.c +++ b/ompi/mca/pml/base/pml_base_frame.c @@ -126,14 +126,6 @@ static int mca_pml_base_register(mca_base_register_flag_t flags) return OMPI_SUCCESS; } -int mca_pml_base_finalize(void) { - if (NULL != mca_pml_base_selected_component.pmlm_finalize) { - return mca_pml_base_selected_component.pmlm_finalize(); - } - return OMPI_SUCCESS; -} - - static int mca_pml_base_close(void) { int i, j; diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 1b9c1de13d4..301b51d8889 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology @@ -15,6 +15,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights + * Copyright (c) 2018-2020 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -37,6 +38,7 @@ #include "opal/mca/pmix/pmix-internal.h" #include "ompi/constants.h" +#include "ompi/instance/instance.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "ompi/proc/proc.h" @@ -46,6 +48,15 @@ typedef struct opened_component_t { mca_pml_base_component_t *om_component; } opened_component_t; + +static int mca_pml_base_finalize (void) { + if (NULL != mca_pml_base_selected_component.pmlm_finalize) { + return mca_pml_base_selected_component.pmlm_finalize(); + } + + return OMPI_SUCCESS; +} + /** * Function for selecting one component from all those that are * available. @@ -229,6 +240,7 @@ int mca_pml_base_select(bool enable_progress_threads, ret = mca_pml_base_pml_selected(best_component->pmlm_version.mca_component_name); /* All done */ + ompi_mpi_instance_append_finalize (mca_pml_base_finalize); return ret; } diff --git a/ompi/mca/pml/cm/pml_cm.c b/ompi/mca/pml/cm/pml_cm.c index 567b00bc331..51a63fdba4c 100644 --- a/ompi/mca/pml/cm/pml_cm.c +++ b/ompi/mca/pml/cm/pml_cm.c @@ -93,7 +93,7 @@ int mca_pml_cm_add_comm(ompi_communicator_t* comm) { /* should never happen, but it was, so check */ - if (comm->c_contextid > ompi_pml_cm.super.pml_max_contextid) { + if (comm->c_index > ompi_pml_cm.super.pml_max_contextid) { return OMPI_ERR_OUT_OF_RESOURCE; } diff --git a/ompi/mca/pml/cm/pml_cm_component.c b/ompi/mca/pml/cm/pml_cm_component.c index 40def94feb2..4f0934c8d9e 100644 --- a/ompi/mca/pml/cm/pml_cm_component.c +++ b/ompi/mca/pml/cm/pml_cm_component.c @@ -151,8 +151,10 @@ mca_pml_cm_component_init(int* priority, ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_REQUIRE_WORLD; } - /* update our tag / context id max values based on MTL - information */ + if (ompi_mtl->mtl_flags & MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID) { + ompi_pml_cm.super.pml_flags |= MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID; + } + ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid; ompi_pml_cm.super.pml_max_tag = ompi_mtl->mtl_max_tag; diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index a25caf46d35..bc2eeb520bd 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -22,6 +22,8 @@ * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reseved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -193,6 +195,7 @@ int mca_pml_ob1_enable(bool enable) NULL, 0, NULL, NULL, NULL); mca_pml_ob1.enabled = true; + return OMPI_SUCCESS; } @@ -209,7 +212,7 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) } /* should never happen, but it was, so check */ - if (comm->c_contextid > mca_pml_ob1.super.pml_max_contextid) { + if (comm->c_index > mca_pml_ob1.super.pml_max_contextid) { OBJ_RELEASE(pml_comm); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -224,9 +227,25 @@ int mca_pml_ob1_add_comm(ompi_communicator_t* comm) OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) { hdr = &frag->hdr.hdr_match; + if (MCA_PML_OB1_HDR_TYPE_CID == frag->hdr.hdr_common.hdr_type) { + if (!ompi_communicator_cid_compare (comm, frag->hdr.hdr_cid.hdr_cid)) { + continue; + } + + /* handle this CID*/ + mca_pml_ob1_handle_cid (comm, frag->hdr.hdr_ext_match.hdr_match.hdr_src, &frag->hdr.hdr_cid); + + hdr = &frag->hdr.hdr_ext_match.hdr_match; + hdr->hdr_ctx = comm->c_index; + + /* NTH: this is ok because the pointer that will be freed is stored in frag->addr[] */ + frag->segments[0].seg_addr.pval = (void *)((uintptr_t) frag->segments[0].seg_addr.pval + sizeof (frag->hdr.hdr_cid)); + } + /* Is this fragment for the current communicator ? */ - if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid ) + if (hdr->hdr_ctx != comm->c_index) { continue; + } /* As we now know we work on a fragment for this communicator * we should remove it from the @@ -330,11 +349,17 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) return rc; } - rc = mca_bml.bml_add_procs( nprocs, - procs, - &reachable ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + OBJ_CONSTRUCT(&reachable, opal_bitmap_t); + rc = opal_bitmap_init(&reachable, (int)nprocs); + if (OMPI_SUCCESS != rc) { + return rc; + } + + rc = mca_bml.bml_add_procs (nprocs, procs, &reachable); + OBJ_DESTRUCT(&reachable); + if (OMPI_SUCCESS != rc) { + return rc; + } /* Check that values supplied by all initialized btls will work for us. Note that this is the list of all initialized BTLs, @@ -358,8 +383,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sizeof(mca_pml_ob1_hdr_t), sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } #if OPAL_CUDA_GDR_SUPPORT /* If size is SIZE_MAX, then we know we want to set this to the minimum possible @@ -380,8 +404,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sizeof(mca_pml_ob1_hdr_t), sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } } if (0 == sm->btl_module->btl_cuda_rdma_limit) { @@ -398,8 +421,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) sm->btl_component->btl_version.mca_component_name, sm->btl_module->btl_cuda_eager_limit, sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; + return OMPI_ERR_BAD_PARAM; } } #endif /* OPAL_CUDA_GDR_SUPPORT */ @@ -410,54 +432,61 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_MATCH, mca_pml_ob1_recv_frag_callback_match, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RNDV, mca_pml_ob1_recv_frag_callback_rndv, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RGET, mca_pml_ob1_recv_frag_callback_rget, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_ACK, mca_pml_ob1_recv_frag_callback_ack, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FRAG, mca_pml_ob1_recv_frag_callback_frag, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_PUT, mca_pml_ob1_recv_frag_callback_put, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FIN, mca_pml_ob1_recv_frag_callback_fin, NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* register error handlers */ - rc = mca_bml.bml_register_error(mca_pml_ob1_error_handler); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; + if (OMPI_SUCCESS != rc) { + return rc; + } - cleanup_and_return: - OBJ_DESTRUCT(&reachable); + rc = mca_bml.bml_register (MCA_PML_OB1_HDR_TYPE_CID, + mca_pml_ob1_recv_frag_callback_cid, + NULL); + if (OMPI_SUCCESS != rc) { + return rc; + } - return rc; + /* register error handlers */ + return mca_bml.bml_register_error(mca_pml_ob1_error_handler); } /* @@ -594,8 +623,8 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) /* TODO: don't forget to dump mca_pml_ob1.non_existing_communicator_pending */ - opal_output(0, "Communicator %s [%p](%d) rank %d recv_seq %d num_procs %lu last_probed %lu\n", - comm->c_name, (void*) comm, comm->c_contextid, comm->c_my_rank, + opal_output(0, "Communicator %s [%p](%s) rank %d recv_seq %d num_procs %lu last_probed %lu\n", + comm->c_name, (void*) comm, ompi_comm_print_cid (comm), comm->c_my_rank, pml_comm->recv_sequence, pml_comm->num_procs, pml_comm->last_probed); #if !MCA_PML_OB1_CUSTOM_MATCH @@ -653,10 +682,8 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) return OMPI_SUCCESS; } -static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) +static void mca_pml_ob1_control_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t *endpoint, + mca_btl_base_descriptor_t *des, int status) { mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; @@ -665,40 +692,36 @@ static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl, MCA_PML_OB1_PROGRESS_PENDING(bml_btl); } -/** - * Send an FIN to the peer. If we fail to send this ack (no more available - * fragments or the send failed) this function automatically add the FIN - * to the list of pending FIN, Which guarantee that the FIN will be sent - * later. - */ -int mca_pml_ob1_send_fin( ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, - opal_ptr_t hdr_frag, - uint64_t rdma_size, - uint8_t order, - int status ) + +int mca_pml_ob1_send_control_btl (mca_bml_base_btl_t *bml_btl, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending) { - mca_btl_base_descriptor_t* fin; + int des_flags = MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL; + mca_btl_base_descriptor_t *des; int rc; - mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_ob1_fin_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); + if (NULL != bml_btl->btl->btl_sendi) { + rc = mca_bml_base_sendi (bml_btl, NULL, hdr, hdr_size, 0, order, des_flags, hdr->hdr_common.hdr_type, &des); + if (OPAL_LIKELY(OPAL_SUCCESS == rc)) { + return rc; + } + } else { + (void) mca_bml_base_alloc (bml_btl, &des, order, hdr_size, des_flags); + } - if(NULL == fin) { - MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status); + if (OPAL_UNLIKELY(NULL == des)) { + if (add_to_pending) { + mca_pml_ob1_add_to_pending (NULL, bml_btl, order, hdr, hdr_size); + } return OMPI_ERR_OUT_OF_RESOURCE; } - fin->des_cbfunc = mca_pml_ob1_fin_completion; - fin->des_cbdata = NULL; - /* fill in header */ - mca_pml_ob1_fin_hdr_prepare ((mca_pml_ob1_fin_hdr_t *) fin->des_segments->seg_addr.pval, - 0, hdr_frag.lval, status ? status : (int64_t) rdma_size); + des->des_cbfunc = mca_pml_ob1_control_completion; - ob1_hdr_hton((mca_pml_ob1_hdr_t *) fin->des_segments->seg_addr.pval, MCA_PML_OB1_HDR_TYPE_FIN, proc); + memcpy (des->des_segments->seg_addr.pval, hdr, hdr_size); /* queue request */ - rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN ); + rc = mca_bml_base_send (bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { MCA_PML_OB1_PROGRESS_PENDING(bml_btl); @@ -706,76 +729,98 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc, SPC_RECORD(OMPI_SPC_BYTES_SENT_MPI, (ompi_spc_value_t)sizeof(mca_pml_ob1_fin_hdr_t)); return OMPI_SUCCESS; } - mca_bml_base_free(bml_btl, fin); - MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status); + + mca_bml_base_free(bml_btl, des); + if (add_to_pending) { + mca_pml_ob1_add_to_pending (NULL, bml_btl, order, hdr, hdr_size); + } + return OMPI_ERR_OUT_OF_RESOURCE; } +int mca_pml_ob1_send_control_any (ompi_proc_t *proc, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending) +{ + mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc); + int rc; + + assert (NULL != endpoint); + + for (size_t i = 0 ; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager) ; ++i) { + mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_next (&endpoint->btl_eager); + + rc = mca_pml_ob1_send_control_btl (bml_btl, order, hdr, hdr_size, false); + if (OMPI_SUCCESS == rc) { + return OMPI_SUCCESS; + } + } + + if (add_to_pending) { + mca_pml_ob1_add_to_pending (proc, NULL, order, hdr, hdr_size); + } + + return OMPI_ERR_OUT_OF_RESOURCE; +} + +/** + * Send an FIN to the peer. If we fail to send this ack (no more available + * fragments or the send failed) this function automatically add the FIN + * to the list of pending FIN, Which guarantee that the FIN will be sent + * later. + */ +int mca_pml_ob1_send_fin (ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, opal_ptr_t hdr_frag, uint64_t rdma_size, + uint8_t order, int status) +{ + mca_pml_ob1_fin_hdr_t fin; + + /* fill in header */ + mca_pml_ob1_fin_hdr_prepare (&fin, 0, hdr_frag.lval, status ? status : (int64_t) rdma_size); + + ob1_hdr_hton((mca_pml_ob1_hdr_t *) &fin, MCA_PML_OB1_HDR_TYPE_FIN, proc); + + return mca_pml_ob1_send_control_btl (bml_btl, order, (mca_pml_ob1_hdr_t *) &fin, sizeof (fin), true); +} + +int mca_pml_ob1_send_cid (ompi_proc_t *proc, ompi_communicator_t *comm) +{ + mca_pml_ob1_cid_hdr_t cid; + + mca_pml_ob1_cid_hdr_prepare (&cid, comm); + ob1_hdr_hton ((mca_pml_ob1_hdr_t *) &cid, cid->hdr_common.hdr_type, proc); + + return mca_pml_ob1_send_control_any (proc, MCA_BTL_NO_ORDER, (mca_pml_ob1_hdr_t *) &cid, sizeof (cid), true); +} + void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl) { mca_pml_ob1_pckt_pending_t *pckt; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_ob1.pckt_pending); - - for(i = 0; i < s; i++) { - mca_bml_base_btl_t *send_dst = NULL; - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); - pckt = (mca_pml_ob1_pckt_pending_t*) - opal_list_remove_first(&mca_pml_ob1.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - if(NULL == pckt) + int32_t rc, max = (int32_t) opal_list_get_size (&mca_pml_ob1.pckt_pending); + + for (int32_t i = 0; i < max ; ++i) { + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { + pckt = (mca_pml_ob1_pckt_pending_t*) + opal_list_remove_first(&mca_pml_ob1.pckt_pending); + }); + if (NULL == pckt) { break; - if(pckt->bml_btl != NULL && - pckt->bml_btl->btl == bml_btl->btl) { - send_dst = pckt->bml_btl; - } else { - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; - send_dst = mca_bml_base_btl_array_find( - &endpoint->btl_eager, bml_btl->btl); } - if(NULL == send_dst) { - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); - opal_list_append(&mca_pml_ob1.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - continue; + + if (pckt->bml_btl) { + rc = mca_pml_ob1_send_control_btl (pckt->bml_btl, pckt->order, &pckt->hdr, pckt->hdr_size, false); + } else { + rc = mca_pml_ob1_send_control_any (pckt->proc, pckt->order, &pckt->hdr, pckt->hdr_size, false); } - switch(pckt->hdr.hdr_common.hdr_type) { - case MCA_PML_OB1_HDR_TYPE_ACK: - rc = mca_pml_ob1_recv_request_ack_send_btl(pckt->proc, - send_dst, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset, - pckt->hdr.hdr_ack.hdr_send_size, - pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); + if (OPAL_SUCCESS != rc) { + /* could not send the packet. readd it to the pending list */ + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { opal_list_append(&mca_pml_ob1.pckt_pending, (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); - return; - } - break; - case MCA_PML_OB1_HDR_TYPE_FIN: - rc = mca_pml_ob1_send_fin(pckt->proc, send_dst, - pckt->hdr.hdr_fin.hdr_frag, - pckt->hdr.hdr_fin.hdr_size, - pckt->order, - pckt->status); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); - return; - } - break; - default: - opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); - break; + }); + } else { + /* We're done with this packet, return it back to the free list */ + MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); } - /* We're done with this packet, return it back to the free list */ - MCA_PML_OB1_PCKT_PENDING_RETURN(pckt); } } diff --git a/ompi/mca/pml/ob1/pml_ob1.h b/ompi/mca/pml/ob1/pml_ob1.h index f425e98d6ff..726791bfeba 100644 --- a/ompi/mca/pml/ob1/pml_ob1.h +++ b/ompi/mca/pml/ob1/pml_ob1.h @@ -12,10 +12,12 @@ * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +43,7 @@ #include "ompi/mca/bml/base/base.h" #include "ompi/proc/proc.h" #include "opal/mca/allocator/base/base.h" +#include "ompi/runtime/mpiruntime.h" BEGIN_C_DECLS @@ -226,11 +229,11 @@ END_C_DECLS struct mca_pml_ob1_pckt_pending_t { opal_free_list_item_t super; - ompi_proc_t* proc; + ompi_proc_t *proc; mca_pml_ob1_hdr_t hdr; + size_t hdr_size; struct mca_bml_base_btl_t *bml_btl; uint8_t order; - int status; }; typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t); @@ -248,22 +251,22 @@ do { \ (opal_free_list_item_t*)pckt); \ } while(0) -#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S) \ - do { \ - mca_pml_ob1_pckt_pending_t *_pckt; \ - \ - MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \ - mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0, \ - (D).lval, (Sz)); \ - _pckt->proc = (P); \ - _pckt->bml_btl = (B); \ - _pckt->order = (O); \ - _pckt->status = (S); \ - OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \ - opal_list_append(&mca_pml_ob1.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \ - } while(0) +static inline void mca_pml_ob1_add_to_pending (ompi_proc_t *proc, mca_bml_base_btl_t *bml_btl, + int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size) +{ + mca_pml_ob1_pckt_pending_t *pckt; + + MCA_PML_OB1_PCKT_PENDING_ALLOC(pckt); + assert (sizeof (pckt->hdr) >= hdr_size); + pckt->proc = proc; + pckt->order = order; + pckt->hdr_size = hdr_size; + pckt->bml_btl = bml_btl; + memcpy (&pckt->hdr, hdr, hdr_size); + OPAL_THREAD_SCOPED_LOCK(&mca_pml_ob1.lock, { + opal_list_append(&mca_pml_ob1.pckt_pending, &pckt->super.super); + }); +} #define OB1_MATCHING_LOCK(lock) \ do { \ @@ -287,6 +290,8 @@ do { \ int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status); +int mca_pml_ob1_send_cid (ompi_proc_t *proc, ompi_communicator_t *comm); + /* This function tries to resend FIN/ACK packets from pckt_pending queue. * Packets are added to the queue when sending of FIN or ACK is failed due to * resource unavailability. bml_btl passed to the function doesn't represents @@ -408,4 +413,9 @@ mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, siz */ int mca_pml_ob1_enable_progress(int32_t count); +int mca_pml_ob1_send_control_any (ompi_proc_t *proc, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending); +int mca_pml_ob1_send_control_btl (mca_bml_base_btl_t *bml_btl, int order, mca_pml_ob1_hdr_t *hdr, size_t hdr_size, + bool add_to_pending); + #endif diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.c b/ompi/mca/pml/ob1/pml_ob1_comm.c index 9eeedd6b05d..aa0f2046638 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.c +++ b/ompi/mca/pml/ob1/pml_ob1_comm.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,9 @@ * * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +37,8 @@ static void mca_pml_ob1_comm_proc_construct(mca_pml_ob1_comm_proc_t* proc) proc->expected_sequence = 1; proc->send_sequence = 0; proc->frags_cant_match = NULL; + /* don't know the index of this communicator yet */ + proc->comm_index = -1; #if !MCA_PML_OB1_CUSTOM_MATCH OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t); OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t); @@ -84,7 +90,7 @@ static void mca_pml_ob1_comm_destruct(mca_pml_ob1_comm_t* comm) } } - free(comm->procs); + free ((void *) comm->procs); } #if !MCA_PML_OB1_CUSTOM_MATCH @@ -116,4 +122,26 @@ int mca_pml_ob1_comm_init_size (mca_pml_ob1_comm_t* comm, size_t size) return OMPI_SUCCESS; } +mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_create (ompi_communicator_t *comm, mca_pml_ob1_comm_t *pml_comm, int rank) +{ + mca_pml_ob1_comm_proc_t *proc = OBJ_NEW(mca_pml_ob1_comm_proc_t); + uintptr_t old_proc = 0; + proc->ompi_proc = ompi_comm_peer_lookup (comm, rank); + if (OMPI_COMM_IS_GLOBAL_INDEX (comm)) { + /* the index is global so we can save it on the proc now */ + proc->comm_index = comm->c_index; + } + OBJ_RETAIN(proc->ompi_proc); + /* make sure proc structure is filled in before adding it to the array */ + opal_atomic_wmb (); + + if (!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR((opal_atomic_intptr_t *) pml_comm->procs + rank, &old_proc, + (uintptr_t) proc)) { + /* proc was created by a competing thread. go ahead and throw this one away. */ + OBJ_RELEASE(proc); + return (mca_pml_ob1_comm_proc_t *) old_proc; + } + + return proc; +} diff --git a/ompi/mca/pml/ob1/pml_ob1_comm.h b/ompi/mca/pml/ob1/pml_ob1_comm.h index 25313b4d204..7b7e978ec96 100644 --- a/ompi/mca/pml/ob1/pml_ob1_comm.h +++ b/ompi/mca/pml/ob1/pml_ob1_comm.h @@ -44,6 +44,7 @@ struct mca_pml_ob1_comm_proc_t { opal_object_t super; struct ompi_proc_t* ompi_proc; uint16_t expected_sequence; /**< send message sequence number - receiver side */ + int16_t comm_index; /**< index of this communicator on the receiver size (-1 - not set) */ opal_atomic_int32_t send_sequence; /**< send side sequence number */ struct mca_pml_ob1_recv_frag_t* frags_cant_match; /**< out-of-order fragment queues */ #if !MCA_PML_OB1_CUSTOM_MATCH @@ -54,6 +55,8 @@ struct mca_pml_ob1_comm_proc_t { OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_proc_t); +#define MCA_PML_OB1_PROC_REQUIRES_EXT_MATCH(proc) (-1 == (proc)->comm_index) + /** * Cached on ompi_communicator_t to hold queues/state * used by the PML<->PTL interface for matching logic. @@ -66,7 +69,7 @@ struct mca_pml_comm_t { opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ #endif opal_mutex_t proc_lock; - mca_pml_ob1_comm_proc_t **procs; + mca_pml_ob1_comm_proc_t * volatile * procs; size_t num_procs; size_t last_probed; #if MCA_PML_OB1_CUSTOM_MATCH @@ -78,6 +81,11 @@ typedef struct mca_pml_comm_t mca_pml_ob1_comm_t; OBJ_CLASS_DECLARATION(mca_pml_ob1_comm_t); +/** + * @brief Helper function to allocate/fill in ob1 proc for a comm/rank + */ +mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_create (ompi_communicator_t *comm, mca_pml_ob1_comm_t *pml_comm, int rank); + static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_communicator_t *comm, int rank) { mca_pml_ob1_comm_t *pml_comm = (mca_pml_ob1_comm_t *)comm->c_pml_comm; @@ -93,15 +101,7 @@ static inline mca_pml_ob1_comm_proc_t *mca_pml_ob1_peer_lookup (struct ompi_comm " valid range of the communicator. Please submit a bug request!"); } if (OPAL_UNLIKELY(NULL == pml_comm->procs[rank])) { - OPAL_THREAD_LOCK(&pml_comm->proc_lock); - if (NULL == pml_comm->procs[rank]) { - mca_pml_ob1_comm_proc_t* proc = OBJ_NEW(mca_pml_ob1_comm_proc_t); - proc->ompi_proc = ompi_comm_peer_lookup (comm, rank); - OBJ_RETAIN(proc->ompi_proc); - opal_atomic_wmb (); - pml_comm->procs[rank] = proc; - } - OPAL_THREAD_UNLOCK(&pml_comm->proc_lock); + mca_pml_ob1_peer_create (comm, pml_comm, rank); } return pml_comm->procs[rank]; diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index 0feb982ae60..57fccd643ac 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -314,6 +314,9 @@ mca_pml_ob1_component_init( int* priority, } + /** this pml supports the extended CID space */ + mca_pml_ob1.super.pml_flags |= MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID; + return &mca_pml_ob1.super; } diff --git a/ompi/mca/pml/ob1/pml_ob1_hdr.h b/ompi/mca/pml/ob1/pml_ob1_hdr.h index 716dd841511..4ce0d84e150 100644 --- a/ompi/mca/pml/ob1/pml_ob1_hdr.h +++ b/ompi/mca/pml/ob1/pml_ob1_hdr.h @@ -49,13 +49,14 @@ #define MCA_PML_OB1_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7) #define MCA_PML_OB1_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8) #define MCA_PML_OB1_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9) +#define MCA_PML_OB1_HDR_TYPE_CID (MCA_BTL_TAG_PML + 10) -#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ -#define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */ -#define MCA_PML_OB1_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ -#define MCA_PML_OB1_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */ -#define MCA_PML_OB1_HDR_FLAGS_SIGNAL 32 /* message can be optionally signalling */ +#define MCA_PML_OB1_HDR_FLAGS_ACK 0x01 /* is an ack required */ +#define MCA_PML_OB1_HDR_FLAGS_NBO 0x02 /* is the hdr in network byte order */ +#define MCA_PML_OB1_HDR_FLAGS_PIN 0x04 /* is user buffer pinned */ +#define MCA_PML_OB1_HDR_FLAGS_CONTIG 0x08 /* is user buffer contiguous */ +#define MCA_PML_OB1_HDR_FLAGS_NORDMA 0x10 /* rest will be send by copy-in-out */ +#define MCA_PML_OB1_HDR_FLAGS_SIGNAL 0x20 /* message can be optionally signalling */ /** * Common hdr attributes - must be first element in each hdr type @@ -76,6 +77,41 @@ static inline void mca_pml_ob1_common_hdr_prepare (mca_pml_ob1_common_hdr_t *hdr #define MCA_PML_OB1_COMMON_HDR_NTOH(h) #define MCA_PML_OB1_COMMON_HDR_HTON(h) +/** + * Header definition for sending a CID/local comm index combo + */ +struct mca_pml_ob1_cid_hdr_t { + mca_pml_ob1_common_hdr_t hdr_common; + ompi_comm_extended_cid_t hdr_cid; + int16_t hdr_src_comm_index; + int32_t hdr_src; +}; + +typedef struct mca_pml_ob1_cid_hdr_t mca_pml_ob1_cid_hdr_t; + +static inline void mca_pml_ob1_cid_hdr_prepare (mca_pml_ob1_cid_hdr_t *hdr, ompi_communicator_t *comm) +{ + mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_CID, 0); + hdr->hdr_cid = ompi_comm_get_extended_cid (comm); + hdr->hdr_src_comm_index = comm->c_index; + hdr->hdr_src = ompi_comm_rank (comm); +} + +#define MCA_PML_OB1_EXT_CID_HDR_HTON(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ + (h).hdr_src_comm_index = htons((h).hdr_src_comm_index); \ + ompi_comm_cid_hton(&(h).hdr_cid); \ + } while (0) + +#define MCA_PML_OB1_EXT_CID_HDR_NTOH(h) \ + do { \ + MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \ + (h).hdr_src_comm_index = ntonh((h).hdr_src_comm_index); \ + ompi_comm_cid_ntoh(&(h).hdr_cid); \ + } while (0) + + /** * Header definition for the first fragment, contains the * attributes required to match the corresponding posted receive. @@ -130,7 +166,17 @@ do { \ (h).hdr_seq = htons((h).hdr_seq); \ } while (0) -/** +struct mca_pml_ob1_ext_match_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_match_hdr_t hdr_match; +}; + +typedef struct mca_pml_ob1_ext_match_hdr_t mca_pml_ob1_ext_match_hdr_t; + +/* +* * Header definition for the first fragment when an acknowledgment * is required. This could be the first fragment of a large message * or a short message that requires an ack (synchronous). @@ -142,6 +188,14 @@ struct mca_pml_ob1_rendezvous_hdr_t { }; typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t; +struct mca_pml_ob1_ext_rendezvous_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_rendezvous_hdr_t hdr_rndv; +}; +typedef struct mca_pml_ob1_ext_rendezvous_hdr_t mca_pml_ob1_ext_rendezvous_hdr_t; + static inline void mca_pml_ob1_rendezvous_hdr_prepare (mca_pml_ob1_rendezvous_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags, uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq, uint64_t hdr_msg_length, void *hdr_src_req) @@ -180,6 +234,15 @@ struct mca_pml_ob1_rget_hdr_t { }; typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t; +struct mca_pml_ob1_ext_rget_hdr_t { + mca_pml_ob1_cid_hdr_t hdr_ext_cid; + + /* actual match */ + mca_pml_ob1_rget_hdr_t hdr_rget; +}; + +typedef struct mca_pml_ob1_ext_rget_hdr_t mca_pml_ob1_ext_rget_hdr_t; + static inline void mca_pml_ob1_rget_hdr_prepare (mca_pml_ob1_rget_hdr_t *hdr, uint8_t hdr_flags, uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq, uint64_t hdr_msg_length, void *hdr_src_req, void *hdr_frag, @@ -425,6 +488,11 @@ union mca_pml_ob1_hdr_t { mca_pml_ob1_ack_hdr_t hdr_ack; mca_pml_ob1_rdma_hdr_t hdr_rdma; mca_pml_ob1_fin_hdr_t hdr_fin; + /* extended CID support */ + mca_pml_ob1_cid_hdr_t hdr_cid; + mca_pml_ob1_ext_match_hdr_t hdr_ext_match; + mca_pml_ob1_ext_rendezvous_hdr_t hdr_ext_rndv; + mca_pml_ob1_ext_rget_hdr_t hdr_ext_rget; }; typedef union mca_pml_ob1_hdr_t mca_pml_ob1_hdr_t; @@ -457,6 +525,15 @@ ob1_hdr_ntoh(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type) case MCA_PML_OB1_HDR_TYPE_FIN: MCA_PML_OB1_FIN_HDR_NTOH(hdr->hdr_fin); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_hdr = (mca_pml_ob1_hdr_t *) ((uintptr_t) hdr + sizeof (hdr->hdr_cid)); + + MCA_PML_OB1_EXT_MATCH_HDR_NTOH(hdr->hdr_cid); + /* now swap the real header */ + ob1_hdr_ntoh (next_hdr, hext_hdr->hdr_common.hdr_type); + break; + } default: assert(0); break; @@ -503,6 +580,15 @@ ob1_hdr_hton_intr(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type, case MCA_PML_OB1_HDR_TYPE_FIN: MCA_PML_OB1_FIN_HDR_HTON(hdr->hdr_fin); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_hdr = (mca_pml_ob1_hdr_t *) ((uintptr_t) hdr + sizeof (hdr->hdr_cid)); + + MCA_PML_OB1_EXT_MATCH_HDR_HTON(hdr->hdr_cid); + /* now swap the real header */ + ob1_hdr_hton (next_hdr, hext_hdr->hdr_common.hdr_type, proc); + break; + } default: assert(0); break; @@ -516,7 +602,8 @@ ob1_hdr_hton_intr(mca_pml_ob1_hdr_t *hdr, const uint8_t hdr_type, static inline __opal_attribute_always_inline__ void ob1_hdr_copy(mca_pml_ob1_hdr_t *src, mca_pml_ob1_hdr_t *dst) { - switch(src->hdr_common.hdr_type) { + do { + switch(src->hdr_common.hdr_type) { case MCA_PML_OB1_HDR_TYPE_MATCH: memcpy( &(dst->hdr_match), &(src->hdr_match), sizeof(mca_pml_ob1_match_hdr_t) ); break; @@ -538,10 +625,24 @@ ob1_hdr_copy(mca_pml_ob1_hdr_t *src, mca_pml_ob1_hdr_t *dst) case MCA_PML_OB1_HDR_TYPE_FIN: memcpy( &(dst->hdr_fin), &(src->hdr_fin), sizeof(mca_pml_ob1_fin_hdr_t) ); break; + case MCA_PML_OB1_HDR_TYPE_CID: + { + mca_pml_ob1_hdr_t *next_src = (mca_pml_ob1_hdr_t *) ((uintptr_t) src + sizeof (src->hdr_cid)); + mca_pml_ob1_hdr_t *next_dst = (mca_pml_ob1_hdr_t *) ((uintptr_t) dst + sizeof (dst->hdr_cid)); + + memcpy (&dst->hdr_cid, &src->hdr_cid, sizeof (src->hdr_cid)); + /* can't call recusively and expect inlining */ + src = next_src; + dst = next_dst; + continue; + } default: memcpy( &(dst->hdr_common), &(src->hdr_common), sizeof(mca_pml_ob1_common_hdr_t) ); break; - } + } + + break; + } while (1); } #endif /* MCA_PML_OB1_HEADER_H */ diff --git a/ompi/mca/pml/ob1/pml_ob1_isend.c b/ompi/mca/pml/ob1/pml_ob1_isend.c index 2b7e7047708..79d50ab2348 100644 --- a/ompi/mca/pml/ob1/pml_ob1_isend.c +++ b/ompi/mca/pml/ob1/pml_ob1_isend.c @@ -48,17 +48,14 @@ int mca_pml_ob1_isend_init(const void *buf, ompi_communicator_t * comm, ompi_request_t ** request) { + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, dst); mca_pml_ob1_send_request_t *sendreq = NULL; MCA_PML_OB1_SEND_REQUEST_ALLOC(comm, dst, sendreq); if (NULL == sendreq) return OMPI_ERR_OUT_OF_RESOURCE; - MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, buf, count, datatype, dst, tag, + comm, sendmode, true, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -78,7 +75,8 @@ int mca_pml_ob1_isend_init(const void *buf, static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, ompi_datatype_t * datatype, int dst, int tag, int16_t seqn, - ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint, + ompi_proc_t *dst_proc, mca_pml_ob1_comm_proc_t *ob1_proc, + mca_bml_base_endpoint_t* endpoint, ompi_communicator_t * comm) { mca_pml_ob1_match_hdr_t match; @@ -92,7 +90,10 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, return OMPI_ERR_NOT_AVAILABLE; ompi_datatype_type_size (datatype, &size); - if ((size * count) > 256) { /* some random number */ + + /* the size used here was picked based on performance on a Cray XE-6. it should probably + * be provided by the btl module */ + if ((size * count) > 256 || -1 == ob1_proc->comm_index) { return OMPI_ERR_NOT_AVAILABLE; } @@ -111,7 +112,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count, } mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - comm->c_contextid, comm->c_my_rank, + ob1_proc->comm_index, comm->c_my_rank, tag, seqn); ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc); @@ -174,7 +175,7 @@ int mca_pml_ob1_isend(const void *buf, } if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { - rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, + rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, ob1_proc, endpoint, comm); if (OPAL_LIKELY(0 <= rc)) { /* NTH: it is legal to return ompi_request_empty since the only valid @@ -194,7 +195,7 @@ int mca_pml_ob1_isend(const void *buf, count, datatype, dst, tag, - comm, sendmode, false); + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -216,7 +217,7 @@ int mca_pml_ob1_isend(const void *buf, count, datatype, dst, tag, - comm, sendmode, false); + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &(sendreq)->req_send.req_base, @@ -279,7 +280,7 @@ int mca_pml_ob1_send(const void *buf, */ if (MCA_PML_BASE_SEND_SYNCHRONOUS != sendmode) { rc = mca_pml_ob1_send_inline (buf, count, datatype, dst, tag, seqn, dst_proc, - endpoint, comm); + ob1_proc, endpoint, comm); if (OPAL_LIKELY(0 <= rc)) { return OMPI_SUCCESS; } @@ -299,12 +300,8 @@ int mca_pml_ob1_send(const void *buf, sendreq->req_send.req_base.req_proc = dst_proc; sendreq->rdma_frag = NULL; - MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); + MCA_PML_OB1_SEND_REQUEST_INIT(sendreq, buf, count, datatype, dst, tag, + comm, sendmode, false, ob1_proc); PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, &sendreq->req_send.req_base, diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c index 3c1e2762e85..3295d9fad77 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.c @@ -20,6 +20,9 @@ * Copyright (c) 2018 Sandia National Laboratories * All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -476,8 +479,8 @@ void mca_pml_ob1_recv_frag_callback_match (mca_btl_base_module_t *btl, * this pending queue will be searched and all matching fragments * moved to the right communicator. */ - append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); + append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, btl, + hdr, segments, num_segments, NULL ); return; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; @@ -1037,8 +1040,8 @@ static int mca_pml_ob1_recv_frag_match (mca_btl_base_module_t *btl, * this pending queue will be searched and all matching fragments * moved to the right communicator. */ - append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); + append_frag_to_list( &mca_pml_ob1.non_existing_communicator_pending, btl, + hdr, segments, num_segments, NULL ); return OMPI_SUCCESS; } comm = (mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; @@ -1201,3 +1204,71 @@ mca_pml_ob1_recv_frag_match_proc (mca_btl_base_module_t *btl, return OMPI_SUCCESS; } +void mca_pml_ob1_handle_cid (ompi_communicator_t *comm, int src, mca_pml_ob1_cid_hdr_t *hdr_cid) +{ + mca_pml_ob1_comm_proc_t *ob1_proc = mca_pml_ob1_peer_lookup (comm, src); + bool had_comm_index = (-1 != ob1_proc->comm_index); + + if (!had_comm_index) { + /* avoid sending too many extra packets. if this doesn't work well then a flag can be added to + * the proc to indicate that this packet has been sent */ + ob1_proc->comm_index = hdr_cid->hdr_src_comm_index; + + /* + * if the proc to send to is myself, no need to do the send + */ + if(ob1_proc->ompi_proc != ompi_proc_local()) { + (void) mca_pml_ob1_send_cid (ob1_proc->ompi_proc, comm); + } + } +} + +void mca_pml_ob1_recv_frag_callback_cid (mca_btl_base_module_t* btl, + const mca_btl_base_receive_descriptor_t* des) +{ + mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; + mca_pml_ob1_hdr_t *hdr = (mca_pml_ob1_hdr_t *) des->des_segments[0].seg_addr.pval; + mca_pml_ob1_match_hdr_t *hdr_match = &hdr->hdr_ext_match.hdr_match; + size_t num_segments = des->des_segment_count; + ompi_communicator_t *comm; + + memcpy (segments, des->des_segments, num_segments * sizeof (segments[0])); + assert (segments->seg_len >= sizeof (hdr->hdr_cid)); + + ob1_hdr_ntoh (hdr, hdr->hdr_common.hdr_type); + + /* NTH: this should be ok as as all BTLs create a dummy segment */ + segments->seg_len -= offsetof (mca_pml_ob1_ext_match_hdr_t, hdr_match); + segments->seg_addr.pval = (void *) hdr_match; + + /* find the communicator with this extended CID */ + comm = ompi_comm_lookup_cid (hdr->hdr_cid.hdr_cid); + if (OPAL_UNLIKELY(NULL == comm)) { + if (segments->seg_len > 0) { + /* This is a special case. A message for a not yet existing + * communicator can happens. Instead of doing a matching we + * will temporarily add it the a pending queue in the PML. + * Later on, when the communicator is completely instantiated, + * this pending queue will be searched and all matching fragments + * moved to the right communicator. + */ + append_frag_to_list (&mca_pml_ob1.non_existing_communicator_pending, + btl, (const mca_pml_ob1_match_hdr_t *)hdr, des->des_segments, + num_segments, NULL); + } + + /* nothing more to do */ + return; + } + + mca_pml_ob1_handle_cid (comm, hdr->hdr_cid.hdr_src, &hdr->hdr_cid); + hdr_match->hdr_ctx = comm->c_index; + + if (segments->seg_len == 0) { + /* just a response */ + return; + } + + mca_pml_ob1_recv_frag_match (btl, hdr_match, segments, des->des_segment_count, + hdr_match->hdr_common.hdr_type); +} diff --git a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h index d058a113612..95f57a66b6e 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvfrag.h +++ b/ompi/mca/pml/ob1/pml_ob1_recvfrag.h @@ -159,6 +159,12 @@ extern void mca_pml_ob1_recv_frag_callback_put (mca_btl_base_module_t *btl, extern void mca_pml_ob1_recv_frag_callback_fin (mca_btl_base_module_t *btl, const mca_btl_base_receive_descriptor_t *descriptor); +/** + * Callback from BTL on receipt of an extended CID header + */ +extern void mca_pml_ob1_recv_frag_callback_cid( mca_btl_base_module_t *btl, + const mca_btl_base_receive_descriptor_t* descriptor); + /** * Extract the next fragment from the cant_match ordered list. This fragment * will be the next in sequence. @@ -170,6 +176,8 @@ void append_frag_to_ordered_list(mca_pml_ob1_recv_frag_t** queue, mca_pml_ob1_recv_frag_t* frag, uint16_t seq); +void mca_pml_ob1_handle_cid (ompi_communicator_t *comm, int src, mca_pml_ob1_cid_hdr_t *hdr_cid); + extern void mca_pml_ob1_dump_cant_match(mca_pml_ob1_recv_frag_t* queue); END_C_DECLS diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index cd089c01db3..c8d597d3b36 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -282,6 +282,10 @@ int mca_pml_ob1_recv_request_ack_send_btl( return OMPI_ERR_OUT_OF_RESOURCE; } +/* + * + */ + static int mca_pml_ob1_recv_request_ack( mca_pml_ob1_recv_request_t* recvreq, mca_btl_base_module_t* btl, @@ -1198,8 +1202,8 @@ recv_req_match_wild( mca_pml_ob1_recv_request_t* req, mca_pml_ob1_comm_proc_t **p) #endif { - mca_pml_ob1_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_ob1_comm_proc_t **procp = comm->procs; + mca_pml_ob1_comm_t *comm = (mca_pml_ob1_comm_t *) req->req_recv.req_base.req_comm->c_pml_comm; + mca_pml_ob1_comm_proc_t **procp = (mca_pml_ob1_comm_proc_t **) comm->procs; #if MCA_PML_OB1_CUSTOM_MATCH mca_pml_ob1_recv_frag_t* frag; diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.c b/ompi/mca/pml/ob1/pml_ob1_sendreq.c index bae8fc10bc9..d1f37a73b20 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.c @@ -19,6 +19,8 @@ * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -487,18 +489,23 @@ int mca_pml_ob1_send_request_start_buffered( mca_bml_base_btl_t* bml_btl, size_t size) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = sizeof (mca_pml_ob1_rendezvous_hdr_t); mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_rendezvous_hdr_t *hdr_rndv; struct iovec iov; unsigned int iov_count; size_t max_data, req_bytes_delivered; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rndv); + } + /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t) + size, + mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size + size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { @@ -507,8 +514,7 @@ int mca_pml_ob1_send_request_start_buffered( segment = des->des_segments; /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - sizeof(mca_pml_ob1_rendezvous_hdr_t)); + iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + hdr_size); iov.iov_len = size; iov_count = 1; max_data = size; @@ -523,17 +529,24 @@ int mca_pml_ob1_send_request_start_buffered( /* build rendezvous header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_rndv = &hdr->hdr_ext_rndv.hdr_rndv; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rndv = &hdr->hdr_rndv; + } + + mca_pml_ob1_rendezvous_hdr_prepare (hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* update lengths */ - segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data; + segment->seg_len = hdr_size + max_data; des->des_cbfunc = mca_pml_ob1_rndv_completion; des->des_cbdata = sendreq; @@ -571,7 +584,7 @@ int mca_pml_ob1_send_request_start_buffered( MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq, true); /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + rc = mca_bml_base_send (bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); @@ -593,18 +606,22 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = OMPI_PML_OB1_MATCH_HDR_LEN; mca_btl_base_descriptor_t* des = NULL; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_match_hdr_t *hdr_match; struct iovec iov; unsigned int iov_count; size_t max_data = size; int rc; - if(NULL != bml_btl->btl->btl_sendi) { + if(NULL != bml_btl->btl->btl_sendi && !need_ext_match) { mca_pml_ob1_match_hdr_t match; + mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); @@ -632,9 +649,11 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, } } else { /* allocate descriptor */ - mca_bml_base_alloc( bml_btl, &des, - MCA_BTL_NO_ORDER, - OMPI_PML_OB1_MATCH_HDR_LEN + size, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size += sizeof (hdr->hdr_cid); + } + + mca_bml_base_alloc (bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size + size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); } if( OPAL_UNLIKELY(NULL == des) ) { @@ -645,8 +664,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, if(size > 0) { /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - OMPI_PML_OB1_MATCH_HDR_LEN); + iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + hdr_size); iov.iov_len = size; iov_count = 1; /* @@ -672,26 +690,32 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq, ); } - /* build match header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_match = &hdr->hdr_ext_match.hdr_match; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_match = &hdr->hdr_match; + } + + mca_pml_ob1_match_hdr_prepare (hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* update lengths */ - segment->seg_len = OMPI_PML_OB1_MATCH_HDR_LEN + max_data; + segment->seg_len = hdr_size + max_data; /* short message */ des->des_cbdata = sendreq; des->des_cbfunc = mca_pml_ob1_match_completion_free; /* send */ - rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + rc = mca_bml_base_send_status(bml_btl, des, hdr->hdr_common.hdr_type); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)size, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { @@ -720,19 +744,23 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, mca_bml_base_btl_t* bml_btl, size_t size ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = OMPI_PML_OB1_MATCH_HDR_LEN; mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_match_hdr_t *hdr_match; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size += sizeof (hdr->hdr_cid); + } + /* prepare descriptor */ - mca_bml_base_prepare_src( bml_btl, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - OMPI_PML_OB1_MATCH_HDR_LEN, - &size, + mca_bml_base_prepare_src (bml_btl, &sendreq->req_send.req_base.req_convertor, + MCA_BTL_NO_ORDER, hdr_size, &size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); + &des); if( OPAL_UNLIKELY(NULL == des) ) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -740,20 +768,27 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq, /* build match header */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, - sendreq->req_send.req_base.req_comm->c_contextid, + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_match = &hdr->hdr_ext_match.hdr_match; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_match = &hdr->hdr_match; + } + + mca_pml_ob1_match_hdr_prepare (hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* short message */ des->des_cbfunc = mca_pml_ob1_match_completion_free; des->des_cbdata = sendreq; /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_MATCH); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); SPC_USER_OR_MPI(sendreq->req_send.req_base.req_ompi.req_status.MPI_TAG, (ompi_spc_value_t)size, OMPI_SPC_BYTES_SENT_USER, OMPI_SPC_BYTES_SENT_MPI); if( OPAL_LIKELY( rc >= OPAL_SUCCESS ) ) { @@ -782,11 +817,13 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, * one RDMA capable BTLs). This way round robin distribution of RDMA * operation is achieved. */ + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t reg_size, hdr_size = sizeof (mca_pml_ob1_rget_hdr_t); mca_btl_base_registration_handle_t *local_handle; mca_btl_base_descriptor_t *des; mca_pml_ob1_rdma_frag_t *frag; - mca_pml_ob1_rget_hdr_t *hdr; - size_t reg_size; + mca_pml_ob1_hdr_t *hdr; + mca_pml_ob1_rget_hdr_t *hdr_rget; void *data_ptr; int rc; @@ -818,10 +855,15 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, frag->cbfunc = mca_pml_ob1_rget_completion; /* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */ + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rget); + } + reg_size = bml_btl->btl->btl_registration_handle_size; + hdr_size += reg_size; /* allocate space for get hdr + segment list */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + reg_size, + mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL); if( OPAL_UNLIKELY(NULL == des) ) { @@ -834,17 +876,24 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, sendreq->rdma_frag = frag; /* build match header */ - hdr = (mca_pml_ob1_rget_hdr_t *) des->des_segments->seg_addr.pval; + hdr = (mca_pml_ob1_hdr_t *) des->des_segments->seg_addr.pval; + if (need_ext_match) { + hdr_rget = &hdr->hdr_ext_rget.hdr_rget; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rget = &hdr->hdr_rget; + } + /* TODO -- Add support for multiple segments for get */ - mca_pml_ob1_rget_hdr_prepare (hdr, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN, - sendreq->req_send.req_base.req_comm->c_contextid, + mca_pml_ob1_rget_hdr_prepare (hdr_rget, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq, frag, data_ptr, local_handle, reg_size); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); des->des_cbfunc = mca_pml_ob1_send_ctl_completion; des->des_cbdata = sendreq; @@ -860,7 +909,7 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq, } /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); if (OPAL_UNLIKELY(rc < 0)) { MCA_PML_OB1_RDMA_FRAG_RETURN(frag); sendreq->rdma_frag = NULL; @@ -882,18 +931,22 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, size_t size, int flags ) { + const bool need_ext_match = MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq); + size_t hdr_size = sizeof (mca_pml_ob1_rendezvous_hdr_t); mca_btl_base_descriptor_t* des; mca_btl_base_segment_t* segment; mca_pml_ob1_hdr_t* hdr; + mca_pml_ob1_rendezvous_hdr_t *hdr_rndv; int rc; + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_size = sizeof (hdr->hdr_ext_rndv); + } + /* prepare descriptor */ if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); + mca_bml_base_alloc (bml_btl, &des, MCA_BTL_NO_ORDER, hdr_size, MCA_BTL_DES_FLAGS_PRIORITY | + MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); } else { MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_defined, @@ -901,14 +954,10 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, sendreq->req_send.req_base.req_count, sendreq->req_send.req_base.req_datatype); ); - mca_bml_base_prepare_src( bml_btl, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_ob1_rendezvous_hdr_t), - &size, + mca_bml_base_prepare_src (bml_btl, &sendreq->req_send.req_base.req_convertor, + MCA_BTL_NO_ORDER, hdr_size, &size, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_SIGNAL, - &des ); + MCA_BTL_DES_FLAGS_SIGNAL, &des); MEMCHECKER( memchecker_call(&opal_memchecker_base_mem_noaccess, sendreq->req_send.req_base.req_addr, @@ -924,15 +973,23 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, /* build hdr */ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; - mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags | + + if (OPAL_UNLIKELY(need_ext_match)) { + hdr_rndv = &hdr->hdr_ext_rndv.hdr_rndv; + mca_pml_ob1_cid_hdr_prepare (&hdr->hdr_cid, sendreq->req_send.req_base.req_comm); + } else { + hdr_rndv = &hdr->hdr_rndv; + } + + mca_pml_ob1_rendezvous_hdr_prepare (hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags | MCA_PML_OB1_HDR_FLAGS_SIGNAL, - sendreq->req_send.req_base.req_comm->c_contextid, + sendreq->ob1_proc->comm_index, sendreq->req_send.req_base.req_comm->c_my_rank, sendreq->req_send.req_base.req_tag, (uint16_t)sendreq->req_send.req_base.req_sequence, sendreq->req_send.req_bytes_packed, sendreq); - ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc); + ob1_hdr_hton(hdr, hdr->hdr_common.hdr_type, sendreq->req_send.req_base.req_proc); /* first fragment of a long message */ des->des_cbdata = sendreq; @@ -942,7 +999,7 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq, sendreq->req_state = 2; /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RNDV); + rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); if( OPAL_LIKELY( rc >= 0 ) ) { if( OPAL_LIKELY( 1 == rc ) ) { mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, size ); diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 80a4ae2f6b4..07e9899fd30 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -46,6 +46,7 @@ typedef enum { struct mca_pml_ob1_send_request_t { mca_pml_base_send_request_t req_send; mca_bml_base_endpoint_t* req_endpoint; + mca_pml_ob1_comm_proc_t *ob1_proc; opal_ptr_t req_recv; opal_atomic_int32_t req_state; opal_atomic_int32_t req_lock; @@ -143,7 +144,8 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) tag, \ comm, \ sendmode, \ - persistent) \ + persistent, \ + ob1_proc) \ { \ MCA_PML_BASE_SEND_REQUEST_INIT(&(sendreq)->req_send, \ buf, \ @@ -156,11 +158,14 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type) persistent, \ 0); /* convertor_flags */ \ (sendreq)->req_recv.pval = NULL; \ + (sendreq)->ob1_proc = ob1_proc; \ } #define MCA_PML_OB1_SEND_REQUEST_RESET(sendreq) \ MCA_PML_BASE_SEND_REQUEST_RESET(&(sendreq)->req_send) +#define MCA_PML_OB1_SEND_REQUEST_REQUIRES_EXT_MATCH(sendreq) (-1 == sendreq->ob1_proc->comm_index) + static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq) { size_t r; diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index b356d224a22..6614cf4d5f6 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -489,7 +489,12 @@ typedef int (*mca_pml_base_module_dump_fn_t)( */ /** PML requires requires all procs in the job on the first call to * add_procs */ -#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 +#define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 + +/** + * PML supports the extended CID space (doesn't need a global communicator index) + */ +#define MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID 0x00000002 /** * PML instance. @@ -560,5 +565,10 @@ static inline bool mca_pml_base_requires_world (void) return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD); } +static inline bool mca_pml_base_supports_extended_cid (void) +{ + return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_SUPPORTS_EXT_CID); +} + END_C_DECLS #endif /* MCA_PML_H */ diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index 6bee4dea29f..6f53489c35a 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -35,18 +35,18 @@ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' dst %d tag %d mode %s comm %d '%s'", \ __VA_ARGS__, \ (_buf), (_count), (_datatype)->name, (_dst), (_tag), \ - mca_pml_ucx_send_mode_name(_mode), (_comm)->c_contextid, \ + mca_pml_ucx_send_mode_name(_mode), (_comm)->c_index, \ (_comm)->c_name); #define PML_UCX_TRACE_RECV(_msg, _buf, _count, _datatype, _src, _tag, _comm, ...) \ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' src %d tag %d comm %d '%s'", \ __VA_ARGS__, \ (_buf), (_count), (_datatype)->name, (_src), (_tag), \ - (_comm)->c_contextid, (_comm)->c_name); + (_comm)->c_index, (_comm)->c_name); #define PML_UCX_TRACE_PROBE(_msg, _src, _tag, _comm) \ PML_UCX_VERBOSE(8, _msg " src %d tag %d comm %d '%s'", \ - _src, (_tag), (_comm)->c_contextid, (_comm)->c_name); + _src, (_tag), (_comm)->c_index, (_comm)->c_name); #define PML_UCX_TRACE_MRECV(_msg, _buf, _count, _datatype, _message) \ PML_UCX_VERBOSE(8, _msg " buf %p count %zu type '%s' msg *%p=%p (%p)", \ diff --git a/ompi/mca/pml/ucx/pml_ucx_request.h b/ompi/mca/pml/ucx/pml_ucx_request.h index bfa3d6c858c..c4a13d96e57 100644 --- a/ompi/mca/pml/ucx/pml_ucx_request.h +++ b/ompi/mca/pml/ucx/pml_ucx_request.h @@ -42,7 +42,7 @@ enum { #define PML_UCX_MAKE_SEND_TAG(_tag, _comm) \ ((((uint64_t) (_tag) ) << (PML_UCX_RANK_BITS + PML_UCX_CONTEXT_BITS)) | \ (((uint64_t)(_comm)->c_my_rank ) << PML_UCX_CONTEXT_BITS) | \ - ((uint64_t)(_comm)->c_contextid)) + ((uint64_t)(_comm)->c_index)) #define PML_UCX_MAKE_RECV_TAG(_ucp_tag, _ucp_tag_mask, _tag, _src, _comm) \ @@ -54,7 +54,7 @@ enum { } \ \ _ucp_tag = (((uint64_t)(_src) & UCS_MASK(PML_UCX_RANK_BITS)) << PML_UCX_CONTEXT_BITS) | \ - (_comm)->c_contextid; \ + (_comm)->c_index; \ \ if ((_tag) != MPI_ANY_TAG) { \ _ucp_tag_mask |= PML_UCX_TAG_MASK; \ diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm.c b/ompi/mca/sharedfp/sm/sharedfp_sm.c index 498c02d716a..bb481c9b2fe 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm.c @@ -88,9 +88,9 @@ struct mca_sharedfp_base_module_1_0_0_t * mca_sharedfp_sm_component_file_query(o proc = ompi_group_peer_lookup(group,i); if (!OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)){ opal_output(ompi_sharedfp_base_framework.framework_output, - "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%d/%s) " + "mca_sharedfp_sm_component_file_query: Disqualifying myself: (%s/%s) " "not all processes are on the same node.", - comm->c_contextid, comm->c_name); + ompi_comm_print_cid (comm), comm->c_name); return NULL; } } diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 42cc532b4e7..c3a61ce6d3d 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -61,7 +61,6 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, struct mca_sharedfp_sm_offset * sm_offset_ptr; struct mca_sharedfp_sm_offset sm_offset; int sm_fd; - uint32_t comm_cid; int int_pid; pid_t my_pid; @@ -102,8 +101,8 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ filename_basename = basename((char*)filename); - /* format is "%s/%s_cid-%d-%d.sm", see below */ - sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; + /* format is "%s/%s_cid-%s-%d.sm", see below */ + sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + strlen(ompi_comm_print_cid(comm)) + 5 + (3*sizeof(uint32_t)+1) + 4; sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); if (NULL == sm_filename) { opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n"); @@ -112,7 +111,6 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, return OMPI_ERR_OUT_OF_RESOURCE; } - comm_cid = ompi_comm_get_cid(comm); if ( 0 == fh->f_rank ) { my_pid = getpid(); int_pid = (int) my_pid; @@ -126,8 +124,8 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, return err; } - snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%d-%d.sm", ompi_process_info.job_session_dir, - filename_basename, comm_cid, int_pid); + snprintf(sm_filename, sm_filename_length, "%s/%s_cid-%s-%d.sm", ompi_process_info.job_session_dir, + filename_basename, ompi_comm_print_cid (comm), int_pid); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); diff --git a/ompi/mca/topo/base/base.h b/ompi/mca/topo/base/base.h index 9ab1a4b927a..f04a1cbff9f 100644 --- a/ompi/mca/topo/base/base.h +++ b/ompi/mca/topo/base/base.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -16,6 +17,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -56,6 +59,13 @@ mca_topo_base_comm_select(const ompi_communicator_t* comm, mca_topo_base_module_t** selected_module, uint32_t type); +/* Select a topo module for a particular type of topology */ +OMPI_DECLSPEC int +mca_topo_base_group_select (const ompi_group_t *group, + mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, + uint32_t type); + /* Find all components that want to be considered in this job */ OMPI_DECLSPEC int mca_topo_base_find_available(bool enable_progress_threads, diff --git a/ompi/mca/topo/base/topo_base_cart_create.c b/ompi/mca/topo/base/topo_base_cart_create.c index e751a909f3f..9da23056c70 100644 --- a/ompi/mca/topo/base/topo_base_cart_create.c +++ b/ompi/mca/topo/base/topo_base_cart_create.c @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,78 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/topo/topo.h" +static int mca_topo_base_cart_allocate (ompi_group_t *group, int ndims, const int *dims, const int *periods, + int *my_rank, int *num_procs, mca_topo_base_comm_cart_2_2_0_t **cart_out) +{ + mca_topo_base_comm_cart_2_2_0_t *cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t); + int nprocs = 1; + + *num_procs = group->grp_proc_count; + *my_rank = group->grp_my_rank; + + /* Calculate the number of processes in this grid */ + for (int i = 0 ; i < ndims ; ++i) { + if (dims[i] <= 0) { + return OMPI_ERROR; + } + nprocs *= dims[i]; + } + + /* check for the error condition */ + if (OPAL_UNLIKELY(*num_procs < nprocs)) { + return MPI_ERR_DIMS; + } + + /* check if we have to trim the list of processes */ + if (nprocs < *num_procs) { + *num_procs = nprocs; + } + + if (*my_rank > (nprocs - 1)) { + *my_rank = MPI_UNDEFINED; + } + + if (MPI_UNDEFINED == *my_rank) { + /* nothing more to do */ + *cart_out = NULL; + return OMPI_SUCCESS; + } + + if (OPAL_UNLIKELY(NULL == cart)) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + cart->ndims = ndims; + + /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent + a 0-byte malloc -- leave dims as NULL */ + if (0 == ndims) { + *cart_out = cart; + return OMPI_SUCCESS; + } + + cart->dims = (int *) malloc (sizeof (int) * ndims); + cart->periods = (int *) malloc (sizeof (int) * ndims); + cart->coords = (int *) malloc (sizeof (int) * ndims); + if (OPAL_UNLIKELY(NULL == cart->dims || NULL == cart->periods || NULL == cart->coords)) { + OBJ_RELEASE(cart); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + /* Cartesian communicator; copy the right data to the common information */ + memcpy(cart->dims, dims, ndims * sizeof(int)); + memcpy(cart->periods, periods, ndims * sizeof(int)); + + nprocs = *num_procs; + for (int i = 0, rank = *my_rank ; i < ndims ; ++i) { + nprocs /= cart->dims[i]; + cart->coords[i] = rank / nprocs; + rank %= nprocs; + } + + *cart_out = cart; + return OMPI_SUCCESS; +} + /* * function - makes a new communicator to which topology information * has been attached @@ -55,135 +129,50 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, bool reorder, ompi_communicator_t** comm_topo) { - int nprocs = 1, i, new_rank, num_procs, ret; + int new_rank, num_procs, ret; ompi_communicator_t *new_comm; - ompi_proc_t **topo_procs = NULL; mca_topo_base_comm_cart_2_2_0_t* cart; + ompi_group_t *c_local_group; - num_procs = old_comm->c_local_group->grp_proc_count; - new_rank = old_comm->c_local_group->grp_my_rank; assert(topo->type == OMPI_COMM_CART); - /* Calculate the number of processes in this grid */ - for (i = 0; i < ndims; ++i) { - if(dims[i] <= 0) { - return OMPI_ERROR; - } - nprocs *= dims[i]; - } - - /* check for the error condition */ - if (num_procs < nprocs) { - return MPI_ERR_DIMS; - } - - /* check if we have to trim the list of processes */ - if (nprocs < num_procs) { - num_procs = nprocs; - } - - if (new_rank > (nprocs-1)) { - ndims = 0; - new_rank = MPI_UNDEFINED; - num_procs = 0; + ret = mca_topo_base_cart_allocate (old_comm->c_local_group, ndims, dims, periods, + &new_rank, &num_procs, &cart); + if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) { + return ret; } - cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t); - if( NULL == cart ) { + /* Copy the proc structure from the previous communicator over to + the new one. The topology module is then able to work on this + copy and rearrange it as it deems fit. NTH: seems odd that this + function has always clipped the group size here. It might be + worthwhile to clip the group in the module (if reordering) */ + c_local_group = ompi_group_flatten (old_comm->c_local_group, num_procs); + if (OPAL_UNLIKELY(NULL == c_local_group)) { + OBJ_RELEASE(cart); return OMPI_ERR_OUT_OF_RESOURCE; } - cart->ndims = ndims; - - /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent - a 0-byte malloc -- leave dims as NULL */ - if( ndims > 0 ) { - cart->dims = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->dims) { - OBJ_RELEASE(cart); - return OMPI_ERROR; - } - memcpy(cart->dims, dims, ndims * sizeof(int)); - /* Cartesian communicator; copy the right data to the common information */ - cart->periods = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->periods) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(cart->periods, periods, ndims * sizeof(int)); - - cart->coords = (int*)malloc(sizeof(int) * ndims); - if (NULL == cart->coords) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - { /* setup the cartesian topology */ - int n_procs = num_procs, rank = new_rank; - - for (i = 0; i < ndims; ++i) { - n_procs /= cart->dims[i]; - cart->coords[i] = rank / n_procs; - rank %= n_procs; - } - } - } + ret = ompi_comm_create (old_comm, c_local_group, &new_comm); - /* JMS: This should really be refactored to use - comm_create_group(), because ompi_comm_allocate() still - complains about 0-byte mallocs in debug builds for 0-member - groups. */ - if (num_procs > 0) { - /* Copy the proc structure from the previous communicator over to - the new one. The topology module is then able to work on this - copy and rearrange it as it deems fit. */ - topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if (NULL == topo_procs) { - OBJ_RELEASE(cart); - return OMPI_ERR_OUT_OF_RESOURCE; - } - if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) { - memcpy(topo_procs, - old_comm->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i); - } - } - } + ompi_group_free (&c_local_group); - /* allocate a new communicator */ - new_comm = ompi_comm_allocate(num_procs, 0); - if (NULL == new_comm) { - free(topo_procs); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(cart); - return MPI_ERR_INTERN; + return ret; } - ret = ompi_comm_enable(old_comm, new_comm, - new_rank, num_procs, topo_procs); - if (OMPI_SUCCESS != ret) { - /* something wrong happened during setting the communicator */ - free(topo_procs); - OBJ_RELEASE(cart); - if (MPI_COMM_NULL != new_comm) { - new_comm->c_topo = NULL; - new_comm->c_flags &= ~OMPI_COMM_CART; - ompi_comm_free (&new_comm); - } - return ret; + *comm_topo = new_comm; + + if (MPI_COMM_NULL == new_comm) { + /* not part of this new communicator */ + return OMPI_SUCCESS; } new_comm->c_topo = topo; new_comm->c_topo->mtc.cart = cart; new_comm->c_topo->reorder = reorder; new_comm->c_flags |= OMPI_COMM_CART; - *comm_topo = new_comm; - - if( MPI_UNDEFINED == new_rank ) { - ompi_comm_free(&new_comm); - *comm_topo = MPI_COMM_NULL; - } /* end here */ return OMPI_SUCCESS; @@ -197,15 +186,9 @@ static void mca_topo_base_comm_cart_2_2_0_construct(mca_topo_base_comm_cart_2_2_ } static void mca_topo_base_comm_cart_2_2_0_destruct(mca_topo_base_comm_cart_2_2_0_t * cart) { - if (NULL != cart->dims) { - free(cart->dims); - } - if (NULL != cart->periods) { - free(cart->periods); - } - if (NULL != cart->coords) { - free(cart->coords); - } + free(cart->dims); + free(cart->periods); + free(cart->coords); } OBJ_CLASS_INSTANCE(mca_topo_base_comm_cart_2_2_0_t, opal_object_t, diff --git a/ompi/mca/topo/base/topo_base_comm_select.c b/ompi/mca/topo/base/topo_base_comm_select.c index 165727fd393..9dc6fc3d45b 100644 --- a/ompi/mca/topo/base/topo_base_comm_select.c +++ b/ompi/mca/topo/base/topo_base_comm_select.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -68,10 +71,10 @@ static OBJ_CLASS_INSTANCE(queried_module_t, opal_list_item_t, NULL, NULL); * 4. Select the module with the highest priority. * 5. OBJ_RELEASE all the "losing" modules. */ -int mca_topo_base_comm_select(const ompi_communicator_t* comm, - mca_topo_base_module_t* preferred_module, - mca_topo_base_module_t** selected_module, - uint32_t type) +static int _mca_topo_base_select (const ompi_communicator_t *comm, const ompi_group_t *group, + mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, + uint32_t type) { int priority; int best_priority; @@ -88,9 +91,15 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, if (OMPI_SUCCESS != (err = mca_topo_base_lazy_init())) { return err; } - opal_output_verbose(10, ompi_topo_base_framework.framework_output, - "topo:base:comm_select: new communicator: %s (cid %d)", - comm->c_name, comm->c_contextid); + + if (comm) { + opal_output_verbose(10, ompi_topo_base_framework.framework_output, + "topo:base:comm_select: new communicator: %s (cid %s)", + comm->c_name, ompi_comm_print_cid (comm)); + } else { + opal_output_verbose(10, ompi_topo_base_framework.framework_output, + "topo:base:group_select: new communicator"); + } /* Check and see if a preferred component was provided. If it was provided then it should be used (if possible) */ @@ -106,7 +115,7 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, /* query the component for its priority and get its module structure. This is necessary to proceed */ component = (mca_topo_base_component_t *)preferred_module->topo_component; - module = component->topoc_comm_query(comm, &priority, type); + module = component->topoc_query(comm, group, &priority, type); if (NULL != module) { /* this query seems to have returned something legitimate @@ -149,14 +158,14 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, /* * we can call the query function only if there is a function :-) */ - if (NULL == component->topoc_comm_query) { + if (NULL == component->topoc_query) { opal_output_verbose(10, ompi_topo_base_framework.framework_output, "select: no query, ignoring the component"); } else { /* * call the query function and see what it returns */ - module = component->topoc_comm_query(comm, &priority, type); + module = component->topoc_query(comm, group, &priority, type); if (NULL == module) { /* @@ -251,6 +260,17 @@ int mca_topo_base_comm_select(const ompi_communicator_t* comm, return OMPI_SUCCESS; } +int mca_topo_base_comm_select (const ompi_communicator_t *comm, mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, uint32_t type) +{ + return _mca_topo_base_select (comm, NULL, preferred_module, selected_module, type); +} + +int mca_topo_base_group_select(const ompi_group_t *group, mca_topo_base_module_t *preferred_module, + mca_topo_base_module_t **selected_module, uint32_t type) +{ + return _mca_topo_base_select (NULL, group, preferred_module, selected_module, type); +} /* * This function fills in the null function pointers, in other words, diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index fdc202f879a..66e2976deb5 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. */ #include "ompi_config.h" @@ -289,20 +292,10 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, { int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { + if (OMPI_SUCCESS != (err = ompi_comm_dup_with_info (comm_old, info, newcomm))) { OBJ_RELEASE(module); return err; } - // But if there is an info object, the above call didn't make use - // of it, so we'll do a dup-with-info to get the final comm and - // free the above intermediate newcomm: - if (info && info != &(MPI_INFO_NULL->super)) { - ompi_communicator_t *intermediate_comm = *newcomm; - ompi_comm_dup_with_info (intermediate_comm, info, newcomm); - ompi_comm_free(&intermediate_comm); - } assert(NULL == (*newcomm)->c_topo); (*newcomm)->c_topo = module; @@ -332,18 +325,10 @@ static void mca_topo_base_comm_dist_graph_2_2_0_construct(mca_topo_base_comm_dis } static void mca_topo_base_comm_dist_graph_2_2_0_destruct(mca_topo_base_comm_dist_graph_2_2_0_t * dist_graph) { - if (NULL != dist_graph->in) { - free(dist_graph->in); - } - if (NULL != dist_graph->inw) { - free(dist_graph->inw); - } - if (NULL != dist_graph->out) { - free(dist_graph->out); - } - if (NULL != dist_graph->outw) { - free(dist_graph->outw); - } + free(dist_graph->in); + free(dist_graph->inw); + free(dist_graph->out); + free(dist_graph->outw); } OBJ_CLASS_INSTANCE(mca_topo_base_comm_dist_graph_2_2_0_t, opal_object_t, diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c index 5b12042708b..336aa05c733 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create_adjacent.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2008 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corp. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. */ #include "ompi_config.h" @@ -20,40 +23,23 @@ #include "ompi/mca/topo/base/base.h" -int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, - ompi_communicator_t *comm_old, - int indegree, const int sources[], - const int sourceweights[], - int outdegree, - const int destinations[], - const int destweights[], - opal_info_t *info, int reorder, - ompi_communicator_t **newcomm) +static int _mca_topo_base_dist_graph_create_adjacent (mca_topo_base_module_t* module, int indegree, + const int sources[], const int sourceweights[], + int outdegree, const int destinations[], + const int destweights[], int reorder, + ompi_communicator_t **newcomm) { mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL; int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { - return err; - } - // But if there is an info object, the above call didn't make use - // of it, so we'll do a dup-with-info to get the final comm and - // free the above intermediate newcomm: - if (info && info != &(MPI_INFO_NULL->super)) { - ompi_communicator_t *intermediate_comm = *newcomm; - ompi_comm_dup_with_info (intermediate_comm, info, newcomm); - ompi_comm_free(&intermediate_comm); - } - err = OMPI_ERR_OUT_OF_RESOURCE; /* suppose by default something bad will happens */ assert( NULL == (*newcomm)->c_topo ); topo = OBJ_NEW(mca_topo_base_comm_dist_graph_2_2_0_t); - if( NULL == topo ) { - goto bail_out; + if (NULL == topo) { + ompi_comm_free (newcomm); + return OMPI_ERR_OUT_OF_RESOURCE; } topo->in = topo->inw = NULL; topo->out = topo->outw = NULL; @@ -103,16 +89,29 @@ int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, bail_out: if (NULL != topo) { - if( NULL != topo->in ) free(topo->in); - if( MPI_UNWEIGHTED != sourceweights ) { - if( NULL != topo->inw ) free(topo->inw); - } - if( NULL != topo->out ) free(topo->out); - if( MPI_UNWEIGHTED != destweights ) { - if( NULL != topo->outw ) free(topo->outw); - } OBJ_RELEASE(topo); } + ompi_comm_free(newcomm); return err; } + +int mca_topo_base_dist_graph_create_adjacent(mca_topo_base_module_t* module, + ompi_communicator_t *comm_old, + int indegree, const int sources[], + const int sourceweights[], + int outdegree, + const int destinations[], + const int destweights[], + opal_info_t *info, int reorder, + ompi_communicator_t **newcomm) +{ + int err; + + if (OMPI_SUCCESS != (err = ompi_comm_dup_with_info (comm_old, info, newcomm))) { + return err; + } + + return _mca_topo_base_dist_graph_create_adjacent (module, indegree, sources, sourceweights, outdegree, + destinations, destweights, reorder, newcomm); +} diff --git a/ompi/mca/topo/base/topo_base_find_available.c b/ompi/mca/topo/base/topo_base_find_available.c index 64a831c4cef..fda053f2ac5 100644 --- a/ompi/mca/topo/base/topo_base_find_available.c +++ b/ompi/mca/topo/base/topo_base_find_available.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2012-2013 Inria. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -101,7 +104,7 @@ static int init_query(const mca_base_component_t *m, happened in the topo framework before v2.2.0, so don't bother supporting anything before then. */ if (2 == m->mca_type_major_version && - 2 == m->mca_type_minor_version && + (2 == m->mca_type_minor_version || 3 == m->mca_type_minor_version) && 0 == m->mca_type_release_version) { ret = init_query_2_2_0(m, entry, enable_progress_threads, enable_mpi_threads); @@ -140,8 +143,8 @@ static int init_query_2_2_0(const mca_base_component_t *component, bool enable_progress_threads, bool enable_mpi_threads) { - mca_topo_base_component_2_2_0_t *topo = - (mca_topo_base_component_2_2_0_t *) component; + mca_topo_base_component_2_3_0_t *topo = + (mca_topo_base_component_2_3_0_t *) component; return topo->topoc_init_query(enable_progress_threads, enable_mpi_threads); diff --git a/ompi/mca/topo/base/topo_base_graph_create.c b/ompi/mca/topo/base/topo_base_graph_create.c index f41cd033d9d..dfd2708bd53 100644 --- a/ompi/mca/topo/base/topo_base_graph_create.c +++ b/ompi/mca/topo/base/topo_base_graph_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology @@ -24,6 +25,46 @@ #include "ompi/mca/topo/base/base.h" #include "ompi/mca/topo/topo.h" +static int mca_topo_base_graph_allocate (ompi_group_t *group, int nnodes, const int *index, const int *edges, + int *num_procs, mca_topo_base_comm_graph_2_2_0_t **graph_out) +{ + mca_topo_base_comm_graph_2_2_0_t *graph; + + *num_procs = group->grp_proc_count; + + if (*num_procs < nnodes) { + return MPI_ERR_DIMS; + } + + if (*num_procs > nnodes) { + *num_procs = nnodes; + } + + if (group->grp_my_rank > (nnodes - 1) || MPI_UNDEFINED == group->grp_my_rank) { + *graph_out = NULL; + return OMPI_SUCCESS; + } + + graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t); + if( NULL == graph ) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + graph->nnodes = nnodes; + graph->index = (int *) malloc (sizeof (int) * nnodes); + graph->edges = (int *) malloc (sizeof (int) * index[nnodes-1]); + if (OPAL_UNLIKELY(NULL == graph->index || NULL == graph->edges)) { + OBJ_RELEASE(graph); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + memcpy(graph->index, index, nnodes * sizeof(int)); + memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int)); + + *graph_out = graph; + + return OMPI_SUCCESS; +} + /* * * function - makes a new communicator to which topology information @@ -40,111 +81,41 @@ * @retval MPI_ERR_OUT_OF_RESOURCE */ -int mca_topo_base_graph_create(mca_topo_base_module_t *topo, - ompi_communicator_t* old_comm, - int nnodes, - const int *index, - const int *edges, - bool reorder, - ompi_communicator_t** comm_topo) +int mca_topo_base_graph_create (mca_topo_base_module_t *topo, ompi_communicator_t *old_comm, + int nnodes, const int *index, const int *edges, bool reorder, + ompi_communicator_t **comm_topo) { - ompi_communicator_t *new_comm; - int new_rank, num_procs, ret, i; - ompi_proc_t **topo_procs = NULL; - mca_topo_base_comm_graph_2_2_0_t* graph; + mca_topo_base_comm_graph_2_2_0_t *graph; + ompi_group_t *c_local_group; + int num_procs, ret; - num_procs = old_comm->c_local_group->grp_proc_count; - new_rank = old_comm->c_local_group->grp_my_rank; assert(topo->type == OMPI_COMM_GRAPH); - if( num_procs < nnodes ) { - return MPI_ERR_DIMS; - } - if( num_procs > nnodes ) { - num_procs = nnodes; - } - if( new_rank > (nnodes - 1) ) { - new_rank = MPI_UNDEFINED; - num_procs = 0; - nnodes = 0; - } - - graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t); - if( NULL == graph ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - graph->nnodes = nnodes; + *comm_topo = MPI_COMM_NULL; - /* Don't do any of the other initialization if we're not supposed - to be part of the new communicator (because nnodes has been - reset to 0, making things like index[nnodes-1] be junk). - - JMS: This should really be refactored to use - comm_create_group(), because ompi_comm_allocate() still - complains about 0-byte mallocs in debug builds for 0-member - groups. */ - if (MPI_UNDEFINED != new_rank) { - graph->index = (int*)malloc(sizeof(int) * nnodes); - if (NULL == graph->index) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(graph->index, index, nnodes * sizeof(int)); - - /* Graph communicator; copy the right data to the common information */ - graph->edges = (int*)malloc(sizeof(int) * index[nnodes-1]); - if (NULL == graph->edges) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int)); - - topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if (NULL == topo_procs) { - OBJ_RELEASE(graph); - return OMPI_ERR_OUT_OF_RESOURCE; - } - if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) { - memcpy(topo_procs, - old_comm->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i); - } - } + ret = mca_topo_base_graph_allocate (old_comm->c_local_group, nnodes, index, edges, &num_procs, + &graph); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + return ret; } - /* allocate a new communicator */ - new_comm = ompi_comm_allocate(nnodes, 0); - if (NULL == new_comm) { - free(topo_procs); + c_local_group = ompi_group_flatten (old_comm->c_local_group, nnodes); + if (OPAL_UNLIKELY(NULL == c_local_group)) { OBJ_RELEASE(graph); return OMPI_ERR_OUT_OF_RESOURCE; } - ret = ompi_comm_enable(old_comm, new_comm, - new_rank, num_procs, topo_procs); - if (OMPI_SUCCESS != ret) { - free(topo_procs); + ret = ompi_comm_create (old_comm, c_local_group, comm_topo); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OBJ_RELEASE(graph); - if (MPI_COMM_NULL != new_comm) { - new_comm->c_topo = NULL; - new_comm->c_flags &= ~OMPI_COMM_GRAPH; - ompi_comm_free (&new_comm); - } return ret; } - - new_comm->c_topo = topo; - new_comm->c_topo->mtc.graph = graph; - new_comm->c_flags |= OMPI_COMM_GRAPH; - new_comm->c_topo->reorder = reorder; - *comm_topo = new_comm; - - if( MPI_UNDEFINED == new_rank ) { - ompi_comm_free(&new_comm); - *comm_topo = MPI_COMM_NULL; + + if (MPI_COMM_NULL != *comm_topo) { + (*comm_topo)->c_topo = topo; + (*comm_topo)->c_topo->mtc.graph = graph; + (*comm_topo)->c_flags |= OMPI_COMM_GRAPH; + (*comm_topo)->c_topo->reorder = reorder; } return OMPI_SUCCESS; diff --git a/ompi/mca/topo/basic/topo_basic.h b/ompi/mca/topo/basic/topo_basic.h index 006005bffcd..c03fb242f26 100644 --- a/ompi/mca/topo/basic/topo_basic.h +++ b/ompi/mca/topo/basic/topo_basic.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2013 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -6,6 +7,8 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +24,7 @@ BEGIN_C_DECLS -typedef mca_topo_base_component_2_2_0_t mca_topo_basic_component_t; +typedef mca_topo_base_component_2_3_0_t mca_topo_basic_component_t; /* Public component instance */ OMPI_MODULE_DECLSPEC extern mca_topo_basic_component_t mca_topo_basic_component; diff --git a/ompi/mca/topo/basic/topo_basic_component.c b/ompi/mca/topo/basic/topo_basic_component.c index 83f26519273..a8a09c03532 100644 --- a/ompi/mca/topo/basic/topo_basic_component.c +++ b/ompi/mca/topo/basic/topo_basic_component.c @@ -9,6 +9,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,7 +32,7 @@ const char *mca_topo_basic_component_version_string = */ static int init_query(bool enable_progress_threads, bool enable_mpi_threads); static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); +mca_topo_basic_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); /* * Public component structure @@ -38,11 +40,10 @@ comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); mca_topo_basic_component_t mca_topo_basic_component = { .topoc_version = { - MCA_TOPO_BASE_VERSION_2_2_0, + MCA_TOPO_BASE_VERSION_2_3_0, .mca_component_name = "basic", - .mca_component_major_version = OMPI_MAJOR_VERSION, - .mca_component_minor_version = OMPI_MINOR_VERSION, - .mca_component_release_version = OMPI_RELEASE_VERSION, + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), /* NULLs for the rest of the function pointers */ }, @@ -52,7 +53,7 @@ mca_topo_basic_component_t mca_topo_basic_component = }, .topoc_init_query = init_query, - .topoc_comm_query = comm_query, + .topoc_query = mca_topo_basic_query, }; @@ -64,7 +65,7 @@ static int init_query(bool enable_progress_threads, bool enable_mpi_threads) static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) +mca_topo_basic_query (const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type) { /* Don't use OBJ_NEW, we need to zero the memory or the functions pointers * will not be correctly copied over from the base. @@ -81,5 +82,3 @@ comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) basic->type = type; return basic; } - - diff --git a/ompi/mca/topo/topo.h b/ompi/mca/topo/topo.h index 7735250f290..d19fbd7d9a6 100644 --- a/ompi/mca/topo/topo.h +++ b/ompi/mca/topo/topo.h @@ -17,6 +17,8 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,29 +42,28 @@ typedef struct mca_topo_base_module_t mca_topo_base_module_t; /* * Initial component query, called during mca_topo_base_open. */ -typedef int (*mca_topo_base_component_init_query_2_2_0_fn_t) +typedef int (*mca_topo_base_component_init_query_2_3_0_fn_t) (bool enable_progress_threads, bool enable_mpi_threads); /* - * Communicator query, called during cart and graph communicator - * creation. + * Communicator/group query, called during cart and graph communicator creation. */ typedef struct mca_topo_base_module_t* -(*mca_topo_base_component_comm_query_2_2_0_fn_t) - (const ompi_communicator_t *comm, int *priority, uint32_t type); +(*mca_topo_base_component_query_2_3_0_fn_t) + (const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); /* * Structure for topo v2.1.0 components.This is chained to MCA v2.0.0 */ -typedef struct mca_topo_base_component_2_2_0_t { +typedef struct mca_topo_base_component_2_3_0_t { mca_base_component_t topoc_version; mca_base_component_data_t topoc_data; - mca_topo_base_component_init_query_2_2_0_fn_t topoc_init_query; - mca_topo_base_component_comm_query_2_2_0_fn_t topoc_comm_query; -} mca_topo_base_component_2_2_0_t; -typedef mca_topo_base_component_2_2_0_t mca_topo_base_component_t; + mca_topo_base_component_init_query_2_3_0_fn_t topoc_init_query; + mca_topo_base_component_query_2_3_0_fn_t topoc_query; +} mca_topo_base_component_2_3_0_t; +typedef mca_topo_base_component_2_3_0_t mca_topo_base_component_t; /* * Struct for holding graph communicator information @@ -291,7 +292,7 @@ typedef int (*mca_topo_base_module_dist_graph_neighbors_count_fn_t) * automatically replaced by their default version. They will return the answers * based on the base information stored in the associated module extra data. */ -typedef struct mca_topo_base_cart_module_2_2_0_t { +typedef struct mca_topo_base_cart_module_2_3_0_t { mca_topo_base_module_cart_coords_fn_t cart_coords; mca_topo_base_module_cart_create_fn_t cart_create; mca_topo_base_module_cart_get_fn_t cart_get; @@ -300,23 +301,23 @@ typedef struct mca_topo_base_cart_module_2_2_0_t { mca_topo_base_module_cart_rank_fn_t cart_rank; mca_topo_base_module_cart_shift_fn_t cart_shift; mca_topo_base_module_cart_sub_fn_t cart_sub; -} mca_topo_base_cart_module_2_2_0_t; +} mca_topo_base_cart_module_2_3_0_t; -typedef struct mca_topo_base_graph_module_2_2_0_t { +typedef struct mca_topo_base_graph_module_2_3_0_t { mca_topo_base_module_graph_create_fn_t graph_create; mca_topo_base_module_graph_get_fn_t graph_get; mca_topo_base_module_graph_map_fn_t graph_map; mca_topo_base_module_graphdims_get_fn_t graphdims_get; mca_topo_base_module_graph_neighbors_fn_t graph_neighbors; mca_topo_base_module_graph_neighbors_count_fn_t graph_neighbors_count; -} mca_topo_base_graph_module_2_2_0_t; +} mca_topo_base_graph_module_2_3_0_t; -typedef struct mca_topo_base_dist_graph_module_2_2_0_t { +typedef struct mca_topo_base_dist_graph_module_2_3_0_t { mca_topo_base_module_dist_graph_create_fn_t dist_graph_create; mca_topo_base_module_dist_graph_create_adjacent_fn_t dist_graph_create_adjacent; mca_topo_base_module_dist_graph_neighbors_fn_t dist_graph_neighbors; mca_topo_base_module_dist_graph_neighbors_count_fn_t dist_graph_neighbors_count; -} mca_topo_base_dist_graph_module_2_2_0_t; +} mca_topo_base_dist_graph_module_2_3_0_t; struct mca_topo_base_module_t { /* Make this structure be an object so that it has a constructor @@ -329,9 +330,9 @@ struct mca_topo_base_module_t { /* Cart, graph or dist graph related functions */ union { - mca_topo_base_cart_module_2_2_0_t cart; - mca_topo_base_graph_module_2_2_0_t graph; - mca_topo_base_dist_graph_module_2_2_0_t dist_graph; + mca_topo_base_cart_module_2_3_0_t cart; + mca_topo_base_graph_module_2_3_0_t graph; + mca_topo_base_dist_graph_module_2_3_0_t dist_graph; } topo; /* This union caches the parameters passed when the communicator @@ -344,10 +345,10 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_topo_base_module_t); /* * ****************************************************************** - * ********** Use in components that are of type topo v2.2.0 ******** + * ********** Use in components that are of type topo v2.3.0 ******** * ****************************************************************** */ -#define MCA_TOPO_BASE_VERSION_2_2_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("topo", 2, 2, 0) +#define MCA_TOPO_BASE_VERSION_2_3_0 \ + OMPI_MCA_BASE_VERSION_2_1_0("topo", 2, 3, 0) #endif /* MCA_TOPO_H */ diff --git a/ompi/mca/topo/treematch/topo_treematch.h b/ompi/mca/topo/treematch/topo_treematch.h index bcc4d748bfd..e56a0abc0fa 100644 --- a/ompi/mca/topo/treematch/topo_treematch.h +++ b/ompi/mca/topo/treematch/topo_treematch.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -7,6 +8,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,13 +41,13 @@ BEGIN_C_DECLS /* * Public component instance */ -typedef struct mca_topo_treematch_component_2_2_0_t { - mca_topo_base_component_2_2_0_t super; +typedef struct mca_topo_treematch_component_2_3_0_t { + mca_topo_base_component_2_3_0_t super; int reorder_mode; -} mca_topo_treematch_component_2_2_0_t; +} mca_topo_treematch_component_2_3_0_t; -OMPI_MODULE_DECLSPEC extern mca_topo_treematch_component_2_2_0_t +OMPI_MODULE_DECLSPEC extern mca_topo_treematch_component_2_3_0_t mca_topo_treematch_component; /* diff --git a/ompi/mca/topo/treematch/topo_treematch_component.c b/ompi/mca/topo/treematch/topo_treematch_component.c index fca7e5b71b0..8d41a3e9568 100644 --- a/ompi/mca/topo/treematch/topo_treematch_component.c +++ b/ompi/mca/topo/treematch/topo_treematch_component.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights @@ -5,6 +6,8 @@ * Copyright (c) 2011-2015 INRIA. All rights reserved. * Copyright (c) 2011-2015 Université Bordeaux 1 * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,37 +29,32 @@ const char *mca_topo_treematch_component_version_string = */ static int init_query(bool enable_progress_threads, bool enable_mpi_threads); static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type); +mca_topo_treematch_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type); static int mca_topo_treematch_component_register(void); /* * Public component structure */ -mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component = +mca_topo_treematch_component_2_3_0_t mca_topo_treematch_component = { - { - { - MCA_TOPO_BASE_VERSION_2_2_0, - - "treematch", - OMPI_MAJOR_VERSION, - OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION, - NULL, /* component open */ - NULL, /* component close */ - NULL, /* component query */ - mca_topo_treematch_component_register, /* component register */ + .super = { + .topoc_version = { + MCA_TOPO_BASE_VERSION_2_3_0, + .mca_component_name = "treematch", + MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, + OMPI_RELEASE_VERSION), + .mca_register_component_params = mca_topo_treematch_component_register, }, - { + .topoc_data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, - init_query, - comm_query + .topoc_init_query = init_query, + .topoc_query = mca_topo_treematch_query, }, - 0 /* reorder: by default centralized */ + .reorder_mode = 0 /* reorder: by default centralized */ }; @@ -72,7 +70,7 @@ static int init_query(bool enable_progress_threads, bool enable_mpi_threads) static struct mca_topo_base_module_t * -comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type) +mca_topo_treematch_query(const ompi_communicator_t *comm, const ompi_group_t *group, int *priority, uint32_t type) { mca_topo_treematch_module_t *treematch; diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c index b3ba707c746..4ca64d7c830 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist.c @@ -50,6 +50,6 @@ mca_vprotocol_pessimist_module_t mca_vprotocol_pessimist = int mca_vprotocol_pessimist_dump(struct ompi_communicator_t* comm, int verbose) { - V_OUTPUT_VERBOSE(verbose, "vprotocol_pessimist: dump for comm %d", comm->c_contextid); + V_OUTPUT_VERBOSE(verbose, "vprotocol_pessimist: dump for comm %s", ompi_comm_print_cid (comm)); return mca_pml_v.host_pml.pml_dump(comm, verbose); } diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c index 50ed0a16c85..994790b9fae 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.c @@ -24,6 +24,7 @@ #include "opal/datatype/opal_datatype_memcpy.h" #include "opal/util/printf.h" #include +#include "ompi/runtime/mpiruntime.h" #define sb mca_vprotocol_pessimist.sender_based diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h index b639b47b394..07a9944a25c 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based.h @@ -195,7 +195,7 @@ static inline void vprotocol_pessimist_sender_based_copy_start(ompi_request_t *r sbhdr->size = pmlreq->req_bytes_packed; sbhdr->dst = pmlreq->req_base.req_peer; sbhdr->tag = pmlreq->req_base.req_tag; - sbhdr->contextid = pmlreq->req_base.req_comm->c_contextid; + sbhdr->contextid = ompi_comm_get_extended_cid (pmlreq->req_base.req_comm); sbhdr->sequence = pmlreq->req_base.req_sequence; ftreq->sb.cursor += sizeof(vprotocol_pessimist_sender_based_header_t); V_OUTPUT_VERBOSE(70, "pessimist:\tsb\tsend\t%"PRIpclock"\tsize %lu (+%lu header)", VPESSIMIST_FTREQ(req)->reqid, (long unsigned)pmlreq->req_bytes_packed, (long unsigned)sizeof(vprotocol_pessimist_sender_based_header_t)); diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h index c00dfff70e3..c19ade18e76 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_sender_based_types.h @@ -46,7 +46,7 @@ typedef struct vprotocol_pessimist_sender_based_header_t size_t size; int dst; int tag; - uint32_t contextid; + ompi_comm_extended_cid_t contextid; vprotocol_pessimist_clock_t sequence; } vprotocol_pessimist_sender_based_header_t; diff --git a/ompi/message/message.c b/ompi/message/message.c index deb0a4697f9..9bbe5f3d1f5 100644 --- a/ompi/message/message.c +++ b/ompi/message/message.c @@ -6,6 +6,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +22,7 @@ #include "opal/class/opal_object.h" #include "ompi/message/message.h" #include "ompi/constants.h" +#include "ompi/instance/instance.h" static void ompi_message_constructor(ompi_message_t *msg); @@ -27,6 +30,8 @@ OBJ_CLASS_INSTANCE(ompi_message_t, opal_free_list_item_t, ompi_message_constructor, NULL); +static int ompi_message_finalize (void); + opal_free_list_t ompi_message_free_list = {{{0}}}; opal_pointer_array_t ompi_message_f_to_c_table = {{0}}; @@ -67,11 +72,12 @@ ompi_message_init(void) return OMPI_ERR_NOT_FOUND; } + ompi_mpi_instance_append_finalize (ompi_message_finalize); + return rc; } -int -ompi_message_finalize(void) +static int ompi_message_finalize (void) { OBJ_DESTRUCT(&ompi_message_no_proc); OBJ_DESTRUCT(&ompi_message_free_list); diff --git a/ompi/message/message.h b/ompi/message/message.h index 0f0f1eacfac..0706a7490fb 100644 --- a/ompi/message/message.h +++ b/ompi/message/message.h @@ -4,6 +4,8 @@ * Copyright (c) 2012-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,8 +51,6 @@ typedef struct ompi_predefined_message_t ompi_predefined_message_t; int ompi_message_init(void); -int ompi_message_finalize(void); - OMPI_DECLSPEC extern opal_free_list_t ompi_message_free_list; OMPI_DECLSPEC extern opal_pointer_array_t ompi_message_f_to_c_table; OMPI_DECLSPEC extern ompi_predefined_message_t ompi_message_no_proc; diff --git a/ompi/mpi/c/Makefile.am b/ompi/mpi/c/Makefile.am index d9cb60bf19a..c50ff9acc04 100644 --- a/ompi/mpi/c/Makefile.am +++ b/ompi/mpi/c/Makefile.am @@ -106,6 +106,7 @@ libmpi_c_mpi_la_SOURCES = \ comm_connect.c \ comm_create.c \ comm_create_errhandler.c \ + comm_create_from_group.c \ comm_create_group.c \ comm_create_keyval.c \ comm_delete_attr.c \ @@ -240,6 +241,7 @@ libmpi_c_mpi_la_SOURCES = \ group_excl.c \ group_f2c.c \ group_free.c \ + group_from_session_pset.c \ group_incl.c \ group_intersection.c \ group_range_excl.c \ @@ -266,6 +268,7 @@ libmpi_c_mpi_la_SOURCES = \ init_thread.c \ initialized.c \ intercomm_create.c \ + intercomm_create_from_groups.c \ intercomm_merge.c \ iprobe.c \ irecv.c \ @@ -340,6 +343,16 @@ libmpi_c_mpi_la_SOURCES = \ send_init.c \ sendrecv.c \ sendrecv_replace.c \ + session_c2f.c \ + session_create_errhandler.c \ + session_get_info.c \ + session_get_num_psets.c \ + session_get_nth_pset.c \ + session_get_pset_info.c \ + session_init.c \ + session_f2c.c \ + session_finalize.c \ + session_set_info.c \ ssend_init.c \ ssend.c \ start.c \ diff --git a/ompi/mpi/c/comm_create_errhandler.c b/ompi/mpi/c/comm_create_errhandler.c index e342037cc92..9caf0510300 100644 --- a/ompi/mpi/c/comm_create_errhandler.c +++ b/ompi/mpi/c/comm_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,29 +43,29 @@ static const char FUNC_NAME[] = "MPI_Comm_create_errhandler"; int MPI_Comm_create_errhandler(MPI_Comm_errhandler_function *function, MPI_Errhandler *errhandler) { - int err = MPI_SUCCESS; + int err = MPI_SUCCESS; - /* Error checking */ + /* Error checking */ - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || - NULL == errhandler) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, - FUNC_NAME); + if (NULL == function || + NULL == errhandler) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + FUNC_NAME); + } } - } - /* Create and cache the errhandler. Sets a refcount of 1. */ + /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_C); - if (NULL == *errhandler) { - err = MPI_ERR_INTERN; - } + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, + (ompi_errhandler_generic_handler_fn_t*) function, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } - OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, FUNC_NAME); + OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_create_from_group.c b/ompi/mpi/c/comm_create_from_group.c new file mode 100644 index 00000000000..80b17481582 --- /dev/null +++ b/ompi/mpi/c/comm_create_from_group.c @@ -0,0 +1,87 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2008 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/communicator/communicator.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Comm_create_from_group = PMPI_Comm_create_from_group +#endif +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group +#endif + +static const char FUNC_NAME[] = "MPI_Comm_create_from_group"; + + +int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newcomm) { + int rc; + + MEMCHECKER( + memchecker_comm(comm); + ); + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == tag) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_TAG, FUNC_NAME); + } + + if (NULL == group) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_GROUP, FUNC_NAME); + } + + if (NULL == info || ompi_info_is_freed(info)) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_INFO, FUNC_NAME); + } + + if (NULL == newcomm) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_ARG, FUNC_NAME); + } + } + + if (MPI_GROUP_NULL == group || MPI_UNDEFINED == ompi_group_rank (group)) { + *newcomm = MPI_COMM_NULL; + return MPI_SUCCESS; + } + + + rc = ompi_comm_create_from_group ((ompi_group_t *) group, tag, &info->super, errhandler, + (ompi_communicator_t **) newcomm); + OMPI_ERRHANDLER_RETURN (rc, *newcomm, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/comm_get_errhandler.c b/ompi/mpi/c/comm_get_errhandler.c index 301cdd91862..288476e1e9b 100644 --- a/ompi/mpi/c/comm_get_errhandler.c +++ b/ompi/mpi/c/comm_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/memchecker.h" +#include "ompi/instance/instance.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -43,6 +46,8 @@ static const char FUNC_NAME[] = "MPI_Comm_get_errhandler"; int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + /* Error checking */ MEMCHECKER( memchecker_comm(comm); @@ -68,7 +73,10 @@ int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *errhandler) *errhandler = comm->error_handler; OPAL_THREAD_UNLOCK(&(comm->c_lock)); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/comm_get_info.c b/ompi/mpi/c/comm_get_info.c index cb9ff2146cf..4fe9e746ce4 100644 --- a/ompi/mpi/c/comm_get_info.c +++ b/ompi/mpi/c/comm_get_info.c @@ -53,11 +53,12 @@ int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used) } - (*info_used) = OBJ_NEW(ompi_info_t); + *info_used = ompi_info_allocate (); if (NULL == (*info_used)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); } + opal_info_t *opal_info_used = &(*info_used)->super; opal_info_dup_mpistandard(comm->super.s_info, &opal_info_used); diff --git a/ompi/mpi/c/errhandler_f2c.c b/ompi/mpi/c/errhandler_f2c.c index bf4dce1994f..8ec5dd2527e 100644 --- a/ompi/mpi/c/errhandler_f2c.c +++ b/ompi/mpi/c/errhandler_f2c.c @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2020-2021 Triad National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,23 +42,41 @@ static const char FUNC_NAME[] = "MPI_Errhandler_f2c"; MPI_Errhandler MPI_Errhandler_f2c(MPI_Fint errhandler_f) { int eh_index = OMPI_FINT_2_INT(errhandler_f); - - /* Error checking */ - - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - } + MPI_Errhandler c_err_handler; /* Per MPI-2:4.12.4, do not invoke an error handler if we get an invalid fortran handle. If we get an invalid fortran handle, return an invalid C handle. */ - if (eh_index < 0 || - eh_index >= - opal_pointer_array_get_size(&ompi_errhandler_f_to_c_table)) { - return NULL; + /* + * special cases for MPI_ERRORS_ARE_FATAL and MPI_ERRORS_RETURN - + * needed for MPI 4.0 + */ + + switch(eh_index) { + case OMPI_ERRHANDLER_NULL_FORTRAN: + c_err_handler = MPI_ERRHANDLER_NULL; + break; + case OMPI_ERRORS_ARE_FATAL_FORTRAN: + c_err_handler = MPI_ERRORS_ARE_FATAL; + break; + case OMPI_ERRORS_RETURN_FORTRAN: + c_err_handler = MPI_ERRORS_RETURN; + break; + default: + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + } + if (eh_index < 0 || + eh_index >= + opal_pointer_array_get_size(&ompi_errhandler_f_to_c_table)) { + c_err_handler = NULL; + } else { + c_err_handler = (MPI_Errhandler)opal_pointer_array_get_item(&ompi_errhandler_f_to_c_table, + eh_index); + } + break; } - return (MPI_Errhandler)opal_pointer_array_get_item(&ompi_errhandler_f_to_c_table, - eh_index); + return c_err_handler; } diff --git a/ompi/mpi/c/errhandler_free.c b/ompi/mpi/c/errhandler_free.c index a87038f4707..77e7da2e919 100644 --- a/ompi/mpi/c/errhandler_free.c +++ b/ompi/mpi/c/errhandler_free.c @@ -41,7 +41,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - /* Raise an MPI error if we got NULL or if we got an intrinsic + /* Raise an MPI exception if we got NULL or if we got an intrinsic *and* the reference count is 1 (meaning that this FREE would actually free the underlying intrinsic object). This is ugly but necessary -- see below. */ @@ -69,7 +69,7 @@ int MPI_Errhandler_free(MPI_Errhandler *errhandler) So decrease the refcount here. */ - OBJ_RELEASE(*errhandler); + ompi_errhandler_free (*errhandler); *errhandler = MPI_ERRHANDLER_NULL; /* All done */ diff --git a/ompi/mpi/c/file_create_errhandler.c b/ompi/mpi/c/file_create_errhandler.c index a839ec3a9fa..4041d00b658 100644 --- a/ompi/mpi/c/file_create_errhandler.c +++ b/ompi/mpi/c/file_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,31 +40,31 @@ static const char FUNC_NAME[] = "MPI_File_create_errhandler"; -int MPI_File_create_errhandler(MPI_File_errhandler_function *function, - MPI_Errhandler *errhandler) { - int err = MPI_SUCCESS; +int MPI_File_create_errhandler (MPI_File_errhandler_function *function, + MPI_Errhandler *errhandler) { + int err = MPI_SUCCESS; - /* Error checking */ + /* Error checking */ - if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == function || - NULL == errhandler) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == function || + NULL == errhandler) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_ARG, "MPI_File_create_errhandler"); + } } - } - /* Create and cache the errhandler. Sets a refcount of 1. */ + /* Create and cache the errhandler. Sets a refcount of 1. */ - *errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, - (ompi_errhandler_generic_handler_fn_t*) function, - OMPI_ERRHANDLER_LANG_C); - if (NULL == *errhandler) { - err = MPI_ERR_INTERN; - } + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, + (ompi_errhandler_generic_handler_fn_t*) function, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } - OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, - "MPI_File_create_errhandler"); + OMPI_ERRHANDLER_NOHANDLE_RETURN(err, MPI_ERR_INTERN, + "MPI_File_create_errhandler"); } diff --git a/ompi/mpi/c/file_get_errhandler.c b/ompi/mpi/c/file_get_errhandler.c index b78de9ce292..8836ec16b62 100644 --- a/ompi/mpi/c/file_get_errhandler.c +++ b/ompi/mpi/c/file_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,6 +44,8 @@ static const char FUNC_NAME[] = "MPI_File_get_errhandler"; int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + /* Error checking */ if (MPI_PARAM_CHECK) { @@ -67,7 +71,10 @@ int MPI_File_get_errhandler( MPI_File file, MPI_Errhandler *errhandler) OBJ_RETAIN(file->error_handler); OPAL_THREAD_UNLOCK(&file->f_lock); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/file_get_info.c b/ompi/mpi/c/file_get_info.c index 8d1411dedc8..9ccf43c05a9 100644 --- a/ompi/mpi/c/file_get_info.c +++ b/ompi/mpi/c/file_get_info.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +14,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2019 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,8 +80,8 @@ int MPI_File_get_info(MPI_File fh, MPI_Info *info_used) } - (*info_used) = OBJ_NEW(ompi_info_t); - if (NULL == (*info_used)) { + *info_used = ompi_info_allocate (); + if (NULL == *info_used) { return OMPI_ERRHANDLER_INVOKE(fh, MPI_ERR_NO_MEM, FUNC_NAME); } opal_info_t *opal_info_used = &(*info_used)->super; diff --git a/ompi/mpi/c/group_from_session_pset.c b/ompi/mpi/c/group_from_session_pset.c new file mode 100644 index 00000000000..4ee2dfe0451 --- /dev/null +++ b/ompi/mpi/c/group_from_session_pset.c @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Group_from_session_pset = PMPI_Group_from_session_pset +#endif +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset +#endif + +static const char FUNC_NAME[] = "MPI_Group_from_session_pset"; + + +int MPI_Group_from_session_pset (MPI_Session session, const char *pset_name, MPI_Group *newgroup) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || NULL == pset_name || NULL == newgroup) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_group_from_pset (session, pset_name, newgroup); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, session, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/info_c2f.c b/ompi/mpi/c/info_c2f.c index 55b795016b8..372c8abe766 100644 --- a/ompi/mpi/c/info_c2f.c +++ b/ompi/mpi/c/info_c2f.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,14 +37,12 @@ #define MPI_Info_c2f PMPI_Info_c2f #endif -static const char FUNC_NAME[] = "MPI_Info_c2f"; +/* static const char FUNC_NAME[] = "MPI_Info_c2f"; */ MPI_Fint MPI_Info_c2f(MPI_Info info) { if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if (NULL == info || ompi_info_is_freed(info)) { return OMPI_INT_2_FINT(-1); } diff --git a/ompi/mpi/c/info_create.c b/ompi/mpi/c/info_create.c index a41433e116d..c6f7ee18f26 100644 --- a/ompi/mpi/c/info_create.c +++ b/ompi/mpi/c/info_create.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,20 +53,13 @@ static const char FUNC_NAME[] = "MPI_Info_create"; int MPI_Info_create(MPI_Info *info) { if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME); } } - /* - * Call the object create function. This function not only - * allocates the space for MPI_Info, but also calls all the - * relevant init functions. Should I check if the fortran - * handle is valid - */ - (*info) = OBJ_NEW(ompi_info_t); + *info = ompi_info_allocate (); if (NULL == (*info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_NO_MEM, FUNC_NAME); diff --git a/ompi/mpi/c/info_delete.c b/ompi/mpi/c/info_delete.c index e1f53539e0f..3fcf5256782 100644 --- a/ompi/mpi/c/info_delete.c +++ b/ompi/mpi/c/info_delete.c @@ -15,6 +15,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -61,7 +63,6 @@ int MPI_Info_delete(MPI_Info info, const char *key) { * This function merely deletes the (key,val) pair in info */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_dup.c b/ompi/mpi/c/info_dup.c index 7c738d7b01c..bbe320d3955 100644 --- a/ompi/mpi/c/info_dup.c +++ b/ompi/mpi/c/info_dup.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,7 +68,6 @@ int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo) { */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || NULL == newinfo || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_f2c.c b/ompi/mpi/c/info_f2c.c index 8c6383a20e6..4e7095ce26d 100644 --- a/ompi/mpi/c/info_f2c.c +++ b/ompi/mpi/c/info_f2c.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +13,8 @@ * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,16 +50,25 @@ MPI_Info MPI_Info_f2c(MPI_Fint info) { int info_index = OMPI_FINT_2_INT(info); - /* check the arguments */ + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an + invalid fortran handle. If we get an invalid fortran handle, + return an invalid C handle. */ + /* + * Deal with special pre-defined cases for MPI 4.0 + */ + + if (info_index == 0) { + return MPI_INFO_NULL; + } + + if (info_index == 1) { + return MPI_INFO_ENV; + } if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } - /* Per MPI-2:4.12.4, do not invoke an error handler if we get an - invalid fortran handle. If we get an invalid fortran handle, - return an invalid C handle. */ - if (info_index < 0 || info_index >= opal_pointer_array_get_size(&ompi_info_f_to_c_table)) { diff --git a/ompi/mpi/c/info_free.c b/ompi/mpi/c/info_free.c index 32220c84c04..155c2105c24 100644 --- a/ompi/mpi/c/info_free.c +++ b/ompi/mpi/c/info_free.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/c/info_get.c b/ompi/mpi/c/info_get.c index 4b4da55f60a..563c6cd1f83 100644 --- a/ompi/mpi/c/info_get.c +++ b/ompi/mpi/c/info_get.c @@ -15,6 +15,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,7 +76,6 @@ int MPI_Info_get(MPI_Info info, const char *key, int valuelen, * necessary structures. */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_get_nkeys.c b/ompi/mpi/c/info_get_nkeys.c index fe79178af95..850dadeb5ca 100644 --- a/ompi/mpi/c/info_get_nkeys.c +++ b/ompi/mpi/c/info_get_nkeys.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -11,6 +12,8 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +58,6 @@ int MPI_Info_get_nkeys(MPI_Info info, int *nkeys) int err; if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_get_valuelen.c b/ompi/mpi/c/info_get_valuelen.c index 575de6d407d..e40d3c110f8 100644 --- a/ompi/mpi/c/info_get_valuelen.c +++ b/ompi/mpi/c/info_get_valuelen.c @@ -14,6 +14,8 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,7 +72,6 @@ int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, * having the "key" associated with it and return the length */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, diff --git a/ompi/mpi/c/info_set.c b/ompi/mpi/c/info_set.c index bb3c5c6198c..ea6524604d2 100644 --- a/ompi/mpi/c/info_set.c +++ b/ompi/mpi/c/info_set.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -13,6 +14,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,7 +80,6 @@ int MPI_Info_set(MPI_Info info, const char *key, const char *value) */ if (MPI_PARAM_CHECK) { - OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_INFO, diff --git a/ompi/mpi/c/intercomm_create.c b/ompi/mpi/c/intercomm_create.c index 5c582c7c0e5..0e8a903032a 100644 --- a/ompi/mpi/c/intercomm_create.c +++ b/ompi/mpi/c/intercomm_create.c @@ -17,6 +17,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,14 +50,7 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm) { - int local_size=0, local_rank=0; - int lleader=0, rleader=0; - ompi_communicator_t *newcomp=NULL; - struct ompi_proc_t **rprocs=NULL; - int rc=0, rsize=0; - ompi_proc_t **proc_list=NULL; - int j; - ompi_group_t *new_group_pointer; + int rc; MEMCHECKER( memchecker_comm(local_comm); @@ -89,169 +84,9 @@ int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, */ #endif - local_size = ompi_comm_size ( local_comm ); - local_rank = ompi_comm_rank ( local_comm ); - lleader = local_leader; - rleader = remote_leader; - - if ( MPI_PARAM_CHECK ) { - if ( (0 > local_leader) || (local_leader >= local_size) ) - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_ARG, - FUNC_NAME); - - /* remember that the remote_leader and bridge_comm arguments - just have to be valid at the local_leader */ - if ( local_rank == local_leader ) { - if ( ompi_comm_invalid ( bridge_comm ) || - (bridge_comm->c_flags & OMPI_COMM_INTER) ) { - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_COMM, - FUNC_NAME); - } - if ( (remote_leader < 0) || (remote_leader >= ompi_comm_size(bridge_comm))) { - return OMPI_ERRHANDLER_INVOKE ( local_comm, MPI_ERR_ARG, - FUNC_NAME); - } - } /* if ( local_rank == local_leader ) */ - } - - if ( local_rank == local_leader ) { - MPI_Request req; - - /* local leader exchange group sizes lists */ - rc = MCA_PML_CALL(irecv(&rsize, 1, MPI_INT, rleader, tag, bridge_comm, - &req)); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if( MPI_ERR_PROC_FAILED == rc ) { - rsize = 0; - goto skip_handshake; - } -#endif /* OPAL_ENABLE_FT_MPI */ - goto err_exit; - } - rc = MCA_PML_CALL(send (&local_size, 1, MPI_INT, rleader, tag, - MCA_PML_BASE_SEND_STANDARD, bridge_comm)); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if( MPI_ERR_PROC_FAILED == rc ) { - rsize = 0; - goto skip_handshake; - } -#endif /* OPAL_ENABLE_FT_MPI */ - goto err_exit; - } -#if OPAL_ENABLE_FT_MPI - skip_handshake: /* nothing special */; -#endif /* OPAL_ENABLE_FT_MPI */ - rc = ompi_request_wait( &req, MPI_STATUS_IGNORE); - if ( rc != MPI_SUCCESS ) { - rsize = 0; /* participate in the collective and then done */ - } - } - - /* bcast size and list of remote processes to all processes in local_comm */ - rc = local_comm->c_coll->coll_bcast ( &rsize, 1, MPI_INT, lleader, - local_comm, - local_comm->c_coll->coll_bcast_module); - if ( rc != MPI_SUCCESS ) { -#if OPAL_ENABLE_FT_MPI - if ( local_rank != local_leader ) { - goto err_exit; - } - /* the leaders must go in the ger_rprocs in order to avoid deadlocks */ -#else - goto err_exit; -#endif /* OPAL_ENABLE_FT_MPI */ - } - - rc = ompi_comm_get_rprocs( local_comm, bridge_comm, lleader, - remote_leader, tag, rsize, &rprocs ); - if ( OMPI_SUCCESS != rc ) { - goto err_exit; - } - - if ( MPI_PARAM_CHECK ) { - if(OMPI_GROUP_IS_DENSE(local_comm->c_local_group)) { - rc = ompi_comm_overlapping_groups(local_comm->c_local_group->grp_proc_count, - local_comm->c_local_group->grp_proc_pointers, - rsize, - rprocs); - } - else { - proc_list = (ompi_proc_t **) calloc (local_comm->c_local_group->grp_proc_count, - sizeof (ompi_proc_t *)); - for(j=0 ; jc_local_group->grp_proc_count ; j++) { - proc_list[j] = ompi_group_peer_lookup(local_comm->c_local_group,j); - } - rc = ompi_comm_overlapping_groups(local_comm->c_local_group->grp_proc_count, - proc_list, - rsize, - rprocs); - } - if ( OMPI_SUCCESS != rc ) { - goto err_exit; - } - } - new_group_pointer = ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - rc = MPI_ERR_GROUP; - goto err_exit; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - OBJ_RETAIN(rprocs[j]); - } - - rc = ompi_comm_set ( &newcomp, /* new comm */ - local_comm, /* old comm */ - local_comm->c_local_group->grp_proc_count, /* local_size */ - NULL, /* local_procs*/ - rsize, /* remote_size */ - NULL, /* remote_procs */ - NULL, /* attrs */ - local_comm->error_handler, /* error handler*/ - false, /* dont copy the topo */ - local_comm->c_local_group, /* local group */ - new_group_pointer /* remote group */ - ); - - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* Determine context id. It is identical to f_2_c_handle */ - rc = ompi_comm_nextcid (newcomp, local_comm, bridge_comm, &lleader, - &rleader, false, OMPI_COMM_CID_INTRA_BRIDGE); - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - /* activate comm and init coll-module */ - rc = ompi_comm_activate (&newcomp, local_comm, bridge_comm, &lleader, &rleader, - false, OMPI_COMM_CID_INTRA_BRIDGE); - if ( MPI_SUCCESS != rc ) { - goto err_exit; - } - - err_exit: - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - *newintercomm = MPI_COMM_NULL; - return OMPI_ERRHANDLER_INVOKE(local_comm, rc, - FUNC_NAME); - } + rc = ompi_intercomm_create (local_comm, local_leader, bridge_comm, remote_leader, tag, + newintercomm); - *newintercomm = newcomp; - return MPI_SUCCESS; + OMPI_ERRHANDLER_RETURN (rc, local_comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/intercomm_create_from_groups.c b/ompi/mpi/c/intercomm_create_from_groups.c new file mode 100644 index 00000000000..ab4d7938243 --- /dev/null +++ b/ompi/mpi/c/intercomm_create_from_groups.c @@ -0,0 +1,90 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2017 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 University of Houston. All rights reserved. + * Copyright (c) 2012-2013 Inria. All rights reserved. + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/communicator/communicator.h" +#include "ompi/request/request.h" +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Intercomm_create_from_groups = PMPI_Intercomm_create_from_groups +#endif +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups +#endif + +static const char FUNC_NAME[] = "MPI_Intercomm_create_from_groups"; + + +int MPI_Intercomm_create_from_groups (MPI_Group local_group, int local_leader, MPI_Group remote_group, + int remote_leader, const char *tag, MPI_Info info, MPI_Errhandler errhandler, + MPI_Comm *newintercomm) +{ + int rc; + + MEMCHECKER( + memchecker_comm(local_comm); + memchecker_comm(bridge_comm); + ); + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == errhandler) { + return MPI_ERR_ARG; + } + + if (NULL == local_group || NULL == remote_group) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_GROUP, FUNC_NAME); + } + if (NULL == info || ompi_info_is_freed(info)) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_INFO, FUNC_NAME); + } + if (NULL == tag) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_TAG, FUNC_NAME); + } + if (NULL == newintercomm) { + return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type, + MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_intercomm_create_from_groups (local_group, local_leader, remote_group, remote_leader, tag, + &info->super, errhandler, newintercomm); + + OMPI_ERRHANDLER_RETURN (rc, MPI_COMM_SELF, rc, FUNC_NAME); +} + diff --git a/ompi/mpi/c/intercomm_merge.c b/ompi/mpi/c/intercomm_merge.c index 18c458e5ca9..9cb3f31193d 100644 --- a/ompi/mpi/c/intercomm_merge.c +++ b/ompi/mpi/c/intercomm_merge.c @@ -18,6 +18,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,13 +50,12 @@ static const char FUNC_NAME[] = "MPI_Intercomm_merge"; int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newcomm) { - ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_communicator_t *newcomp = MPI_COMM_NULL; ompi_proc_t **procs=NULL; + int first, thigh = high; int local_size, remote_size; - int first; int total_size; int rc=MPI_SUCCESS; - int thigh = high; ompi_group_t *new_group_pointer; MEMCHECKER( @@ -96,8 +97,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, first = ompi_comm_determine_first ( intercomm, thigh ); if ( MPI_UNDEFINED == first ) { - rc = MPI_ERR_INTERN; - goto exit; + return OMPI_ERRHANDLER_INVOKE(intercomm, MPI_ERR_INTERN, FUNC_NAME); } if ( first ) { @@ -115,10 +115,9 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, NULL, /* remote_procs */ NULL, /* attrs */ intercomm->error_handler, /* error handler*/ - false, /* don't copy the topo */ new_group_pointer, /* local group */ - NULL /* remote group */ - ); + NULL, /* remote group */ + 0); if ( MPI_SUCCESS != rc ) { goto exit; } @@ -141,6 +140,7 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, } exit: + if ( NULL != procs ) { free ( procs ); } @@ -155,4 +155,3 @@ int MPI_Intercomm_merge(MPI_Comm intercomm, int high, *newcomm = newcomp; return MPI_SUCCESS; } - diff --git a/ompi/mpi/c/profile/Makefile.am b/ompi/mpi/c/profile/Makefile.am index 1d93634a190..f5c37ee5585 100644 --- a/ompi/mpi/c/profile/Makefile.am +++ b/ompi/mpi/c/profile/Makefile.am @@ -86,6 +86,7 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pcomm_connect.c \ pcomm_create.c \ pcomm_create_errhandler.c \ + pcomm_create_from_group.c \ pcomm_create_group.c \ pcomm_create_keyval.c \ pcomm_delete_attr.c \ @@ -220,6 +221,7 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pgroup_excl.c \ pgroup_f2c.c \ pgroup_free.c \ + pgroup_from_session_pset.c \ pgroup_incl.c \ pgroup_intersection.c \ pgroup_range_excl.c \ @@ -246,6 +248,7 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ pinit_thread.c \ pinitialized.c \ pintercomm_create.c \ + pintercomm_create_from_groups.c \ pintercomm_merge.c \ piprobe.c \ pirecv.c \ @@ -320,6 +323,16 @@ nodist_libmpi_c_pmpi_la_SOURCES = \ psend_init.c \ psendrecv.c \ psendrecv_replace.c \ + psession_c2f.c \ + psession_create_errhandler.c \ + psession_get_info.c \ + psession_get_num_psets.c \ + psession_get_nth_pset.c \ + psession_get_pset_info.c \ + psession_init.c \ + psession_f2c.c \ + psession_finalize.c \ + psession_set_info.c \ pssend_init.c \ pssend.c \ pstart.c \ diff --git a/ompi/mpi/c/session_c2f.c b/ompi/mpi/c/session_c2f.c new file mode 100644 index 00000000000..93b5d7da7f5 --- /dev/null +++ b/ompi/mpi/c/session_c2f.c @@ -0,0 +1,56 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include + +#include "ompi/instance/instance.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_c2f = PMPI_Session_c2f +#endif +#define MPI_Session_c2f PMPI_Session_c2f +#endif + +static const char FUNC_NAME[] = "MPI_Session_c2f"; + + +MPI_Fint MPI_Session_c2f (MPI_Session session) +{ + + if ( MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == session) { + return OMPI_INT_2_FINT(-1); + } + } + + return OMPI_INT_2_FINT(session->i_f_to_c_index); +} diff --git a/ompi/mpi/c/session_create_errhandler.c b/ompi/mpi/c/session_create_errhandler.c new file mode 100644 index 00000000000..b1634131c88 --- /dev/null +++ b/ompi/mpi/c/session_create_errhandler.c @@ -0,0 +1,51 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_create_errhandler = PMPI_Session_create_errhandler +#endif +#define MPI_Session_create_errhandler PMPI_Session_create_errhandler +#endif + +static const char FUNC_NAME[] = "MPI_Session_create_errhandler"; + + +int MPI_Session_create_errhandler (MPI_Session_errhandler_function *session_errhandler_fn, MPI_Errhandler *errhandler) +{ + int err = MPI_SUCCESS; + + if ( MPI_PARAM_CHECK ) { + if (NULL == errhandler || NULL == session_errhandler_fn) { + return MPI_ERR_ARG; + } + } + + /* Create and cache the errhandler. Sets a refcount of 1. */ + *errhandler = + ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_INSTANCE, + (ompi_errhandler_generic_handler_fn_t *) session_errhandler_fn, + OMPI_ERRHANDLER_LANG_C); + if (NULL == *errhandler) { + err = MPI_ERR_INTERN; + } + + return err; +} diff --git a/ompi/mpi/c/session_f2c.c b/ompi/mpi/c/session_f2c.c new file mode 100644 index 00000000000..cb7c0dbd914 --- /dev/null +++ b/ompi/mpi/c/session_f2c.c @@ -0,0 +1,59 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" + +#include "ompi/instance/instance.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/mpi/fortran/base/fint_2_int.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_f2c = PMPI_Session_f2c +#endif +#define MPI_Session_f2c PMPI_Session_f2c +#endif + +static const char FUNC_NAME[] = "MPI_Session_f2c"; + + +MPI_Session MPI_Session_f2c(MPI_Fint session) +{ + int o_index= OMPI_FINT_2_INT(session); + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + } + + /* Per MPI-2:4.12.4, do not invoke an error handler if we get an + invalid fortran handle. If we get an invalid fortran handle, + return an invalid C handle. */ + + if (0 > o_index || o_index >= opal_pointer_array_get_size(&ompi_instance_f_to_c_table)) { + return NULL; + } + + return (MPI_Session) opal_pointer_array_get_item (&ompi_instance_f_to_c_table, o_index); +} diff --git a/ompi/mpi/c/session_finalize.c b/ompi/mpi/c/session_finalize.c new file mode 100644 index 00000000000..4ecf052d974 --- /dev/null +++ b/ompi/mpi/c/session_finalize.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" + +#include "ompi/memchecker.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_finalize = PMPI_Session_finalize +#endif +#define MPI_Session_finalize PMPI_Session_finalize +#endif + +static const char FUNC_NAME[] = "MPI_Session_finalize"; + + +int MPI_Session_finalize (MPI_Session *session) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + + if (NULL == session || NULL == *session || MPI_SESSION_NULL == *session) { + return MPI_ERR_ARG; + } + } + + rc = ompi_mpi_instance_finalize (session); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_info.c b/ompi/mpi/c/session_get_info.c new file mode 100644 index 00000000000..534dddda974 --- /dev/null +++ b/ompi/mpi/c/session_get_info.c @@ -0,0 +1,66 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_info = PMPI_Session_get_info +#endif +#define MPI_Session_get_info PMPI_Session_get_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_info"; + + +int MPI_Session_get_info (MPI_Session session, MPI_Info *info_used) +{ + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + if (NULL == info_used) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + if (NULL == session->super.s_info) { + /* + * Setup any defaults if MPI_Win_set_info was never called + */ + opal_infosubscribe_change_info (&session->super, &MPI_INFO_NULL->super); + } + + + *info_used = ompi_info_allocate (); + if (OPAL_UNLIKELY(NULL == *info_used)) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_NO_MEM, FUNC_NAME); + } + + opal_info_t *opal_info_used = &(*info_used)->super; + + opal_info_dup_mpistandard (session->super.s_info, &opal_info_used); + + return MPI_SUCCESS; +} diff --git a/ompi/mpi/c/session_get_nth_pset.c b/ompi/mpi/c/session_get_nth_pset.c new file mode 100644 index 00000000000..4318979254f --- /dev/null +++ b/ompi/mpi/c/session_get_nth_pset.c @@ -0,0 +1,43 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2020 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_nth_pset = PMPI_Session_get_nth_pset +#endif +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_nth_pset"; + + +int MPI_Session_get_nth_pset (MPI_Session session, MPI_Info info, int n, int *len, char *pset_name) +{ + int rc = MPI_SUCCESS; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || (NULL == pset_name && *len > 0) || n < 0) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_instance_get_nth_pset (session, n, len, pset_name); + + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_num_psets.c b/ompi/mpi/c/session_get_num_psets.c new file mode 100644 index 00000000000..231a01db47a --- /dev/null +++ b/ompi/mpi/c/session_get_num_psets.c @@ -0,0 +1,42 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_num_psets = PMPI_Session_get_num_psets +#endif +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_num_psets"; + + +int MPI_Session_get_num_psets (MPI_Session session, MPI_Info info, int *npset_names) +{ + int rc; + + if ( MPI_PARAM_CHECK ) { + if (NULL == session || NULL == npset_names) { + return OMPI_ERRHANDLER_INVOKE(session, MPI_ERR_ARG, FUNC_NAME); + } + } + + rc = ompi_instance_get_num_psets (session, npset_names); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_get_pset_info.c b/ompi/mpi/c/session_get_pset_info.c new file mode 100644 index 00000000000..441ffa058d5 --- /dev/null +++ b/ompi/mpi/c/session_get_pset_info.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_get_pset_info = PMPI_Session_get_pset_info +#endif +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_get_pset_info"; + + +int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used) +{ + int ret; + + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + if (NULL == info_used) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + ret = ompi_instance_get_pset_info (session, pset_name, (opal_info_t **) info_used); + return OMPI_ERRHANDLER_INVOKE(session, ret, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_init.c b/ompi/mpi/c/session_init.c new file mode 100644 index 00000000000..fc755b27ff0 --- /dev/null +++ b/ompi/mpi/c/session_init.c @@ -0,0 +1,60 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "ompi_config.h" +#include + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/info/info.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/instance/instance.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_init = PMPI_Session_init +#endif +#define MPI_Session_init PMPI_Session_init +#endif + +static const char FUNC_NAME[] = "MPI_Session_init"; + + +int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, MPI_Session *session) +{ + int rc, flag; + int ts_level = MPI_THREAD_SINGLE; /* for now we default to thread single for OMPI sessions */ + opal_cstring_t *info_value; + const char ts_level_multi[] = "MPI_THREAD_MULTIPLE"; + + if ( MPI_PARAM_CHECK ) { + if (NULL == errhandler || NULL == session) { + return MPI_ERR_ARG; + } + + if (NULL == info || ompi_info_is_freed (info)) { + return MPI_ERR_INFO; + } + } + + if (MPI_INFO_NULL != info) { + (void) ompi_info_get (info, "thread_level", &info_value, &flag); + if (flag) { + if(strncmp(info_value->string, ts_level_multi, strlen(ts_level_multi)) == 0) { + ts_level = MPI_THREAD_MULTIPLE; + } + OBJ_RELEASE(info_value); + } + } + + rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session); + /* if an error occured raise it on the null session */ + OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME); +} diff --git a/ompi/mpi/c/session_set_info.c b/ompi/mpi/c/session_set_info.c new file mode 100644 index 00000000000..776bc8f0e63 --- /dev/null +++ b/ompi/mpi/c/session_set_info.c @@ -0,0 +1,52 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/params.h" +#include "ompi/instance/instance.h" +#include "ompi/errhandler/errhandler.h" +#include "opal/util/info_subscriber.h" +#include +#include + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_Session_set_info = PMPI_Session_set_info +#endif +#define MPI_Session_set_info PMPI_Session_set_info +#endif + +static const char FUNC_NAME[] = "MPI_Session_set_info"; + + +int MPI_Session_set_info (MPI_Session session, MPI_Info info) +{ + if (MPI_PARAM_CHECK) { + OMPI_ERR_INIT_FINALIZE(FUNC_NAME); + if (NULL == session || MPI_SESSION_NULL == session) { + return MPI_ERR_ARG; + } + + if (NULL == info || MPI_INFO_NULL == info || ompi_info_is_freed(info)) { + return OMPI_ERRHANDLER_INVOKE (session, MPI_ERR_INFO, FUNC_NAME); + } + } + + opal_infosubscribe_change_info (&session->super, &info->super); + + return MPI_SUCCESS; +} diff --git a/ompi/mpi/c/testall.c b/ompi/mpi/c/testall.c index 1f59fbe3377..a9791636417 100644 --- a/ompi/mpi/c/testall.c +++ b/ompi/mpi/c/testall.c @@ -56,6 +56,7 @@ int MPI_Testall(int count, MPI_Request requests[], int *flag, if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( (NULL == requests) && (0 != count) ) { rc = MPI_ERR_REQUEST; @@ -65,6 +66,16 @@ int MPI_Testall(int count, MPI_Request requests[], int *flag, rc = MPI_ERR_REQUEST; break; } + if (requests[i] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[i]; + } + else { + if (requests[i]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if ((NULL == flag) || (count < 0)) { diff --git a/ompi/mpi/c/testany.c b/ompi/mpi/c/testany.c index 9740ce7b97e..8a54453f327 100644 --- a/ompi/mpi/c/testany.c +++ b/ompi/mpi/c/testany.c @@ -55,6 +55,7 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != count)) { rc = MPI_ERR_REQUEST; @@ -64,6 +65,16 @@ int MPI_Testany(int count, MPI_Request requests[], int *indx, int *completed, MP rc = MPI_ERR_REQUEST; break; } + if (requests[i] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[i]; + } + else { + if (requests[i]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == indx || NULL == completed) && count > 0) || diff --git a/ompi/mpi/c/testsome.c b/ompi/mpi/c/testsome.c index 9f9782d505b..857d1600659 100644 --- a/ompi/mpi/c/testsome.c +++ b/ompi/mpi/c/testsome.c @@ -57,6 +57,7 @@ int MPI_Testsome(int incount, MPI_Request requests[], if ( MPI_PARAM_CHECK ) { int indx, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != incount)) { rc = MPI_ERR_REQUEST; @@ -66,6 +67,16 @@ int MPI_Testsome(int incount, MPI_Request requests[], rc = MPI_ERR_REQUEST; break; } + if (requests[indx] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[indx]; + } + else { + if (requests[indx]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == outcount || NULL == indices) && incount > 0) || diff --git a/ompi/mpi/c/waitall.c b/ompi/mpi/c/waitall.c index 7b12e20cb89..88610df99c2 100644 --- a/ompi/mpi/c/waitall.c +++ b/ompi/mpi/c/waitall.c @@ -54,6 +54,7 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if( (NULL == requests) && (0 != count) ) { rc = MPI_ERR_REQUEST; @@ -63,6 +64,16 @@ int MPI_Waitall(int count, MPI_Request requests[], MPI_Status statuses[]) rc = MPI_ERR_REQUEST; break; } + if (requests[i] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[i]; + } + else { + if (requests[i]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (count < 0) { diff --git a/ompi/mpi/c/waitany.c b/ompi/mpi/c/waitany.c index 5878d7d5c35..95ec36104e0 100644 --- a/ompi/mpi/c/waitany.c +++ b/ompi/mpi/c/waitany.c @@ -55,6 +55,7 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status if ( MPI_PARAM_CHECK ) { int i, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != count)) { rc = MPI_ERR_REQUEST; @@ -64,6 +65,16 @@ int MPI_Waitany(int count, MPI_Request requests[], int *indx, MPI_Status *status rc = MPI_ERR_REQUEST; break; } + if (requests[i] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[i]; + } + else { + if (requests[i]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if ((NULL == indx && count > 0) || diff --git a/ompi/mpi/c/waitsome.c b/ompi/mpi/c/waitsome.c index b6beb5da752..768a5af409f 100644 --- a/ompi/mpi/c/waitsome.c +++ b/ompi/mpi/c/waitsome.c @@ -57,6 +57,7 @@ int MPI_Waitsome(int incount, MPI_Request requests[], if ( MPI_PARAM_CHECK ) { int indx, rc = MPI_SUCCESS; + MPI_Request check_req = NULL; OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if ((NULL == requests) && (0 != incount)) { rc = MPI_ERR_REQUEST; @@ -66,6 +67,16 @@ int MPI_Waitsome(int incount, MPI_Request requests[], rc = MPI_ERR_REQUEST; break; } + if (requests[indx] == &ompi_request_empty) continue; + else if (check_req == NULL) { + check_req = requests[indx]; + } + else { + if (requests[indx]->req_mpi_object.comm->instance != check_req->req_mpi_object.comm->instance) { + rc = MPI_ERR_REQUEST; + break; + } + } } } if (((NULL == outcount || NULL == indices) && incount > 0) || diff --git a/ompi/mpi/c/win_create_errhandler.c b/ompi/mpi/c/win_create_errhandler.c index c18a736857d..16c9262ff4e 100644 --- a/ompi/mpi/c/win_create_errhandler.c +++ b/ompi/mpi/c/win_create_errhandler.c @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology @@ -12,6 +13,8 @@ * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/ompi/mpi/c/win_get_errhandler.c b/ompi/mpi/c/win_get_errhandler.c index 9196e607b83..292f3c706af 100644 --- a/ompi/mpi/c/win_get_errhandler.c +++ b/ompi/mpi/c/win_get_errhandler.c @@ -15,6 +15,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/win/win.h" +#include "ompi/instance/instance.h" #if OMPI_BUILD_MPI_PROFILING #if OPAL_HAVE_WEAK_SYMBOLS @@ -42,6 +45,8 @@ static const char FUNC_NAME[] = "MPI_Win_get_errhandler"; int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) { + int ret = MPI_SUCCESS; + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); if (ompi_win_invalid(win)) { @@ -60,6 +65,10 @@ int MPI_Win_get_errhandler(MPI_Win win, MPI_Errhandler *errhandler) *errhandler = win->error_handler; OPAL_THREAD_UNLOCK(&win->w_lock); + /* make sure the infrastructure is initialized */ + ret = ompi_mpi_instance_retain (); + + /* All done */ - return MPI_SUCCESS; + return ret; } diff --git a/ompi/mpi/c/win_get_info.c b/ompi/mpi/c/win_get_info.c index 83e4fbaf0ee..6d0140c3e44 100644 --- a/ompi/mpi/c/win_get_info.c +++ b/ompi/mpi/c/win_get_info.c @@ -54,8 +54,8 @@ int MPI_Win_get_info(MPI_Win win, MPI_Info *info_used) opal_infosubscribe_change_info(&win->super, &MPI_INFO_NULL->super); } - (*info_used) = OBJ_NEW(ompi_info_t); - if (NULL == (*info_used)) { + *info_used = ompi_info_allocate (); + if (NULL == *info_used) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_NO_MEM, FUNC_NAME); } opal_info_t *opal_info_used = &(*info_used)->super; diff --git a/ompi/mpi/fortran/mpif-h/Makefile.am b/ompi/mpi/fortran/mpif-h/Makefile.am index a23d3d42366..4ff50f56778 100644 --- a/ompi/mpi/fortran/mpif-h/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/Makefile.am @@ -18,6 +18,8 @@ # and Technology (RIST). All rights reserved. # Copyright (c) 2016 IBM Corporation. All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2019 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -167,6 +169,7 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ comm_connect_f.c \ comm_create_errhandler_f.c \ comm_create_f.c \ + comm_create_from_group_f.c \ comm_create_group_f.c \ comm_create_keyval_f.c \ comm_delete_attr_f.c \ @@ -288,6 +291,7 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ group_compare_f.c \ group_difference_f.c \ group_excl_f.c \ + group_from_session_pset_f.c \ group_free_f.c \ group_incl_f.c \ group_intersection_f.c \ @@ -329,6 +333,7 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ initialized_f.c \ init_thread_f.c \ intercomm_create_f.c \ + intercomm_from_groups_f.c \ intercomm_merge_f.c \ iprobe_f.c \ irecv_f.c \ @@ -388,6 +393,12 @@ lib@OMPI_LIBMPI_NAME@_mpifh_la_SOURCES += \ send_init_f.c \ sendrecv_f.c \ sendrecv_replace_f.c \ + session_get_info_f.c \ + session_get_nth_pset_f.c \ + session_get_num_psets_f.c \ + session_get_pset_info_f.c \ + session_init_f.c \ + session_finalize_f.c \ ssend_f.c \ ssend_init_f.c \ startall_f.c \ diff --git a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c index 480e832242f..9881590dd11 100644 --- a/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/comm_create_errhandler_f.c @@ -74,8 +74,9 @@ void ompi_comm_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t *functio MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, + MPI_Errhandler c_errhandler; + + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_COMM, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c b/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c new file mode 100644 index 00000000000..58e75c9af12 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/comm_create_from_group_f.c @@ -0,0 +1,112 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" +#include "ompi/group/group.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_COMM_CREATE_FROM_GROUP = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group_ = ompi_comm_create_from_group_f +#pragma weak pmpi_comm_create_from_group__ = ompi_comm_create_from_group_f + +#pragma weak PMPI_Comm_create_from_group_f = ompi_comm_create_from_group_f +#pragma weak PMPI_Comm_create_from_group_f08 = ompi_comm_create_from_group_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_COMM_CREATE_FROM_GROUP, + pmpi_comm_create_from_group, + pmpi_comm_create_from_group_, + pmpi_comm_create_from_group__, + pmpi_comm_create_from_group_f, + (MPI_Fint *goup, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len), + (group, stringtag, info, errhandler, newcomm, ierr, name_len) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_COMM_CREATE_FROM_GROUP = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group_ = ompi_comm_create_from_group_f +#pragma weak mpi_comm_create_from_group__ = ompi_comm_create_from_group_f + +#pragma weak MPI_Comm_create_from_group_f = ompi_comm_create_from_group_f +#pragma weak MPI_Comm_create_from_group_f08 = ompi_comm_create_from_group_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_COMM_CREATE_FROM_GROUP, + mpi_comm_create_from_group, + mpi_comm_create_from_group_, + mpi_comm_create_from_group__, + ompi_comm_create_from_group_f, + (MPI_Fint *goup, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len), + (group, stringtag, info, errhandler, newcomm, ierr, name_len) ) +#else +#define ompi_comm_create_from_group_f pompi_comm_create_from_group_f +#endif +#endif + +void ompi_comm_create_from_group_f(MPI_Fint *group, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Group c_group; + char *c_tag; + MPI_Comm c_comm; + MPI_Info c_info; + MPI_Errhandler c_err; + + c_group = PMPI_Group_f2c(*group); + c_info = PMPI_Info_f2c(*info); + c_err = PMPI_Errhandler_f2c(*errhandler); + + /* Convert the fortran string */ + + /* Convert the fortran string */ + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(stringtag, name_len, + &c_tag))) { + c_ierr = OMPI_ERRHANDLER_INVOKE(ompi_group_get_instance(c_group), ret, "MPI_COMM_CREATE_FROM_GROUP"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Comm_create_from_group(c_group, c_tag, c_info, c_err, &c_comm); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *newcomm = PMPI_Comm_c2f (c_comm); + } + + /* Free the C tag */ + + free(c_tag); +} + diff --git a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c index 29ac3ffe365..a20af467621 100644 --- a/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/file_create_errhandler_f.c @@ -74,8 +74,8 @@ void ompi_file_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* functio MPI_Fint *errhandler, MPI_Fint *ierr) { int c_ierr; - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, + MPI_Errhandler c_errhandler; + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_FILE, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c b/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c new file mode 100644 index 00000000000..3bbb0d65691 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/group_from_session_pset_f.c @@ -0,0 +1,108 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_GROUP_FROM_SESSION_PSET = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset_ = ompi_group_from_session_pset_f +#pragma weak pmpi_group_from_session_pset__ = ompi_group_from_session_pset_f + +#pragma weak PMPI_Group_from_session_pset_f = ompi_group_from_session_pset_f +#pragma weak PMPI_Group_from_session_pset_f08 = ompi_group_from_session_pset_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_GROUP_FROM_SESSION_PSET, + pmpi_group_from_session_pset, + pmpi_group_from_session_pset_, + pmpi_group_from_session_pset__, + pmpi_group_from_session_pset_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr), + (session, pset_name, newgroup, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_GROUP_FROM_SESSION_PSET = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset_ = ompi_group_from_session_pset_f +#pragma weak mpi_group_from_session_pset__ = ompi_group_from_session_pset_f + +#pragma weak MPI_Group_from_session_pset_f = ompi_group_from_session_pset_f +#pragma weak MPI_Group_from_session_pset_f08 = ompi_group_from_session_pset_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_GROUP_FROM_SESSION_PSET, + mpi_group_from_session_pset, + mpi_group_from_session_pset_, + mpi_group_from_session_pset__, + ompi_group_from_session_pset_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr), + (session, pset_name, newgroup, ierr) ) +#else +#define ompi_group_from_session_pset_f pompi_group_from_session_pset_f +#endif +#endif + +void ompi_group_from_session_pset_f(MPI_Fint *session,char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Session c_session; + char *c_name; + MPI_Group c_newgroup; + + c_session = PMPI_Session_f2c(*session); + + /* Convert the fortran string */ + + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(pset_name, name_len, + &c_name))) { + c_ierr = OMPI_ERRHANDLER_INVOKE((ompi_instance_t *)c_session, ret, + "MPI_GROUP_FROM_SESSION_PSET"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Group_from_session_pset(c_session, c_name, &c_newgroup); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *newgroup = PMPI_Group_c2f (c_newgroup); + } + + /* Free the C name */ + + free(c_name); +} + + diff --git a/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c b/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c new file mode 100644 index 00000000000..61e129ff25e --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/intercomm_create_from_groups_f.c @@ -0,0 +1,123 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" +#include "ompi/group/group.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_INTERCOMM_CREATE_FROM_GROUPS = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups_ = ompi_intercomm_create_from_groups_f +#pragma weak pmpi_intercomm_create_from_groups__ = ompi_intercomm_create_from_groups_f + +#pragma weak PMPI_Intercomm_create_from_groups_f = ompi_intercomm_create_from_groups_f +#pragma weak PMPI_Intercomm_create_from_groups_f08 = ompi_intercomm_create_from_groups_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_INTERCOMM_CREATE_FROM_GROUPS, + pmpi_intercomm_create_from_groups, + pmpi_intercomm_create_from_groups_, + pmpi_intercomm_create_from_groups__, + pmpi_intercomm_create_from_groups_f, + (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcom, MPI_Fint *ierr, int name_len), + (local_group, local_leader, remote_group, + remote_leader, stringtag, info, errhandler, internewcomm, ierr, name_len) ) + +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_INTERCOMM_CREATE_FROM_GROUPS = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups_ = ompi_intercomm_create_from_groups_f +#pragma weak mpi_intercomm_create_from_groups__ = ompi_intercomm_create_from_groups_f + +#pragma weak MPI_Intercomm_create_from_groups_f = ompi_intercomm_create_from_groups_f +#pragma weak MPI_Intercomm_create_from_groups_f08 = ompi_intercomm_create_from_groups_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_INTERCOMM_CREATE_FROM_GROUPS, + mpi_intercomm_create_from_groups, + mpi_intercomm_create_from_groups_, + mpi_intercomm_create_from_groups__, + ompi_intercomm_create_from_groups_f, + (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcom, MPI_Fint *ierr, int name_len), + (local_group, local_leader, remote_group, + remote_leader, stringtag, info, errhandler, internewcomm, ierr, name_len) ) +#else +#define ompi_intercomm_create_from_groups_f pompi_intercomm_create_from_groups_f +#endif +#endif + +void ompi_intercomm_create_from_groups_f(MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, + MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, + MPI_Fint *internewcomm, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Group c_lgroup, c_rgroup; + char *c_tag; + MPI_Comm c_intercomm; + MPI_Info c_info; + MPI_Errhandler c_err; + + c_lgroup = PMPI_Group_f2c(*local_group); + c_rgroup = PMPI_Group_f2c(*remote_group); + c_info = PMPI_Info_f2c(*info); + c_err = PMPI_Errhandler_f2c(*errhandler); + + /* Convert the fortran string */ + + /* Convert the fortran string */ + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(stringtag, name_len, + &c_tag))) { + c_ierr = OMPI_ERRHANDLER_INVOKE(ompi_group_get_instance(c_lgroup), ret, "MPI_INTERCOMM_CREATE_FROM_GROUPS"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Intercomm_create_from_groups(c_lgroup, OMPI_FINT_2_INT(*local_leader), + c_rgroup, OMPI_FINT_2_INT(*remote_leader), + c_tag, c_info, c_err, &c_intercomm); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *internewcomm = PMPI_Comm_c2f (c_intercomm); + } + + /* Free the C tag */ + + free(c_tag); +} diff --git a/ompi/mpi/fortran/mpif-h/profile/Makefile.am b/ompi/mpi/fortran/mpif-h/profile/Makefile.am index 0dffef8a843..5f5c8606245 100644 --- a/ompi/mpi/fortran/mpif-h/profile/Makefile.am +++ b/ompi/mpi/fortran/mpif-h/profile/Makefile.am @@ -18,6 +18,8 @@ # Copyright (c) 2015-2021 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. +# Copyright (c) 2019 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -80,6 +82,7 @@ linked_files = \ pcomm_connect_f.c \ pcomm_create_errhandler_f.c \ pcomm_create_f.c \ + pcomm_create_from_group_f.c \ pcomm_create_group_f.c \ pcomm_create_keyval_f.c \ pcomm_delete_attr_f.c \ @@ -202,6 +205,7 @@ linked_files = \ pgroup_difference_f.c \ pgroup_excl_f.c \ pgroup_free_f.c \ + pgroup_from_session_pset_f.c \ pgroup_incl_f.c \ pgroup_intersection_f.c \ pgroup_range_excl_f.c \ @@ -242,6 +246,7 @@ linked_files = \ pinitialized_f.c \ pinit_thread_f.c \ pintercomm_create_f.c \ + pintercomm_create_from_groups_f.c \ pintercomm_merge_f.c \ piprobe_f.c \ pirecv_f.c \ @@ -300,6 +305,12 @@ linked_files = \ psend_init_f.c \ psendrecv_f.c \ psendrecv_replace_f.c \ + psession_get_info_f.c \ + psession_get_nth_pset_f.c \ + psession_get_num_psets_f.c \ + psession_get_pset_info_f.c \ + psession_init_f.c \ + psession_finalize_f.c \ pssend_f.c \ pssend_init_f.c \ pstartall_f.c \ diff --git a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h index 01aa388862c..487135d6d42 100644 --- a/ompi/mpi/fortran/mpif-h/prototypes_mpi.h +++ b/ompi/mpi/fortran/mpif-h/prototypes_mpi.h @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2016-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -124,6 +126,7 @@ PN2(void, MPI_Comm_connect, mpi_comm_connect, MPI_COMM_CONNECT, (char *port_name PN2(void, MPI_Comm_create_errhandler, mpi_comm_create_errhandler, MPI_COMM_CREATE_ERRHANDLER, (ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr)); PN2(void, MPI_Comm_create_keyval, mpi_comm_create_keyval, MPI_COMM_CREATE_KEYVAL, (ompi_aint_copy_attr_function* comm_copy_attr_fn, ompi_aint_delete_attr_function* comm_delete_attr_fn, MPI_Fint *comm_keyval, MPI_Aint *extra_state, MPI_Fint *ierr)); PN2(void, MPI_Comm_create, mpi_comm_create, MPI_COMM_CREATE, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *newcomm, MPI_Fint *ierr)); +PN2(void, MPI_Comm_create_from_group, mpi_comm_create_from_group, MPI_COMM_CREATE_FROM_GROUP, (MPI_Fint *group, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newcomm, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Comm_create_group, mpi_comm_create_group, MPI_COMM_CREATE_GROUP, (MPI_Fint *comm, MPI_Fint *group, MPI_Fint *tag, MPI_Fint *newcomm, MPI_Fint *ierr)); PN2(void, MPI_Comm_delete_attr, mpi_comm_delete_attr, MPI_COMM_DELETE_ATTR, (MPI_Fint *comm, MPI_Fint *comm_keyval, MPI_Fint *ierr)); PN2(void, MPI_Comm_disconnect, mpi_comm_disconnect, MPI_COMM_DISCONNECT, (MPI_Fint *comm, MPI_Fint *ierr)); @@ -252,6 +255,7 @@ PN2(void, MPI_Group_compare, mpi_group_compare, MPI_GROUP_COMPARE, (MPI_Fint *gr PN2(void, MPI_Group_difference, mpi_group_difference, MPI_GROUP_DIFFERENCE, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_excl, mpi_group_excl, MPI_GROUP_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_free, mpi_group_free, MPI_GROUP_FREE, (MPI_Fint *group, MPI_Fint *ierr)); +PN2(void, MPI_Group_from_session_pset, mpi_group_from_session_pset, MPI_GROUP_FROM_SESSION_PSET, (MPI_Fint *group, char *pset_name, MPI_Fint *newgroup, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Group_incl, mpi_group_incl, MPI_GROUP_INCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint *ranks, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_intersection, mpi_group_intersection, MPI_GROUP_INTERSECTION, (MPI_Fint *group1, MPI_Fint *group2, MPI_Fint *newgroup, MPI_Fint *ierr)); PN2(void, MPI_Group_range_excl, mpi_group_range_excl, MPI_GROUP_RANGE_EXCL, (MPI_Fint *group, MPI_Fint *n, MPI_Fint ranges[][3], MPI_Fint *newgroup, MPI_Fint *ierr)); @@ -298,6 +302,7 @@ PN2(void, MPI_Init, mpi_init, MPI_INIT, (MPI_Fint *ierr)); PN2(void, MPI_Initialized, mpi_initialized, MPI_INITIALIZED, (ompi_fortran_logical_t *flag, MPI_Fint *ierr)); PN2(void, MPI_Init_thread, mpi_init_thread, MPI_INIT_THREAD, (MPI_Fint *required, MPI_Fint *provided, MPI_Fint *ierr)); PN2(void, MPI_Intercomm_create, mpi_intercomm_create, MPI_INTERCOMM_CREATE, (MPI_Fint *local_comm, MPI_Fint *local_leader, MPI_Fint *bridge_comm, MPI_Fint *remote_leader, MPI_Fint *tag, MPI_Fint *newintercomm, MPI_Fint *ierr)); +PN2(void, MPI_Intercomm_create_from_groups, mpi_intercomm_create_from_groups, MPI_INTERCOMM_CREATE_FROM_GROUPS, (MPI_Fint *local_group, MPI_Fint *local_leader, MPI_Fint *remote_group, MPI_Fint *remote_leader, char *stringtag, MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *newintercomm, MPI_Fint *ierr, int name_len)); PN2(void, MPI_Intercomm_merge, mpi_intercomm_merge, MPI_INTERCOMM_MERGE, (MPI_Fint *intercomm, ompi_fortran_logical_t *high, MPI_Fint *newintercomm, MPI_Fint *ierr)); PN2(void, MPI_Iprobe, mpi_iprobe, MPI_IPROBE, (MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, ompi_fortran_logical_t *flag, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Irecv, mpi_irecv, MPI_IRECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); @@ -356,6 +361,12 @@ PN2(void, MPI_Send_init, mpi_send_init, MPI_SEND_INIT, (char *buf, MPI_Fint *cou PN2(void, MPI_Send, mpi_send, MPI_SEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Sendrecv, mpi_sendrecv, MPI_SENDRECV, (char *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype, MPI_Fint *dest, MPI_Fint *sendtag, char *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); PN2(void, MPI_Sendrecv_replace, mpi_sendrecv_replace, MPI_SENDRECV_REPLACE, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *sendtag, MPI_Fint *source, MPI_Fint *recvtag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_info, mpi_session_get_info, MPI_SESSION_GET_INFO, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_nth_pset, mpi_session_get_nth_pset, MPI_SESSION_GET_NTH_PSET, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *n, MPI_Fint *pset_len, char *pset_name, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_num_psets, mpi_session_get_num_psets, MPI_SESSION_GET_NUM_PSETS, (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr)); +PN2(void, MPI_Session_get_pset_info, mpi_session_get_pset_info, MPI_SESSION_GET_PSET_INFO, (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len)); +PN2(void, MPI_Session_init, mpi_session_init, MPI_SESSION_INIT, (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr)); +PN2(void, MPI_Session_finalize, mpi_session_finalize, MPI_SESSION_FINALIZE, (MPI_Fint *session, MPI_Fint *ierr)); PN2(void, MPI_Ssend_init, mpi_ssend_init, MPI_SSEND_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr)); PN2(void, MPI_Ssend, mpi_ssend, MPI_SSEND, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *dest, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *ierr)); PN2(void, MPI_Start, mpi_start, MPI_START, (MPI_Fint *request, MPI_Fint *ierr)); diff --git a/ompi/mpi/fortran/mpif-h/session_finalize_f.c b/ompi/mpi/fortran/mpif-h/session_finalize_f.c new file mode 100644 index 00000000000..57c26cf9557 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_finalize_f.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_FINALIZE = ompi_session_finalize_f +#pragma weak pmpi_session_finalize = ompi_session_finalize_f +#pragma weak pmpi_session_finalize_ = ompi_session_finalize_f +#pragma weak pmpi_session_finalize__ = ompi_session_finalize_f + +#pragma weak PMPI_Session_finalize_f = ompi_session_finalize_f +#pragma weak PMPI_Session_finalize_f08 = ompi_session_finalize_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_FINALIZE, + pmpi_session_finalize, + pmpi_session_finalize_, + pmpi_session_finalize__, + pompi_session_finalize_f, + (MPI_Fint *session, MPI_Fint *ierr), + (session, ierr) ) +#endif +#endif + + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_FINALIZE = ompi_session_finalize_f +#pragma weak mpi_session_finalize = ompi_session_finalize_f +#pragma weak mpi_session_finalize_ = ompi_session_finalize_f +#pragma weak mpi_session_finalize__ = ompi_session_finalize_f + +#pragma weak MPI_Session_finalize_f = ompi_session_finalize_f +#pragma weak MPI_Session_finalize_f08 = ompi_session_finalize_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_FINALIZE, + mpi_session_finalize, + mpi_session_finalize_, + mpi_session_finalize__, + ompi_session_finalize_f, + (MPI_Fint *session, MPI_Fint *ierr), + (session, ierr) ) +#else +#define ompi_session_finalize_f pompi_session_finalize_f +#endif +#endif + +void ompi_session_finalize_f(MPI_Fint *session, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_finalize(&c_session); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_info_f.c b/ompi/mpi/fortran/mpif-h/session_get_info_f.c new file mode 100644 index 00000000000..c9da5b16ff0 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_info_f.c @@ -0,0 +1,88 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_INFO = ompi_session_get_info_f +#pragma weak pmpi_session_get_info = ompi_session_get_info_f +#pragma weak pmpi_session_get_info_ = ompi_session_get_info_f +#pragma weak pmpi_session_get_info__ = ompi_session_get_info_f + +#pragma weak PMPI_Session_get_info_f = ompi_session_get_info_f +#pragma weak PMPI_Session_get_info_f08 = ompi_session_get_info_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_INFO, + pmpi_session_get_info, + pmpi_session_get_info_, + pmpi_session_get_info__, + pmpi_session_get_info_f, + (MPI_Fint *session, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_INFO = ompi_session_get_info_f +#pragma weak mpi_session_get_info = ompi_session_get_info_f +#pragma weak mpi_session_get_info_ = ompi_session_get_info_f +#pragma weak mpi_session_get_info__ = ompi_session_get_info_f + +#pragma weak MPI_Session_get_info_f = ompi_session_get_info_f +#pragma weak MPI_Session_get_info_f08 = ompi_session_get_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_INFO, + mpi_session_get_info, + mpi_session_get_info_, + mpi_session_get_info__, + ompi_session_get_info_f, + (MPI_Fint *session, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_info_f pompi_session_get_info_f +#endif +#endif + +void ompi_session_get_info_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + MPI_Info c_info; + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_get_info(c_session, &c_info); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *info = PMPI_Info_c2f(c_info); + } +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c b/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c new file mode 100644 index 00000000000..4b2d0aa180b --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_nth_pset_f.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_NTH_PSET = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset_ = ompi_session_get_nth_pset_f +#pragma weak pmpi_session_get_nth_pset__ = ompi_session_get_nth_pset_f + +#pragma weak PMPI_Session_get_nth_pset_f = ompi_session_get_nth_pset_f +#pragma weak PMPI_Session_get_nth_pset_f08 = ompi_session_get_nth_pset_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_NTH_PSET, + pmpi_session_get_nth_pset, + pmpi_session_get_nth_pset_, + pmpi_session_get_nth_pset__, + pmpi_session_get_nth_pset_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_NTH_PSET = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset_ = ompi_session_get_nth_pset_f +#pragma weak mpi_session_get_nth_pset__ = ompi_session_get_nth_pset_f + +#pragma weak MPI_Session_get_nth_pset_f = ompi_session_get_nth_pset_f +#pragma weak MPI_Session_get_nth_pset_f08 = ompi_session_get_nth_pset_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_NTH_PSET, + mpi_session_get_nth_pset, + mpi_session_get_nth_pset_, + mpi_session_get_nth_pset__, + ompi_session_get_nth_pset_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_nth_pset_f pompi_session_get_nth_pset_f +#endif +#endif + +void ompi_session_get_nth_pset_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *n, MPI_Fint *pset_len, char *pset_name, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + char c_name[MPI_MAX_OBJECT_NAME]; + + c_session = PMPI_Session_f2c(*session); + + if (0 == *pset_len) { + c_ierr = PMPI_Session_get_nth_pset(c_session, MPI_INFO_NULL, *n, + OMPI_SINGLE_NAME_CONVERT(pset_len), + c_name); + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(pset_len); + } + + } else { + c_ierr = PMPI_Session_get_nth_pset(c_session, MPI_INFO_NULL, *n, + OMPI_SINGLE_NAME_CONVERT(pset_len), + c_name); + if (MPI_SUCCESS == c_ierr) { + ompi_fortran_string_c2f(c_name, pset_name, *pset_len); + } + } + + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c b/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c new file mode 100644 index 00000000000..039b86b8686 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_num_psets_f.c @@ -0,0 +1,87 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_NUM_PSETS = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets_ = ompi_session_get_num_psets_f +#pragma weak pmpi_session_get_num_psets__ = ompi_session_get_num_psets_f + +#pragma weak PMPI_Session_get_num_psets_f = ompi_session_get_num_psets_f +#pragma weak PMPI_Session_get_num_psets_f08 = ompi_session_get_num_psets_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_NUM_PSETS, + pmpi_session_get_num_psets, + pmpi_session_get_num_psets_, + pmpi_session_get_num_psets__, + pmpi_session_get_num_psets_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_NUM_PSETS = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets_ = ompi_session_get_num_psets_f +#pragma weak mpi_session_get_num_psets__ = ompi_session_get_num_psets_f + +#pragma weak MPI_Session_get_num_psets_f = ompi_session_get_num_psets_f +#pragma weak MPI_Session_get_num_psets_f08 = ompi_session_get_num_psets_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_NUM_PSETS, + mpi_session_get_num_psets, + mpi_session_get_num_psets_, + mpi_session_get_num_psets__, + ompi_session_get_num_psets_f, + (MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr), + (session, npset_names, ierr) ) +#else +#define ompi_session_get_num_psets_f pompi_session_get_num_psets_f +#endif +#endif + +void ompi_session_get_num_psets_f(MPI_Fint *session, MPI_Fint *info, MPI_Fint *npset_names, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + OMPI_SINGLE_NAME_DECL(npset_names); + + c_session = PMPI_Session_f2c(*session); + + c_ierr = PMPI_Session_get_num_psets(c_session, MPI_INFO_NULL, OMPI_SINGLE_NAME_CONVERT(npset_names)); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + OMPI_SINGLE_INT_2_FINT(npset_names); + } +} diff --git a/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c b/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c new file mode 100644 index 00000000000..a8b7b7e3052 --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_get_pset_info_f.c @@ -0,0 +1,104 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" +#include "ompi/mpi/fortran/base/fortran_base_strings.h" +#include "ompi/constants.h" +#include "ompi/instance/instance.h" + + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_GET_PSET_INFO = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info_ = ompi_session_get_pset_info_f +#pragma weak pmpi_session_get_pset_info__ = ompi_session_get_pset_info_f + +#pragma weak PMPI_Session_get_pset_info_f = ompi_session_get_pset_info_f +#pragma weak PMPI_Session_get_pset_info_f08 = ompi_session_get_pset_info_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_GET_PSET_INFO, + pmpi_session_get_pset_info, + pmpi_session_get_pset_info_, + pmpi_session_get_pset_info__, + pmpi_session_get_pset_info_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len), + (session, pset_name, info, ierr, name_len) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_GET_PSET_INFO = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info_ = ompi_session_get_pset_info_f +#pragma weak mpi_session_get_pset_info__ = ompi_session_get_pset_info_f + +#pragma weak MPI_Session_get_pset_info_f = ompi_session_get_pset_info_f +#pragma weak MPI_Session_get_pset_info_f08 = ompi_session_get_pset_info_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_GET_PSET_INFO, + mpi_session_get_pset_info, + mpi_session_get_pset_info_, + mpi_session_get_pset_info__, + ompi_session_get_pset_info_f, + (MPI_Fint *session, char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len), + (session, pset_name, info, ierr, name_len) ) +#else +#define ompi_session_get_pset_info_f pompi_session_get_pset_info_f +#endif +#endif + +void ompi_session_get_pset_info_f(MPI_Fint *session,char *pset_name, MPI_Fint *info, MPI_Fint *ierr, int name_len) +{ + int c_ierr, ret; + MPI_Session c_session; + char *c_name; + MPI_Info c_info; + + c_session = PMPI_Session_f2c(*session); + + /* Convert the fortran string */ + + if (OMPI_SUCCESS != (ret = ompi_fortran_string_f2c(pset_name, name_len, + &c_name))) { + c_ierr = OMPI_ERRHANDLER_INVOKE((ompi_instance_t *)c_session, ret, + "MPI_SESSION_GET_PSET_INFO"); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + return; + } + + c_ierr = PMPI_Session_get_pset_info(c_session, c_name, &c_info); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *info = PMPI_Info_c2f(c_info); + } +} + + diff --git a/ompi/mpi/fortran/mpif-h/session_init_f.c b/ompi/mpi/fortran/mpif-h/session_init_f.c new file mode 100644 index 00000000000..b36a324f09d --- /dev/null +++ b/ompi/mpi/fortran/mpif-h/session_init_f.c @@ -0,0 +1,89 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Triad National Security, LLC. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/fortran/mpif-h/bindings.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak PMPI_SESSION_INIT = ompi_session_init_f +#pragma weak pmpi_session_init = ompi_session_init_f +#pragma weak pmpi_session_init_ = ompi_session_init_f +#pragma weak pmpi_session_init__ = ompi_session_init_f + +#pragma weak PMPI_Session_init_f = ompi_session_init_f +#pragma weak PMPI_Session_init_f08 = ompi_session_init_f +#else +OMPI_GENERATE_F77_BINDINGS (PMPI_SESSION_INIT, + pmpi_session_init, + pmpi_session_init_, + pmpi_session_init__, + pompi_session_init_f, + (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr), + (info, errhandler, session, ierr) ) +#endif +#endif + +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_SESSION_INIT = ompi_session_init_f +#pragma weak mpi_session_init = ompi_session_init_f +#pragma weak mpi_session_init_ = ompi_session_init_f +#pragma weak mpi_session_init__ = ompi_session_init_f + +#pragma weak MPI_Session_init_f = ompi_session_init_f +#pragma weak MPI_Session_init_f08 = ompi_session_init_f +#else +#if ! OMPI_BUILD_MPI_PROFILING +OMPI_GENERATE_F77_BINDINGS (MPI_SESSION_INIT, + mpi_session_init, + mpi_session_init_, + mpi_session_init__, + ompi_session_init_f, + (MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr), + (info, errhandler, session, ierr) ) +#else +#define ompi_session_init_f pompi_session_init_f +#endif +#endif + +void ompi_session_init_f(MPI_Fint *info, MPI_Fint *errhandler, MPI_Fint *session, MPI_Fint *ierr) +{ + int c_ierr; + MPI_Session c_session; + MPI_Info c_info; + MPI_Errhandler c_errhandler; + + c_info = PMPI_Info_f2c(*info); + c_errhandler = PMPI_Errhandler_f2c(*errhandler); + + c_ierr = PMPI_Session_init(c_info, c_errhandler, &c_session); + if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); + + if (MPI_SUCCESS == c_ierr) { + *session = PMPI_Session_c2f(c_session); + } +} diff --git a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c index aae4adc3bd7..c46bdcb9bc3 100644 --- a/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c +++ b/ompi/mpi/fortran/mpif-h/win_create_errhandler_f.c @@ -73,8 +73,9 @@ static const char FUNC_NAME[] = "MPI_WIN_CREATE_ERRHANDLER"; void ompi_win_create_errhandler_f(ompi_errhandler_fortran_handler_fn_t* function, MPI_Fint *errhandler, MPI_Fint *ierr) { - MPI_Errhandler c_errhandler = - ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, + MPI_Errhandler c_errhandler; + + c_errhandler = ompi_errhandler_create(OMPI_ERRHANDLER_TYPE_WIN, (ompi_errhandler_generic_handler_fn_t*) function, OMPI_ERRHANDLER_LANG_FORTRAN); if (MPI_ERRHANDLER_NULL != c_errhandler) { diff --git a/ompi/mpi/fortran/use-mpi-f08/Makefile.am b/ompi/mpi/fortran/use-mpi-f08/Makefile.am index 08e316c86b9..40d457ec062 100644 --- a/ompi/mpi/fortran/use-mpi-f08/Makefile.am +++ b/ompi/mpi/fortran/use-mpi-f08/Makefile.am @@ -146,6 +146,7 @@ mpi_api_files = \ comm_connect_f08.F90 \ comm_create_errhandler_f08.F90 \ comm_create_f08.F90 \ + comm_create_from_group_f08.F90 \ comm_create_group_f08.F90 \ comm_create_keyval_f08.F90 \ comm_delete_attr_f08.F90 \ @@ -272,6 +273,7 @@ mpi_api_files = \ group_difference_f08.F90 \ group_excl_f08.F90 \ group_free_f08.F90 \ + group_from_session_pset_f08.F90 \ group_incl_f08.F90 \ group_intersection_f08.F90 \ group_range_excl_f08.F90 \ @@ -312,6 +314,7 @@ mpi_api_files = \ initialized_f08.F90 \ init_thread_f08.F90 \ intercomm_create_f08.F90 \ + intercomm_create_from_groups_f08.F90 \ intercomm_merge_f08.F90 \ iprobe_f08.F90 \ irecv_f08.F90 \ @@ -374,6 +377,12 @@ mpi_api_files = \ send_init_f08.F90 \ sendrecv_f08.F90 \ sendrecv_replace_f08.F90 \ + session_get_info_f08.F90 \ + session_get_nth_pset_f08.F90 \ + session_get_num_psets_f08.F90 \ + session_get_pset_info_f08.F90 \ + session_init_f08.F90 \ + session_finalize_f08.F90 \ ssend_f08.F90 \ ssend_init_f08.F90 \ startall_f08.F90 \ diff --git a/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h b/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h index 248fda71852..fef5d91ae22 100644 --- a/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h +++ b/ompi/mpi/fortran/use-mpi-f08/bindings/mpi-f-interfaces-bind.h @@ -9,6 +9,8 @@ ! Copyright (c) 2012 Inria. All rights reserved. ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -1320,6 +1322,19 @@ subroutine ompi_comm_create_f(comm,group,newcomm,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_comm_create_f +subroutine ompi_comm_create_from_group_f(group, stringtag, info, errhandler, newcomm, ierror, name_len) & + BIND(C, name="ompi_comm_create_from_group_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: group + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newcomm + integer, intent(out) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_comm_create_from_group_f + subroutine ompi_comm_create_group_f(comm, group, tag, newcomm, ierror) & BIND(C, name="ompi_comm_create_group_f") implicit none @@ -1401,6 +1416,19 @@ subroutine ompi_comm_get_name_f(comm,comm_name,resultlen,ierror,comm_name_len) & INTEGER, VALUE, INTENT(IN) :: comm_name_len end subroutine ompi_comm_get_name_f +subroutine ompi_comm_from_group_f(group, stringtag, info, errhandler, newcomm, ierror, name_len) & + BIND(C, name="ompi_comm_from_group_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: group + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + INTEGER, INTENT(IN) :: info + INTEGER, INTENT(IN) :: errhandler + INTEGER, INTENT(OUT) :: newcomm + INTEGER, INTENT(OUT) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_comm_from_group_f + subroutine ompi_comm_group_f(comm,group,ierror) & BIND(C, name="ompi_comm_group_f") implicit none @@ -1522,6 +1550,17 @@ subroutine ompi_group_free_f(group,ierror) & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_group_free_f +subroutine ompi_group_from_session_pset_f(session, pset_name, newgroup, ierror, name_len) & + BIND(C, name="ompi_group_from_session_pset_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: session + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: pset_name + INTEGER, INTENT(OUT) :: newgroup + integer, intent(out) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_group_from_session_pset_f + subroutine ompi_group_incl_f(group,n,ranks,newgroup,ierror) & BIND(C, name="ompi_group_incl_f") implicit none @@ -1605,6 +1644,21 @@ subroutine ompi_intercomm_create_f(local_comm,local_leader,peer_comm, & INTEGER, INTENT(OUT) :: ierror end subroutine ompi_intercomm_create_f +subroutine ompi_intercomm_create_from_groups_f(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror, name_len) & + BIND(C, name="ompi_intercomm_create_from_groups_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + INTEGER, INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN) :: local_leader, remote_leader + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: stringtag + INTEGER, INTENT(IN) :: info, errhandler + INTEGER, INTENT(OUT) :: newintercomm + INTEGER, INTENT(OUT) :: ierror + INTEGER, VALUE, INTENT(IN) :: name_len +end subroutine ompi_intercomm_create_from_groups_f + subroutine ompi_type_create_keyval_f(type_copy_attr_fn,type_delete_attr_fn, & type_keyval,extra_state,ierror) & BIND(C, name="ompi_type_create_keyval_f") @@ -3565,4 +3619,60 @@ subroutine ompi_ineighbor_alltoallw_f(sendbuf,sendcounts,sdispls,sendtypes,recvb INTEGER, INTENT(OUT) :: ierror end subroutine ompi_ineighbor_alltoallw_f +subroutine ompi_session_get_info_f(session, info, ierror) & + BIND(C, name="ompi_session_get_info_f") + implicit none + integer, intent(in) :: session + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine ompi_session_get_info_f + +subroutine ompi_session_get_nth_pset_f(session, info, n, pset_len, pset_name, ierror) & + BIND(C, name="ompi_session_get_nth_pset_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(in) :: n + integer, intent(inout) :: pset_len + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(OUT) :: pset_name + integer, intent(out) :: ierror +end subroutine ompi_session_get_nth_pset_f + +subroutine ompi_session_get_num_psets_f(session, info, npset_names, ierror) & + BIND(C, name="ompi_session_get_num_psets_f") + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(out) :: npset_names + integer, intent(out) :: ierror +end subroutine ompi_session_get_num_psets_f + +subroutine ompi_session_get_pset_info_f(session, pset_name, info, ierror, name_len) & + BIND(C, name="ompi_session_get_pset_info_f") + use, intrinsic :: ISO_C_BINDING, only : C_CHAR + implicit none + integer, intent(in) :: session + CHARACTER(KIND=C_CHAR), DIMENSION(*), INTENT(IN) :: pset_name + INTEGER, VALUE, INTENT(IN) :: name_len + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine ompi_session_get_pset_info_f + +subroutine ompi_session_init_f(info, errhandler, session, ierror) & + BIND(C, name="ompi_session_init_f") + implicit none + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine ompi_session_init_f + +subroutine ompi_session_finalize_f(session, ierror) & + BIND(C, name="ompi_session_finalize_f") + implicit none + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine ompi_session_finalize_f + end interface diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 new file mode 100644 index 00000000000..8f1befe0d8c --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/comm_create_from_group_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_comm_create_from_group_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_comm_create_from_group_f(group%MPI_VAL, stringtag, info%MPI_VAL, errhandler%MPI_VAL, & + newcomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Comm_create_from_group_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 index 3b7dad61b75..397e1c9aed9 100644 --- a/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/comm_get_name_f08.F90 @@ -19,7 +19,8 @@ subroutine MPI_Comm_get_name_f08(comm,comm_name,resultlen,ierror) INTEGER, OPTIONAL, INTENT(OUT) :: ierror integer :: c_ierror - call ompi_comm_get_name_f(comm%MPI_VAL,comm_name,resultlen,c_ierror,len(comm_name)) + call ompi_comm_get_name_f(comm%MPI_VAL,comm_name,resultlen,c_ierror, & + len(comm_name)) if (present(ierror)) ierror = c_ierror end subroutine MPI_Comm_get_name_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 new file mode 100644 index 00000000000..77cfed44a23 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/group_from_session_pset_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" +#include "mpi-f08-rename.h" + +subroutine MPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + use :: ompi_mpifh_bindings, only : ompi_group_from_session_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_group_from_session_pset_f(session%MPI_VAL, pset_name, newgroup%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Group_from_session_pset_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 new file mode 100644 index 00000000000..9b92a9db9ab --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/intercomm_create_from_groups_f08.F90 @@ -0,0 +1,35 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Intercomm_create_from_groups_f08(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_intercomm_create_from_groups_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newintercomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_intercomm_create_from_groups_f(local_group%MPI_VAL, local_leader, & + remote_group%MPI_VAL, & + remote_leader, stringtag, info%MPI_VAL, & + errhandler%MPI_VAL, & + newintercomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Intercomm_create_from_groups_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 index 2bbd07eb5f9..71cefb1f128 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.F90 @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in index db1e96f22bd..7fb7e869fed 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-interfaces.h.in @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2021 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran @@ -388,6 +390,71 @@ subroutine MPI_Send_init_f08(buf,count,datatype,dest,tag,comm,request,ierror) end subroutine MPI_Send_init_f08 end interface MPI_Send_init +interface MPI_Session_get_info +subroutine MPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_info_f08 +end interface MPI_Session_get_info + +interface MPI_Session_get_nth_pset +subroutine MPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_pset_f08 +end interface MPI_Session_get_nth_pset + +interface MPI_Session_get_num_psets +subroutine MPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_num_psets_f08 +end interface MPI_Session_get_num_psets + +interface MPI_Session_get_pset_info +subroutine MPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_pset_info_f08 +end interface MPI_Session_get_pset_info + +interface MPI_Session_init +subroutine MPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_init_f08 +end interface MPI_Session_init + +interface MPI_Session_finalize +subroutine MPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + implicit none + TYPE(MPI_Session), INTENT(INOUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Session_finalize_f08 +end interface MPI_Session_finalize + interface MPI_Ssend subroutine MPI_Ssend_f08(buf,count,datatype,dest,tag,comm,ierror) use :: mpi_f08_types, only : MPI_Datatype, MPI_Comm @@ -1506,6 +1573,20 @@ subroutine MPI_Comm_create_f08(comm,group,newcomm,ierror) end subroutine MPI_Comm_create_f08 end interface MPI_Comm_create +interface MPI_Comm_create_from_group +subroutine MPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Info, MPI_Errhandler + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + +end subroutine MPI_Comm_create_from_group_f08 +end interface MPI_Comm_create_from_group + interface MPI_Comm_create_group subroutine MPI_Comm_create_group_f08(comm,group,tag,newcomm,ierror) use :: mpi_f08_types, only : MPI_Comm, MPI_Group @@ -1767,6 +1848,17 @@ subroutine MPI_Group_free_f08(group,ierror) end subroutine MPI_Group_free_f08 end interface MPI_Group_free +interface MPI_Group_from_session_pset +subroutine MPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror +end subroutine MPI_Group_from_session_pset_f08 +end interface MPI_Group_from_session_pset + interface MPI_Group_incl subroutine MPI_Group_incl_f08(group,n,ranks,newgroup,ierror) use :: mpi_f08_types, only : MPI_Group diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h index 29f853af3ba..4ab097b22b9 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-rename.h @@ -63,6 +63,20 @@ #define MPI_Sendrecv_replace_f08 PMPI_Sendrecv_replace_f08 #define MPI_Send_init PMPI_Send_init #define MPI_Send_init_f08 PMPI_Send_init_f08 +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_info_f08 PMPI_Session_get_info_f08 +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_nth_pset_f08 PMPI_Session_get_nth_pset_f08 +#define MPI_Session_get_nth_psetlen PMPI_Session_get_nth_psetlen +#define MPI_Session_get_nth_psetlen_f08 PMPI_Session_get_nth_psetlen_f08 +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#define MPI_Session_get_num_psets_f08 PMPI_Session_get_num_psets_f08 +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_get_pset_info_f08 PMPI_Session_get_pset_info_f08 +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_init_f08 PMPI_Session_init_f08 +#define MPI_Session_finalize PMPI_Session_finalize +#define MPI_Session_finalize_f08 PMPI_Session_finalize_f08 #define MPI_Ssend PMPI_Ssend #define MPI_Ssend_f08 PMPI_Ssend_f08 #define MPI_Ssend_init PMPI_Ssend_init @@ -233,6 +247,8 @@ #define MPI_Comm_create_f08 PMPI_Comm_create_f08 #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_group_f08 PMPI_Comm_create_group_f08 +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group +#define MPI_Comm_create_from_group_f08 PMPI_Comm_create_from_group_f08 #define MPI_Comm_create_keyval PMPI_Comm_create_keyval #define MPI_Comm_create_keyval_f08 PMPI_Comm_create_keyval_f08 #define MPI_Comm_delete_attr PMPI_Comm_delete_attr @@ -279,6 +295,8 @@ #define MPI_Group_difference_f08 PMPI_Group_difference_f08 #define MPI_Group_excl PMPI_Group_excl #define MPI_Group_excl_f08 PMPI_Group_excl_f08 +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset +#define MPI_Group_from_session_pset_f08 PMPI_Group_from_session_pset_f08 #define MPI_Group_free PMPI_Group_free #define MPI_Group_free_f08 PMPI_Group_free_f08 #define MPI_Group_incl PMPI_Group_incl @@ -299,6 +317,8 @@ #define MPI_Group_union_f08 PMPI_Group_union_f08 #define MPI_Intercomm_create PMPI_Intercomm_create #define MPI_Intercomm_create_f08 PMPI_Intercomm_create_f08 +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups +#define MPI_Intercomm_create_from_groups_f08 PMPI_Intercomm_create_from_groups_f08 #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Intercomm_merge_f08 PMPI_Intercomm_merge_f08 #define MPI_Type_create_keyval PMPI_Type_create_keyval diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 index c1abace16b3..db81f5b5ba4 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/mpi-f08-types.F90 @@ -8,6 +8,7 @@ ! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. ! Copyright (c) 2020 The University of Tennessee and The University ! of Tennessee Research Foundation. All rights +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights ! reserved. ! $COPYRIGHT$ ! @@ -71,6 +72,11 @@ module mpi_f08_types integer :: MPI_VAL end type MPI_Win + type, BIND(C) :: MPI_Session + integer :: MPI_VAL + end type MPI_Session + + ! ! Pre-defined handles ! diff --git a/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 b/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 index ddd81d17f74..1089248c42b 100644 --- a/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 +++ b/ompi/mpi/fortran/use-mpi-f08/mod/pmpi-f08-interfaces.F90 @@ -10,6 +10,8 @@ ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. ! Copyright (c) 2017-2018 FUJITSU LIMITED. All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! This file provides the interface specifications for the MPI Fortran diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 new file mode 100644 index 00000000000..84098a44dc2 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pcomm_create_from_group_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Comm_create_from_group_f08(group, stringtag, info, errhandler, newcomm, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group, MPI_Errhandler, MPI_Info, MPI_Comm + use :: ompi_mpifh_bindings, only : ompi_comm_create_from_group_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newcomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_comm_create_from_group_f(group%MPI_VAL, stringtag, info%MPI_VAL, errhandler%MPI_VAL, & + newcomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Comm_create_from_group_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 new file mode 100644 index 00000000000..a719b361302 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pgroup_from_session_pset_f08.F90 @@ -0,0 +1,29 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" +#include "mpi-f08-rename.h" + +subroutine PMPI_Group_from_session_pset_f08(session, pset_name, newgroup, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Group + use :: ompi_mpifh_bindings, only : ompi_group_from_session_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Group), INTENT(OUT) :: newgroup + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_group_from_session_pset_f(session%MPI_VAL, pset_name, newgroup%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Group_from_session_pset_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 new file mode 100644 index 00000000000..668188d1adb --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/pintercomm_create_from_groups_f08.F90 @@ -0,0 +1,35 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Intercomm_create_from_groups_f08(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + use :: mpi_f08_types, only : MPI_Comm, MPI_Group, MPI_Errhandler, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_intercomm_create_from_groups_f + implicit none + TYPE(MPI_Group), INTENT(IN) :: local_group, remote_group + INTEGER, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(IN) :: errhandler + TYPE(MPI_Comm), INTENT(OUT) :: newintercomm + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_intercomm_create_from_groups_f(local_group%MPI_VAL, local_leader, & + remote_group%MPI_VAL, & + remote_leader, stringtag, info%MPI_VAL, & + errhandler%MPI_VAL, & + newintercomm%MPI_VAL, c_ierror, len(stringtag)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Intercomm_create_from_groups_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 new file mode 100644 index 00000000000..01316dd79ca --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_finalize_f08.F90 @@ -0,0 +1,24 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + use :: ompi_mpifh_bindings, only : ompi_session_finalize_f + implicit none + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_finalize_f(session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_finalize_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 new file mode 100644 index 00000000000..bfe72d516e6 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_info_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_info_f(session%MPI_VAL, info%MPI_VAL, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 new file mode 100644 index 00000000000..249a25ddc1b --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_nth_pset_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2020 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_nth_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(IN) :: n + INTEGER, OPTIONAL, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_nth_pset_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, n, pset_len, pset_name, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_nth_pset_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 new file mode 100644 index 00000000000..01fd0dc9c1b --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_num_psets_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_num_psets_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_num_psets_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, npset_names, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_num_psets_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 new file mode 100644 index 00000000000..0271b976f3a --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_get_pset_info_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_pset_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_pset_info_f(session%MPI_VAL, pset_name, info%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_get_pset_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 new file mode 100644 index 00000000000..555aa10e9dd --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/profile/psession_init_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine PMPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + use :: ompi_mpifh_bindings, only : ompi_session_init_f + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(OUT) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_init_f(info%MPI_VAL,errhandler%MPI_VAL,session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine PMPI_Session_init_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 new file mode 100644 index 00000000000..55bf9e4e479 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_finalize_f08.F90 @@ -0,0 +1,24 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_finalize_f08(session,ierror) + use :: mpi_f08_types, only : MPI_Session + use :: ompi_mpifh_bindings, only : ompi_session_finalize_f + implicit none + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_finalize_f(session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_finalize_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 new file mode 100644 index 00000000000..c0e1eb16577 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_info_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_info_f08(session, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_info_f(session%MPI_VAL, info%MPI_VAL, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 new file mode 100644 index 00000000000..fa41b9f2ac3 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_nth_pset_f08.F90 @@ -0,0 +1,27 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2020 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_nth_pset_f08(session, info, n, pset_len, pset_name, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_nth_pset_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(IN) :: n + INTEGER, OPTIONAL, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_nth_pset_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, n, pset_len, pset_name, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_nth_pset_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 new file mode 100644 index 00000000000..b5d114efea8 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_num_psets_f08.F90 @@ -0,0 +1,25 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_num_psets_f08(session, info, npset_names, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_INFO_NULL + use :: ompi_mpifh_bindings, only : ompi_session_get_num_psets_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + TYPE(MPI_Info), INTENT(IN) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: npset_names + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_num_psets_f(session%MPI_VAL, MPI_INFO_NULL%MPI_VAL, npset_names, c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_num_psets_f08 diff --git a/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 new file mode 100644 index 00000000000..51383469b1c --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_get_pset_info_f08.F90 @@ -0,0 +1,26 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +subroutine MPI_Session_get_pset_info_f08(session, pset_name, info, ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info + use :: ompi_mpifh_bindings, only : ompi_session_get_pset_info_f + implicit none + TYPE(MPI_Session), INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + TYPE(MPI_Info), INTENT(OUT) :: info + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_get_pset_info_f(session%MPI_VAL, pset_name, info%MPI_VAL, c_ierror, len(pset_name)) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_get_pset_info_f08 + diff --git a/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 b/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 new file mode 100644 index 00000000000..b9eee1338b1 --- /dev/null +++ b/ompi/mpi/fortran/use-mpi-f08/session_init_f08.F90 @@ -0,0 +1,30 @@ +! -*- f90 -*- +! +! Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. +! Copyright (c) 2009-2013 Los Alamos National Security, LLC. +! All rights reserved. +! Copyright (c) 2018 Research Organization for Information Science +! and Technology (RIST). All rights reserved. +! Copyright (c) 2019-2021 Triad National Security, LLC. All rights +! reserved. +! $COPYRIGHT$ + +#include "ompi/mpi/fortran/configure-fortran-output.h" + +#include "mpi-f08-rename.h" + +subroutine MPI_Session_init_f08(info,errhandler,session,ierror) + use :: mpi_f08_types, only : MPI_Session, MPI_Info, MPI_Errhandler + use :: ompi_mpifh_bindings, only : ompi_session_init_f + implicit none + TYPE(MPI_Info), INTENT(IN) :: info + TYPE(MPI_Errhandler), INTENT(OUT) :: errhandler + TYPE(MPI_Session), INTENT(OUT) :: session + INTEGER, OPTIONAL, INTENT(OUT) :: ierror + integer :: c_ierror + + call ompi_session_init_f(info%MPI_VAL,errhandler%MPI_VAL,session%MPI_VAL,c_ierror) + if (present(ierror)) ierror = c_ierror + +end subroutine MPI_Session_init_f08 + diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in index 1e7b4861f38..3dcb34d88fe 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/mpi-ignore-tkr-interfaces.h.in @@ -11,6 +11,8 @@ ! reserved. ! Copyright (c) 2015-2020 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -535,6 +537,18 @@ end subroutine MPI_Comm_create_errhandler end interface +interface MPI_Comm_create_from_group + +subroutine MPI_Comm_create_from_group(group, stringtag, info, errhandler, newcomm, ierror) + integer, INTENT(IN) :: group + CHARACTER(LEN=*), INTENT(IN) :: stringtag + integer, INTENT(IN) :: info + integer, INTENT(IN) :: errhandler + integer, INTENT(OUT) :: newcomm + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Comm_create_from_group + +end interface interface MPI_Comm_create_group @@ -1417,6 +1431,18 @@ end subroutine MPI_Group_free end interface +interface MPI_Group_from_session_pset + +subroutine MPI_Group_from_session_pset(session, pset_name, newgroup, ierror) + implicit none + integer, INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + integer, INTENT(OUT) :: newgroup + INTEGER, INTENT(OUT) :: ierror + integer :: c_ierror +end subroutine MPI_Group_from_session_pset + +end interface interface MPI_Group_incl @@ -2037,6 +2063,22 @@ end subroutine MPI_Intercomm_create end interface +interface MPI_Intercomm_create_from_groups + +subroutine MPI_Intercomm_create_from_groups(local_group, local_leader, remote_group, & + remote_leader, stringtag, info, errhandler, & + newintercomm, ierror) + implicit none + integer, INTENT(IN) :: local_group, remote_group + integer, INTENT(IN):: local_leader, remote_leader + CHARACTER(LEN=*), INTENT(IN) :: stringtag + integer, INTENT(IN) :: info + integer, INTENT(IN) :: errhandler + integer, INTENT(OUT) :: newintercomm + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Intercomm_create_from_groups + +end interface interface MPI_Intercomm_merge @@ -3090,6 +3132,61 @@ end subroutine MPI_Sendrecv_replace end interface +interface MPI_Session_get_info +subroutine MPI_Session_get_info(session, info, ierror) + integer, INTENT(IN) :: session + integer, INTENT(OUT) :: info + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_info +end interface + +interface MPI_Session_get_nth_pset +subroutine MPI_Session_get_nth_pset(session, info, n, pset_len, pset_name, ierror) + integer, INTENT(IN) :: session + integer, INTENT(IN) :: info + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(INOUT) :: pset_len + CHARACTER(LEN=*), INTENT(OUT) :: pset_name + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_pset +end interface + +interface MPI_Session_get_nth_psetlen +subroutine MPI_Session_get_nth_psetlen(session, n, pset_len, ierror) + implicit none + integer, INTENT(IN) :: session + INTEGER, INTENT(IN) :: n + INTEGER, INTENT(OUT) :: pset_len + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_nth_psetlen +end interface + +interface MPI_Session_get_pset_info +subroutine MPI_Session_get_pset_info(session, pset_name, info, ierror) + integer, INTENT(IN) :: session + CHARACTER(LEN=*), INTENT(IN) :: pset_name + integer, INTENT(OUT) :: info + INTEGER, INTENT(OUT) :: ierror +end subroutine MPI_Session_get_pset_info +end interface + +interface MPI_Session_init +subroutine MPI_Session_init(info,errhandler,session,ierror) + integer, intent(IN) :: info + integer, intent(IN) :: errhandler + integer, intent(OUT) :: session + integer, intent(OUT) :: ierror +end subroutine MPI_Session_init +end interface + +interface + +subroutine MPI_Session_finalize(session,ierror) + integer, intent(inout) :: session + integer, intent(OUT) :: ierror +end subroutine MPI_Session_finalize + +end interface interface MPI_Ssend diff --git a/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h b/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h index 157d0ad706e..b48c0c6f86d 100644 --- a/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-ignore-tkr/pmpi-ignore-tkr-interfaces.h @@ -45,6 +45,7 @@ #define MPI_Comm_connect PMPI_Comm_connect #define MPI_Comm_create PMPI_Comm_create #define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_keyval PMPI_Comm_create_keyval #define MPI_Comm_delete_attr PMPI_Comm_delete_attr @@ -111,6 +112,7 @@ #define MPI_Group_compare PMPI_Group_compare #define MPI_Group_difference PMPI_Group_difference #define MPI_Group_excl PMPI_Group_excl +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset #define MPI_Group_free PMPI_Group_free #define MPI_Group_incl PMPI_Group_incl #define MPI_Group_intersection PMPI_Group_intersection @@ -152,6 +154,7 @@ #define MPI_Init_thread PMPI_Init_thread #define MPI_Initialized PMPI_Initialized #define MPI_Intercomm_create PMPI_Intercomm_create +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Iprobe PMPI_Iprobe #define MPI_Irecv PMPI_Irecv @@ -214,6 +217,12 @@ #define MPI_Send_init PMPI_Send_init #define MPI_Sendrecv PMPI_Sendrecv #define MPI_Sendrecv_replace PMPI_Sendrecv_replace +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_nth_psetlen PMPI_Session_get_nth_psetlen +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_finalize PMPI_Session_finalize #define MPI_Ssend PMPI_Ssend #define MPI_Ssend_init PMPI_Ssend_init #define MPI_Start PMPI_Start diff --git a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h index b6a5c08e71d..a9662509222 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/mpi-f90-interfaces.h @@ -13,6 +13,8 @@ ! Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. ! Copyright (c) 2016-2018 Research Organization for Information Science ! and Technology (RIST). All rights reserved. +! Copyright (c) 2019 Triad National Security, LLC. All rights +! reserved. ! $COPYRIGHT$ ! ! Additional copyrights may follow @@ -277,6 +279,19 @@ end subroutine MPI_Comm_create end interface +interface MPI_Comm_create_from_group + +subroutine MPI_Comm_create_from_group(group, stringtag, info, errhandler, newcomm, ierror) + implicit none + integer, intent(in) :: group + character(len=*), intent(in) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newcomm + integer, intent(out) :: ierror +end subroutine MPI_Comm_create_from_group + +end interface interface MPI_Comm_create_group @@ -837,6 +852,16 @@ end subroutine MPI_Group_free end interface +interface MPI_Group_from_session_pset +subroutine MPI_Group_from_session_pset(session, pset_name, newgroup, ierror) + implicit none + integer, intent(in) :: session + character(len=*), intent(in) :: pset_name + integer, intent(out) :: newgroup + integer, intent(out) :: ierror +end subroutine MPI_Group_from_session_pset +end interface + interface MPI_Group_incl @@ -1088,6 +1113,22 @@ end subroutine MPI_Intercomm_create end interface +interface MPI_Intercomm_create_from_groups + +subroutine MPI_Intercomm_create_from_groups(local_group, local_leader, remote_group, remote_leader, & + stringtag, info, errhandler, newintercomm, ierror) + implicit none + integer, intent(in) :: local_group, remote_group + integer, intent(in):: local_leader, remote_leader + character(len=*), intent(in) :: stringtag + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: newintercomm + integer, intent(out) :: ierror +end subroutine MPI_Intercomm_create_from_groups + +end interface + interface MPI_Intercomm_merge @@ -1259,6 +1300,70 @@ end subroutine MPI_Request_get_status end interface +interface MPI_Session_get_info +subroutine MPI_Session_get_info(session, info, ierror) + implicit none + integer, intent(in) :: session + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine MPI_Session_get_info +end interface + +interface +subroutine MPI_Session_get_nth_pset(session, info, n, pset_len, pset_name, ierror) + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(in) :: n + integer, intent(inout) :: pset_len + character(len=*), intent(out) :: pset_name + integer, intent(out) :: ierror +end subroutine MPI_Session_get_nth_pset +end interface + + +interface +subroutine MPI_Session_get_num_psets(session, info, npset_names, ierror) + implicit none + integer, intent(in) :: session + integer, intent(in) :: info + integer, intent(out) :: npset_names + integer, intent(out) :: ierror +end subroutine MPI_Session_get_num_psets +end interface + +interface +subroutine MPI_Session_get_pset_info(session, pset_name, info, ierror) + implicit none + integer, intent(in) :: session + character(len=*), intent(in) :: pset_name + integer, intent(out) :: info + integer, intent(out) :: ierror +end subroutine MPI_Session_get_pset_info +end interface + + +interface MPI_Session_init + +subroutine MPI_Session_init(info,errhandler,session,ierror) + implicit none + integer, intent(in) :: info + integer, intent(in) :: errhandler + integer, intent(out) :: session + integer, intent(out) :: ierror +end subroutine MPI_Session_init + +end interface MPI_Session_init + +interface MPI_Session_finalize + +subroutine MPI_Session_finalize(session,ierror) + implicit none + integer, intent(inout) :: session + integer, intent(out) :: ierror +end subroutine MPI_Session_finalize + +end interface MPI_Session_finalize interface MPI_Start diff --git a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h index e8e5d6d9582..642b6341648 100644 --- a/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h +++ b/ompi/mpi/fortran/use-mpi-tkr/pmpi-f90-interfaces.h @@ -7,6 +7,8 @@ ! Additional copyrights may follow ! ! $HEADER$ +! + #define MPI_Wtick PMPI_Wtick #define MPI_Wtime PMPI_Wtime @@ -30,6 +32,7 @@ #define MPI_Comm_call_errhandler PMPI_Comm_call_errhandler #define MPI_Comm_compare PMPI_Comm_compare #define MPI_Comm_create PMPI_Comm_create +#define MPI_Comm_create_from_group PMPI_Comm_create_from_group #define MPI_Comm_create_group PMPI_Comm_create_group #define MPI_Comm_create_errhandler PMPI_Comm_create_errhandler #define MPI_Comm_create_keyval PMPI_Comm_create_keyval @@ -77,6 +80,7 @@ #define MPI_Group_difference PMPI_Group_difference #define MPI_Group_excl PMPI_Group_excl #define MPI_Group_free PMPI_Group_free +#define MPI_Group_from_session_pset PMPI_Group_from_session_pset #define MPI_Group_incl PMPI_Group_incl #define MPI_Group_intersection PMPI_Group_intersection #define MPI_Group_range_excl PMPI_Group_range_excl @@ -98,6 +102,7 @@ #define MPI_Init_thread PMPI_Init_thread #define MPI_Initialized PMPI_Initialized #define MPI_Intercomm_create PMPI_Intercomm_create +#define MPI_Intercomm_create_from_groups PMPI_Intercomm_create_from_groups #define MPI_Intercomm_merge PMPI_Intercomm_merge #define MPI_Iprobe PMPI_Iprobe #define MPI_Is_thread_main PMPI_Is_thread_main @@ -112,6 +117,12 @@ #define MPI_Register_datarep PMPI_Register_datarep #define MPI_Request_free PMPI_Request_free #define MPI_Request_get_status PMPI_Request_get_status +#define MPI_Session_get_info PMPI_Session_get_info +#define MPI_Session_get_nth_pset PMPI_Session_get_nth_pset +#define MPI_Session_get_num_psets PMPI_Session_get_num_psets +#define MPI_Session_get_pset_info PMPI_Session_get_pset_info +#define MPI_Session_init PMPI_Session_init +#define MPI_Session_finalize PMPI_Session_finalize #define MPI_Start PMPI_Start #define MPI_Startall PMPI_Startall #define MPI_Status_f2f08 PMPI_Status_f2f08 diff --git a/ompi/mpiext/mpiext.c b/ompi/mpiext/mpiext.c index 3a0012f125a..1c59b520c8f 100644 --- a/ompi/mpiext/mpiext.c +++ b/ompi/mpiext/mpiext.c @@ -1,3 +1,14 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + #include "ompi_config.h" #include @@ -5,17 +16,16 @@ #include "ompi/constants.h" #include "ompi/mpiext/mpiext.h" #include "ompi/mpiext/static-components.h" +#include "ompi/instance/instance.h" - -int -ompi_mpiext_init(void) +static int ompi_mpiext_fini (void) { const ompi_mpiext_component_t **tmp = ompi_mpiext_components; int ret; while (NULL != (*tmp)) { - if (NULL != (*tmp)->init) { - ret = (*tmp)->init(); + if (NULL != (*tmp)->fini) { + ret = (*tmp)->fini(); if (OMPI_SUCCESS != ret) return ret; } tmp++; @@ -24,20 +34,21 @@ ompi_mpiext_init(void) return OMPI_SUCCESS; } - int -ompi_mpiext_fini(void) +ompi_mpiext_init(void) { const ompi_mpiext_component_t **tmp = ompi_mpiext_components; int ret; while (NULL != (*tmp)) { - if (NULL != (*tmp)->fini) { - ret = (*tmp)->fini(); + if (NULL != (*tmp)->init) { + ret = (*tmp)->init(); if (OMPI_SUCCESS != ret) return ret; } tmp++; } + ompi_mpi_instance_append_finalize (ompi_mpiext_fini); + return OMPI_SUCCESS; } diff --git a/ompi/mpiext/mpiext.h b/ompi/mpiext/mpiext.h index 6a93563c791..e5488cdc357 100644 --- a/ompi/mpiext/mpiext.h +++ b/ompi/mpiext/mpiext.h @@ -1,4 +1,11 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * * $HEADER$ */ #if defined(c_plusplus) || defined(__cplusplus) @@ -8,7 +15,6 @@ extern "C" { #include "ompi_config.h" OMPI_DECLSPEC int ompi_mpiext_init(void); -OMPI_DECLSPEC int ompi_mpiext_fini(void); typedef int (*ompi_mpiext_init_fn_t)(void); typedef int (*ompi_mpiext_fini_fn_t)(void); diff --git a/ompi/op/op.c b/ompi/op/op.c index 1b547764c16..87634f42f72 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -17,6 +17,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,6 +35,7 @@ #include "ompi/op/op.h" #include "ompi/mca/op/base/base.h" #include "ompi/datatype/ompi_datatype_internal.h" +#include "ompi/instance/instance.h" /* @@ -47,6 +50,7 @@ opal_pointer_array_t *ompi_op_f_to_c_table = {0}; static int add_intrinsic(ompi_op_t *op, int fort_handle, int flags, const char *name); +static int ompi_op_finalize (void); /* * Class information @@ -300,15 +304,23 @@ int ompi_op_init(void) ompi_mpi_op_replace.op.op_type = OMPI_OP_REPLACE; } + ompi_mpi_instance_append_finalize (ompi_op_finalize); + /* All done */ return OMPI_SUCCESS; } -/* - * Clean up the op resources + +/** + * Finalize the op interface. + * + * @returns OMPI_SUCCESS Always + * + * Invokes on instance teardown if ompi_op_init() was called; tears down the op interface, and + * destroys the F2C translation table. */ -int ompi_op_finalize(void) +static int ompi_op_finalize (void) { /* clean up the intrinsic ops */ OBJ_DESTRUCT(&ompi_mpi_op_no_op); diff --git a/ompi/op/op.h b/ompi/op/op.h index 2cddcc97bb2..81643c87baa 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -326,16 +326,6 @@ extern struct opal_pointer_array_t *ompi_op_f_to_c_table; */ int ompi_op_init(void); -/** - * Finalize the op interface. - * - * @returns OMPI_SUCCESS Always - * - * Invokes from ompi_mpi_finalize(); tears down the op interface, and - * destroys the F2C translation table. - */ -int ompi_op_finalize(void); - /** * Create a ompi_op_t with a user-defined callback (vs. creating an * intrinsic ompi_op_t). diff --git a/ompi/request/request.c b/ompi/request/request.c index abf33449d89..1ff70f9d45f 100644 --- a/ompi/request/request.c +++ b/ompi/request/request.c @@ -18,6 +18,8 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -104,6 +106,16 @@ OBJ_CLASS_INSTANCE( ompi_request_destruct); +static int ompi_request_finalize (void) +{ + OMPI_REQUEST_FINI( &ompi_request_null.request ); + OBJ_DESTRUCT( &ompi_request_null.request ); + OMPI_REQUEST_FINI( &ompi_request_empty ); + OBJ_DESTRUCT( &ompi_request_empty ); + OBJ_DESTRUCT( &ompi_request_f_to_c_table ); + return OMPI_SUCCESS; +} + int ompi_request_init(void) { @@ -173,21 +185,11 @@ int ompi_request_init(void) ompi_status_empty._ucount = 0; ompi_status_empty._cancelled = 0; - return OMPI_SUCCESS; -} - + ompi_mpi_instance_append_finalize (ompi_request_finalize); -int ompi_request_finalize(void) -{ - OMPI_REQUEST_FINI( &ompi_request_null.request ); - OBJ_DESTRUCT( &ompi_request_null.request ); - OMPI_REQUEST_FINI( &ompi_request_empty ); - OBJ_DESTRUCT( &ompi_request_empty ); - OBJ_DESTRUCT( &ompi_request_f_to_c_table ); return OMPI_SUCCESS; } - int ompi_request_persistent_noop_create(ompi_request_t** request) { ompi_request_t *req; diff --git a/ompi/request/request.h b/ompi/request/request.h index eb3f829b45b..7b342736894 100644 --- a/ompi/request/request.h +++ b/ompi/request/request.h @@ -16,6 +16,8 @@ * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -365,11 +367,6 @@ OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions; */ int ompi_request_init(void); -/** - * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE. - */ -int ompi_request_finalize(void); - /** * Create a persistent request that does nothing (e.g., to MPI_PROC_NULL). */ diff --git a/ompi/runtime/mpiruntime.h b/ompi/runtime/mpiruntime.h index 81c9741c2e2..3e5b5885718 100644 --- a/ompi/runtime/mpiruntime.h +++ b/ompi/runtime/mpiruntime.h @@ -242,12 +242,6 @@ void ompi_mpi_dynamics_disable(const char *msg); */ bool ompi_mpi_dynamics_is_enabled(const char *function); -/** - * Clean up memory / resources by the MPI dynamics process - * functionality checker - */ -void ompi_mpi_dynamics_finalize(void); - END_C_DECLS #endif /* OMPI_MPI_MPIRUNTIME_H */ diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 1c9961215c0..90bbf321758 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -19,7 +19,7 @@ * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights + * Copyright (c) 2019-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -198,5 +198,6 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, kill the entire job. Wah wah. */ ompi_rte_abort(errcode, NULL); - /* Does not return */ + /* Does not return - but we add a return to keep compiler warnings at bay*/ + return 0; } diff --git a/ompi/runtime/ompi_mpi_dynamics.c b/ompi/runtime/ompi_mpi_dynamics.c index 9e9f92f84f8..25403c91892 100644 --- a/ompi/runtime/ompi_mpi_dynamics.c +++ b/ompi/runtime/ompi_mpi_dynamics.c @@ -28,16 +28,30 @@ #include "ompi/runtime/params.h" #include "ompi/runtime/mpiruntime.h" +#include "ompi/instance/instance.h" static char *ompi_mpi_dynamics_disabled_msg = "Enabled"; +static int ompi_mpi_dynamics_finalize (void) +{ + // If dynamics were disabled, then we have a message to free + if (!ompi_mpi_dynamics_enabled) { + free(ompi_mpi_dynamics_disabled_msg); + ompi_mpi_dynamics_disabled_msg = NULL; + } + + return OMPI_SUCCESS; +} + void ompi_mpi_dynamics_disable(const char *msg) { assert(msg); ompi_mpi_dynamics_enabled = false; ompi_mpi_dynamics_disabled_msg = strdup(msg); + + ompi_mpi_instance_append_finalize (ompi_mpi_dynamics_finalize); } bool ompi_mpi_dynamics_is_enabled(const char *function) @@ -53,12 +67,3 @@ bool ompi_mpi_dynamics_is_enabled(const char *function) ompi_mpi_dynamics_disabled_msg); return false; } - -void ompi_mpi_dynamics_finalize(void) -{ - // If dynamics were disabled, then we have a message to free - if (!ompi_mpi_dynamics_enabled) { - free(ompi_mpi_dynamics_disabled_msg); - ompi_mpi_dynamics_disabled_msg = NULL; - } -} diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 49ece5f9e94..a5b73795858 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -19,7 +19,6 @@ * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Triad National Security, LLC. All rights * reserved. @@ -61,7 +60,6 @@ #include "opal/mca/allocator/base/base.h" #include "opal/mca/pmix/pmix-internal.h" #include "opal/util/timings.h" - #include "mpi.h" #include "ompi/constants.h" #include "ompi/errhandler/errcode.h" @@ -87,9 +85,15 @@ #include "ompi/mca/io/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" -#include "ompi/dpm/dpm.h" -#include "ompi/mpiext/mpiext.h" +#include "ompi/datatype/ompi_datatype.h" +#include "ompi/mca/hook/hook.h" #include "ompi/mca/hook/base/base.h" +#include "ompi/communicator/communicator.h" +#include "ompi/attribute/attribute.h" +#include "ompi/instance/instance.h" + +#include "mpi.h" +#include "ompi/constants.h" extern bool ompi_enable_timing; @@ -104,13 +108,8 @@ static void fence_cbfunc(pmix_status_t status, void *cbdata) int ompi_mpi_finalize(void) { int ret = MPI_SUCCESS; - opal_list_item_t *item; - ompi_proc_t** procs; - size_t nprocs; - volatile bool active; - uint32_t key; - ompi_datatype_t * datatype; pmix_status_t rc; + volatile bool active; ompi_hook_base_mpi_finalize_top(); @@ -138,8 +137,6 @@ int ompi_mpi_finalize(void) opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_STARTED); - ompi_mpiext_fini(); - /* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing anything else in MPI_FINALIZE (to include setting up such that MPI_FINALIZED will return true). */ @@ -196,11 +193,6 @@ int ompi_mpi_finalize(void) opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT); - /* As finalize is the last legal MPI call, we are allowed to force the release - * of the user buffer used for bsend, before going anywhere further. - */ - (void)mca_pml_base_bsend_detach(NULL, NULL); - #if OPAL_ENABLE_PROGRESS_THREADS == 0 opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK); #endif @@ -303,212 +295,7 @@ int ompi_mpi_finalize(void) OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* Shut down any bindings-specific issues: C++, F77, F90 */ - - /* Remove all memory associated by MPI_REGISTER_DATAREP (per - MPI-2:9.5.3, there is no way for an MPI application to - *un*register datareps, but we don't want the OMPI layer causing - memory leaks). */ - while (NULL != (item = opal_list_remove_first(&ompi_registered_datareps))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&ompi_registered_datareps); - - /* Remove all F90 types from the hash tables */ - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) - OBJ_RELEASE(datatype); - OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable); - - /* Free communication objects */ - - /* free file resources */ - if (OMPI_SUCCESS != (ret = ompi_file_finalize())) { - goto done; - } - - /* free window resources */ - if (OMPI_SUCCESS != (ret = ompi_win_finalize())) { - goto done; - } - if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_part_base_finalize())) { - goto done; - } - - - /* free communicator resources. this MUST come before finalizing the PML - * as this will call into the pml */ - if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) { - goto done; - } - - /* call del_procs on all allocated procs even though some may not be known - * to the pml layer. the pml layer is expected to be resilient and ignore - * any unknown procs. */ - nprocs = 0; - procs = ompi_proc_get_allocated (&nprocs); - MCA_PML_CALL(del_procs(procs, nprocs)); - free(procs); - - /* free pml resource */ - if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { - goto done; - } - - /* free requests */ - if (OMPI_SUCCESS != (ret = ompi_request_finalize())) { - goto done; - } - - if (OMPI_SUCCESS != (ret = ompi_message_finalize())) { - goto done; - } - - /* If requested, print out a list of memory allocated by ALLOC_MEM - but not freed by FREE_MEM */ - if (0 != ompi_debug_show_mpi_alloc_mem_leaks) { - mca_mpool_base_tree_print(ompi_debug_show_mpi_alloc_mem_leaks); - } - - /* Now that all MPI objects dealing with communications are gone, - shut down MCA types having to do with communications */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) { - OMPI_ERROR_LOG(ret); - goto done; - } - - /* shut down buffered send code */ - mca_pml_base_bsend_fini(); - - /* Free secondary resources */ - - /* free attr resources */ - if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) { - goto done; - } - - /* free group resources */ - if (OMPI_SUCCESS != (ret = ompi_group_finalize())) { - goto done; - } - - /* finalize the DPM subsystem */ - if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { - goto done; - } - - /* free internal error resources */ - if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { - goto done; - } - - /* free error code resources */ - if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) { - goto done; - } - - /* free errhandler resources */ - if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) { - goto done; - } - - /* Free all other resources */ - - /* free op resources */ - if (OMPI_SUCCESS != (ret = ompi_op_finalize())) { - goto done; - } - - /* free ddt resources */ - if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) { - goto done; - } - - /* free info resources */ - if (OMPI_SUCCESS != (ret = ompi_mpiinfo_finalize())) { - goto done; - } - - /* Close down MCA modules */ - - /* io is opened lazily, so it's only necessary to close it if it - was actually opened */ - if (0 < ompi_io_base_framework.framework_refcnt) { - /* May have been "opened" multiple times. We want it closed now */ - ompi_io_base_framework.framework_refcnt = 1; - - if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) { - goto done; - } - } - (void) mca_base_framework_close(&ompi_topo_base_framework); - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_part_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_mpool_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_rcache_base_framework))) { - goto done; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_allocator_base_framework))) { - goto done; - } - - /* free proc resources */ - if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { - goto done; - } - - if (NULL != ompi_mpi_main_thread) { - OBJ_RELEASE(ompi_mpi_main_thread); - ompi_mpi_main_thread = NULL; - } - - /* Clean up memory/resources from the MPI dynamic process - functionality checker */ - ompi_mpi_dynamics_finalize(); - - /* Leave the RTE */ - - if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { - goto done; - } - ompi_rte_initialized = false; - - /* Now close the hook framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework) ) ) { - OMPI_ERROR_LOG(ret); - goto done; - } - - if (OPAL_SUCCESS != (ret = opal_finalize_util())) { - goto done; - } - - if (0 == opal_initialized) { - /* if there is no MPI_T_init_thread that has been MPI_T_finalize'd, - * then be gentle to the app and release all the memory now (instead - * of the opal library destructor */ - opal_class_finalize(); - } + ompi_mpi_instance_finalize (&ompi_mpi_instance_default); /* cleanup environment */ opal_unsetenv("OMPI_COMMAND", &environ); @@ -516,7 +303,7 @@ int ompi_mpi_finalize(void) /* All done */ - done: + done: opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 69c182899d7..cbc81254a3e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -64,7 +64,6 @@ #include "opal/mca/mpool/base/base.h" #include "opal/mca/btl/base/base.h" #include "opal/mca/pmix/base/base.h" -#include "opal/util/timings.h" #include "opal/util/opal_environ.h" #include "ompi/constants.h" @@ -262,57 +261,6 @@ MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; #include "mpif-c-constants.h" -/* - * Hash tables for MPI_Type_create_f90* functions - */ -opal_hash_table_t ompi_mpi_f90_integer_hashtable = {{0}}; -opal_hash_table_t ompi_mpi_f90_real_hashtable = {{0}}; -opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; - -/* - * Per MPI-2:9.5.3, MPI_REGISTER_DATAREP is a memory leak. There is - * no way to *de*register datareps once they've been registered. So - * we have to track all registrations here so that they can be - * de-registered during MPI_FINALIZE so that memory-tracking debuggers - * don't show Open MPI as leaking memory. - */ -opal_list_t ompi_registered_datareps = {{0}}; - -bool ompi_enable_timing = false; -extern bool ompi_mpi_yield_when_idle; -extern int ompi_mpi_event_tick_rate; - -/** - * Static functions used to configure the interactions between the OPAL and - * the runtime. - */ -static char* -_process_name_print_for_opal(const opal_process_name_t procname) -{ - ompi_process_name_t* rte_name = (ompi_process_name_t*)&procname; - return OMPI_NAME_PRINT(rte_name); -} - -static int -_process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2) -{ - ompi_process_name_t* o1 = (ompi_process_name_t*)&p1; - ompi_process_name_t* o2 = (ompi_process_name_t*)&p2; - return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); -} - -static int _convert_string_to_process_name(opal_process_name_t *name, - const char* name_string) -{ - return ompi_rte_convert_string_to_process_name(name, name_string); -} - -static int _convert_process_name_to_string(char** name_string, - const opal_process_name_t *name) -{ - return ompi_rte_convert_process_name_to_string(name_string, name); -} - void ompi_mpi_thread_level(int requested, int *provided) { /** @@ -336,41 +284,6 @@ void ompi_mpi_thread_level(int requested, int *provided) MPI_THREAD_MULTIPLE); } -static int ompi_register_mca_variables(void) -{ - int ret; - - /* Register MPI variables */ - if (OMPI_SUCCESS != (ret = ompi_mpi_register_params())) { - return ret; - } - - /* check to see if we want timing information */ - /* TODO: enable OMPI init and OMPI finalize timings if - * this variable was set to 1! - */ - ompi_enable_timing = false; - (void) mca_base_var_register("ompi", "ompi", NULL, "timing", - "Request that critical timing loops be measured", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_enable_timing); - -#if OPAL_ENABLE_FT_MPI - /* Before loading any other part of the MPI library, we need to load - * the ft-mpi tune file to override default component selection when - * FT is desired ON; this does override openmpi-params.conf, but not - * command line or env. - */ - if( ompi_ftmpi_enabled ) { - mca_base_var_load_extra_files("ft-mpi", false); - } -#endif /* OPAL_ENABLE_FT_MPI */ - - return OMPI_SUCCESS; -} - static void fence_release(pmix_status_t status, void *cbdata) { volatile bool *active = (volatile bool*)cbdata; @@ -394,8 +307,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, bool reinit_ok) { int ret; - ompi_proc_t** procs; - size_t nprocs; char *error = NULL; volatile bool active; bool background_fence = false; @@ -403,8 +314,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, pmix_status_t codes[1] = { PMIX_ERR_PROC_ABORTED }; pmix_status_t rc; OMPI_TIMING_INIT(64); - opal_pmix_lock_t mylock; - opal_process_name_t pname; ompi_hook_base_mpi_init_top(argc, argv, requested, provided); @@ -439,257 +348,37 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, } } - /* Figure out the final MPI thread levels. If we were not - compiled for support for MPI threads, then don't allow - MPI_THREAD_MULTIPLE. Set this stuff up here early in the - process so that other components can make decisions based on - this value. */ - ompi_mpi_thread_level(requested, provided); - /* Setup enough to check get/set MCA params */ - memset(&opal_process_info, 0, sizeof(opal_process_info)); - if (OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv))) { - error = "ompi_mpi_init: opal_init_util failed"; - goto error; - } - OMPI_TIMING_IMPORT_OPAL("opal_init_util"); - - /* If thread support was enabled, then setup OPAL to allow for them. This must be done - * early to prevent a race condition that can occur with orte_init(). */ - if (*provided != MPI_THREAD_SINGLE) { - opal_set_using_threads(true); - } - - /* Convince OPAL to use our naming scheme */ - opal_process_name_print = _process_name_print_for_opal; - opal_compare_proc = _process_name_compare; - opal_convert_string_to_process_name = _convert_string_to_process_name; - opal_convert_process_name_to_string = _convert_process_name_to_string; - opal_proc_for_name = ompi_proc_for_name; - - /* Register MCA variables */ - if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) { - error = "ompi_mpi_init: ompi_register_mca_variables failed"; - goto error; - } - - /* setup our internal nspace hack */ - opal_pmix_setup_nspace_tracker(); - /* init PMIx */ - if (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0))) { - /* if we get PMIX_ERR_UNREACH indicating that we cannot reach the - * server, then we assume we are operating as a singleton */ - if (PMIX_ERR_UNREACH == ret) { - ompi_singleton = true; - } else { - /* we cannot run - this could be due to being direct launched - * without the required PMI support being built, so print - * out a help message indicating it */ - opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret)); - return OPAL_ERR_SILENT; - } - } - /* setup the process name fields - also registers the new nspace */ - OPAL_PMIX_CONVERT_PROCT(ret, &pname, &opal_process_info.myprocid); - if (OPAL_SUCCESS != ret) { - error = "ompi_mpi_init: converting process name"; - goto error; - } - OPAL_PROC_MY_NAME.jobid = pname.jobid; - OPAL_PROC_MY_NAME.vpid = pname.vpid; - opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid; - opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid; - - /* get our topology and cache line size */ - ret = opal_hwloc_base_get_topology(); - if (OPAL_SUCCESS != ret) { - error = "ompi_mpi_init: get topology"; - goto error; - } - - if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { - error = "ompi_mpi_init: opal_arch_set_fortran_logical_size failed"; - goto error; - } - - /* _After_ opal_init_util() but _before_ orte_init(), we need to - set an MCA param that tells libevent that it's ok to use any - mechanism in libevent that is available on this platform (e.g., - epoll and friends). Per opal/event/event.s, we default to - select/poll -- but we know that MPI processes won't be using - pty's with the event engine, so it's ok to relax this - constraint and let any fd-monitoring mechanism be used. */ - - ret = mca_base_var_find("opal", "event", "*", "event_include"); - if (ret >= 0) { - char *allvalue = "all"; - /* We have to explicitly "set" the MCA param value here - because libevent initialization will re-register the MCA - param and therefore override the default. Setting the value - here puts the desired value ("all") in different storage - that is not overwritten if/when the MCA param is - re-registered. This is unless the user has specified a different - value for this MCA parameter. Make sure we check to see if the - default is specified before forcing "all" in case that is not what - the user desires. Note that we do *NOT* set this value as an - environment variable, just so that it won't be inherited by - any spawned processes and potentially cause unintented - side-effects with launching RTE tools... */ - mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); - } - - /* open the ompi hook framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_hook_base_framework, 0))) { - error = "ompi_hook_base_open() failed"; + ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + error = "ompi_mpi_init: ompi_mpi_instance_init failed"; goto error; } ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - - OMPI_TIMING_NEXT("initialization"); - - /* Setup RTE */ - if (OMPI_SUCCESS != (ret = ompi_rte_init(&argc, &argv))) { - error = "ompi_mpi_init: ompi_rte_init failed"; - goto error; - } - OMPI_TIMING_NEXT("rte_init"); - OMPI_TIMING_IMPORT_OPAL("orte_ess_base_app_setup"); - OMPI_TIMING_IMPORT_OPAL("rte_init"); - - ompi_rte_initialized = true; - - /* Register the default errhandler callback */ - /* we want to go first */ - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_PREPEND, NULL, PMIX_BOOL); - /* give it a name so we can distinguish it */ - PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING); - OPAL_PMIX_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); - OPAL_PMIX_WAIT_THREAD(&mylock); - rc = mylock.status; - OPAL_PMIX_DESTRUCT_LOCK(&mylock); - PMIX_INFO_DESTRUCT(&info[0]); - PMIX_INFO_DESTRUCT(&info[1]); - if (PMIX_SUCCESS != rc) { - error = "Error handler registration"; - ret = opal_pmix_convert_status(rc); + /* initialize communicator subsystem */ + if (OMPI_SUCCESS != (ret = ompi_comm_init_mpi3 ())) { + error = "ompi_mpi_init: ompi_comm_init_mpi3 failed"; goto error; } - /* declare our presence for interlib coordination, and - * register for callbacks when other libs declare */ - if (OMPI_SUCCESS != (ret = ompi_interlib_declare(*provided, OMPI_IDENT_STRING))) { - error = "ompi_interlib_declare"; - goto error; - } - - /* initialize datatypes. This step should be done early as it will - * create the local convertor and local arch used in the proc - * init. + /* if we were not externally started, then we need to setup + * some envars so the MPI_INFO_ENV can get the cmd name + * and argv (but only if the user supplied a non-NULL argv!), and + * the requested thread level */ - if (OMPI_SUCCESS != (ret = ompi_datatype_init())) { - error = "ompi_datatype_init() failed"; - goto error; - } - - /* Initialize OMPI procs */ - if (OMPI_SUCCESS != (ret = ompi_proc_init())) { - error = "mca_proc_init() failed"; - goto error; - } - - /* Initialize the op framework. This has to be done *after* - ddt_init, but befor mca_coll_base_open, since some collective - modules (e.g., the hierarchical coll component) may need ops in - their query function. */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_op_base_framework, 0))) { - error = "ompi_op_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != - (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "ompi_op_base_find_available() failed"; - goto error; + if (NULL == getenv("OMPI_COMMAND") && NULL != argv && NULL != argv[0]) { + opal_setenv("OMPI_COMMAND", argv[0], true, &environ); } - if (OMPI_SUCCESS != (ret = ompi_op_init())) { - error = "ompi_op_init() failed"; - goto error; - } - - /* Open up MPI-related MCA components */ - - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_allocator_base_framework, 0))) { - error = "mca_allocator_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_rcache_base_framework, 0))) { - error = "mca_rcache_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_mpool_base_framework, 0))) { - error = "mca_mpool_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_bml_base_framework, 0))) { - error = "mca_bml_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_bml_base_init (1, ompi_mpi_thread_multiple))) { - error = "mca_bml_base_init() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_pml_base_framework, 0))) { - error = "mca_pml_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_coll_base_framework, 0))) { - error = "mca_coll_base_open() failed"; - goto error; + if (NULL == getenv("OMPI_ARGV") && 1 < argc) { + char *tmp; + tmp = opal_argv_join(&argv[1], ' '); + opal_setenv("OMPI_ARGV", tmp, true, &environ); + free(tmp); } - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_osc_base_framework, 0))) { - error = "ompi_osc_base_open() failed"; - goto error; - } - - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_part_base_framework, 0))) { - error = "ompi_part_base_open() failed"; - goto error; - } - - /* In order to reduce the common case for MPI apps (where they - don't use MPI-2 IO or MPI-1 topology functions), the io and - topo frameworks are initialized lazily, at the first use of - relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*), - so they are not opened here. */ - - /* Select which MPI components to use */ - - if (OMPI_SUCCESS != - (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_pml_base_select() failed"; - goto error; - } - - OMPI_TIMING_IMPORT_OPAL("orte_init"); - OMPI_TIMING_NEXT("rte_init-commit"); - - /* exchange connection info - this function may also act as a barrier - * if data exchange is required. The modex occurs solely across procs - * in our job. If a barrier is required, the "modex" function will - * perform it internally */ - rc = PMIx_Commit(); - if (PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Commit()"; - goto error; - } - OMPI_TIMING_NEXT("commit"); #if (OPAL_ENABLE_TIMING) if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data && !ompi_singleton) { @@ -752,150 +441,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, OMPI_TIMING_NEXT("modex"); - /* select buffered send allocator component to be used */ - if( OMPI_SUCCESS != - (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { - error = "mca_pml_base_bsend_init() failed"; - goto error; - } - - if (OMPI_SUCCESS != - (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_coll_base_find_available() failed"; - goto error; - } - - if (OMPI_SUCCESS != - (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "ompi_osc_base_find_available() failed"; - goto error; - } - - - if (OMPI_SUCCESS != - (ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { - error = "mca_part_base_select() failed"; - goto error; - } - - /* io and topo components are not selected here -- see comment - above about the io and topo frameworks being loaded lazily */ - - /* Initialize each MPI handle subsystem */ - /* initialize requests */ - if (OMPI_SUCCESS != (ret = ompi_request_init())) { - error = "ompi_request_init() failed"; - goto error; - } - - if (OMPI_SUCCESS != (ret = ompi_message_init())) { - error = "ompi_message_init() failed"; - goto error; - } - - /* initialize error handlers */ - if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) { - error = "ompi_errhandler_init() failed"; - goto error; - } - - /* initialize error codes */ - if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init())) { - error = "ompi_mpi_errcode_init() failed"; - goto error; - } - - /* initialize internal error codes */ - if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) { - error = "ompi_errcode_intern_init() failed"; - goto error; - } - - /* initialize info */ - if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) { - error = "ompi_info_init() failed"; - goto error; - } - - /* initialize groups */ - if (OMPI_SUCCESS != (ret = ompi_group_init())) { - error = "ompi_group_init() failed"; - goto error; - } - - /* initialize communicators */ - if (OMPI_SUCCESS != (ret = ompi_comm_init())) { - error = "ompi_comm_init() failed"; - goto error; - } - - /* initialize file handles */ - if (OMPI_SUCCESS != (ret = ompi_file_init())) { - error = "ompi_file_init() failed"; - goto error; - } - - /* initialize windows */ - if (OMPI_SUCCESS != (ret = ompi_win_init())) { - error = "ompi_win_init() failed"; - goto error; - } - - /* initialize attribute meta-data structure for comm/win/dtype */ - if (OMPI_SUCCESS != (ret = ompi_attr_init())) { - error = "ompi_attr_init() failed"; - goto error; - } - - /* identify the architectures of remote procs and setup - * their datatype convertors, if required - */ - if (OMPI_SUCCESS != (ret = ompi_proc_complete_init())) { - error = "ompi_proc_complete_init failed"; - goto error; - } - - /* start PML/BTL's */ - ret = MCA_PML_CALL(enable(true)); - if( OMPI_SUCCESS != ret ) { - error = "PML control failed"; - goto error; - } - - /* some btls/mtls require we call add_procs with all procs in the job. - * since the btls/mtls have no visibility here it is up to the pml to - * convey this requirement */ - if (mca_pml_base_requires_world ()) { - if (NULL == (procs = ompi_proc_world (&nprocs))) { - error = "ompi_proc_world () failed"; - goto error; - } - } else { - /* add all allocated ompi_proc_t's to PML (below the add_procs limit this - * behaves identically to ompi_proc_world ()) */ - if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { - error = "ompi_proc_get_allocated () failed"; - goto error; - } - } - ret = MCA_PML_CALL(add_procs(procs, nprocs)); - free(procs); - /* If we got "unreachable", then print a specific error message. - Otherwise, if we got some other failure, fall through to print - a generic message. */ - if (OMPI_ERR_UNREACH == ret) { - opal_show_help("help-mpi-runtime.txt", - "mpi_init:startup:pml-add-procs-fail", true); - error = NULL; - goto error; - } else if (OMPI_SUCCESS != ret) { - error = "PML add procs failed"; - goto error; - } - MCA_PML_CALL(add_comm(&ompi_mpi_comm_world.comm)); MCA_PML_CALL(add_comm(&ompi_mpi_comm_self.comm)); @@ -925,7 +470,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, */ if (ompi_mpi_show_mca_params) { ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, - nprocs, + ompi_process_info.num_procs, ompi_process_info.nodename); } @@ -982,23 +527,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, goto error; } - /* Setup the dynamic process management (DPM) subsystem */ - if (OMPI_SUCCESS != (ret = ompi_dpm_init())) { - error = "ompi_dpm_init() failed"; - goto error; - } - - /* Determine the overall threadlevel support of all processes - in MPI_COMM_WORLD. This has to be done before calling - coll_base_comm_select, since some of the collective components - e.g. hierarch, might create subcommunicators. The threadlevel - requested by all processes is required in order to know - which cid allocation algorithm can be used. */ - if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { - error = "ompi_mpi_init: ompi_comm_cid_init failed"; - goto error; - } - /* Init coll for the comms. This has to be after dpm_base_select, (since dpm.mark_dyncomm is not set in the communicator creation function else), but before dpm.dyncom_init, since this function @@ -1015,39 +543,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, goto error; } - /* Check whether we have been spawned or not. We introduce that - at the very end, since we need collectives, datatypes, ptls - etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { - error = "ompi_dpm_dyn_init() failed"; - goto error; - } - - /* Undo OPAL calling opal_progress_event_users_increment() during - opal_init, to get better latency when not using TCP. Do - this *after* dyn_init, as dyn init uses lots of RTE - communication and we don't want to hinder the performance of - that code. */ - opal_progress_event_users_decrement(); - - /* see if yield_when_idle was specified - if so, use it */ - opal_progress_set_yield_when_idle(ompi_mpi_yield_when_idle); - - /* negative value means use default - just don't do anything */ - if (ompi_mpi_event_tick_rate >= 0) { - opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate); - } - - /* At this point, we are fully configured and in MPI mode. Any - communication calls here will work exactly like they would in - the user's code. Setup the connections between procs and warm - them up with simple sends, if requested */ - - if (OMPI_SUCCESS != (ret = ompi_mpiext_init())) { - error = "ompi_mpiext_init"; - goto error; - } - #if OPAL_ENABLE_FT_MPI /* start the failure detector */ if( ompi_ftmpi_enabled ) { @@ -1056,6 +551,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, } #endif + /* Check whether we have been spawned or not. We introduce that + at the very end, since we need collectives, datatypes, ptls + etc. up and running here.... */ + if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { + return ret; + } + /* Fall through */ error: if (ret != OMPI_SUCCESS) { @@ -1071,21 +573,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, return ret; } - /* Initialize the registered datarep list to be empty */ - OBJ_CONSTRUCT(&ompi_registered_datareps, opal_list_t); - - /* Initialize the arrays used to store the F90 types returned by the - * MPI_Type_create_f90_XXX functions. - */ - OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */); - - OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP); - - OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t); - opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP); - /* All done. Wasn't that simple? */ opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_COMPLETED); diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 238f74ad6ca..cc2050d976e 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -20,7 +20,7 @@ * All rights reserved. * Copyright (c) 2016-2021 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -87,6 +87,8 @@ char *ompi_mpi_spc_attach_string = NULL; bool ompi_mpi_spc_dump_enabled = false; uint32_t ompi_pmix_connect_timeout = 0; +bool ompi_enable_timing = false; + static bool show_default_mca_params = false; static bool show_file_mca_params = false; static bool show_enviro_mca_params = false; @@ -401,6 +403,30 @@ int ompi_mpi_register_params(void) 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_pmix_connect_timeout); + /* check to see if we want timing information */ + /* TODO: enable OMPI init and OMPI finalize timings if + * this variable was set to 1! + */ + ompi_enable_timing = false; + (void) mca_base_var_register("ompi", "ompi", NULL, "timing", + "Request that critical timing loops be measured", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_enable_timing); + +#if OPAL_ENABLE_FT_MPI + /* Before loading any other part of the MPI library, we need to load + * * the ft-mpi tune file to override default component selection when + * * FT is desired ON; this does override openmpi-params.conf, but not + * * command line or env. + * */ + if( ompi_ftmpi_enabled ) { + mca_base_var_load_extra_files("ft-mpi", false); + } +#endif /* OPAL_ENABLE_FT_MPI */ + + return OMPI_SUCCESS; } diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index f67efb0573d..71f6d3624cf 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -97,6 +97,7 @@ buffer_cleanup(void *value) } free (ptr); } + fns_init = false; } static opal_print_args_buffers_t* @@ -560,6 +561,35 @@ int ompi_rte_init(int *pargc, char ***pargv) goto error; } + /* setup our internal nspace hack */ + opal_pmix_setup_nspace_tracker(); + + /* initialize the selected module */ + if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&opal_process_info.myprocid, NULL, 0)))) { + /* if we get PMIX_ERR_UNREACH indicating that we cannot reach the + * server, then we assume we are operating as a singleton */ + if (PMIX_ERR_UNREACH == ret) { + ompi_singleton = true; + } else { + /* we cannot run - this could be due to being direct launched + * without the required PMI support being built, so print + * out a help message indicating it */ + opal_show_help("help-mpi-runtime.txt", "no-pmi", true, PMIx_Error_string(ret)); + return OPAL_ERR_SILENT; + } + } + + /* setup the process name fields - also registers the new nspace */ + OPAL_PMIX_CONVERT_PROCT(rc, &pname, &opal_process_info.myprocid); + if (OPAL_SUCCESS != rc) { + return rc; + } + OPAL_PROC_MY_NAME.jobid = pname.jobid; + OPAL_PROC_MY_NAME.vpid = pname.vpid; + opal_process_info.my_name.jobid = OPAL_PROC_MY_NAME.jobid; + opal_process_info.my_name.vpid = OPAL_PROC_MY_NAME.vpid; + + /* set our hostname */ ev1 = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, PMIX_HOSTNAME, &OPAL_PROC_MY_NAME, @@ -968,6 +998,8 @@ int ompi_rte_finalize(void) opal_pmix_finalize_nspace_tracker(); + opal_finalize (); + return OMPI_SUCCESS; } diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 9e3e9b6d086..edae5de385a 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -16,7 +16,7 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2021 Triad National Security, LLC. All rights + * Copyright (c) 2018-2021 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -30,6 +30,8 @@ #include "ompi_config.h" +#include "ompi/runtime/mpiruntime.h" + BEGIN_C_DECLS /* @@ -179,6 +181,15 @@ OMPI_DECLSPEC extern bool ompi_mpi_spc_dump_enabled; */ OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout; + /** + * A boolean value that determines whether or not to enable runtime timing of + * init and finalize. + */ +OMPI_DECLSPEC extern bool ompi_enable_timing; + +OMPI_DECLSPEC extern int ompi_mpi_event_tick_rate; +OMPI_DECLSPEC extern bool ompi_mpi_yield_when_idle; + /** * Register MCA parameters used by the MPI layer. * @@ -189,6 +200,7 @@ OMPI_DECLSPEC extern uint32_t ompi_pmix_connect_timeout; */ OMPI_DECLSPEC int ompi_mpi_register_params(void); + /** * Display all MCA parameters used * diff --git a/ompi/win/win.c b/ompi/win/win.c index b4bc150d893..c290fa11ac1 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -17,6 +17,8 @@ * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018-2019 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -74,8 +76,44 @@ static void ompi_win_destruct(ompi_win_t *win); OBJ_CLASS_INSTANCE(ompi_win_t, opal_infosubscriber_t, ompi_win_construct, ompi_win_destruct); -int -ompi_win_init(void) + +static void ompi_win_dump (ompi_win_t *win) +{ + opal_output(0, "Dumping information for window: %s\n", win->w_name); + opal_output(0," Fortran window handle: %d, window size: %d\n", + win->w_f_to_c_index, ompi_group_size (win->w_group)); +} + +static int ompi_win_finalize(void) +{ + int ret = OMPI_SUCCESS; + + size_t size = opal_pointer_array_get_size (&ompi_mpi_windows); + /* start at 1 to skip win null */ + for (size_t i = 1 ; i < size ; ++i) { + ompi_win_t *win = + (ompi_win_t *) opal_pointer_array_get_item (&ompi_mpi_windows, i); + if (NULL != win) { + if (ompi_debug_show_handle_leaks && !ompi_win_invalid(win)){ + opal_output(0,"WARNING: MPI_Win still allocated in MPI_Finalize\n"); + ompi_win_dump (win); + } + ompi_win_free (win); + } + } + + OBJ_DESTRUCT(&ompi_mpi_win_null.win); + OBJ_DESTRUCT(&ompi_mpi_windows); + OBJ_RELEASE(ompi_win_accumulate_ops); + OBJ_RELEASE(ompi_win_accumulate_order); + + /* release a reference to the attributes subsys */ + ret = ompi_attr_put_ref(); + + return ret; +} + +int ompi_win_init (void) { int ret; @@ -106,36 +144,13 @@ ompi_win_init(void) return ret; } - return OMPI_SUCCESS; -} - -static void ompi_win_dump (ompi_win_t *win) -{ - opal_output(0, "Dumping information for window: %s\n", win->w_name); - opal_output(0," Fortran window handle: %d, window size: %d\n", - win->w_f_to_c_index, ompi_group_size (win->w_group)); -} - -int ompi_win_finalize(void) -{ - size_t size = opal_pointer_array_get_size (&ompi_mpi_windows); - /* start at 1 to skip win null */ - for (size_t i = 1 ; i < size ; ++i) { - ompi_win_t *win = - (ompi_win_t *) opal_pointer_array_get_item (&ompi_mpi_windows, i); - if (NULL != win) { - if (ompi_debug_show_handle_leaks && !ompi_win_invalid(win)){ - opal_output(0,"WARNING: MPI_Win still allocated in MPI_Finalize\n"); - ompi_win_dump (win); - } - ompi_win_free (win); - } + /* get a reference to the attributes subsys */ + ret = ompi_attr_get_ref(); + if (OMPI_SUCCESS != ret) { + return ret; } - OBJ_DESTRUCT(&ompi_mpi_win_null.win); - OBJ_DESTRUCT(&ompi_mpi_windows); - OBJ_RELEASE(ompi_win_accumulate_ops); - OBJ_RELEASE(ompi_win_accumulate_order); + ompi_mpi_instance_append_finalize (ompi_win_finalize); return OMPI_SUCCESS; } @@ -260,6 +275,24 @@ ompi_win_create(void *base, size_t size, return OMPI_SUCCESS; } +int ompi_win_create_from_group (void *base, size_t size, int disp_unit, ompi_group_t *group, + const char *tag, opal_info_t *info, ompi_win_t** newwin) +{ + /* NTH: dummy implementation until the osc modules are updated for group communicator + * creation. This implemention will probably continue to exist for modules that do + * not provide group-based window creation. */ + ompi_communicator_t *comm; + int rc; + + rc = ompi_comm_create_from_group (group, tag, info, MPI_ERRHANDLER_NULL, &comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS == rc)) { + return rc; + } + + rc = ompi_win_create (base, size, disp_unit, comm, info, newwin); + ompi_comm_free (&comm); + return rc; +} int ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, @@ -293,7 +326,6 @@ ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, return OMPI_SUCCESS; } - int ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin) @@ -326,7 +358,6 @@ ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, return OMPI_SUCCESS; } - int ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin) { @@ -356,6 +387,25 @@ ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t return OMPI_SUCCESS; } +int ompi_win_create_dynamic_from_group (opal_info_t *info, ompi_group_t *group, const char *tag, + ompi_win_t** newwin) +{ + /* NTH: dummy implementation until the osc modules are updated for group communicator + * creation. This implemention will probably continue to exist for modules that do + * not provide group-based window creation. */ + ompi_communicator_t *comm; + int rc; + + rc = ompi_comm_create_from_group (group, tag, info, MPI_ERRHANDLER_NULL, &comm); + if (OPAL_UNLIKELY(OMPI_SUCCESS == rc)) { + return rc; + } + + rc = ompi_win_create_dynamic (info, comm, newwin); + ompi_comm_free (&comm); + return rc; +} + int ompi_win_free(ompi_win_t *win) diff --git a/ompi/win/win.h b/ompi/win/win.h index 63aec9de14a..9cc142f40ce 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -15,6 +15,8 @@ * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -131,16 +133,20 @@ OMPI_DECLSPEC extern ompi_predefined_win_t ompi_mpi_win_null; OMPI_DECLSPEC extern ompi_predefined_win_t *ompi_mpi_win_null_addr; int ompi_win_init(void); -int ompi_win_finalize(void); int ompi_win_create(void *base, size_t size, int disp_unit, ompi_communicator_t *comm, opal_info_t *info, ompi_win_t **newwin); +int ompi_win_create_from_group (void *base, size_t size, int disp_unit, + ompi_group_t *group, const char *tag, opal_info_t *info, + ompi_win_t **newwin); int ompi_win_allocate(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin); int ompi_win_allocate_shared(size_t size, int disp_unit, opal_info_t *info, ompi_communicator_t *comm, void *baseptr, ompi_win_t **newwin); int ompi_win_create_dynamic(opal_info_t *info, ompi_communicator_t *comm, ompi_win_t **newwin); +int ompi_win_create_dynamic_from_group (opal_info_t *info, ompi_group_t *group, const char *tag, + ompi_win_t **newwin); int ompi_win_free(ompi_win_t *win); diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 6b60726f1c3..f97df45e890 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -15,6 +15,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * * $COPYRIGHT$ * Additional copyrights may follow * $HEADER$ diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e08265b42bc..6a7c947eeb3 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -585,6 +585,9 @@ int32_t opal_convertor_prepare_for_recv(opal_convertor_t *convertor, #endif assert(!(convertor->flags & CONVERTOR_SEND)); + /* + * Note this macro may call return.... + */ OPAL_CONVERTOR_PREPARE(convertor, datatype, count, pUserBuf); #if defined(CHECKSUM) diff --git a/opal/mca/btl/sm/btl_sm_fifo.h b/opal/mca/btl/sm/btl_sm_fifo.h index 36f497a5dd9..7b347c8c1eb 100644 --- a/opal/mca/btl/sm/btl_sm_fifo.h +++ b/opal/mca/btl/sm/btl_sm_fifo.h @@ -157,7 +157,9 @@ static inline bool sm_fifo_write_ep(mca_btl_sm_hdr_t *hdr, struct mca_btl_base_e opal_atomic_wmb(); return mca_btl_sm_fbox_sendi(ep, 0xfe, &rhdr, sizeof(rhdr), NULL, 0); } - mca_btl_sm_try_fbox_setup(ep, hdr); +#if 0 + mca_btl_smr_try_fbox_setup (ep, hdr); +#endif hdr->next = SM_FIFO_FREE; sm_fifo_write(ep->fifo, rhdr); diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index eaeee0900f9..e9b6faee8b2 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -9,6 +9,8 @@ * All rights reserved. * Copyright (c) 2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +30,7 @@ # include #endif +#include "opal/class/opal_list.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/argv.h" #include "opal/util/output.h" diff --git a/opal/mca/pmix/pmix-internal.h b/opal/mca/pmix/pmix-internal.h index 6f42ebf0e5f..e77da9f84bd 100644 --- a/opal/mca/pmix/pmix-internal.h +++ b/opal/mca/pmix/pmix-internal.h @@ -685,6 +685,15 @@ OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void); OPAL_DECLSPEC int opal_pmix_register_cleanup(char *path, bool directory, bool ignore, bool jobscope); +OPAL_DECLSPEC pmix_status_t opal_pmix_group_construct (const char *tag, + const opal_process_name_t *procs, + size_t nprocs, + opal_list_t *info, + opal_list_t *info_out); + +OPAL_DECLSPEC pmix_status_t opal_pmix_group_destruct (const char *tag, + opal_list_t *info); + /* protect against early versions of PMIx */ #if PMIX_VERSION_MAJOR == 3 # if PMIX_VERSION_MINOR == 0 diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 94cd775af5b..f94e037bbcc 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -111,9 +111,9 @@ void opal_finalize_append_cleanup(opal_cleanup_fn_t cleanup_fn, const char *fn_n (void) fn_name; #endif - opal_mutex_lock(&opal_finalize_cleanup_fns_lock); - opal_list_append(¤t_finalize_domain->super, &cleanup_item->super); - opal_mutex_unlock(&opal_finalize_cleanup_fns_lock); + opal_mutex_lock (&opal_finalize_cleanup_fns_lock); + opal_list_append (¤t_finalize_domain->super, &cleanup_item->super); + opal_mutex_unlock (&opal_finalize_cleanup_fns_lock); } void opal_finalize_domain_init(opal_finalize_domain_t *domain, const char *domain_name) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 57f1ea4e062..78181597e06 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -485,6 +485,9 @@ int opal_init_util(int *pargc, char ***pargv) opal_init_called = true; + /* register for */ + opal_finalize_register_cleanup_arg (mca_base_framework_close_list, opal_init_util_frameworks); + /* set the nodename right away so anyone who needs it has it. Note * that we don't bother with fqdn and prefix issues here - we let * the RTE later replace this with a modified name if the user diff --git a/opal/util/proc.h b/opal/util/proc.h index 433735e50f3..ad3282ae654 100644 --- a/opal/util/proc.h +++ b/opal/util/proc.h @@ -115,8 +115,9 @@ typedef struct opal_process_info_t { uint32_t num_local_peers; /**< number of procs from my job that share my node with me */ uint16_t my_local_rank; /**< local rank on this node within my job */ uint16_t my_node_rank; - char *cpuset; /**< String-representation of bitmap where we are bound */ - char *locality; /**< String-representation of process locality */ + uint16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */ + char *cpuset; /**< String-representation of bitmap where we are bound */ + char *locality; /**< String-representation of process locality */ pid_t pid; uint32_t num_procs; uint32_t app_num;