From 093cc5f381ec6fe91e73a550b7d3f19f03582a86 Mon Sep 17 00:00:00 2001 From: Ken Raffenetti Date: Wed, 15 Aug 2018 13:37:33 -0500 Subject: [PATCH 1/2] Revert "mem/handle: take 6 bits to allow multiple bucket mem pools" This reverts commit 622864ee3c2f6646045199900f820ab8a4617a6e. --- src/include/mpir_handlemem.h | 18 ++++++------- src/include/mpir_objects.h | 37 ++++----------------------- src/include/mpir_request.h | 38 +++++++--------------------- src/mpi/init/initthread.c | 14 +++------- src/mpi/pt2pt/mpir_request.c | 26 +++---------------- src/mpid/ch4/netmod/ofi/ofi_events.h | 2 +- src/mpid/ch4/netmod/ofi/ofi_impl.h | 13 +++------- src/mpid/ch4/netmod/ofi/ofi_probe.h | 6 ++--- 8 files changed, 37 insertions(+), 117 deletions(-) diff --git a/src/include/mpir_handlemem.h b/src/include/mpir_handlemem.h index ec1fbaf41ae..3d42e6284b7 100644 --- a/src/include/mpir_handlemem.h +++ b/src/include/mpir_handlemem.h @@ -94,7 +94,7 @@ static inline int MPIR_Handle_free(void *((*indirect)[]), int indirect_size) #endif static inline void *MPIR_Handle_direct_init(void *direct, - int direct_size, int obj_size, int handle_type, int bucket) + int direct_size, int obj_size, int handle_type) { int i; MPIR_Handle_common *hptr = 0; @@ -108,7 +108,7 @@ static inline void *MPIR_Handle_direct_init(void *direct, ptr = ptr + obj_size; hptr->next = ptr; hptr->handle = ((unsigned) HANDLE_KIND_DIRECT << HANDLE_KIND_SHIFT) | - (handle_type << HANDLE_MPI_KIND_SHIFT) | (bucket << HANDLE_BUCKET_SHIFT) | i; + (handle_type << HANDLE_MPI_KIND_SHIFT) | i; HANDLE_VG_LABEL(hptr, obj_size, handle_type, 1); } @@ -123,7 +123,7 @@ static inline void *MPIR_Handle_indirect_init(void *(**indirect)[], int *indirect_size, int indirect_num_blocks, int indirect_num_indices, - int obj_size, int handle_type, int bucket) + int obj_size, int handle_type) { void *block_ptr; MPIR_Handle_common *hptr = 0; @@ -160,8 +160,8 @@ static inline void *MPIR_Handle_indirect_init(void *(**indirect)[], ptr = ptr + obj_size; hptr->next = ptr; hptr->handle = ((unsigned) HANDLE_KIND_INDIRECT << HANDLE_KIND_SHIFT) | - (handle_type << HANDLE_MPI_KIND_SHIFT) | (bucket << HANDLE_BUCKET_SHIFT) | (*indirect_size << HANDLE_INDIRECT_SHIFT) | i; - //printf("handle=%#x handle_type=%x bucket=%d *indirect_size=%d i=%d\n", hptr->handle, handle_type, bucket, *indirect_size, i); + (handle_type << HANDLE_MPI_KIND_SHIFT) | (*indirect_size << HANDLE_INDIRECT_SHIFT) | i; + /* printf("handle=%#x handle_type=%x *indirect_size=%d i=%d\n", hptr->handle, handle_type, *indirect_size, i); */ HANDLE_VG_LABEL(hptr, obj_size, handle_type, 0); } @@ -213,7 +213,6 @@ Input Parameters: locking with a single global mutex and with a mutex specific for handles. +*/ - #undef FUNCNAME #define FUNCNAME MPIR_Handle_obj_alloc #undef FCNAME @@ -246,11 +245,10 @@ static inline void *MPIR_Handle_obj_alloc_unsafe(MPIR_Object_alloc_t * objmem) /* ptr points to object to allocate */ } else { - int objsize, objkind, bucket; + int objsize, objkind; objsize = objmem->size; objkind = objmem->kind; - bucket = objmem->bucket; if (!objmem->initialized) { MPL_VG_CREATE_MEMPOOL(objmem, 0 /*rzB */ , 0 /*is_zeroed */); @@ -259,7 +257,7 @@ static inline void *MPIR_Handle_obj_alloc_unsafe(MPIR_Object_alloc_t * objmem) * jobs do not need to include any of the Info code if no * Info-using routines are used */ objmem->initialized = 1; - ptr = MPIR_Handle_direct_init(objmem->direct, objmem->direct_size, objsize, objkind, bucket); + ptr = MPIR_Handle_direct_init(objmem->direct, objmem->direct_size, objsize, objkind); if (ptr) { objmem->avail = ptr->next; } @@ -280,7 +278,7 @@ static inline void *MPIR_Handle_obj_alloc_unsafe(MPIR_Object_alloc_t * objmem) ptr = MPIR_Handle_indirect_init(&objmem->indirect, &objmem->indirect_size, HANDLE_NUM_BLOCKS, - HANDLE_NUM_INDICES, objsize, objkind, bucket); + HANDLE_NUM_INDICES, objsize, objkind); if (ptr) { objmem->avail = ptr->next; } diff --git a/src/include/mpir_objects.h b/src/include/mpir_objects.h index 38b324bd71b..05420be43c2 100644 --- a/src/include/mpir_objects.h +++ b/src/include/mpir_objects.h @@ -178,16 +178,11 @@ const char *MPIR_Handle_get_kind_str(int kind); #define HANDLE_GET_KIND(a) (((unsigned)(a)&HANDLE_KIND_MASK)>>HANDLE_KIND_SHIFT) #define HANDLE_SET_KIND(a,kind) ((a)|((kind)<> HANDLE_BUCKET_SHIFT ) -#define HANDLE_SET_BUCKET(a,bucket) ((a) | ((bucket) << HANDLE_BUCKET_SHIFT)) - - /* For indirect, the remainder of the handle has a block and index within that * block */ -#define HANDLE_INDIRECT_SHIFT 6 -#define HANDLE_BLOCK(a) (((a)& 0x000FFFC0) >> HANDLE_INDIRECT_SHIFT) -#define HANDLE_BLOCK_INDEX(a) ((a) & 0x0000003F) +#define HANDLE_INDIRECT_SHIFT 12 +#define HANDLE_BLOCK(a) (((a)& 0x03FFF000) >> HANDLE_INDIRECT_SHIFT) +#define HANDLE_BLOCK_INDEX(a) ((a) & 0x00000FFF) /* Number of blocks is between 1 and 16384 */ #if defined MPID_HANDLE_NUM_BLOCKS @@ -201,7 +196,7 @@ const char *MPIR_Handle_get_kind_str(int kind); #if defined MPID_HANDLE_NUM_INDICES #define HANDLE_NUM_INDICES MPID_HANDLE_NUM_INDICES #else -#define HANDLE_NUM_INDICES 32 +#define HANDLE_NUM_INDICES 1024 #endif /* MPID_HANDLE_NUM_INDICES */ /* For direct, the remainder of the handle is the index into a predefined @@ -410,7 +405,6 @@ typedef struct MPIR_Object_alloc_t { void *direct; /* Pointer to direct block, used for allocation */ int direct_size; /* Size of direct block */ - int bucket; #if (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__POBJ) || \ (MPICH_THREAD_GRANULARITY == MPICH_THREAD_GRANULARITY__EP) MPID_Thread_mutex_t lock; /**/ @@ -466,27 +460,6 @@ static inline void *MPIR_Handle_get_ptr_indirect( int, MPIR_Object_alloc_t * ); } \ } -#define MPIR_Get_ptr_bucket(kind,a,ptr) \ -{ \ - int bucket = HANDLE_GET_BUCKET(a); \ - switch (HANDLE_GET_KIND(a)) { \ - case HANDLE_KIND_DIRECT: \ - ptr=MPIR_##kind##_direct[bucket]+HANDLE_INDEX(a);\ - /*printf("[D] h=%x, buck=%d, idx=%d, ptr=%p\n", a, bucket, HANDLE_INDEX(a), ptr);*/\ - break; \ - case HANDLE_KIND_INDIRECT: \ - ptr=((MPIR_##kind*) \ - MPIR_Handle_get_ptr_indirect(a,&MPIR_##kind##_mem[bucket]));\ - /*printf("[I] h=%x, buck=%d, ptr=%p\n", a, bucket, ptr);*/\ - break; \ - case HANDLE_KIND_INVALID: \ - case HANDLE_KIND_BUILTIN: \ - default: \ - ptr=0; \ - break; \ - } \ -} - /* FIXME: the masks should be defined with the handle definitions instead of inserted here as literals */ #define MPIR_Comm_get_ptr(a,ptr) MPIR_Getb_ptr(Comm,a,0x03ffffff,ptr) @@ -495,7 +468,7 @@ static inline void *MPIR_Handle_get_ptr_indirect( int, MPIR_Object_alloc_t * ); #define MPIR_Op_get_ptr(a,ptr) MPIR_Getb_ptr(Op,a,0x000000ff,ptr) #define MPIR_Info_get_ptr(a,ptr) MPIR_Getb_ptr(Info,a,0x03ffffff,ptr) #define MPIR_Win_get_ptr(a,ptr) MPIR_Get_ptr(Win,a,ptr) -#define MPIR_Request_get_ptr(a,ptr) MPIR_Get_ptr_bucket(Request,a,ptr) +#define MPIR_Request_get_ptr(a,ptr) MPIR_Get_ptr(Request,a,ptr) #define MPIR_Grequest_class_get_ptr(a,ptr) MPIR_Get_ptr(Grequest_class,a,ptr) /* Keyvals have a special format. This is roughly MPIR_Get_ptrb, but the handle index is in a smaller bit field. In addition, diff --git a/src/include/mpir_request.h b/src/include/mpir_request.h index a2594dbe9ff..a4c12f04d91 100644 --- a/src/include/mpir_request.h +++ b/src/include/mpir_request.h @@ -159,16 +159,16 @@ struct MPIR_Request { }; #define MPIR_REQUEST_PREALLOC 8 -#define MPIR_REQUEST_MEM_NBUCKETS 64 -extern MPIR_Object_alloc_t MPIR_Request_mem[]; +extern MPIR_Object_alloc_t MPIR_Request_mem; /* Preallocated request objects */ -extern MPIR_Request MPIR_Request_direct[MPIR_REQUEST_MEM_NBUCKETS][MPIR_REQUEST_PREALLOC]; +extern MPIR_Request MPIR_Request_direct[]; -void MPIR_Request_mem_init(); - -static inline void MPIR_Request_init(MPIR_Request *req, MPIR_Request_kind_t kind){ +static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind) +{ + MPIR_Request *req; + req = MPIR_Handle_obj_alloc(&MPIR_Request_mem); if (req != NULL) { MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE, "allocated request, handle=0x%08x", req->handle); @@ -222,26 +222,6 @@ static inline void MPIR_Request_init(MPIR_Request *req, MPIR_Request_kind_t kind /* FIXME: This fails to fail if debugging is turned off */ MPL_DBG_MSG(MPIR_DBG_REQUEST,TYPICAL,"unable to allocate a request"); } -} - -static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind) -{ - MPIR_Request *req; - - req = MPIR_Handle_obj_alloc(&MPIR_Request_mem[0]); - MPIR_Request_init(req, kind); - - - return req; -} - -static inline MPIR_Request *MPIR_Request_create_buck(MPIR_Request_kind_t kind, int bucket) -{ - MPIR_Request *req; - - req = MPIR_Handle_obj_alloc(&MPIR_Request_mem[bucket]); - MPIR_Request_init(req, kind); - return req; } @@ -254,7 +234,7 @@ static inline MPIR_Request *MPIR_Request_create_buck(MPIR_Request_kind_t kind, i static inline void MPIR_Request_free(MPIR_Request *req) { - int inuse, bucket; + int inuse; MPIR_Request_release_ref(req, &inuse); @@ -306,8 +286,8 @@ static inline void MPIR_Request_free(MPIR_Request *req) } MPID_Request_destroy_hook(req); - bucket = HANDLE_GET_BUCKET(req->handle); - MPIR_Handle_obj_free(&MPIR_Request_mem[bucket], req); + + MPIR_Handle_obj_free(&MPIR_Request_mem, req); } } diff --git a/src/mpi/init/initthread.c b/src/mpi/init/initthread.c index 314ddfd1f87..963d31c5b78 100644 --- a/src/mpi/init/initthread.c +++ b/src/mpi/init/initthread.c @@ -214,10 +214,8 @@ do { \ MPIR_Assert(err == 0); \ MPID_Thread_mutex_create(&MPIR_Grequest_class_mem.lock, &err); \ MPIR_Assert(err == 0); \ - for(int i = 0; i < MPIR_REQUEST_MEM_NBUCKETS; i++) { \ - MPID_Thread_mutex_create(&MPIR_Request_mem[i].lock, &err); \ - MPIR_Assert(err == 0); \ - } \ + MPID_Thread_mutex_create(&MPIR_Request_mem.lock, &err); \ + MPIR_Assert(err == 0); \ MPID_Thread_mutex_create(&MPIR_Win_mem.lock, &err); \ MPIR_Assert(err == 0); \ } while(0) @@ -242,10 +240,8 @@ do { \ MPIR_Assert(err == 0); \ MPID_Thread_mutex_destroy(&MPIR_Grequest_class_mem.lock, &err); \ MPIR_Assert(err == 0); \ - for(int i = 0; i < MPIR_REQUEST_MEM_NBUCKETS; i++) { \ - MPID_Thread_mutex_destroy(&MPIR_Request_mem[i].lock, &err); \ - MPIR_Assert(err == 0); \ - } \ + MPID_Thread_mutex_destroy(&MPIR_Request_mem.lock, &err); \ + MPIR_Assert(err == 0); \ MPID_Thread_mutex_destroy(&MPIR_Win_mem.lock, &err); \ MPIR_Assert(err == 0); \ } while(0) @@ -409,8 +405,6 @@ int MPIR_Init_thread(int * argc, char ***argv, int required, int * provided) int exit_init_cs_on_failure = 0; MPIR_Info *info_ptr; - - MPIR_Request_mem_init(); /* For any code in the device that wants to check for runtime decisions on the value of isThreaded, set a provisional value here. We could let the MPID_Init routine override this */ diff --git a/src/mpi/pt2pt/mpir_request.c b/src/mpi/pt2pt/mpir_request.c index 12dc7a458e9..9453e58cf36 100644 --- a/src/mpi/pt2pt/mpir_request.c +++ b/src/mpi/pt2pt/mpir_request.c @@ -9,28 +9,10 @@ /* style:PMPIuse:PMPI_Status_f2c:2 sig:0 */ -MPIR_Request MPIR_Request_direct[MPIR_REQUEST_MEM_NBUCKETS][MPIR_REQUEST_PREALLOC]; -MPIR_Object_alloc_t MPIR_Request_mem[MPIR_REQUEST_MEM_NBUCKETS] = {{0}};; - -#undef FUNCNAME -#define FUNCNAME MPIR_Request_mem_init -#undef FCNAME -#define FCNAME MPL_QUOTE(FUNCNAME) - -void MPIR_Request_mem_init() -{ - for(int i = 0; i < MPIR_REQUEST_MEM_NBUCKETS; i++) { - MPIR_Request_mem[i].avail = NULL; - MPIR_Request_mem[i].initialized = 0; - MPIR_Request_mem[i].indirect = NULL; - MPIR_Request_mem[i].indirect_size = 0; - MPIR_Request_mem[i].kind = MPIR_REQUEST; - MPIR_Request_mem[i].size = sizeof(MPIR_Request); - MPIR_Request_mem[i].direct = &MPIR_Request_direct[i][0]; - MPIR_Request_mem[i].direct_size = MPIR_REQUEST_PREALLOC; - MPIR_Request_mem[i].bucket = i; - } -} +MPIR_Request MPIR_Request_direct[MPIR_REQUEST_PREALLOC] = {{0}}; +MPIR_Object_alloc_t MPIR_Request_mem = { + 0, 0, 0, 0, MPIR_REQUEST, sizeof(MPIR_Request), MPIR_Request_direct, + MPIR_REQUEST_PREALLOC }; #undef FUNCNAME #define FUNCNAME MPIR_Progress_wait_request diff --git a/src/mpid/ch4/netmod/ofi/ofi_events.h b/src/mpid/ch4/netmod/ofi/ofi_events.h index f387fac1848..9d26a810370 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_events.h +++ b/src/mpid/ch4/netmod/ofi/ofi_events.h @@ -289,7 +289,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_ssend_ack_event(struct fi_cq_tagged_entry MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_NETMOD_OFI_SSEND_ACK_EVENT); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_NETMOD_OFI_SSEND_ACK_EVENT); mpi_errno = MPIDI_OFI_send_event(NULL, req->signal_req); - MPIDI_OFI_ssendack_request_t_tls_free(sreq); + MPIDI_OFI_ssendack_request_t_tls_free(req); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_NETMOD_OFI_SSEND_ACK_EVENT); return mpi_errno; } diff --git a/src/mpid/ch4/netmod/ofi/ofi_impl.h b/src/mpid/ch4/netmod/ofi/ofi_impl.h index 42fd01d19f6..17b703a6eb1 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_impl.h +++ b/src/mpid/ch4/netmod/ofi/ofi_impl.h @@ -42,12 +42,8 @@ "Cannot allocate Ssendack Request"); \ } while (0) -#define MPIDI_OFI_ssendack_request_t_tls_free(sreq) \ -do { \ - MPIR_Request *tmp_req = (MPIR_Request *) sreq; \ - int bucket = HANDLE_GET_BUCKET(tmp_req->handle); \ - MPIR_Handle_obj_free(&MPIR_Request_mem[bucket], (sreq)); \ -} while (0) +#define MPIDI_OFI_ssendack_request_t_tls_free(req) \ + MPIR_Handle_obj_free(&MPIR_Request_mem, (req)) #define MPIDI_OFI_ssendack_request_t_alloc_and_init(req) \ do { \ @@ -323,7 +319,7 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_win_datatype_unmap(MPIDI_OFI_win_datatyp MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_win_request_complete(MPIDI_OFI_win_request_t * req) { - int count, bucket; + int count; MPIR_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPIR_REQUEST); MPIR_Object_release_ref(req, &count); MPIR_Assert(count >= 0); @@ -332,8 +328,7 @@ MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_win_request_complete(MPIDI_OFI_win_reque MPIDI_OFI_win_datatype_unmap(&req->noncontig->origin_dt); MPIDI_OFI_win_datatype_unmap(&req->noncontig->result_dt); MPL_free(req->noncontig); - bucket = HANDLE_GET_BUCKET(req->handle); - MPIR_Handle_obj_free(&MPIR_Request_mem[bucket], (req)); + MPIR_Handle_obj_free(&MPIR_Request_mem, (req)); } } diff --git a/src/mpid/ch4/netmod/ofi/ofi_probe.h b/src/mpid/ch4/netmod/ofi/ofi_probe.h index fbf4260f089..433ab64861c 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_probe.h +++ b/src/mpid/ch4/netmod/ofi/ofi_probe.h @@ -68,10 +68,8 @@ static inline int MPIDI_OFI_do_iprobe(int source, case MPIDI_OFI_PEEK_NOT_FOUND: *flag = 0; - if (message) { - int bucket = HANDLE_GET_BUCKET(rreq->handle); - MPIR_Handle_obj_free(&MPIR_Request_mem[bucket], rreq); - } + if (message) + MPIR_Handle_obj_free(&MPIR_Request_mem, rreq); goto fn_exit; break; From 3f97f0d8cc7bbd2a0b507131ff8ca7bf22083904 Mon Sep 17 00:00:00 2001 From: Ken Raffenetti Date: Wed, 15 Aug 2018 13:39:06 -0500 Subject: [PATCH 2/2] request: Implement per-thread request cache This will leak all cached requests currently. --- src/include/mpir_request.h | 23 ++++++++++++++++++++--- src/include/mpir_thread.h | 4 ++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/include/mpir_request.h b/src/include/mpir_request.h index a4c12f04d91..30bc80d1038 100644 --- a/src/include/mpir_request.h +++ b/src/include/mpir_request.h @@ -167,8 +167,16 @@ extern MPIR_Request MPIR_Request_direct[]; static inline MPIR_Request *MPIR_Request_create(MPIR_Request_kind_t kind) { MPIR_Request *req; - - req = MPIR_Handle_obj_alloc(&MPIR_Request_mem); + MPIR_Per_thread_t *per_thread = NULL; + int err; + + MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, + MPIR_Per_thread, per_thread, &err); + if (per_thread->request_cache_cnt > 0) { + req = per_thread->request_cache[--(per_thread->request_cache_cnt)]; + } else { + req = MPIR_Handle_obj_alloc(&MPIR_Request_mem); + } if (req != NULL) { MPL_DBG_MSG_P(MPIR_DBG_REQUEST,VERBOSE, "allocated request, handle=0x%08x", req->handle); @@ -287,7 +295,16 @@ static inline void MPIR_Request_free(MPIR_Request *req) MPID_Request_destroy_hook(req); - MPIR_Handle_obj_free(&MPIR_Request_mem, req); + MPIR_Per_thread_t *per_thread = NULL; + int err; + + MPID_THREADPRIV_KEY_GET_ADDR(MPIR_ThreadInfo.isThreaded, MPIR_Per_thread_key, + MPIR_Per_thread, per_thread, &err); + if (per_thread->request_cache_cnt < MPIR_THREAD_REQUEST_CACHE) { + per_thread->request_cache[(per_thread->request_cache_cnt)++] = req; + } else { + MPIR_Handle_obj_free(&MPIR_Request_mem, req); + } } } diff --git a/src/include/mpir_thread.h b/src/include/mpir_thread.h index 5e2e809f93c..4be42dd3a91 100644 --- a/src/include/mpir_thread.h +++ b/src/include/mpir_thread.h @@ -51,6 +51,7 @@ extern OPA_int_t num_server_thread; /* arbitrary, just needed to avoid cleaning up heap allocated memory at thread * destruction time */ #define MPIR_STRERROR_BUF_SIZE (1024) +#define MPIR_THREAD_REQUEST_CACHE (64) /* This structure contains all thread-local variables and will be zeroed at * allocation time. @@ -71,6 +72,9 @@ typedef struct { #ifdef MPICH_THREAD_USE_MDTA MPIR_Thread_sync_t sync; #endif + + MPIR_Request *request_cache[MPIR_THREAD_REQUEST_CACHE]; + unsigned int request_cache_cnt; #endif } MPIR_Per_thread_t;