diff --git a/Makefile.in b/Makefile.in index 8621a347..b696ae12 100644 --- a/Makefile.in +++ b/Makefile.in @@ -77,6 +77,7 @@ XNI_DIR = $(SRC_DIR)/xni # AR = ar AUTOCONF = autoconf +AUTOHEADER = autoheader CC = $(CC_EXE) CP = cp CPR = cp -r $(CP_PRESERVE_OPTS) @@ -422,7 +423,7 @@ CONFIG_FLAGS = $(shell grep "\-\-prefix" config.log | cut -f 5- -d ' ') xdd-$(XDDVERSION): git archive --format=tar --prefix=$@/ master | $(TAR) xf - $(FIND) $@ -name .gitignore -exec rm {} \; - cd $@ && $(AUTOCONF) + cd $@ && $(AUTOCONF) && $(AUTOHEADER) xdd-$(XDDVERSION).tar.gz: xdd-$(XDDVERSION) $(TAR) cfz $@ $< diff --git a/README b/README index 2d0359ac..99540014 100644 --- a/README +++ b/README @@ -23,6 +23,7 @@ software build process. We also suggest enabling debug mode. On supported OS: > autoconf +> autoheader > ./configure --enable-debug > make > make install diff --git a/configure.ac b/configure.ac index 66869d36..0ff8d4ed 100644 --- a/configure.ac +++ b/configure.ac @@ -24,8 +24,7 @@ dnl Process this file with autoconf to produce a configure script dnl AC_PREREQ(2.59) AC_INIT([xdd], [7.0.0.pre-rc27], bug@xdd.org) -AC_CONFIG_HEADER([src/compat/config.h]) - +AC_CONFIG_HEADERS([src/compat/config.h]) dnl dnl Include useful macros @@ -282,13 +281,15 @@ AC_ARG_ENABLE([numa], [enable_numa="$enableval"], [enable_numa=yes]) if test "$enable_numa" = "yes" ; then - AC_DEFINE(HAVE_ENABLE_NUMA) + AC_DEFINE(HAVE_ENABLE_NUMA,1,[Define to 1 if you have libnuma.]) AC_SEARCH_LIBS([numa_node_to_cpus], [numa], - AC_DEFINE(HAVE_NUMA_NODE_TO_CPUS), + AC_DEFINE(HAVE_NUMA_NODE_TO_CPUS,1, + [Define to 1 if you have the `numa_node_to_cpus' function.]), AC_MSG_ERROR([Function numa_node_to_cpus not found. Use --disable-numa])) AC_SEARCH_LIBS([numa_allocate_cpumask], [numa], - AC_DEFINE(HAVE_NUMA_ALLOCATE_CPUMASK), + AC_DEFINE(HAVE_NUMA_ALLOCATE_CPUMASK,1, + [Define to 1 if you have the `numa_allocate_cpumask' function.]), AC_MSG_ERROR([Function numa_allocate_cpumask not found. Use --disable-numa.])) fi @@ -315,10 +316,12 @@ AC_ARG_ENABLE([ib], [enable_ib="$enableval"], [enable_ib=yes]) if test "x$enable_ib" = "xyes" ; then - AC_DEFINE(HAVE_ENABLE_IB) + AC_DEFINE(HAVE_ENABLE_IB,1, + [Define to 1 to enable InfiniBand network support.]) AC_CHECK_HEADERS([infiniband/verbs.h]) AC_SEARCH_LIBS([ibv_get_device_list], [ibverbs], - AC_DEFINE(HAVE_IBV_GET_DEVICE_LIST), + AC_DEFINE(HAVE_IBV_GET_DEVICE_LIST,1, + [Define to 1 if you have the `ibv_get_device_list' function.]), AC_MSG_ERROR([Function ibv_get_device_list not found. Use --disable-ib.])) fi @@ -336,7 +339,8 @@ AC_ARG_ENABLE([xfs], [enable_xfs=yes]) if test "$enable_xfs" = "yes" ; then - AC_DEFINE(HAVE_ENABLE_XFS) + AC_DEFINE(HAVE_ENABLE_XFS,1, + [Define to 1 to enable XFS preallocation support.]) xfs_header="no" AC_CHECK_HEADERS([xfs/xfs.h] [xfs/libxfs.h], xfs_header="yes"; break, []) @@ -344,7 +348,8 @@ if test "$enable_xfs" = "yes" ; then AC_MSG_ERROR([No valid XFS header found (sometimes this is caused by missing libuuid headers). Use --disable-xfs.]) fi AC_CHECK_DECLS([xfsctl], - AC_DEFINE(HAVE_XFSCTL), + AC_DEFINE(HAVE_XFSCTL,1, + [Define to 1 if you have the `xfsctl' function.]), AC_MSG_ERROR([Function xfsctl not found. Use --disable-xfs.]), [#if HAVE_XFS_XFS_H #include @@ -431,14 +436,14 @@ AX_PYTHON() dnl dnl Check for export-controlled packages dnl -AC_CHECK_FILES([contrib/pycrypto-2.6.1.tar.gz], - AC_SUBST(PYCRYPTO_DIST, [contrib/pycrypto-2.6.1.tar.gz]), - [AX_PYTHON_MODULE([Crypto], - [XDD requires PyCrypto. Due to U.S. Export Controls, PyCrypto must be installed separately from XDD. See README.crypto])]) -AC_CHECK_FILES([contrib/ecdsa-0.11.tar.gz], - AC_SUBST(ECDSA_DIST, [contrib/ecdsa-0.11.tar.gz]), - [AX_PYTHON_MODULE([ecdsa], - [XDD requires Python ECDSA. Due to U.S. Export Controls, ECDSA must be installed separately from XDD. See README.crypto])]) +AC_CHECK_FILE([contrib/pycrypto-2.6.1.tar.gz], + AC_SUBST(PYCRYPTO_DIST, [contrib/pycrypto-2.6.1.tar.gz]), + [AX_PYTHON_MODULE([Crypto], + [XDD requires PyCrypto. Due to U.S. Export Controls, PyCrypto must be installed separately from XDD. See README.crypto])]) +AC_CHECK_FILE([contrib/ecdsa-0.11.tar.gz], + AC_SUBST(ECDSA_DIST, [contrib/ecdsa-0.11.tar.gz]), + [AX_PYTHON_MODULE([ecdsa], + [XDD requires Python ECDSA. Due to U.S. Export Controls, ECDSA must be installed separately from XDD. See README.crypto])]) dnl dnl Check for packages XDD requires directly diff --git a/contrib/buildbot_gen_test_config.sh b/contrib/buildbot_gen_test_config.sh index e2da2ff6..57b17fdd 100755 --- a/contrib/buildbot_gen_test_config.sh +++ b/contrib/buildbot_gen_test_config.sh @@ -4,7 +4,8 @@ # # Step 1: Generate a local test_config # Step 2: autoconf -# Step 3: ./configure +# Step 3: autoheader +# Step 4: ./configure # # @@ -128,4 +129,5 @@ EOF # Perform the configure # autoconf +autoheader ./configure $configure_flags diff --git a/doc/HOWTO_release.txt b/doc/HOWTO_release.txt index 33f2b315..b16d3ee4 100644 --- a/doc/HOWTO_release.txt +++ b/doc/HOWTO_release.txt @@ -19,6 +19,7 @@ available: > cd xdd > vi configure.ac # and edit the VERSION variable > autoconf + > autoheader > ./configure --prefix=$HOME/sw/xdd > make baseversion > make @@ -55,5 +56,6 @@ The resulting tar file now contains the released code only. 9. Prepare for the next release > vi configure.ac # And set the version in AC_INIT to pre-nextrelease > autoconf + > autoheader > git commit -a > git push diff --git a/src/base/io_buffers.c b/src/base/io_buffers.c index 28748c7e..f018531a 100644 --- a/src/base/io_buffers.c +++ b/src/base/io_buffers.c @@ -22,8 +22,7 @@ /*----------------------------------------------------------------------------*/ /* xdd_init_io_buffers() - set up the I/O buffers * This routine will allocate the memory used as the I/O buffer for a Worker - * Thread. The pointer to the buffer (wd_bufp) and the size of the buffer - * (wd_buf_size) are set in the Worker Data Struct. + * Thread. * * This routine will return the pointer to the buffer upon success. If for * some reason the buffer cannot be allocated then NULL is returned. @@ -34,32 +33,17 @@ * * The size of the buffer depends on whether it is being used for network * I/O as in an End-to-end operation. For End-to-End operations, the size - * of the buffer is 1 page larger than for non-End-to-End operations. + * of the buffer is 2 pages larger than for non-End-to-End operations. * * For normal (non-E2E operations) the buffer pointers are as follows: - * |<----------- wd_buf_size = N Pages ----------------->| + * |<------------------- N Pages ----------------------->| * +-----------------------------------------------------+ * | data buffer | * | transfer size (td_xfer_size) rounded up to N pages | - * |<-wd_bufp | - * |<-task_datap | * +-----------------------------------------------------+ - * - * For End-to-End operations, the buffer pointers are as follows: - * |<------------------- wd_buf_size = N+1 Pages ------------------------>| - * +----------------+-----------------------------------------------------+ - * |<----1 page---->| transfer size (td_xfer_size) rounded up to N pages | - * |<-wd_bufp |<-task_datap | - * | | E2E | E2E | - * | |<-Header->| data buffer | - * +-----*----------*-----------------------------------------------------+ - * ^ ^ - * ^ +-e2e_datap - * +-e2e_hdrp */ unsigned char * -xdd_init_io_buffers(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to Target Data +xdd_init_io_buffers(target_data_t *tdp) { unsigned char *bufp; // Generic Buffer Pointer void *shmat_status; // Status of shmat() int buf_shmid; // Shared Memory ID @@ -70,25 +54,14 @@ xdd_init_io_buffers(worker_data_t *wdp) { LPVOID lpMsgBuf; /* Used for the error messages */ #endif - tdp = wdp->wd_tdp; - wdp->wd_bufp = NULL; - wdp->wd_buf_size = 0; - // Calaculate the number of pages needed for a buffer page_size = getpagesize(); pages = tdp->td_xfer_size / page_size; if (tdp->td_xfer_size % page_size) pages++; // Round up to page size if ((tdp->td_target_options & TO_ENDTOEND)) { - // Add one page for the e2e header - pages++; - - // If its XNI, add another page for XNI, better would be for XNI to - // pack all of the header data (and do the hton, ntoh calls) - xdd_plan_t *planp = tdp->td_planp; - if (PLAN_ENABLE_XNI & planp->plan_options) { - pages++; - } + // Add one page for the XNI header + pages++; } @@ -171,8 +144,9 @@ xdd_init_io_buffers(worker_data_t *wdp) { /* Lock all pages in memory */ xdd_lock_memory(bufp, buffer_size, "RW BUFFER"); - wdp->wd_bufp = bufp; - wdp->wd_buf_size = buffer_size; + // the size of each buffer must be the same + assert(0 == tdp->io_buffer_size || (size_t)buffer_size == tdp->io_buffer_size); + tdp->io_buffer_size = buffer_size; return(bufp); } /* end of xdd_init_io_buffers() */ diff --git a/src/base/target_cleanup.c b/src/base/target_cleanup.c index 134f52ce..8812571d 100644 --- a/src/base/target_cleanup.c +++ b/src/base/target_cleanup.c @@ -42,11 +42,33 @@ xdd_target_thread_cleanup(target_data_t *tdp) { #endif } - /* On e2e XNI, part of cleanup includes closing the source side */ - if ((TO_ENDTOEND & tdp->td_target_options) && - (PLAN_ENABLE_XNI & tdp->td_planp->plan_options)) { - xni_close_connection(&tdp->td_e2ep->xni_td_conn); - } + // if this is an e2e transfer + if (xint_is_e2e(tdp)) { + // Disconnect + xint_e2e_disconnect(tdp); + struct xint_e2e * const e2ep = tdp->td_e2ep; + + // Free the connections + e2ep->xni_td_connections_count = 0; + free(e2ep->xni_td_connections); + e2ep->xni_td_connections = NULL; + + // Free the connection mutexes + for (int i = 0; i < e2ep->xni_td_connections_count; i++) { + int error = pthread_mutex_destroy(e2ep->xni_td_connection_mutexes+i); + assert(!error); + } + free(e2ep->xni_td_connection_mutexes); + e2ep->xni_td_connection_mutexes = NULL; + } + + // Free the I/O buffers + for (size_t i = 0; i < tdp->io_buffers_count; i++) { + free(tdp->io_buffers[i]); + } + free(tdp->io_buffers); + tdp->io_buffers = NULL; + tdp->io_buffers_count = 0; } // End of xdd_target_thread_cleanup() diff --git a/src/base/target_init.c b/src/base/target_init.c index d450fd03..a0367afd 100644 --- a/src/base/target_init.c +++ b/src/base/target_init.c @@ -15,6 +15,46 @@ */ #include "xint.h" +static size_t +xint_get_buffer_count(const target_data_t *tdp) +{ + size_t count = tdp->td_queue_depth; + + return count; +} + +static int +xint_allocate_io_buffers(target_data_t* tdp) +{ + const size_t buffer_count = xint_get_buffer_count(tdp); + + unsigned char **iobufs = calloc(buffer_count, sizeof(*iobufs)); + if (!iobufs) { + fprintf(xgp->errout,"%s: xint_allocate_io_buffers: Target %d: ERROR: Failed to allocate I/O buffer array.\n", + xgp->progname, + tdp->td_target_number); + return XDD_RC_BAD; + } + + for (size_t i = 0; i < buffer_count; i++) { + // allocate an I/O buffer + unsigned char *bufp = xdd_init_io_buffers(tdp); + if (bufp == NULL) { + fprintf(xgp->errout,"%s: xint_allocate_io_buffers: Target %d: ERROR: Failed to allocate I/O buffer.\n", + xgp->progname, + tdp->td_target_number); + return XDD_RC_BAD; + } + + // save allocated buffer into target's array of buffers + iobufs[i] = bufp; + } + + tdp->io_buffers = iobufs; + tdp->io_buffers_count = buffer_count; + return XDD_RC_GOOD; +} + /*----------------------------------------------------------------------------*/ /* xint_target_init() - Initialize a Target Thread * This subroutine will open the target file and perform some initial sanity @@ -32,10 +72,7 @@ */ int32_t xint_target_init(target_data_t *tdp) { - int32_t status; // Status of function calls -// nclk_t CurrentLocalTime; // Used the init the Global Clock -// nclk_t TimeDelta; // Used the init the Global Clock -// uint32_t sleepseconds; + int status; // Status of function calls #if (AIX) @@ -87,21 +124,6 @@ xint_target_init(target_data_t *tdp) { if (xgp->max_errors == 0) xgp->max_errors = tdp->td_target_ops; - /* If we are synchronizing to a Global Clock, let's synchronize - * here so that we all start at *roughly* the same time - */ - // FIXME - TOM review to see if this can go in plan_init -// if (xgp->gts_addr) { -// nclk_now(&CurrentLocalTime); -// while (CurrentLocalTime < xgp->ActualLocalStartTime) { -// TimeDelta = ((xgp->ActualLocalStartTime - CurrentLocalTime)/BILLION); -// if (TimeDelta > 2) { -// sleepseconds = TimeDelta - 2; -// sleep(sleepseconds); -// } -// nclk_now(&CurrentLocalTime); -// } -// } if (xgp->global_options & GO_TIMER_INFO) { fprintf(xgp->errout,"Starting now...\n"); fflush(xgp->errout); @@ -125,32 +147,23 @@ xint_target_init(target_data_t *tdp) { } // Special setup for an End-to-End operation - if (tdp->td_target_options & TO_ENDTOEND) { - status = xdd_e2e_target_init(tdp); + if (xint_is_e2e(tdp)) { + status = xint_e2e_target_init(tdp); if (status) return(-1); } + // Allocate I/O buffers and store pointers into tdp + status = xint_allocate_io_buffers(tdp); + if (status != XDD_RC_GOOD) { + return -1; + } + // Start the WorkerThreads status = xint_target_init_start_worker_threads(tdp); if (status) return(-1); - // If this is XNI, perform the connection here - xdd_plan_t *planp = tdp->td_planp; - if (PLAN_ENABLE_XNI & planp->plan_options) { - /* Perform the XNI accept/connect */ - if (tdp->td_target_options & TO_E2E_DESTINATION) { - status = xint_e2e_dest_connect(tdp); - } else { - status = xint_e2e_src_connect(tdp); - } - if (0 != status) { - fprintf(xgp->errout, "Failure during XNI connection.\n"); - return -1; - } - } - // Display the information for this target xdd_target_info(xgp->output, tdp); if (xgp->csvoutput) @@ -190,22 +203,24 @@ xint_target_init_barriers(target_data_t *tdp) { sprintf(tmpname,"T%04d:target_worker_thread_init_barrier",tdp->td_target_number); status += xdd_init_barrier(tdp->td_planp, &tdp->td_target_worker_thread_init_barrier,2, tmpname); - // The Target Pass barrier - sprintf(tmpname,"T%04d>targetpass_worker_thread_passcomplete_barrier",tdp->td_target_number); - status += xdd_init_barrier(tdp->td_planp, &tdp->td_targetpass_worker_thread_passcomplete_barrier,tdp->td_queue_depth+1,tmpname); - - // The Target Pass E2E EOF Complete barrier - only initialized when an End-to-End operation is running - if (tdp->td_target_options & TO_ENDTOEND) { - sprintf(tmpname,"T%04d>targetpass_worker_thread_eofcomplete_barrier",tdp->td_target_number); - status += xdd_init_barrier(tdp->td_planp, &tdp->td_targetpass_worker_thread_eofcomplete_barrier,2,tmpname); - } - // The Target Start Trigger barrier if (tdp->td_target_options & TO_WAITFORSTART) { // If we are expecting a Start Trigger then we need to init the starttrigger barrier sprintf(tmpname,"T%04d>target_target_starttrigger_barrier",tdp->td_target_number); status += xdd_init_barrier(tdp->td_planp, &tdp->td_trigp->target_target_starttrigger_barrier,2,tmpname); } + // Barrier to wait for all worker threads to connect (E2E only) + if (xint_is_e2e(tdp)) { + snprintf(tmpname, + sizeof(tmpname), + "T%04d>target_worker_thread_connected_barrier", + tdp->td_target_number); + status += xdd_init_barrier(tdp->td_planp, + &tdp->td_target_worker_thread_connected_barrier, + (tdp->td_queue_depth + 1), // workers + target + tmpname); + } + // The "td_counters_mutex" is used by the WorkerThreads when updating the counter information in the Target Thread Data status += pthread_mutex_init(&tdp->td_counters_mutex, 0); @@ -261,16 +276,24 @@ xint_target_init_start_worker_threads(target_data_t *tdp) { // Start a WorkerThread and wait for it to initialize wdp->wd_worker_number = q; - if (tdp->td_target_options & TO_ENDTOEND) { + if (xint_is_e2e(tdp)) { // Find an e2e entry that has a valid port count while (0 == tdp->td_e2ep->e2e_address_table[e2e_addr_index].port_count) { e2e_addr_index++; } //assert(e2e_addr_index < p->e2ep->e2e_address_table->number_of_entries); - - wdp->wd_e2ep->e2e_dest_hostname = tdp->td_e2ep->e2e_address_table[e2e_addr_index].hostname; - wdp->wd_e2ep->e2e_dest_port = tdp->td_e2ep->e2e_address_table[e2e_addr_index].base_port + e2e_addr_port; + + if (xgp->global_options & GO_REALLYVERBOSE) { + fprintf(xgp->errout, + "Target Init: Target %d: assigning hostname %s to worker_thread %d\n", + tdp->td_target_number, + tdp->td_e2ep->e2e_address_table[e2e_addr_index].hostname, + wdp->wd_worker_number); + } + + // Assign this address to the current worker + wdp->wd_e2ep->address_table_index = e2e_addr_index; // Set the WorkerThread Numa node if possible #if defined(HAVE_CPU_SET_T) && defined(HAVE_PTHREAD_ATTR_SETAFFINITY_NP) @@ -284,13 +307,9 @@ xint_target_init_start_worker_threads(target_data_t *tdp) { e2e_addr_index++; e2e_addr_port = 0; } - if (xgp->global_options & GO_REALLYVERBOSE) - fprintf(stderr,"Target Init: Target %d: assigning hostname %s port %d to worker_thread %d\n", - tdp->td_target_number, wdp->wd_e2ep->e2e_dest_hostname, - wdp->wd_e2ep->e2e_dest_port, wdp->wd_worker_number); - } - + } // end of e2e initialization + status = pthread_create(&wdp->wd_thread, &worker_thread_attr, xdd_worker_thread, wdp); if (status) { fprintf(xgp->errout,"%s: xdd_target_init_start_worker_threads: ERROR: Cannot create worker_thread %d for target number %d name '%s' - Error number %d\n", @@ -328,6 +347,13 @@ xint_target_init_start_worker_threads(target_data_t *tdp) { wdp = wdp->wd_next_wdp; } // End of FOR loop that starts all worker_threads for this target + // Wait for worker threads to finish connecting (E2E only) + if (xint_is_e2e(tdp)) { + xdd_barrier(&tdp->td_target_worker_thread_connected_barrier, + &tdp->td_occupant, + 1); + } + if (xgp->global_options & GO_REALLYVERBOSE) { fprintf(xgp->errout,"\n%s: xdd_target_init_start_worker_threads: Target %d ALL %d WorkerThreads started\n", xgp->progname, diff --git a/src/base/target_offset_table.c b/src/base/target_offset_table.c index 37242ef2..c668280a 100644 --- a/src/base/target_offset_table.c +++ b/src/base/target_offset_table.c @@ -49,14 +49,14 @@ int tot_init(tot_t** table, size_t queue_depth, size_t num_reqs) // Initialize the memory in the dumbest way possible #if HAVE_VALLOC *table = valloc(sizeof(**table) + num_entries * sizeof(tot_entry_t)); - rc = (NULL != table); + rc = (NULL == *table); #elif HAVE_POSIX_MEMALIGN rc = posix_memalign((void**)table, sysconf(_SC_PAGESIZE), sizeof(**table) + num_entries * sizeof(tot_entry_t)); #else *table = malloc(sizeof(**table) + num_entries * sizeof(tot_entry_t)); - rc = (NULL != table); + rc = (NULL == *table); #endif if (0 != rc) return -1; diff --git a/src/base/target_ttd_before_pass.c b/src/base/target_ttd_before_pass.c index 24ceb0cd..2e0955a5 100644 --- a/src/base/target_ttd_before_pass.c +++ b/src/base/target_ttd_before_pass.c @@ -142,8 +142,6 @@ xdd_raw_before_pass(target_data_t *tdp) { rawp = tdp->td_rawp; // Initialize the read-after-write variables - rawp->raw_msg_sent = 0; - rawp->raw_msg_recv = 0; rawp->raw_msg_last_sequence = 0; rawp->raw_msg.sequence = 0; rawp->raw_prev_loc = 0; @@ -167,12 +165,7 @@ xdd_e2e_before_pass(target_data_t *tdp) { return; // Initialize the read-after-write variables - tdp->td_e2ep->e2e_msg_sent = 0; - tdp->td_e2ep->e2e_msg_recv = 0; tdp->td_e2ep->e2e_msg_sequence_number = 0; - tdp->td_e2ep->e2e_prev_loc = 0; - tdp->td_e2ep->e2e_prev_len = 0; - tdp->td_e2ep->e2e_data_length = 0; tdp->td_e2ep->e2e_sr_time = 0; } // End of xdd_e2e_before_pass() diff --git a/src/base/worker_thread.c b/src/base/worker_thread.c index 440511da..7e0083f8 100644 --- a/src/base/worker_thread.c +++ b/src/base/worker_thread.c @@ -45,9 +45,34 @@ xdd_worker_thread(void *pin) { // Enter the WorkerThread_Init barrier so that the next WorkerThread can start xdd_barrier(&tdp->td_target_worker_thread_init_barrier,&wdp->wd_occupant,0); - if ( xgp->abort == 1) // Something went wrong during thread initialization so let's just leave + // Only for E2E + if (xint_is_e2e(tdp)) { + // Set up for an e2e operation and establish a connection + status = xint_e2e_worker_init(wdp); + if (-1 == status) { + fprintf(xgp->errout, + "%s: xdd_worker_thread: Target %d WorkerThread %d: E2E %s initialization failed.\n", + xgp->progname, + tdp->td_target_number, + wdp->wd_worker_number, + (tdp->td_target_options & TO_E2E_DESTINATION) ? "DESTINATION":"SOURCE"); + xgp->abort = 1; + } + + // Enter barrier to let the target thread know we have connected + xdd_barrier(&tdp->td_target_worker_thread_connected_barrier, + &wdp->wd_occupant, + 0); + } + + if (1 == xgp->abort) // Something went wrong during thread initialization so let's just leave return(0); + // Set the buffer data pattern for non-E2E operations or E2E sources + if (!xint_is_e2e(tdp) || !(tdp->td_target_options & TO_E2E_DESTINATION)) { + xdd_datapattern_buffer_init(wdp); + } + // If this is a dry run then just exit at this point if (xgp->global_options & GO_DRYRUN) return(0); @@ -76,8 +101,8 @@ xdd_worker_thread(void *pin) { xdd_worker_thread_cleanup(wdp); return(0); case TASK_REQ_EOF: - // E2E Source Side only - send EOF packets to Destination - status = xdd_e2e_eof_source_side(wdp); + // E2E Source Side only + status = xint_e2e_eof_source_side(wdp); if (status) // Only set the status in the Target Data Struct if it is non-zero tdp->td_counters.tc_current_io_status = status; break; diff --git a/src/base/worker_thread_init.c b/src/base/worker_thread_init.c index b8ef60bb..e0a0792f 100644 --- a/src/base/worker_thread_init.c +++ b/src/base/worker_thread_init.c @@ -27,7 +27,6 @@ xdd_worker_thread_init(worker_data_t *wdp) { int32_t status; target_data_t *tdp; // Pointer to this worker_thread's target Data Struct char tmpname[XDD_BARRIER_MAX_NAME_LENGTH]; // Used to create unique names for the barriers - unsigned char *bufp; // Generic Buffer pointer #if defined(HAVE_CPUSET_T) && defined(HAVE_PTHREAD_ATTR_SETAFFINITY_NP) // BWS Print the cpuset @@ -104,70 +103,19 @@ xdd_worker_thread_init(worker_data_t *wdp) { wdp->wd_tot_wait.totw_is_released = 0; wdp->wd_tot_wait.totw_nextp = 0; - // Get the I/O buffer - // The xdd_init_io_buffers() routine will set wd_bufp and wd_buf_size to appropriate values. - // The size of the buffer depends on whether it is being used for network - // I/O as in an End-to-end operation. For End-to-End operations, the size - // of the buffer is at least 1 page larger than for non-End-to-End - // operations. - // - // For normal (non-E2E operations) the buffer pointers are as follows: - // |<----------- wd_buf_size = N Pages ----------------->| - // +-----------------------------------------------------+ - // | data buffer | - // | transfer size (td_xfer_size) rounded up to N pages | - // |<-wd_bufp | - // |<-task_datap | - // +-----------------------------------------------------+ - // For ease of reading this code, bufp == wdp->wd_bufp. - // - bufp = xdd_init_io_buffers(wdp); - if (bufp == NULL) { - fprintf(xgp->errout,"%s: xdd_worker_thread_init: Target %d WorkerThread %d: ERROR: Failed to allocate I/O buffer.\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number); - return(-1); - } - // For End-to-End operations, the buffer pointers are as follows: - // |<------------------- wd_buf_size = N+1 Pages ------------------------>| - // +----------------+-----------------------------------------------------+ - // |<----1 page---->| transfer size (td_xfer_size) rounded up to N pages | - // |<-wd_bufp |<-task_datap | - // | | E2E | E2E | - // | |<-Header->| data buffer | - // +-----*----------*-----------------------------------------------------+ - // ^ ^ - // ^ +-e2e_datap - // +-e2e_hdrp - // - if (tdp->td_target_options & TO_ENDTOEND) { - - /* If this e2e transfer is xni, register the buffer */ - xdd_plan_t *planp = wdp->wd_tdp->td_planp; - if (PLAN_ENABLE_XNI & planp->plan_options) { - /* Clear the two sparsely used pages for header data */ - memset(bufp, 0, 2*getpagesize()); - /* Mark everything after the first page as reserved */ - size_t reserve = getpagesize(); - xni_register_buffer(tdp->xni_ctx, bufp, wdp->wd_buf_size, reserve, - &wdp->wd_e2ep->xni_wd_buf); - xni_request_target_buffer(tdp->xni_ctx, &wdp->wd_e2ep->xni_wd_buf); - bufp = xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); - } - - /* Use the first page for the E2E header */ - wdp->wd_task.task_datap = bufp + getpagesize(); - wdp->wd_e2ep->e2e_datap = wdp->wd_task.task_datap; - wdp->wd_e2ep->e2e_hdrp = (xdd_e2e_header_t *)(bufp + (getpagesize() - sizeof(xdd_e2e_header_t))); + // Set up I/O buffer pointers + if (xint_is_e2e(tdp)) { + // the buffer for source will be requested after connecting + // the buffer for destination is set after a receive + wdp->wd_e2ep->xni_wd_buf = NULL; + wdp->wd_task.task_datap = NULL; } else { - // For normal (non-E2E) operations the data portion is the entire buffer - wdp->wd_task.task_datap = bufp; + // For non-E2E operations the data portion is the entire buffer + const int32_t workernum = wdp->wd_worker_number; + assert((uint32_t)workernum < tdp->io_buffers_count); + wdp->wd_task.task_datap = tdp->io_buffers[workernum]; } - // Set proper data pattern in Data buffer - xdd_datapattern_buffer_init(wdp); - // Init the WorkerThread-TargetPass WAIT Barrier for this WorkerThread sprintf(tmpname,"T%04d:W%04d>worker_thread_targetpass_wait_barrier",tdp->td_target_number,wdp->wd_worker_number); status = xdd_init_barrier(tdp->td_planp, &wdp->wd_thread_targetpass_wait_for_task_barrier, 2, tmpname); @@ -205,31 +153,6 @@ xdd_worker_thread_init(worker_data_t *wdp) { errno); } - // Set up for an End-to-End operation (if requested) - if (tdp->td_target_options & TO_ENDTOEND) { - if (tdp->td_target_options & (TO_E2E_DESTINATION|TO_E2E_SOURCE)) { - status = xdd_e2e_worker_init(wdp); - } else { // Not sure which side of the E2E this target is supposed to be.... - fprintf(xgp->errout,"%s: xdd_worker_thread_init: Target %d WorkerThread %d: Cannot determine which side of the E2E operation this target is supposed to be.\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number); - fprintf(xgp->errout,"%s: xdd_worker_thread_init: Check to be sure that the '-e2e issource' or '-e2e isdestination' was specified for this target.\n", - xgp->progname); - fflush(xgp->errout); - return(-1); - } - - if (status == -1) { - fprintf(xgp->errout,"%s: xdd_worker_thread_init: Target %d WorkerThread %d: E2E %s initialization failed.\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - (tdp->td_target_options & TO_E2E_DESTINATION) ? "DESTINATION":"SOURCE"); - return(-1); - } - } // End of end-to-end setup - // All went well... return(0); diff --git a/src/base/worker_thread_io.c b/src/base/worker_thread_io.c index 8eb24672..947ef6c7 100644 --- a/src/base/worker_thread_io.c +++ b/src/base/worker_thread_io.c @@ -57,7 +57,7 @@ xdd_worker_thread_io(worker_data_t *wdp) { // done receiving data from the Source Side. // If there is Loose or Serial Ordering in effect then we need to release the Next Worker Thread // before returning. - if ((tdp->td_target_options & TO_E2E_DESTINATION) && (wdp->wd_e2ep->e2e_hdrp->e2eh_magic == XDD_E2E_EOF)) { + if ((tdp->td_target_options & TO_E2E_DESTINATION) && wdp->wd_e2ep->received_eof) { // Indicate that this Worker Thread has received its EOF Packet pthread_mutex_lock(&wdp->wd_worker_thread_target_sync_mutex); wdp->wd_worker_thread_target_sync |= WTSYNC_EOF_RECEIVED; diff --git a/src/base/worker_thread_ttd_after_io_op.c b/src/base/worker_thread_ttd_after_io_op.c index 72729a97..02e5411a 100644 --- a/src/base/worker_thread_ttd_after_io_op.c +++ b/src/base/worker_thread_ttd_after_io_op.c @@ -212,7 +212,7 @@ xdd_e2e_after_io_op(worker_data_t *wdp) { if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_after_io_op: Target: %d: Worker: %d: ENTER\n", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); if (xgp->global_options & GO_DEBUG_E2E) xdd_show_task(&wdp->wd_task); - if ( (wdp->wd_task.task_io_status > 0) && (tdp->td_target_options & TO_ENDTOEND) ) { + if (wdp->wd_task.task_io_status > 0) { if (tdp->td_target_options & TO_E2E_SOURCE) { // For Serial Ordering, wait for the Previous I/O to complete before the associated Worker Thread releases this Worker Thread. // It is important to note that for Srial Ordering, when we get released by the Previous Worker Thread @@ -222,21 +222,20 @@ if (xgp->global_options & GO_DEBUG_E2E) xdd_show_task(&wdp->wd_task); // operation. // Send the data to the Destination machine - wdp->wd_e2ep->e2e_hdrp->e2eh_magic = XDD_E2E_DATA_READY; wdp->wd_current_state |= WORKER_CURRENT_STATE_SRC_SEND; - if (PLAN_ENABLE_XNI & tdp->td_planp->plan_options) { - xint_e2e_xni_send(wdp); - } - else { -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_after_io_op: Target: %d: Worker: %d: Calling xdd_e2e_src_send...\n", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); - xdd_e2e_src_send(wdp); -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_after_io_op: Target: %d: Worker: %d: Returned from xdd_e2e_src_send...\n", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); - } - wdp->wd_current_state &= ~WORKER_CURRENT_STATE_SRC_SEND; + xint_e2e_xni_send(wdp); + wdp->wd_current_state &= ~WORKER_CURRENT_STATE_SRC_SEND; } // End of me being the SOURCE in an End-to-End test - } // End of processing a End-to-End + } + + if (tdp->td_target_options & TO_E2E_DESTINATION) { + // Release the current target buffer to XNI + xni_release_target_buffer(&wdp->wd_e2ep->xni_wd_buf); + wdp->wd_task.task_datap = NULL; + } + if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_after_io_op: Target: %d: Worker: %d: EXIT...\n", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); } // End of xdd_e2e_after_io_op(wdp) @@ -340,8 +339,10 @@ xdd_worker_thread_ttd_after_io_op(worker_data_t *wdp) { // Read-After_Write Processing xdd_raw_after_io_op(wdp); - // End-to-End Processing - xdd_e2e_after_io_op(wdp); + if (xint_is_e2e(tdp)) { + // End-to-End Processing + xdd_e2e_after_io_op(wdp); + } // Extended Statistics xdd_extended_stats(wdp); diff --git a/src/base/worker_thread_ttd_before_io_op.c b/src/base/worker_thread_ttd_before_io_op.c index 527a7a74..971f7646 100644 --- a/src/base/worker_thread_ttd_before_io_op.c +++ b/src/base/worker_thread_ttd_before_io_op.c @@ -201,23 +201,13 @@ xdd_e2e_before_io_op(worker_data_t *wdp) { /* ------------------------------------------------------ */ // We are the Destination side of an End-to-End op - wdp->wd_e2ep->e2e_data_recvd = 0; // This will record how much data is recvd in this routine // Lets read a packet of data from the Source side - // The call to xdd_e2e_dest_recv() will block until there is data to read + // The call to xint_e2e_xni_recv() will block until there is data to read wdp->wd_current_state |= WORKER_CURRENT_STATE_DEST_RECEIVE; - if (PLAN_ENABLE_XNI & tdp->td_planp->plan_options) { - status = xint_e2e_xni_recv(wdp); - } - else { + status = xint_e2e_xni_recv(wdp); -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_before_io_op: Target: %d: Worker: %d: Calling xdd_e2e_dest_recv...\n ", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); - - status = xdd_e2e_dest_receive(wdp); - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_before_io_op: Target: %d: Worker: %d: Returning from xdd_e2e_dest_recv: e2e header:\n ", (long long int)pclk_now(),tdp->td_target_number,wdp->wd_worker_number); - } wdp->wd_current_state &= ~WORKER_CURRENT_STATE_DEST_RECEIVE; // If status is "-1" then soemthing happened to the connection - time to leave @@ -225,17 +215,15 @@ if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e return(-1); // Check to see of this is the last message in the transmission - if (wdp->wd_e2ep->e2e_hdrp->e2eh_magic == XDD_E2E_EOF) { // This must be the End of the File + if (wdp->wd_e2ep->received_eof) { // This must be the End of the File return(0); } // Use the hearder.location as the new tdp->td_counters.tc_current_byte_offset and the e2e_header.length as the new my_current_xfer_size for this op // This will allow for the use of "no ordering" on the source side of an e2e operation - wdp->wd_task.task_byte_offset = wdp->wd_e2ep->e2e_hdrp->e2eh_byte_offset; - wdp->wd_task.task_xfer_size = wdp->wd_e2ep->e2e_hdrp->e2eh_data_length; - wdp->wd_task.task_op_number = wdp->wd_e2ep->e2e_hdrp->e2eh_sequence_number; - // Record the amount of data received - wdp->wd_e2ep->e2e_data_recvd = wdp->wd_e2ep->e2e_hdrp->e2eh_data_length; + wdp->wd_task.task_byte_offset = xni_target_buffer_target_offset(wdp->wd_e2ep->xni_wd_buf); + wdp->wd_task.task_xfer_size = xni_target_buffer_data_length(wdp->wd_e2ep->xni_wd_buf); + wdp->wd_task.task_op_number = xni_target_buffer_sequence_number(wdp->wd_e2ep->xni_wd_buf); return(0); diff --git a/src/client/info_display.c b/src/client/info_display.c index 3e8d168c..f5f72911 100644 --- a/src/client/info_display.c +++ b/src/client/info_display.c @@ -421,8 +421,13 @@ xdd_target_info(FILE *out, target_data_t *tdp) { return; } // ok - we have a good restart structure - tdp->td_restartp->source_host = tdp->td_e2ep->e2e_src_hostname; // Name of the Source machine - tdp->td_restartp->destination_host = tdp->td_e2ep->e2e_dest_hostname; // Name of the Destination machine + + // Commented out the next line because I removed the + // e2e_dest_hostname field. However, this line couldn't + // have worked anyway because td_e2ep->e2e_dest_hostname + // was never assigned to. Restart has probably been broken + // for some time. -nlmills + //tdp->td_restartp->destination_host = tdp->td_e2ep->e2e_dest_hostname; // Name of the Destination machine if (tdp->td_restartp->flags & RESTART_FLAG_ISSOURCE) { // This is the SOURCE sside of the biz tdp->td_restartp->source_filename = tdp->td_target_full_pathname; // The source_filename is the name of the file being copied on the source side tdp->td_restartp->destination_filename = NULL; // The destination_filename is the name of the file being copied on the destination side diff --git a/src/client/interactive_func.c b/src/client/interactive_func.c index 0ba2bc63..a48272ac 100644 --- a/src/client/interactive_func.c +++ b/src/client/interactive_func.c @@ -321,17 +321,16 @@ xdd_interactive_display_state_info(worker_data_t *wdp) { (long long int)wdp->wd_counters.tc_current_op_end_time); } if (wdp->wd_current_state & WORKER_CURRENT_STATE_DEST_RECEIVE) { - fprintf(xgp->output," Destination Side of an E2E - waiting to receive data from Source, target op number %lld, location %lld, length %lld, recvfrom status is %d\n", + fprintf(xgp->output," Destination Side of an E2E - waiting to receive data from Source, target op number %lld, location %lld, length %lld\n", (long long int)wdp->wd_counters.tc_current_op_number, - (long long int)wdp->wd_e2ep->e2e_hdrp->e2eh_byte_offset, - (long long int)wdp->wd_e2ep->e2e_hdrp->e2eh_data_length, - wdp->wd_e2ep->e2e_recv_status); + (long long int)xni_target_buffer_target_offset(wdp->wd_e2ep->xni_wd_buf), + (long long int)xni_target_buffer_data_length(wdp->wd_e2ep->xni_wd_buf)); } if (wdp->wd_current_state & WORKER_CURRENT_STATE_SRC_SEND) { fprintf(xgp->output," Source Side of an E2E - waiting to send data to Destination, target op number %lld, location %lld, length %lld, sendto status is %d\n", (long long int)wdp->wd_counters.tc_current_op_number, - (long long int)wdp->wd_e2ep->e2e_hdrp->e2eh_byte_offset, - (long long int)wdp->wd_e2ep->e2e_hdrp->e2eh_data_length, + (long long int)xni_target_buffer_target_offset(wdp->wd_e2ep->xni_wd_buf), + (long long int)xni_target_buffer_data_length(wdp->wd_e2ep->xni_wd_buf), wdp->wd_e2ep->e2e_send_status); } if (wdp->wd_current_state & WORKER_CURRENT_STATE_BARRIER) { diff --git a/src/client/parse.c b/src/client/parse.c index a8b706a3..9031f775 100644 --- a/src/client/parse.c +++ b/src/client/parse.c @@ -388,7 +388,7 @@ xdd_get_target_datap(xdd_plan_t *planp, int32_t target_number, char *op) { if (planp->plan_options & PLAN_ENDTOEND) { if (NULL == tdp->td_e2ep) { // If there is no e2e struct then allocate one. - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); if (NULL == tdp->td_e2ep) { fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for Target Data Struct END TO END Data Structure for target %d\n", xgp->progname, (int)sizeof(xint_data_pattern_t), target_number); diff --git a/src/client/parse_func.c b/src/client/parse_func.c index cc21e106..f3745ccd 100644 --- a/src/client/parse_func.c +++ b/src/client/parse_func.c @@ -220,65 +220,6 @@ int xddfunc_congestion(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t f } } // End of xddfunc_congestion() /*----------------------------------------------------------------------------*/ -// Set the magic cookie for network transfers -int xddfunc_cookie(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) -{ - int target_number = -1; - int args = xdd_parse_target_number(planp, argc, &argv[0], - flags, &target_number); - if (args < 0) - return(-1); - - if (xdd_parse_arg_count_check(args,argc, argv[0]) == 0) - return(0); - - // Get the hex string representation of the cookie - const char *cookie_str = argv[args + 1]; - const target_data_t tmptdp_; // hack to later find - // sizeof(target_data_t.td_magic_cookie) - - // two hex digits represent one byte - if (strlen(cookie_str) != sizeof(tmptdp_.td_magic_cookie)*2) { - fprintf(xgp->errout,"%s: ERROR: invalid '-cookie' %s\n", xgp->progname, cookie_str); - return(-1); - } - - // Convert the hex string to its binary representation - unsigned char magic_cookie[sizeof(tmptdp_.td_magic_cookie)] = { 0 }; - for (int i = 0; i < 32; i += 2) { - unsigned int bite = 0; // just in case `byte' is a typedef - - if (!isxdigit(cookie_str[i]) || !isxdigit(cookie_str[i+1]) || sscanf(cookie_str+i, "%2x", &bite) != 1) { - fprintf(xgp->errout,"%s: ERROR: invalid '-cookie' %s\n", xgp->progname, cookie_str); - return(-1); - } - - magic_cookie[i>>1] = (unsigned char)bite; - } - - // Set the magic cookie for the relevant targets - if (target_number >= 0) { - /* Set this option value for a specific target */ - target_data_t *tdp = xdd_get_target_datap(planp, target_number, argv[0]); - if (tdp == NULL) - return(-1); - memcpy(tdp->td_magic_cookie, magic_cookie, sizeof(tdp->td_magic_cookie)); - return(args+2); - } else { - /* Put this option into all Targets */ - if (flags & XDD_PARSE_PHASE2) { - target_data_t *tdp = planp->target_datap[0]; - int i = 0; - while (tdp) { - memcpy(tdp->td_magic_cookie, magic_cookie, sizeof(tdp->td_magic_cookie)); - i++; - tdp = planp->target_datap[i]; - } - } - return(2); - } -} // End of xddfunc_cookie() -/*----------------------------------------------------------------------------*/ // Create new target files for each pass. int xddfunc_createnewfiles(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) @@ -899,7 +840,7 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) tdp = xdd_get_target_datap(planp, target_number, argv[0]); if (tdp == NULL) return(-1); if (NULL == tdp->td_e2ep) { // If there is no e2e struct then allocate one. - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); if (NULL == tdp->td_e2ep) { fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for Target Data Struct END TO END Data Structure for target %d\n", xgp->progname, (int)sizeof(xint_data_pattern_t), target_number); @@ -911,7 +852,7 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) i = 0; while (tdp) { if (NULL == tdp->td_e2ep) { // If there is no e2e struct then allocate one. - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); if (NULL == tdp->td_e2ep) { fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for Target Data Struct END TO END Data Structure for target %d\n", xgp->progname, (int)sizeof(xint_data_pattern_t), target_number); @@ -1074,23 +1015,23 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) if (tdp == NULL) return(-1); tdp->td_target_options |= TO_ENDTOEND; - strcpy(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].hostname, hostname); - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].base_port = DEFAULT_E2E_PORT; - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = 0; + strcpy(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].hostname, hostname); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].base_port = DEFAULT_E2E_PORT; + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = 0; // Set a default NUMA node value if possible #if defined(HAVE_CPU_SET_T) - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); sched_getaffinity(getpid(), - sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set), - &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set), + &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); #endif if (base_port) { // Set the requested Port Number and possible Port Count - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].base_port = atoi(base_port); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].base_port = atoi(base_port); if (port_count) { - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = atoi(port_count); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = atoi(port_count); } else { - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = 0; + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = 0; } #if defined(HAVE_CPU_SET_T) && defined(HAVE_NUMA_NODE_TO_CPUS) && defined(HAVE_NUMA_ALLOCATE_CPUMASK) @@ -1098,18 +1039,18 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) int i; struct bitmask* numa_mask = numa_allocate_cpumask(); int numa_node_no = atoi(numa_node); - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); numa_node_to_cpus(numa_node_no, numa_mask); for (i = 0; i <= CPU_SETSIZE; i++) { if (numa_bitmask_isbitset(numa_mask, i)) - CPU_SET(i, &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_SET(i, &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); } numa_free_cpumask(numa_mask); } else { - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); sched_getaffinity(getpid(), - sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set), - &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set), + &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); } #else if (numa_node) { @@ -1117,8 +1058,7 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) } #endif } - tdp->td_e2ep->e2e_address_table_port_count += tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count; - tdp->td_e2ep->e2e_address_table_next_entry++; + tdp->td_e2ep->e2e_address_table_port_count += tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count; tdp->td_e2ep->e2e_address_table_host_count++; } else { /* set option for all targets */ if (flags & XDD_PARSE_PHASE2) { @@ -1126,41 +1066,41 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) i = 0; while (tdp) { tdp->td_target_options |= TO_ENDTOEND; - strcpy(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].hostname, hostname); - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].base_port = DEFAULT_E2E_PORT; - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = 0; + strcpy(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].hostname, hostname); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].base_port = DEFAULT_E2E_PORT; + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = 0; // Set a default NUMA node value if possible #if defined(HAVE_CPU_SET_T) - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); sched_getaffinity(getpid(), - sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set), - &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set), + &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); #endif if (base_port) { // Set the requested Port Number and possible Port Count - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].base_port = atoi(base_port); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].base_port = atoi(base_port); if (port_count) { - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = atoi(port_count); + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = atoi(port_count); } else { - tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count = 0; + tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count = 0; } #if defined(HAVE_CPU_SET_T) && defined(HAVE_NUMA_NODE_TO_CPUS) && defined(HAVE_NUMA_ALLOCATE_CPUMASK) if (numa_node && -1 != numa_available()) { int i; struct bitmask* numa_mask = numa_allocate_cpumask(); int numa_node_no = atoi(numa_node); - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); numa_node_to_cpus(numa_node_no, numa_mask); for (i = 0; i <= CPU_SETSIZE; i++) { if (numa_bitmask_isbitset(numa_mask, i)) - CPU_SET(i, &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_SET(i, &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); } numa_free_cpumask(numa_mask); } else { - CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + CPU_ZERO(&tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); sched_getaffinity(getpid(), - sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set), - &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].cpu_set); + sizeof(tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set), + &tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].cpu_set); } #else if (numa_node) { @@ -1168,9 +1108,8 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) } #endif } // End of IF stmnt that sets the Port/NPorts - tdp->td_e2ep->e2e_address_table_port_count += tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_next_entry].port_count; + tdp->td_e2ep->e2e_address_table_port_count += tdp->td_e2ep->e2e_address_table[tdp->td_e2ep->e2e_address_table_host_count].port_count; tdp->td_e2ep->e2e_address_table_host_count++; - tdp->td_e2ep->e2e_address_table_next_entry++; i++; tdp = planp->target_datap[i]; } @@ -1302,8 +1241,6 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) tdp->td_target_options |= TO_ENDTOEND; source_path = argv[args_index+1]; source_mtime = atoll(argv[args_index+2]); - tdp->td_e2ep->e2e_src_file_path = source_path; - tdp->td_e2ep->e2e_src_file_mtime = source_mtime; } else { /* set option for all targets */ if (flags & XDD_PARSE_PHASE2) { tdp = planp->target_datap[0]; @@ -1312,8 +1249,6 @@ xddfunc_endtoend(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) source_mtime = atoll(argv[args_index+2]); while (tdp) { tdp->td_target_options |= TO_ENDTOEND; - tdp->td_e2ep->e2e_src_file_path = source_path; - tdp->td_e2ep->e2e_src_file_mtime = source_mtime; i++; tdp = planp->target_datap[i]; } @@ -3229,14 +3164,13 @@ xddfunc_restart(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) if (rp == NULL) return(-1); if (NULL == tdp->td_e2ep) // If there is no e2e struct, then allocate one - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); if (tdp->td_e2ep == NULL) // If there is still no e2e struct then return -1 return(-1); rp->initial_restart_offset = atoll(argv[args_index+1]); rp->byte_offset = rp->initial_restart_offset; rp->flags |= RESTART_FLAG_RESUME_COPY; - tdp->td_e2ep->e2e_total_bytes_written=rp->byte_offset; /* Set the last committed location to avoid restart output of 0 if the target does not complete any I/O during first interval @@ -3251,14 +3185,13 @@ xddfunc_restart(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags) if (rp == NULL) return(-1); if (NULL == tdp->td_e2ep) // If there is no e2e struct, then allocate one - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); if (tdp->td_e2ep == NULL) // If there is still no e2e struct then return -1 return(-1); rp->initial_restart_offset = atoll(argv[args_index+1]); rp->byte_offset = rp->initial_restart_offset; rp->flags |= RESTART_FLAG_RESUME_COPY; - tdp->td_e2ep->e2e_total_bytes_written=rp->byte_offset; /* Set the last committed location to avoid restart output of 0 if the target does not complete any I/O during diff --git a/src/client/parse_table.c b/src/client/parse_table.c index e9f8b8cd..ab882cd0 100644 --- a/src/client/parse_table.c +++ b/src/client/parse_table.c @@ -57,13 +57,6 @@ xdd_func_t xdd_func[] = { {" Will set the TCP congestion control algorithm\n", 0,0,0,0}, XDD_FUNC_INVISIBLE}, - {"cookie", "cookie", - xddfunc_cookie, - 1, - " -cookie \n", - {" Will set the magic cookie for network connections\n", - 0,0,0,0}, - 0}, {"createnewfiles", "cnf", xddfunc_createnewfiles, 1, diff --git a/src/common/debug.c b/src/common/debug.c index afa8381b..f8380f54 100644 --- a/src/common/debug.c +++ b/src/common/debug.c @@ -117,8 +117,6 @@ xdd_show_target_data(target_data_t *tdp) { fprintf(stderr,"xdd_show_target_data: char td_occupant_name[XDD_BARRIER_MAX_NAME_LENGTH]='%s'\n",tdp->td_occupant_name); // For a Target thread this is "TARGET####", for a Worker Thread it is "TARGET####WORKER####" fprintf(stderr,"xdd_show_target_data: xdd_barrier_t *td_current_barrier=%p\n",tdp->td_current_barrier); // Pointer to the current barrier this Thread is in at any given time or NULL if not in a barrier fprintf(stderr,"xdd_show_target_data: xdd_barrier_t td_target_worker_thread_init_barrier\n"); // Where the Target Thread waits for the Worker Thread to initialize - fprintf(stderr,"xdd_show_target_data: xdd_barrier_t td_targetpass_worker_thread_passcomplete_barrier\n");// The barrier used to sync targetpass() with all the Worker Threads at the end of a pass - fprintf(stderr,"xdd_show_target_data: xdd_barrier_t td_targetpass_worker_thread_eofcomplete_barrier\n"); // The barrier used to sync targetpass_eof_desintation_side() with a Worker Thread trying to recv an EOF packet fprintf(stderr,"xdd_show_target_data: uint64_t td_current_bytes_issued=%lld\n",(long long int)tdp->td_current_bytes_issued); // The amount of data for all transfer requests that has been issued so far fprintf(stderr,"xdd_show_target_data: uint64_t td_current_bytes_completed=%lld\n",(long long int)tdp->td_current_bytes_completed); // The amount of data for all transfer requests that has been completed so far fprintf(stderr,"xdd_show_target_data: uint64_t td_current_bytes_remaining=%lld\n",(long long int)tdp->td_current_bytes_remaining); // Bytes remaining to be transferred @@ -382,69 +380,18 @@ xdd_show_target_counters(xint_target_counters_t *tcp) { void xdd_show_e2e(xint_e2e_t *e2ep) { fprintf(stderr,"\nxdd_show_e2e:********* Start of E2E Data at 0x%p **********\n",e2ep); - fprintf(stderr,"\txdd_show_e2e: char *e2e_dest_hostname='%s'\n",e2ep->e2e_dest_hostname); // Name of the Destination machine - fprintf(stderr,"\txdd_show_e2e: char *e2e_src_hostname='%s'\n",e2ep->e2e_dest_hostname); // Name of the Source machine - fprintf(stderr,"\txdd_show_e2e: char *e2e_src_file_path='%s'\n",e2ep->e2e_dest_hostname); // Full path of source file for destination restart file fprintf(stderr,"\txdd_show_e2e: time_t e2e_src_file_mtime\n"); // stat -c %Y *e2e_src_file_path, i.e., last modification time fprintf(stderr,"\txdd_show_e2e: in_addr_t e2e_dest_addr=%d\n",e2ep->e2e_dest_addr); // Destination Address number of the E2E socket - fprintf(stderr,"\txdd_show_e2e: in_port_t e2e_dest_port=%d\n",e2ep->e2e_dest_port); // Port number to use for the E2E socket - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_sd=%d\n",e2ep->e2e_sd); // Socket descriptor for the E2E message port - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_nd=%d\n",e2ep->e2e_nd); // Number of Socket descriptors in the read set - fprintf(stderr,"\txdd_show_e2e: sd_t e2e_csd[FD_SETSIZE]\n");; // Client socket descriptors - fprintf(stderr,"\txdd_show_e2e: fd_set e2e_active?\n"); // This set contains the sockets currently active - fprintf(stderr,"\txdd_show_e2e: fd_set e2e_readset?\n"); // This set is passed to select() - fprintf(stderr,"\txdd_show_e2e: struct sockaddr_in e2e_sname?\n"); // used by setup_server_socket - fprintf(stderr,"\txdd_show_e2e: uint32_t e2e_snamelen=%d\n",e2ep->e2e_snamelen); // the length of the socket name - fprintf(stderr,"\txdd_show_e2e: struct sockaddr_in e2e_rname?\n"); // used by destination machine to remember the name of the source machine - fprintf(stderr,"\txdd_show_e2e: uint32_t e2e_rnamelen=%d\n",e2ep->e2e_rnamelen); // the length of the source socket name - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_current_csd=%d\n",e2ep->e2e_current_csd); // the current csd used by the select call on the destination side - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_next_csd=%d\n",e2ep->e2e_next_csd); // The next available csd to use - fprintf(stderr,"\txdd_show_e2e: xdd_e2e_header_t *e2e_hdrp=%p\n",e2ep->e2e_hdrp); // Pointer to the header portion of a packet - if (e2ep->e2e_hdrp) xdd_show_e2e_header(e2ep->e2e_hdrp); - fprintf(stderr,"\txdd_show_e2e: unsigned char *e2e_datap=%p\n",e2ep->e2e_datap); // Pointer to the data portion of a packet - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_header_size=%d\n",e2ep->e2e_header_size); // Size of the header portion of the buffer - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_data_size=%d\n",e2ep->e2e_data_size); // Size of the data portion of the buffer - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_xfer_size=%d\n",e2ep->e2e_xfer_size); // Number of bytes per End to End request - size of data buffer plus size of E2E Header fprintf(stderr,"\txdd_show_e2e: int32_t e2e_send_status=%d\n",e2ep->e2e_send_status); // Current Send Status - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_recv_status=%d\n",e2ep->e2e_recv_status); // Current Recv status fprintf(stderr,"\txdd_show_e2e: int64_t e2e_msg_sequence_number=%lld\n",(long long int)e2ep->e2e_msg_sequence_number);// The Message Sequence Number of the most recent message sent or to be received - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_msg_sent=%d\n",e2ep->e2e_msg_sent); // The number of messages sent - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_msg_recv=%d\n",e2ep->e2e_msg_recv); // The number of messages received - fprintf(stderr,"\txdd_show_e2e: int64_t e2e_prev_loc=%lld\n",(long long int)e2ep->e2e_prev_loc); // The previous location from a e2e message from the source - fprintf(stderr,"\txdd_show_e2e: int64_t e2e_prev_len=%lld\n",(long long int)e2ep->e2e_prev_len); // The previous length from a e2e message from the source - fprintf(stderr,"\txdd_show_e2e: int64_t e2e_data_recvd=%lld\n",(long long int)e2ep->e2e_data_recvd); // The amount of data that is received each time we call xdd_e2e_dest_recv() - fprintf(stderr,"\txdd_show_e2e: int64_t e2e_data_length=%lld\n",(long long int)e2ep->e2e_data_length); // The amount of data that is ready to be read for this operation - fprintf(stderr,"\txdd_show_e2e: int64_t e2e_total_bytes_written=%lld\n",(long long int)e2ep->e2e_total_bytes_written); // The total amount of data written across all restarts for this file fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_wait_1st_msg=%lld\n",(unsigned long long int)e2ep->e2e_wait_1st_msg); // Time in nanosecs destination waited for 1st source data to arrive - fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_first_packet_received_this_pass=%lld\n",(unsigned long long int)e2ep->e2e_first_packet_received_this_pass);// Time that the first packet was received by the destination from the source - fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_last_packet_received_this_pass=%lld\n",(unsigned long long int)e2ep->e2e_last_packet_received_this_pass);// Time that the last packet was received by the destination from the source - fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_first_packet_received_this_run=%lld\n",(unsigned long long int)e2ep->e2e_first_packet_received_this_run);// Time that the first packet was received by the destination from the source - fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_last_packet_received_this_run=%lld\n",(unsigned long long int)e2ep->e2e_last_packet_received_this_run);// Time that the last packet was received by the destination from the source fprintf(stderr,"\txdd_show_e2e: nclk_t e2e_sr_time=%lld\n",(unsigned long long int)e2ep->e2e_sr_time); // Time spent sending or receiving data for End-to-End operation fprintf(stderr,"\txdd_show_e2e: int32_t e2e_address_table_host_count=%d\n",e2ep->e2e_address_table_host_count); // Cumulative number of hosts represented in the e2e address table fprintf(stderr,"\txdd_show_e2e: int32_t e2e_address_table_port_count=%d\n",e2ep->e2e_address_table_port_count); // Cumulative number of ports represented in the e2e address table - fprintf(stderr,"\txdd_show_e2e: int32_t e2e_address_table_next_entry=%d\n",e2ep->e2e_address_table_next_entry); // Next available entry in the e2e_address_table fprintf(stderr,"\txdd_show_e2e: xdd_e2e_ate_t e2e_address_table[E2E_ADDRESS_TABLE_ENTRIES]\n"); // Used by E2E to stripe over multiple IP Addresses fprintf(stderr,"xdd_show_e2e:********* End of E2E Data at 0x%p **********\n",e2ep); } // End of xdd_show_e2e() -/*----------------------------------------------------------------------------*/ -/* xdd_show_e2e_header() - Display values in the specified data structure - */ -void -xdd_show_e2e_header(xdd_e2e_header_t *e2ehp) { - fprintf(stderr,"\nxdd_show_e2e_header:********* Start of E2E Header Data at 0x%p **********\n",e2ehp); - fprintf(stderr,"\t\txdd_show_e2e_header: uint32_t e2eh_magic=0x%8x\n",e2ehp->e2eh_magic); // Magic Number - sanity check - fprintf(stderr,"\t\txdd_show_e2e_header: int32_t e2eh_worker_thread_number=%d\n",e2ehp->e2eh_worker_thread_number); // Sender's Worker Thread Number - fprintf(stderr,"\t\txdd_show_e2e_header: int64_t e2eh_sequence_number=%lld\n",(long long int)e2ehp->e2eh_sequence_number); // Sequence number of this operation - fprintf(stderr,"\t\txdd_show_e2e_header: nclk_t e2eh_send_time=%lld\n",(unsigned long long int)e2ehp->e2eh_send_time); // Time this packet was sent in global nano seconds - fprintf(stderr,"\t\txdd_show_e2e_header: nclk_t e2eh_recv_time=%lld\n",(unsigned long long int)e2ehp->e2eh_recv_time); // Time this packet was received in global nano seconds - fprintf(stderr,"\t\txdd_show_e2e_header: int64_t e2eh_byte_offset=%lld\n",(long long int)e2ehp->e2eh_byte_offset); // Offset relative to the beginning of the file of where this data belongs - fprintf(stderr,"\t\txdd_show_e2e_header: int64_t e2eh_data_length=%lld\n",(long long int)e2ehp->e2eh_data_length); // Length of the user data in bytes for this operation - fprintf(stderr,"\txdd_show_e2e_header:********* End of E2E Header Data at 0x%p **********\n",e2ehp); - -} // End of xdd_show_e2e_header() - /*----------------------------------------------------------------------------*/ /* xdd_show_tot_entry() - Display values in the specified data structure */ diff --git a/src/common/end_to_end.h b/src/common/end_to_end.h index a3aa7160..7d255b0f 100644 --- a/src/common/end_to_end.h +++ b/src/common/end_to_end.h @@ -10,18 +10,6 @@ * Foundation. See file COPYING. * */ -/* - * +-----+-----------------------------------------------+ - * | hdr | Data | - * +-----+-----------------------------------------------+ - * - * +----//----------------------+----------------------------------------+ - * | | E2E Header | Data ---- N bytes ---- | - * | |<--64 bytes-->|<< Start of data buffer is Page Aligned | - * |<---//----PAGE_SIZE bytes-->| | - * +----//----------------------+----------------------------------------+ - * - */ #ifdef HAVE_SCHED_H #include @@ -31,17 +19,6 @@ #define HOSTNAMELENGTH 1024 #define E2E_ADDRESS_TABLE_ENTRIES 16 -struct xdd_e2e_header { - uint32_t e2eh_magic; // Magic Number - sanity check - unsigned char e2eh_cookie[16]; // Magic Cookie - a safer check - int32_t e2eh_worker_thread_number; // Sender's Worker Thread Number - int64_t e2eh_sequence_number; // Sequence number of this operation - nclk_t e2eh_send_time; // Time this packet was sent in global nano seconds - nclk_t e2eh_recv_time; // Time this packet was received in global nano seconds - int64_t e2eh_byte_offset; // Offset relative to the beginning of the file of where this data belongs - int64_t e2eh_data_length; // Length of the user data in bytes for this operation -}; -typedef struct xdd_e2e_header xdd_e2e_header_t; struct xdd_e2e_address_table_entry { char *address; // Pointer to the ASCII string of the address @@ -59,86 +36,29 @@ struct xdd_e2e_address_table { }; typedef struct xdd_e2e_address_table xdd_e2e_at_t; -// Things used in the various end_to_end subroutines. -#ifdef FD_SETSIZE -#undef FD_SETSIZE -#define FD_SETSIZE 128 -#endif - -#define MAXMIT_TCP (1<<28) - -/* - * The xint_td_e2e structure contains variables that are referenced by the - * target thread. - */ -struct xint_td_e2e { - char *e2e_dest_hostname; // Name of the Destination machine - char *e2e_src_hostname; // Name of the Source machine - char *e2e_src_file_path; // Full path of source file for destination restart file - time_t e2e_src_file_mtime; // stat -c %Y *e2e_src_file_path, i.e., last modification time - in_addr_t e2e_dest_addr; // Destination Address number of the E2E socket - in_port_t e2e_dest_port; // Port number to use for the E2E socket - - /* XNI data */ - xni_connection_t xni_conn; -}; -typedef struct xint_td_e2e xint_td_e2e_t; - /* * The xint_e2e structure contains variables that are referenced by the * target thread and worker thread. */ struct xint_e2e { - char *e2e_dest_hostname; // Name of the Destination machine - char *e2e_src_hostname; // Name of the Source machine - char *e2e_src_file_path; // Full path of source file for destination restart file - time_t e2e_src_file_mtime; // stat -c %Y *e2e_src_file_path, i.e., last modification time in_addr_t e2e_dest_addr; // Destination Address number of the E2E socket - in_port_t e2e_dest_port; // Port number to use for the E2E socket - int32_t e2e_sd; // Socket descriptor for the E2E message port - int32_t e2e_nd; // Number of Socket descriptors in the read set - sd_t e2e_csd[FD_SETSIZE]; // Client socket descriptors - fd_set e2e_active; // This set contains the sockets currently active - fd_set e2e_readset; // This set is passed to select() - struct sockaddr_in e2e_sname; // used by setup_server_socket - uint32_t e2e_snamelen; // the length of the socket name - struct sockaddr_in e2e_rname; // used by destination machine to remember the name of the source machine - uint32_t e2e_rnamelen; // the length of the source socket name - int32_t e2e_current_csd; // the current csd used by the select call on the destination side - int32_t e2e_next_csd; // The next available csd to use - xdd_e2e_header_t *e2e_hdrp; // Pointer to the header portion of a packet - unsigned char *e2e_datap; // Pointer to the data portion of a packet - int32_t e2e_header_size; // Size of the header portion of the buffer - int32_t e2e_data_size; // Size of the data portion of the buffer - int32_t e2e_xfer_size; // Number of bytes per End to End request - size of data buffer plus size of E2E Header int32_t e2e_send_status; // Current Send Status - int32_t e2e_recv_status; // Current Recv status -#define XDD_E2E_DATA_READY 0xDADADADA // The magic number that should appear at the beginning of each message indicating data is present -#define XDD_E2E_EOF 0xE0F0E0F0 // The magic number that should appear in a message signaling and End of File int64_t e2e_msg_sequence_number;// The Message Sequence Number of the most recent message sent or to be received - int32_t e2e_msg_sent; // The number of messages sent - int32_t e2e_msg_recv; // The number of messages received - int64_t e2e_prev_loc; // The previous location from a e2e message from the source - int64_t e2e_prev_len; // The previous length from a e2e message from the source - int64_t e2e_data_recvd; // The amount of data that is received each time we call xdd_e2e_dest_recv() - int64_t e2e_data_length; // The amount of data that is ready to be read for this operation - int64_t e2e_total_bytes_written; // The total amount of data written across all restarts for this file nclk_t e2e_wait_1st_msg; // Time in nanosecs destination waited for 1st source data to arrive - nclk_t e2e_first_packet_received_this_pass;// Time that the first packet was received by the destination from the source - nclk_t e2e_last_packet_received_this_pass;// Time that the last packet was received by the destination from the source - nclk_t e2e_first_packet_received_this_run;// Time that the first packet was received by the destination from the source - nclk_t e2e_last_packet_received_this_run;// Time that the last packet was received by the destination from the source nclk_t e2e_sr_time; // Time spent sending or receiving data for End-to-End operation int32_t e2e_address_table_host_count; // Cumulative number of hosts represented in the e2e address table int32_t e2e_address_table_port_count; // Cumulative number of ports represented in the e2e address table - int32_t e2e_address_table_next_entry; // Next available entry in the e2e_address_table xdd_e2e_ate_t e2e_address_table[E2E_ADDRESS_TABLE_ENTRIES]; // Used by E2E to stripe over multiple IP Addresses /* XNI Target data */ - xni_connection_t xni_td_conn; + xni_connection_t *xni_td_connections; // One connection per host + int xni_td_connections_count; // Number of connection objects + pthread_mutex_t *xni_td_connection_mutexes; // To synchronize connection establishment; one per connection /* XNI Worker data */ xni_target_buffer_t xni_wd_buf; + int received_eof; // TRUE when the source has signaled EOF to this destination worker + int address_table_index; // Index into e2e_address_table for the address assigned to this worker }; // End of struct xint_e2e definition typedef struct xint_e2e xint_e2e_t; diff --git a/src/common/net_utils.h b/src/common/net_utils.h index 5527bbcd..edf0bb70 100644 --- a/src/common/net_utils.h +++ b/src/common/net_utils.h @@ -43,8 +43,8 @@ static inline uint64_t ntohll(uint64_t value) */ static inline uint64_t htonll(uint64_t value) { - // Re-use the htonll implementation to swap the bytes - return htonll(value); + // Re-use the ntohll implementation to swap the bytes + return ntohll(value); } #endif /* NET_UTILS_H */ diff --git a/src/common/parse.h b/src/common/parse.h index 2a834442..f61d5d41 100644 --- a/src/common/parse.h +++ b/src/common/parse.h @@ -42,7 +42,6 @@ int xddfunc_blocksize(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t fl int xddfunc_bytes(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); int xddfunc_combinedout(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); int xddfunc_congestion(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); -int xddfunc_cookie(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); int xddfunc_createnewfiles(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); int xddfunc_csvout(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); int xddfunc_datapattern(xdd_plan_t *planp, int32_t argc, char *argv[], uint32_t flags); diff --git a/src/common/target_data.c b/src/common/target_data.c index 7e181809..ade793a3 100644 --- a/src/common/target_data.c +++ b/src/common/target_data.c @@ -94,17 +94,20 @@ xdd_init_new_target_data(target_data_t *tdp, int32_t n) { } /* Init the end-to-end fields */ if (tdp->td_e2ep) { - tdp->td_e2ep->e2e_sd = 0; /* destination machine socket descriptor */ - tdp->td_e2ep->e2e_src_hostname = NULL; /* E2E source hostname */ - tdp->td_e2ep->e2e_dest_hostname = NULL; /* E2E destination hostname */ - tdp->td_e2ep->e2e_dest_port = DEFAULT_E2E_PORT; tdp->td_e2ep->e2e_address_table_host_count = 0; tdp->td_e2ep->e2e_address_table_port_count = 0; tdp->td_e2ep->e2e_dest_addr = 0; tdp->td_e2ep->e2e_wait_1st_msg = 0; - tdp->td_e2ep->e2e_address_table_next_entry=0; + tdp->td_e2ep->xni_td_connections = NULL; + tdp->td_e2ep->xni_td_connections_count = 0; + tdp->td_e2ep->xni_td_connection_mutexes = NULL; + tdp->td_e2ep->address_table_index = -1; } + tdp->io_buffers = NULL; + tdp->io_buffers_count = 0; + tdp->io_buffer_size = 0; + tdp->xni_ibdevice = DEFAULT_IB_DEVICE; /* can be changed by '-ibdevice' CLO */ tdp->xni_tcp_congestion = XNI_TCP_DEFAULT_CONGESTION; /* can be changed by '-congestion' CLO */ @@ -112,9 +115,6 @@ xdd_init_new_target_data(target_data_t *tdp, int32_t n) { sprintf(tdp->td_occupant_name,"TARGET%04d",tdp->td_target_number); xdd_init_barrier_occupant(&tdp->td_occupant, tdp->td_occupant_name, XDD_OCCUPANT_TYPE_TARGET, (void *)tdp); - // the default magic cookie has all bits set - memset(tdp->td_magic_cookie, 0xFF, sizeof(tdp->td_magic_cookie)); - } /* end of xdd_init_new_target_data() */ /*----------------------------------------------------------------------------*/ @@ -213,7 +213,7 @@ xdd_create_worker_data(target_data_t *tdp, int32_t q) { // Allocate and initialize the End-to-End structure if needed if (tdp->td_target_options & TO_ENDTOEND) { - wdp->wd_e2ep = xdd_get_e2ep(); + wdp->wd_e2ep = xint_get_e2ep(); if (NULL == wdp->wd_e2ep) { fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for WORKER_DATA END TO END Data Structure for worker %d\n", xgp->progname, (int)sizeof(xint_data_pattern_t), q); diff --git a/src/common/xint_prototypes.h b/src/common/xint_prototypes.h index 65181cd2..40885e7c 100644 --- a/src/common/xint_prototypes.h +++ b/src/common/xint_prototypes.h @@ -44,35 +44,12 @@ void xdd_show_task(xint_task_t *taskp); void xdd_show_occupant(xdd_occupant_t *op); void xdd_show_target_counters(xint_target_counters_t *tcp); void xdd_show_e2e(xint_e2e_t *e2ep); -void xdd_show_e2e_header(xdd_e2e_header_t *e2ehp); void xdd_show_tot(tot_t *totp); void xdd_show_tot_entry(tot_t *totp, int i); void xdd_show_ts_table(xint_timestamp_t *ts_tablep, int target_number); void xdd_show_ts_header(xdd_ts_header_t *ts_hdrp, int target_number); void xdd_show_results_data(results_t *rp, char *dumptype, xdd_plan_t *planp); -// end_to_end.c -int32_t xdd_e2e_src_send(worker_data_t *wdp); -int32_t xdd_e2e_dest_receive(worker_data_t *wdp); -int32_t xdd_e2e_dest_connection(worker_data_t *wdp); -int32_t xdd_e2e_dest_receive_header(worker_data_t *wdp); -int32_t xdd_e2e_dest_receive_data(worker_data_t *wdp); -int32_t xdd_e2e_dest_receive_error(worker_data_t *wdp); -int32_t xdd_e2e_eof_source_side(worker_data_t *wdp); -int32_t xdd_e2e_eof_destination_side(worker_data_t *wdp); - -// end_to_end_init.c -int32_t xdd_e2e_target_init(target_data_t *tdp); -int32_t xdd_e2e_worker_init(worker_data_t *wdp); -int32_t xdd_e2e_src_init(worker_data_t *wdp); -int32_t xdd_e2e_setup_src_socket(worker_data_t *wdp); -int32_t xdd_e2e_dest_init(worker_data_t *wdp); -int32_t xdd_e2e_setup_dest_socket(worker_data_t *wdp); -void xdd_e2e_set_socket_opts(worker_data_t *wdp, int skt); -void xdd_e2e_prt_socket_opts(int skt); -void xdd_e2e_err(worker_data_t *wdp, char const *whence, char const *fmt, ...); -int32_t xdd_sockets_init(void); - // global_clock.c in_addr_t xdd_init_global_clock_network(char *hostname); void xdd_init_global_clock(nclk_t *nclkp); @@ -130,7 +107,7 @@ void xdd_interactive_show_trace(int32_t tokens, char *cmdline, uint32_t flags, x void xdd_interactive_show_barrier(int32_t tokens, char *cmdline, uint32_t flags, xdd_plan_t *planp); // io_buffers.c -unsigned char *xdd_init_io_buffers(worker_data_t *wdp); +unsigned char *xdd_init_io_buffers(target_data_t *wdp); // lockstep.c int32_t xdd_lockstep(target_data_t *p); @@ -156,7 +133,6 @@ int xdd_parse_target_number(xdd_plan_t* planp, int32_t argc, char *argv[], target_data_t *xdd_get_target_datap(xdd_plan_t* planp, int32_t target_number, char *op); xint_restart_t *xdd_get_restartp(target_data_t *tdp); xint_raw_t *xdd_get_rawp(target_data_t *tdp); -xint_e2e_t *xdd_get_e2ep(void); xint_throttle_t *xdd_get_throtp(target_data_t *tdp); xint_triggers_t *xdd_get_trigp(target_data_t *tdp); xint_extended_stats_t *xdd_get_esp(target_data_t *tdp); @@ -401,13 +377,21 @@ void xdd_start_restart_monitor(xdd_plan_t *planp); void xdd_start_interactive(xdd_plan_t *planp); // xnet_end_to_end_init.c -int32_t xint_e2e_xni_init(target_data_t *tdp); +int32_t xint_e2e_target_init(target_data_t *tdp); +int32_t xint_e2e_worker_init(worker_data_t *wdp); +xint_e2e_t *xint_get_e2ep(void); // xnet_end_to_end.c -int32_t xint_e2e_dest_connect(target_data_t *tdp); -int32_t xint_e2e_src_connect(target_data_t *tdp); +int32_t xint_e2e_dest_connect(worker_data_t *wdp); +int32_t xint_e2e_src_connect(worker_data_t *wdp); +int32_t xint_e2e_disconnect(target_data_t *tdp); int32_t xint_e2e_xni_send(worker_data_t *wdp); +int32_t xint_e2e_eof_source_side(worker_data_t *wdp); int32_t xint_e2e_xni_recv(worker_data_t *wdp); +int xint_is_e2e(const target_data_t *tdp); +xni_connection_t *xint_e2e_worker_connection(worker_data_t *wdp); +pthread_mutex_t *xint_e2e_worker_connection_mutex(worker_data_t *wdp); +const char *xint_e2e_worker_dest_hostname(worker_data_t *wdp); #endif /* * Local variables: diff --git a/src/common/xint_td.h b/src/common/xint_td.h index 79946c00..9487f9a4 100644 --- a/src/common/xint_td.h +++ b/src/common/xint_td.h @@ -73,9 +73,7 @@ struct xint_target_data { // Target-specific variables xdd_barrier_t td_target_worker_thread_init_barrier; // Where the Target Thread waits for the Worker Thread to initialize - - xdd_barrier_t td_targetpass_worker_thread_passcomplete_barrier;// The barrier used to sync targetpass() with all the Worker Threads at the end of a pass - xdd_barrier_t td_targetpass_worker_thread_eofcomplete_barrier;// The barrier used to sync targetpass_eof_desintation_side() with a Worker Thread trying to recv an EOF packet + xdd_barrier_t td_target_worker_thread_connected_barrier; // Where the Target Thread waits for every Worker Thread to establish a connection (End-to-End only) uint64_t td_current_bytes_issued; // The amount of data for all transfer requests that has been issued so far @@ -166,14 +164,20 @@ struct xint_target_data { #endif int32_t td_op_delay; // Number of seconds to delay between operations + // I/O buffers allocated and freed by the target thread but shared + // with either XNI or the workers + unsigned char **io_buffers; + // number of I/O buffers + size_t io_buffers_count; + // size of each I/O buffer in bytes + size_t io_buffer_size; + /* XNI Networking components */ xni_protocol_t xni_pcl; xni_control_block_t xni_cb; xni_context_t xni_ctx; const char *xni_ibdevice; const char *xni_tcp_congestion; - - unsigned char td_magic_cookie[16]; // Magic cookie for checking network endpoints }; typedef struct xint_target_data target_data_t; diff --git a/src/compat/config.h.in b/src/compat/config.h.in deleted file mode 100644 index 69302170..00000000 --- a/src/compat/config.h.in +++ /dev/null @@ -1,210 +0,0 @@ -/* - * XDD - a data movement and benchmarking toolkit - * - * Copyright (C) 1992-2013 I/O Performance, Inc. - * Copyright (C) 2009-2013 UT-Battelle, LLC - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef CONFIG_H -#define CONFIG_H - -/******************************************************************************** - * Autoconf stuff - *******************************************************************************/ - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - - -/******************************************************************************** - * Defines for headers - *******************************************************************************/ - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_LIBGEN_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_LINUX_MAGIC_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_DISK_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_MOUNT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UTMPX_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_XFS_LIBXFS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_XFS_XFS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_NUMA_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SCHED_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SCSI_SG_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_GTEST_GTEST_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_INFINIBAND_VERBS_H - -/******************************************************************************** - * Defines for functions - *******************************************************************************/ - -/* Define to 1 if you have the `clock_gettime' function. */ -#undef HAVE_CLOCK_GETTIME - -/* Define to 1 if you have the `ibv_get_device_list' function. */ -#undef HAVE_IBV_GET_DEVICE_LIST - -/* Define to 1 if you have the `initstate' function. */ -#undef HAVE_INITSTATE - -/* Define to 1 if you have the `memset' function. */ -#undef HAVE_MEMSET - -/* Define to 1 if you have the `nanosleep' function. */ -#undef HAVE_NANOSLEEP - -/* Define to 1 if you have the `posix_memalign' function. */ -#undef HAVE_NUMA_NODE_TO_CPUS - -/* Define to 1 if you have the `posix_memalign' function. */ -#undef HAVE_NUMA_ALLOCATE_CPUMASK - -/* Define to 1 if you have the `posix_memalign' function. */ -#undef HAVE_POSIX_MEMALIGN - -/* Define to 1 if you have the `pread' function. */ -#undef HAVE_PREAD - -/* Define to 1 if you have the `pthread_attr_setaffinity_np' function. */ -#undef HAVE_PTHREAD_ATTR_SETAFFINITY_NP - -/* Define to 1 if you have the `pwrite' function. */ -#undef HAVE_PWRITE - -/* Define to 1 if you have the `random' function. */ -#undef HAVE_RANDOM - -/* Define to 1 if you have the `rand' function. */ -#undef HAVE_RAND - -/* Define to 1 if you have the `sched_getcpu' function. */ -#undef HAVE_SCHED_GETCPU - -/* Define to 1 if you have the `sched_setscheduler' function. */ -#undef HAVE_SCHED_SETSCHEDULER - -/******************************************************************************** - * Defines for features - *******************************************************************************/ - -/* Define if you have BLKGETSIZE64 ioctl */ -#undef HAVE_DECL_BLKGETSIZE64 - -/* Define if you have cpu_set_t type */ -#undef HAVE_CPU_SET_T - -/* Define if you have BLKGETSIZE64 ioctl */ -#undef HAVE_DECL_DKIOCGETBLOCKCOUNT - -/* Define if you have BLKGETSIZE64 ioctl */ -#undef HAVE_DECL_DKIOCGETBLOCKSIZE - -/* Define to 1 if you have the declaration of `XFS_SUPER_MAGIC', and to 0 if - you don't. */ -#undef HAVE_DECL_XFS_SUPER_MAGIC - -/* Define to 1 if you have the declaration of xfsctl, and to 0 if you don't. */ -#undef HAVE_DECL_XFSCTL - -/* Define if you have the MPI library. */ -#undef HAVE_MPI - -/* Define if you have the pthread. */ -#undef HAVE_PTHREAD_BARRIER_T - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Define to `unsigned int' if does not define. */ -#undef size_t - -/* Define to 1 if you wish to enable XFS support */ -#undef HAVE_ENABLE_XFS - -/* Define to 1 if you wish to enable Infiniband support */ -#undef HAVE_ENABLE_IB - -/* Define to 1 if the TCP_CONGESTION setsockopt is available */ -#undef HAVE_DECL_TCP_CONGESTION - - -#endif - -/* - * Local variables: - * indent-tabs-mode: t - * c-indent-level: 4 - * c-basic-offset: 4 - * End: - * - * vim: ts=4 sts=4 sw=4 noexpandtab - */ diff --git a/src/compat/xint_barrier.h b/src/compat/xint_barrier.h deleted file mode 100644 index d5c4a20f..00000000 --- a/src/compat/xint_barrier.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * XDD - a data movement and benchmarking toolkit - * - * Copyright (C) 1992-2013 I/O Performance, Inc. - * Copyright (C) 1999-2006 Brian Paul - * Copyright (C) 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright (C) 2009-2013 UT-Battelle, LLC - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2, as published by the Free Software - * Foundation. See file COPYING. - * - */ -/* Pthread-based barrier adapted from Gallium */ -#ifndef XINT_BARRIER_H -#define XINT_BARRIER_H - -#include -#include -#include - -#define HAVE_XINT_BARRIER - -typedef struct xint_barrier { - size_t count; - size_t waiters; - pthread_mutex_t mutex; - pthread_cond_t cond; -} xint_barrier_t; - -inline static int xint_barrier_init(xint_barrier_t *barrier, size_t count) -{ - int rc = 0; - barrier->count = count; - barrier->waiters = 0; - rc = pthread_mutex_init(&barrier->mutex, NULL); - assert(0 == rc); - rc = pthread_cond_init(&barrier->cond, NULL); - assert(0 == rc); - return rc; -} - -inline static int xint_barrier_destroy(xint_barrier_t *barrier) -{ - //int rc = 0; - - // Have to remove the checking because the qthreads are sitting in a mutex - // even during a successful completion - //assert(barrier->waiters == 0); - pthread_cond_destroy(&barrier->cond); - //if (0 != rc) -// fprintf(stderr, -// "Error: Destroying barrier condvar count: %zd reason: %s\n", -// barrier->count, strerror(rc)); - //assert(0 == rc); - pthread_mutex_destroy(&barrier->mutex); -// if (0 != rc) -// fprintf(stderr, -// "Error: Destroying barrier mutex count: %zd reason: %s\n", -// barrier->count, strerror(rc)); - //assert(0 == rc); - return 0; -} - -inline static int xint_barrier_wait(xint_barrier_t *barrier) -{ - assert(barrier->waiters < barrier->count); - pthread_mutex_lock(&barrier->mutex); - barrier->waiters++; - - if (barrier->waiters == barrier->count) { - barrier->waiters = 0; - pthread_cond_broadcast(&barrier->cond); - } else { - pthread_cond_wait(&barrier->cond, &barrier->mutex); - } - - pthread_mutex_unlock(&barrier->mutex); - return 0; -} - -#endif -/* - * Local variables: - * indent-tabs-mode: t - * default-tab-width: 4 - * c-indent-level: 4 - * c-basic-offset: 4 - * End: - * - * vim: ts=4 sts=4 sw=4 noexpandtab - */ diff --git a/src/compat/xint_darwin.h b/src/compat/xint_darwin.h index 1a883790..fffaaa50 100644 --- a/src/compat/xint_darwin.h +++ b/src/compat/xint_darwin.h @@ -53,7 +53,6 @@ #endif /* XDD internal compatibility headers for this platform */ -#include "xint_barrier.h" #include "xint_nclk.h" /* nclk_t, prototype compatibility */ #include "xint_misc.h" diff --git a/src/net/end_to_end.c b/src/net/end_to_end.c deleted file mode 100644 index b087019d..00000000 --- a/src/net/end_to_end.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * XDD - a data movement and benchmarking toolkit - * - * Copyright (C) 1992-2013 I/O Performance, Inc. - * Copyright (C) 2009-2013 UT-Battelle, LLC - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2, as published by the Free Software - * Foundation. See file COPYING. - * - */ -/* - * This file contains the subroutines necessary the end-to-end - * Send and Receive functions. - */ -#include "xint.h" - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_src_send() - send the data from source to destination - * This subroutine will take the message header from the Worker Data Struct - * and update it with: - * - senders Worker Thread number - * - time stamp when this packet is being sent normalized to global time - * The message header is placed after the end of the user data in the - * message buffer (thus making it a trailer rather than a header). - * - * Return values: 0 is good, -1 is bad - * - * The size of the buffer depends on whether it is being used for network - * I/O as in an End-to-end operation. For End-to-End operations, the size - * of the buffer is 1 page larger than for non-End-to-End operations. - * - * For normal (non-E2E operations) the buffer pointers are as follows: - * |<----------- wd_buf_size = N Pages ----------------->| - * +-----------------------------------------------------+ - * | data buffer | - * | transfer size (td_xfer_size) rounded up to N pages | - * |<-wd_bufp | - * |<-task_datap | - * +-----------------------------------------------------+ - * - * For End-to-End operations, the buffer pointers are as follows: - * |<------------------- wd_buf_size = N+1 Pages ------------------------>| - * +----------------+-----------------------------------------------------+ - * |<----1 page---->| transfer size (td_xfer_size) rounded up to N pages | - * |<-wd_bufp |<-task_datap | - * | | E2E | E2E | - * | |<-Header->| data buffer | - * +-----*----------*-----------------------------------------------------+ - * ^ ^ - * ^ +-e2e_datap - * +-e2e_hdrp - */ -int32_t -xdd_e2e_src_send(worker_data_t *wdp) { - target_data_t *tdp; - xint_e2e_t *e2ep; // Pointer to the E2E data struct - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header - int bytes_sent; // Cumulative number of bytes sent - int send_size; // Number of bytes to send for each call to sendto() - int sento_calls; // Number of times sendto() has been called - int max_xfer; - unsigned char *bufp; - xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: ENTER: e2ep=%p: e2ehp=%p: e2e_datap=%p\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep, e2ehp, e2ep->e2e_datap); - - memcpy(e2ehp->e2eh_cookie, tdp->td_magic_cookie, sizeof(e2ehp->e2eh_cookie)); - - // The "task" data structure contains variables relevant to the file-read operation - e2ehp->e2eh_worker_thread_number = wdp->wd_worker_number; - e2ehp->e2eh_sequence_number = wdp->wd_task.task_op_number; - e2ehp->e2eh_byte_offset = wdp->wd_task.task_byte_offset; - e2ehp->e2eh_data_length = wdp->wd_task.task_xfer_size; - - // The message header for this data packet precedes the data portion - if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_processor_start = xdd_get_processor(); - } - - // Note: the e2ep->e2e_xfer_size is the size of the data field plus the size of the header - max_xfer = MAXMIT_TCP; - bytes_sent = 0; - bufp = (unsigned char *)e2ehp; - sento_calls = 0; - // The transfer size is the size of the header buffer (not the header struct) - // plus the amount of data in the data portion of the IO buffer. - // For EOF operations the amount of data in the data portion should be zero. - e2ep->e2e_xfer_size = sizeof(xdd_e2e_header_t) + e2ehp->e2eh_data_length; - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: Preparing to send %d bytes: e2ep=%p: e2ehp=%p: e2e_datap=%p: e2e_xfer_size=%d: e2eh_data_length=%lld\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_xfer_size,e2ep,e2ehp,e2ep->e2e_datap,e2ep->e2e_xfer_size,(long long int)e2ehp->e2eh_data_length); -if (xgp->global_options & GO_DEBUG_E2E) xdd_show_e2e_header((xdd_e2e_header_t *)bufp); - - nclk_now(&wdp->wd_counters.tc_current_net_start_time); - while (bytes_sent < e2ep->e2e_xfer_size) { - send_size = e2ep->e2e_xfer_size - bytes_sent; - if (send_size > max_xfer) - send_size = max_xfer; -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: Actually sending %d bytes: e2ep=%p: e2ehp=%p: e2e_datap=%p: bytes_sent=%d: e2ehp+bytes_sent=%p: first 8 bytes=0x%016llx\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, send_size,e2ep,e2ehp,e2ep->e2e_datap,(int)bytes_sent,bufp, *((unsigned long long int *)bufp)); - e2ep->e2e_send_status = sendto(e2ep->e2e_sd, - bufp, - send_size, - 0, - (struct sockaddr *)&e2ep->e2e_sname, - sizeof(struct sockaddr_in)); - if (e2ep->e2e_send_status <= 0) { - xdd_e2e_err(wdp,"xdd_e2e_src_send","ERROR: error sending HEADER+DATA to destination\n"); - return(-1); - } - bytes_sent += e2ep->e2e_send_status; - bufp += e2ep->e2e_send_status; - sento_calls++; -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: Sent %d of %d bytes - %d bytes sent so far: bufp=%p\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_send_status,e2ep->e2e_xfer_size,bytes_sent,bufp); - } - nclk_now(&wdp->wd_counters.tc_current_net_end_time); - // Time stamp if requested - if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_xfer_size = e2ep->e2e_xfer_size; - ttep->tte_net_start = wdp->wd_counters.tc_current_net_start_time; - ttep->tte_net_end = wdp->wd_counters.tc_current_net_end_time; - ttep->tte_net_processor_end = xdd_get_processor(); - ttep->tte_net_xfer_calls = sento_calls; - } - - // Calculate the Send/Receive time by the time it took the last sendto() to run - e2ep->e2e_sr_time = (wdp->wd_counters.tc_current_net_end_time - wdp->wd_counters.tc_current_net_start_time); - - if (bytes_sent != e2ep->e2e_xfer_size) { - xdd_e2e_err(wdp,"xdd_e2e_src_send","ERROR: could not send header+data from e2e source\n"); - return(-1); - } - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: EXIT...\n",(long long int)pclk_now(),tdp->td_target_number, wdp->wd_worker_number); - return(0); - -} /* end of xdd_e2e_src_send() */ - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_connection() - Wait for an incoming connection and - * return when it arrives. - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_dest_connection(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to the Target Data - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_connection: Target %d Worker: %d: ENTER: e2e_nd=%d: e2e_sd=%d: FD_SETSIZE=%d\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number,e2ep->e2e_nd,e2ep->e2e_sd,FD_SETSIZE); - - int rc = select(e2ep->e2e_nd, &e2ep->e2e_readset, NULL, NULL, NULL); - assert(rc != -1); - /* Handle all the descriptors that are ready */ - /* There are two type of sockets: the one sd socket and multiple - * client sockets. We first check to see if the sd is in the readset. - * If so, this means that a client is trying to make a new connection - * in which case we need to issue an accept to establish the connection - * and obtain a new Client Socket Descriptor (csd). - */ -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_connection: Target %d Worker: %d: Inside SELECT \n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number); - if (FD_ISSET(e2ep->e2e_sd, &e2ep->e2e_readset)) { /* Process an incoming connection */ - e2ep->e2e_current_csd = e2ep->e2e_next_csd; - - e2ep->e2e_csd[e2ep->e2e_current_csd] = accept(e2ep->e2e_sd, (struct sockaddr *)&e2ep->e2e_rname,&e2ep->e2e_rnamelen); -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_connection: Target %d Worker: %d: connection accepted: sd=%d\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_sd); - - FD_SET(e2ep->e2e_csd[e2ep->e2e_current_csd], &e2ep->e2e_active); /* Mark this fd as active */ - FD_SET(e2ep->e2e_csd[e2ep->e2e_current_csd], &e2ep->e2e_readset); /* Put in readset so that it gets processed */ - - /* Find the next available csd close to the beginning of the CSD array */ - e2ep->e2e_next_csd = 0; - while (e2ep->e2e_csd[e2ep->e2e_next_csd] != 0) { - e2ep->e2e_next_csd++; - if (e2ep->e2e_next_csd == FD_SETSIZE) { - e2ep->e2e_next_csd = 0; - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_connection: Target %d Worker: %d: ERROR: no csd entries left\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number); - return(-1); - } - } /* end of WHILE loop that finds the next csd entry */ - } /* End of processing an incoming connection */ - return(0); -} // End of xdd_e2e_dest_connection() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_receive_header() - Receive E2E Header from the source side - * This subroutine will block until the entire header is received or until - * the connection is broken in which case an error is returned. - * - * Return values: Upon successfully reading the header and validating - * it, the size of the header in bytes is returned to the caller. - * Otherwise, in the event of an error, the status of the recvfrom() - * call is returned to the caller. - * - */ -int32_t -xdd_e2e_dest_receive_header(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to the Target Data - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header - int status; // function call status - int bytes_received; // Cumulative number of bytes received if multiple recvfrom() invocations are necessary - int receive_size; // The number of bytes to receive for this invocation of recvfrom() - int max_xfer; // Maximum TCP transmission size - unsigned char *bufp; - xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; - - e2ep->e2e_header_size = sizeof(xdd_e2e_header_t); - e2ep->e2e_xfer_size = e2ep->e2e_header_size; - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_header: Target %d Worker: %d: ENTER: Waiting to receive %d bytes of header: op# %lld: e2ep=%p: e2ehp=%p: e2e_datap=%p\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_header_size, (long long int)wdp->wd_task.task_op_number, e2ep, e2ehp, e2ep->e2e_datap ); - - max_xfer = MAXMIT_TCP; - - /* This section will check to see which of the Client Socket Descriptors - * are in the readset. For those csd's that are ready, a recv is issued to - * receive the incoming data. - */ - status = -1; // Default status - bytes_received = 0; - for (e2ep->e2e_current_csd = 0; e2ep->e2e_current_csd < FD_SETSIZE; e2ep->e2e_current_csd++) { // Process all CSDs that are ready - if (FD_ISSET(e2ep->e2e_csd[e2ep->e2e_current_csd], &e2ep->e2e_readset)) { /* Process this csd */ - if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_processor_start = xdd_get_processor(); - } - - nclk_now(&wdp->wd_counters.tc_current_net_start_time); - - // Read in the E2E Header - bytes_received = 0; - bufp = (unsigned char *)e2ehp; - while (bytes_received < e2ep->e2e_header_size) { - // Calculate the max number of bytes we can receive per call to recvfrom() - receive_size = e2ep->e2e_header_size - bytes_received; - if (receive_size > max_xfer) - receive_size = max_xfer; - - // Issue recvfrom() -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_header: Target: %d: Worker: %d: HEADER: Calling recvfrom bytes_received=%d: receive_size=%d: e2ehp=%p: new_bp=%p\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, bytes_received, receive_size, e2ehp, bufp); - status = recvfrom(e2ep->e2e_csd[e2ep->e2e_current_csd], - bufp, - receive_size, - 0, - NULL, - NULL); -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_header: Target: %d: Worker: %d: HEADER: Received %d of %d bytes\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, status, e2ep->e2e_header_size); -if (xgp->global_options & GO_DEBUG_E2E) xdd_show_e2e_header(e2ehp); - - // Check for errors - if (status <= 0) { - e2ep->e2e_recv_status = status; - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_header: Target %d Worker: %d: ERROR RECEIVING HEADER: recv_status=%d, errno=%d\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - e2ep->e2e_recv_status, - errno); - return(status); - } - // Otherwise, figure out how much of the header we read - bytes_received += status; - bufp += status; - } // End of WHILE loop that received incoming data from the source machine -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_header: Target: %d: Worker: %d: HEADER: Got the header... now check to see if the status <%d> is > 0 \n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, status); - } // End of IF stmnt that processes a CSD - } // End of FOR loop that processes all CSDs that were ready - - if (bytes_received != e2ep->e2e_header_size) { - // This is an internal error that should not occur... - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_header: Target %d Worker: %d: INTERNAL ERROR: The number of bytes received <%d> is not equal to the size of the E2E Header <%d>\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - bytes_received, - e2ep->e2e_header_size); - return(-1); - } - - // ensure the cookies are equal - char expected_cookie[sizeof(tdp->td_magic_cookie)]; - memcpy(expected_cookie, tdp->td_magic_cookie, sizeof(expected_cookie)); - if (memcmp(e2ehp->e2eh_cookie, expected_cookie, sizeof(e2ehp->e2eh_cookie))) { - // Invalid E2E Header - bad magic cookie - //TODO: it would be helpful to print out the magic cookie - // received vs. what was expected, but this will require - // converting the binary cookie to a textual representation - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_header: Target %d Worker: %d: ERROR: Bad magic cookie on recv %d\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - e2ep->e2e_msg_recv); - return(-1); - } - - if ((e2ehp->e2eh_magic != XDD_E2E_DATA_READY) && (e2ehp->e2eh_magic != XDD_E2E_EOF)) { - // Invalid E2E Header - bad magic number - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_header: Target %d Worker: %d: ERROR: Bad magic number 0x%08x on recv %d - should be either 0x%08x or 0x%08x\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - e2ehp->e2eh_magic, - e2ep->e2e_msg_recv, - XDD_E2E_DATA_READY, - XDD_E2E_EOF); - return(-1); - } - - return(bytes_received); - -} // End of xdd_e2e_dest_receive_header() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_receive_data() - Receive E2E Data from the source side - * This subroutine will block until the entire data portion is received or - * until the connection is broken in which case an error is returned. - * - * Return values: Upon successfully reading the data and validating - * it, the number of data bytes is returned to the caller. - * Otherwise, in the event of an error, the status of the recvfrom() - * call is returned to the caller. - * - */ -int32_t -xdd_e2e_dest_receive_data(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to the Target Data - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header - int status; // function call status - int bytes_received; // Cumulative number of bytes received if multiple recvfrom() invocations are necessary - int receive_size; // The number of bytes to receive for this invocation of recvfrom() - int max_xfer; // Maximum TCP transmission size - unsigned char *bufp; - xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; - - e2ep->e2e_data_size = e2ehp->e2eh_data_length; - e2ep->e2e_xfer_size = e2ep->e2e_data_size; - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_data: Target %d Worker: %d: ENTER: Waiting to receive %d bytes of DATA: op# %lld: e2ep=%p: e2ehp=%p: e2e_datap=%p\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_data_size, (long long int)wdp->wd_task.task_op_number, e2ep, e2ehp, e2ep->e2e_datap ); - // The following uses strictly TCP - - max_xfer = MAXMIT_TCP; - - /* This section will check to see which of the Client Socket Descriptors - * are in the readset. For those csd's that are ready, a recv is issued to - * receive the incoming data. - */ - status = -1; // Default status - bytes_received = 0; - for (e2ep->e2e_current_csd = 0; e2ep->e2e_current_csd < FD_SETSIZE; e2ep->e2e_current_csd++) { // Process all CSDs that are ready - if (FD_ISSET(e2ep->e2e_csd[e2ep->e2e_current_csd], &e2ep->e2e_readset)) { /* Process this csd */ - if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_processor_start = xdd_get_processor(); - } - - // Receive DATA if this was a "DATA" message - e2ep->e2e_data_size = e2ehp->e2eh_data_length; - bytes_received = 0; - bufp = (unsigned char *)e2ep->e2e_datap; -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_data: Target: %d: Worker: %d: OK - IT IS A HEADER SO LETS READ DATA: bytes_received=%d:e2e_data_size=%d \n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, bytes_received,e2ep->e2e_data_size); - while (bytes_received < e2ep->e2e_data_size) { - receive_size = e2ep->e2e_data_size - bytes_received; - if (receive_size > max_xfer) - receive_size = max_xfer; - // Issue recvfrom() -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_data: Target %d Worker: %d: BEFORE RECVFROM DATA: Preparing to receive %d bytes of data, op# %lld, buffer address %p, wd_bufp=%p, wd_task.task_datap=%p, e2e_datap=%p\n", (unsigned long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, receive_size, e2ep->e2e_data_size, (long long int)wdp->wd_task.task_op_number, ((unsigned char *)e2ehp) + bytes_received, wdp->wd_bufp, wdp->wd_task.task_datap, e2ep->e2e_datap); - status = recvfrom(e2ep->e2e_csd[e2ep->e2e_current_csd], - bufp, - receive_size, - 0, - NULL, - NULL); -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive_data: Target: %d: Worker: %d: AFTER RECVFROM DATA: Received %d of %d bytes\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, status, e2ep->e2e_data_size); - - // Check for errors - if (status <= 0) { - e2ep->e2e_recv_status = status; - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_data: Target %d Worker: %d: ERROR RECEIVING HEADER: recv_status=%d, errno=%d\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - e2ep->e2e_recv_status, - errno); - return(status); - } - - // Otherwise, figure out how much data we got and go back for more if necessary - bytes_received += status; - bufp += status; - } // End of WHILE loop that reads the E2E Header - } - - } - - return(bytes_received); - -} // End of xdd_e2e_dest_receive_data() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_receive_error() - Process an error event during recvfrom() - * Return values: None. - */ -int -xdd_e2e_dest_receive_error(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to the Target Data - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - int errno_save; // A copy of the errno - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - - if (e2ep->e2e_recv_status == 0) { // A status of 0 means that the source side shut down unexpectedly - essentially and Enf-Of-File - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_error: Target %d Worker: %d: ERROR: Connection closed prematurely by Source, op number %lld, location %lld\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - (long long int)wdp->wd_task.task_op_number, - (long long int)wdp->wd_task.task_byte_offset); - } else if (e2ep->e2e_recv_status < 0) { // A status less than 0 indicates some kind of error. - errno_save = errno; - fprintf(xgp->errout,"\n%s: xdd_e2e_dest_receive_error: Target %d Worker: %d: ERROR: recvfrom returned -1, errno %d, op number %lld, location %lld\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - errno, - (long long int)wdp->wd_task.task_op_number, - (long long int)wdp->wd_task.task_byte_offset); - - // Restore the errno and display the reason for the error - errno = errno_save; - perror("Reason"); - } - // At this point we need to clear out this csd and "Deactivate" the socket. - FD_CLR(e2ep->e2e_csd[e2ep->e2e_current_csd], &e2ep->e2e_active); - (void) closesocket(e2ep->e2e_csd[e2ep->e2e_current_csd]); - e2ep->e2e_csd[e2ep->e2e_current_csd] = 0; - return(errno); - -} // End of xdd_e2e_dest_receive_error() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_receive() - Receive data from the source side - * This subroutine will block until data is received or until the - * connection is broken in which case an error is returned. - * - * Return values: 0 is good, -1 is bad - * - */ -// The receive is done in two parts. -// 1) The first part of the receive reads in the header portion of the packet -// which is normally 1 page. The header contains the number of bytes to -// read in for the data portion of the packet. -// 2) The second part of the receive reads in the data portion of the packet. -// If this is an EOF operation then there is no further data to read. -int32_t -xdd_e2e_dest_receive(worker_data_t *wdp) { - target_data_t *tdp; // Pointer to the Target Data - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header - int32_t status; // Status of the call to xdd_e2e_dst_connection() - int header_bytes_received; // Number of header bytes received - int data_bytes_received; // Number of data bytes received - nclk_t e2e_wait_1st_msg_start_time; // This is the time stamp of when the first message arrived - xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; - - nclk_now(&e2e_wait_1st_msg_start_time); - wdp->wd_counters.tc_current_net_start_time = e2e_wait_1st_msg_start_time; - - // Make the connection to the source - status = xdd_e2e_dest_connection(wdp); - if (status) - return(-1); - - // Read in the E2E Header, this will tell us the length of the data portion of this E2E Message - header_bytes_received = xdd_e2e_dest_receive_header(wdp); - if (header_bytes_received <= 0) { - xdd_e2e_dest_receive_error(wdp); - return(-1); - } - - data_bytes_received = 0; - if (e2ehp->e2eh_magic == XDD_E2E_DATA_READY) { - // Read in the data portion of this E2E Message - data_bytes_received = xdd_e2e_dest_receive_data(wdp); - if (data_bytes_received <= 0) { - xdd_e2e_dest_receive_error(wdp); - return(-1); - } - } else { -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_dest_receive: Target %d Worker: %d: Received and EOF\n", (unsigned long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number); - } - - nclk_now(&wdp->wd_counters.tc_current_net_end_time); - - // Keep track of the total number of bytes received so far... - e2ep->e2e_recv_status = header_bytes_received + data_bytes_received; - - // This will record the amount of time that we waited from the time we started until we got the first packet - if (!e2ep->e2e_wait_1st_msg) - e2ep->e2e_wait_1st_msg = wdp->wd_counters.tc_current_net_end_time - e2e_wait_1st_msg_start_time; - - // If this is the first packet received by this Worker Thread then record the *end* of this operation as the - // *start* of this pass. The reason is that the initial recvfrom() may have been issued long before the - // Source side started sending data and we need to ignore that startup delay. - if (tdp->td_counters.tc_pass_start_time == NCLK_MAX) { // This is an indication that this is the fist recvfrom() that has completed - tdp->td_counters.tc_pass_start_time = wdp->wd_counters.tc_current_net_end_time; - e2ep->e2e_sr_time = 0; // The first Send/Receive time is zero. - } else { // Calculate the Send/Receive time by the time it took the last recvfrom() to run - e2ep->e2e_sr_time = (wdp->wd_counters.tc_current_net_end_time - wdp->wd_counters.tc_current_net_start_time); - } - - e2ehp->e2eh_recv_time = wdp->wd_counters.tc_current_net_end_time; // This needs to be the net_end_time from this side of the operation - - // If time stamping is on then we need to reset these values - if ((tdp->td_ts_table.ts_options & (TS_ON|TS_TRIGGERED))) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_start = wdp->wd_counters.tc_current_net_start_time; - ttep->tte_net_end = wdp->wd_counters.tc_current_net_end_time; - ttep->tte_net_processor_end = xdd_get_processor(); - ttep->tte_net_xfer_size = e2ep->e2e_recv_status; - ttep->tte_byte_offset = e2ehp->e2eh_byte_offset; - ttep->tte_disk_xfer_size = e2ehp->e2eh_data_length; - ttep->tte_op_number = e2ehp->e2eh_sequence_number; - if (e2ehp->e2eh_magic == XDD_E2E_EOF) - ttep->tte_op_type = SO_OP_EOF; - else ttep->tte_op_type = SO_OP_WRITE; - } - - e2ep->e2e_readset = e2ep->e2e_active; /* Prepare for the next select */ - - return(0); - -} /* end of xdd_e2e_dest_receive() */ -/*----------------------------------------------------------------------*/ -/* xdd_e2e_eof_source_side() - End-Of-File processing for Source - * Return values: 0 is good, -1 is bad - */ -int32_t -xdd_e2e_eof_source_side(worker_data_t *wdp) { - target_data_t *tdp; - xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header - int bytes_sent; // Cumulative number of bytes sent if multiple sendto() invocations are necessary - int sendto_calls; // Cumulative number of calls if multiple sendto() invocations are necessary - int send_size; // The number of bytes to send for this invocation of sendto() - int max_xfer; // Maximum TCP transmission size - unsigned char *bufp; - xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; - - e2ep->e2e_header_size = sizeof(xdd_e2e_header_t); - e2ep->e2e_xfer_size = e2ep->e2e_header_size; - -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_eof_source_side: Target %d Worker: %d: ENTER: \n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number); - - /* If this is XNI, just short circuit */ - if (PLAN_ENABLE_XNI & tdp->td_planp->plan_options) { - e2ep->e2e_send_status = 0; - e2ep->e2e_sr_time = 0; - return 0; - } - - // The following uses strictly TCP - max_xfer = MAXMIT_TCP; - - nclk_now(&wdp->wd_counters.tc_current_net_start_time); - e2ehp->e2eh_worker_thread_number = wdp->wd_worker_number; - e2ehp->e2eh_sequence_number = (wdp->wd_task.task_op_number + wdp->wd_worker_number); // This is an EOF packet header - e2ehp->e2eh_byte_offset = -1; // NA - e2ehp->e2eh_data_length = 0; // NA - no data being sent other than the header - e2ehp->e2eh_magic = XDD_E2E_EOF; - memcpy(e2ehp->e2eh_cookie, tdp->td_magic_cookie, sizeof(e2ehp->e2eh_cookie)); - - if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_processor_start = xdd_get_processor(); - } - // This will send the E2E Header to the Destination - bytes_sent = 0; - sendto_calls = 0; - bufp = (unsigned char *)e2ehp; - while (bytes_sent < e2ep->e2e_header_size) { - send_size = e2ep->e2e_header_size - bytes_sent; - if (send_size > max_xfer) - send_size = max_xfer; - - bufp += bytes_sent; - e2ep->e2e_send_status = sendto(e2ep->e2e_sd, - bufp, - send_size, - 0, - (struct sockaddr *)&e2ep->e2e_sname, - sizeof(struct sockaddr_in)); - - if (e2ep->e2e_send_status <= 0) { - xdd_e2e_err(wdp,"xdd_e2e_eof_source_side","ERROR: error sending EOF to destination\n"); - return(-1); - } - bytes_sent += e2ep->e2e_send_status; - sendto_calls++; -if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_eof_source_side: Target: %d: Worker: %d: Sent %d of %d bytes of the HEADER - %d bytes sent so far\n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, send_size, e2ep->e2e_header_size,bytes_sent); - } - nclk_now(&wdp->wd_counters.tc_current_net_end_time); - - // Calculate the Send/Receive time by the time it took the last sendto() to run - e2ep->e2e_sr_time = (wdp->wd_counters.tc_current_net_end_time - wdp->wd_counters.tc_current_net_start_time); - // If time stamping is on then we need to reset these values - if ((tdp->td_ts_table.ts_options & (TS_ON|TS_TRIGGERED))) { - ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_start = wdp->wd_counters.tc_current_net_start_time; - ttep->tte_net_end = wdp->wd_counters.tc_current_net_end_time; - ttep->tte_net_processor_end = xdd_get_processor(); - ttep->tte_net_xfer_size = bytes_sent; - ttep->tte_net_xfer_calls = sendto_calls; - ttep->tte_byte_offset = -1; - ttep->tte_disk_xfer_size = 0; - ttep->tte_op_number =e2ehp->e2eh_sequence_number; - ttep->tte_op_type = TASK_OP_TYPE_EOF; - } - - - if (bytes_sent != e2ep->e2e_header_size) { - xdd_e2e_err(wdp,"xdd_e2e_eof_source_side","ERROR: could not send EOF to destination\n"); - return(-1); - } - - return(0); -} /* end of xdd_e2e_eof_source_side() */ - -/* - * Local variables: - * indent-tabs-mode: t - * default-tab-width: 4 - * c-indent-level: 4 - * c-basic-offset: 4 - * End: - * - * vim: ts=4 sts=4 sw=4 noexpandtab - */ diff --git a/src/net/end_to_end_init.c b/src/net/end_to_end_init.c deleted file mode 100644 index e0babe0d..00000000 --- a/src/net/end_to_end_init.c +++ /dev/null @@ -1,553 +0,0 @@ -/* - * XDD - a data movement and benchmarking toolkit - * - * Copyright (C) 1992-2013 I/O Performance, Inc. - * Copyright (C) 2009-2013 UT-Battelle, LLC - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2, as published by the Free Software - * Foundation. See file COPYING. - * - */ -/* - * This file contains the subroutines necessary to perform initialization - * for the end-to-end option. - */ -#include "xint.h" - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_target_init() - init socket library - * This routine is called during target initialization to initialize the - * socket library. - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_target_init(target_data_t *tdp) { - xint_restart_t *rp; // pointer to a restart structure - int status; - - // Perform XNI initialization if required - xdd_plan_t *planp = tdp->td_planp; - if (PLAN_ENABLE_XNI & planp->plan_options) { - xint_e2e_xni_init(tdp); - } - else { - - // Init the sockets - This is actually just for Windows that requires some additional initting - status = xdd_sockets_init(); - if (status == -1) { - fprintf(xgp->errout,"%s: xdd_e2e_target_init: could not initialize sockets for e2e target\n",xgp->progname); - return(-1); - } - } - - // Restart processing if necessary - if ((tdp->td_target_options & TO_RESTART_ENABLE) && (tdp->td_restartp)) { // Check to see if restart was requested - // Set the last_committed_byte_offset to 0 - rp = tdp->td_restartp; - rp->last_committed_byte_offset = rp->byte_offset; - rp->last_committed_length = 0; - } - - return(0); -} - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_worker_init() - init source and destination sides - * This routine is called during Worker Thread initialization to initialize - * a source or destination Worker Thread. - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_worker_init(worker_data_t *wdp) { - target_data_t *tdp; - int status; - in_addr_t addr; - - tdp = wdp->wd_tdp; - wdp->wd_e2ep->e2e_sr_time = 0; - - if(wdp->wd_e2ep->e2e_dest_hostname == NULL) { - fprintf(xgp->errout,"%s: xdd_e2e_worker_init: Target %d Worker Thread %d: No DESTINATION host name or IP address specified for this end-to-end operation.\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number); - fprintf(xgp->errout,"%s: xdd_e2e_worker_init: Target %d Worker Thread %d: Use the '-e2e destination' option to specify the DESTINATION host name or IP address.\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number); - return(-1); - } - - // Get the IP address of the destination host - status = xint_lookup_addr(wdp->wd_e2ep->e2e_dest_hostname, 0, &addr); - if (status) { - fprintf(xgp->errout, "%s: xdd_e2e_worker_init: unable to identify host '%s'\n", - xgp->progname, wdp->wd_e2ep->e2e_dest_hostname); - return(-1); - } - - // Convert to host byte order - wdp->wd_e2ep->e2e_dest_addr = ntohl(addr); - - if (tdp->td_target_options & TO_E2E_DESTINATION) { // This is the Destination side of an End-to-End - status = xdd_e2e_dest_init(wdp); - } else if (tdp->td_target_options & TO_E2E_SOURCE) { // This is the Source side of an End-to-End - status = xdd_e2e_src_init(wdp); - } else { // Should never reach this point - status = -1; - } - - return status; -} // xdd_e2e_worker_init() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_src_init() - init the source side - * This routine is called from the xdd io thread before all the action - * starts. When calling this routine, it is because this thread is on the - * "source" side of an End-to-End test. Hence, this routine needs - * to set up a socket on the appropriate port - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_src_init(worker_data_t *wdp) { - target_data_t *tdp; - xint_e2e_t *e2ep; // Pointer to the E2E data struct - int status; // status of various function calls - - - tdp = wdp->wd_tdp; - e2ep = wdp->wd_e2ep; - - // Check to make sure that the source target is actually *reading* the data from a file or device - if (tdp->td_rwratio < 1.0) { // Something is wrong - the source file/device is not 100% read - fprintf(xgp->errout,"%s: xdd_e2e_src_init: Target %d Worker Thread %d: Error - E2E source file '%s' is not being *read*: rwratio=%5.2f is not valid\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - tdp->td_target_full_pathname, - tdp->td_rwratio); - return(-1); - } - - /* Only setup sockets if not using XNI */ - xdd_plan_t *planp = tdp->td_planp; - if (!(PLAN_ENABLE_XNI & planp->plan_options)) { - status = xdd_e2e_setup_src_socket(wdp); - if (status == -1){ - xdd_e2e_err(wdp,"xdd_e2e_src_init","could not setup sockets for e2e source\n"); - return(-1); - } - } - - // Init the relevant variables - e2ep->e2e_msg_sent = 0; - e2ep->e2e_msg_sequence_number = 0; - e2ep->e2e_header_size = (int)(sizeof(xdd_e2e_header_t)); - - // Init the message header - // For End-to-End operations, the buffer pointers are as follows: - // +----------------+-----------------------------------------------------+ - // |<----1 page---->| transfer size (td_xfer_size) rounded up to N pages | - // |<-task_bufp |<-task_datap | - // | | | | - // | |<-Header->| E2E data buffer | - // +-----*----------*-----------------------------------------------------+ - // ^ ^ - // ^ +-e2e_datap - // +-e2e_hdrp - // - e2ep->e2e_hdrp->e2eh_worker_thread_number = 0; - e2ep->e2e_hdrp->e2eh_sequence_number = 0; - e2ep->e2e_hdrp->e2eh_send_time = 0; - e2ep->e2e_hdrp->e2eh_recv_time = 0; - e2ep->e2e_hdrp->e2eh_byte_offset = 0; - e2ep->e2e_hdrp->e2eh_data_length = 0; - - - return(0); - -} /* end of xdd_e2e_src_init() */ - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_setup_src_socket() - set up the source side - * This subroutine is called by xdd_e2e_src_init() and is passed a - * pointer to the Data Struct of the requesting Worker Thread. - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_setup_src_socket(worker_data_t *wdp) { - int status; /* status of send/recv function calls */ - int type; - static const int connect_try_limit = 4; - int i; - - // The socket type is SOCK_STREAM because this is a TCP connection - type = SOCK_STREAM; - - // Create the socket - wdp->wd_e2ep->e2e_sd = socket(AF_INET, type, IPPROTO_TCP); - if (wdp->wd_e2ep->e2e_sd < 0) { - xdd_e2e_err(wdp,"xdd_e2e_setup_src_socket","ERROR: error openning socket\n"); - return(-1); - } - (void) xdd_e2e_set_socket_opts (wdp,wdp->wd_e2ep->e2e_sd); - - /* Now build the "name" of the DESTINATION machine socket thingy and connect to it. */ - (void) memset(&wdp->wd_e2ep->e2e_sname, 0, sizeof(wdp->wd_e2ep->e2e_sname)); - wdp->wd_e2ep->e2e_sname.sin_family = AF_INET; - wdp->wd_e2ep->e2e_sname.sin_addr.s_addr = htonl(wdp->wd_e2ep->e2e_dest_addr); - wdp->wd_e2ep->e2e_sname.sin_port = htons(wdp->wd_e2ep->e2e_dest_port); - wdp->wd_e2ep->e2e_snamelen = sizeof(wdp->wd_e2ep->e2e_sname); - - // Attempt to connect to the server for roughly 10 seconds - i = 0; - status = 1; - while (i < connect_try_limit && 0 != status) { - - /* If this is a retry, sleep for 3 seconds before retrying */ - if (i > 0) { - struct timespec req; - memset(&req, 0, sizeof(req)); - req.tv_sec = 3; - fprintf(xgp->errout, - "Socket connection error, retrying in %d seconds: %d\n", - (int)req.tv_sec, status); - nanosleep(&req, (struct timespec *)NULL); - } - - status = connect(wdp->wd_e2ep->e2e_sd, - (struct sockaddr *) &wdp->wd_e2ep->e2e_sname, - sizeof(wdp->wd_e2ep->e2e_sname)); - i++; - } - - if (0 != status) { - xdd_e2e_err(wdp,"xdd_e2e_setup_src_socket","error connecting to socket for E2E destination\n"); - return(-1); - } - - return(0); - -} /* end of xdd_e2e_setup_src_socket() */ - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_dest_init() - init the destination side - * This routine is called by a Worker Thread on the "destination" side of an - * end_to_end operation and is passed a pointer to the Data Struct of the - * requesting Worker Thread. - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_dest_init(worker_data_t *wdp) { - target_data_t *tdp; - int status; // status of various function calls - - - tdp = wdp->wd_tdp; - // Check to make sure that the destination target is actually *writing* the data it receives to a file or device - if (tdp->td_rwratio > 0.0) { // Something is wrong - the destination file/device is not 100% write - fprintf(xgp->errout,"%s: xdd_e2e_dest_init: Target %d Worker Thread %d: Error - E2E destination file/device '%s' is not being *written*: rwratio=%5.2f is not valid\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - tdp->td_target_full_pathname, - tdp->td_rwratio); - return(-1); - } - - /* Only setup sockets if not using XNI */ - xdd_plan_t *planp = tdp->td_planp; - if (!(PLAN_ENABLE_XNI & planp->plan_options)) { - status = xdd_e2e_setup_dest_socket(wdp); - if (status == -1){ - xdd_e2e_err(wdp,"xdd_e2e_dest_init","could not setup sockets for e2e destination\n"); - return(-1); - } - - // Set up the descriptor table for the select() call - // This section is used when we are using TCP - /* clear out the csd table */ - for (wdp->wd_e2ep->e2e_current_csd = 0; wdp->wd_e2ep->e2e_current_csd < FD_SETSIZE; wdp->wd_e2ep->e2e_current_csd++) - wdp->wd_e2ep->e2e_csd[wdp->wd_e2ep->e2e_current_csd] = 0; - - // Set the current and next csd indices to 0 - wdp->wd_e2ep->e2e_current_csd = wdp->wd_e2ep->e2e_next_csd = 0; - - /* Initialize the socket sets for select() */ - FD_ZERO(&wdp->wd_e2ep->e2e_readset); - FD_SET(wdp->wd_e2ep->e2e_sd, &wdp->wd_e2ep->e2e_readset); - wdp->wd_e2ep->e2e_active = wdp->wd_e2ep->e2e_readset; - wdp->wd_e2ep->e2e_current_csd = wdp->wd_e2ep->e2e_next_csd = 0; - - /* Find out how many sockets are in each set */ - wdp->wd_e2ep->e2e_nd = FD_SETSIZE; - } - - // Initialize the message counter and sequencer to 0 - wdp->wd_e2ep->e2e_msg_recv = 0; - wdp->wd_e2ep->e2e_msg_sequence_number = 0; - - return(0); - -} /* end of xdd_e2e_dest_init() */ - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_setup_dest_socket() - Set up the socket on the Destination side - * This subroutine is called by xdd_e2e_dest_init() and is passed a - * pointer to the Data Struct of the requesting Worker Thread. - * - * Return values: 0 is good, -1 is bad - * - */ -int32_t -xdd_e2e_setup_dest_socket(worker_data_t *wdp) { - int status; - int type; - char msg[256]; - - - // Set the "type" of socket being requested: for TCP, type=SOCK_STREAM - type = SOCK_STREAM; - - // Create the socket - wdp->wd_e2ep->e2e_sd = socket(AF_INET, type, IPPROTO_TCP); - if (wdp->wd_e2ep->e2e_sd < 0) { - xdd_e2e_err(wdp,"xdd_e2e_setup_dest_socket","ERROR: error openning socket\n"); - return(-1); - } - - (void) xdd_e2e_set_socket_opts (wdp, wdp->wd_e2ep->e2e_sd); - - /* Bind the name to the socket */ - (void) memset(&wdp->wd_e2ep->e2e_sname, 0, sizeof(wdp->wd_e2ep->e2e_sname)); - wdp->wd_e2ep->e2e_sname.sin_family = AF_INET; - wdp->wd_e2ep->e2e_sname.sin_addr.s_addr = htonl(wdp->wd_e2ep->e2e_dest_addr); - wdp->wd_e2ep->e2e_sname.sin_port = htons(wdp->wd_e2ep->e2e_dest_port); - wdp->wd_e2ep->e2e_snamelen = sizeof(wdp->wd_e2ep->e2e_sname); - if (bind(wdp->wd_e2ep->e2e_sd, (struct sockaddr *) &wdp->wd_e2ep->e2e_sname, wdp->wd_e2ep->e2e_snamelen)) { - sprintf(msg,"Error binding name to socket - addr=0x%08x, port=0x%08x, specified as %d \n", - wdp->wd_e2ep->e2e_sname.sin_addr.s_addr, - wdp->wd_e2ep->e2e_sname.sin_port, - wdp->wd_e2ep->e2e_dest_port); - xdd_e2e_err(wdp,"xdd_e2e_setup_dest_socket",msg); - return(-1); - } - - /* All set; prepare to accept connection requests */ - if (type == SOCK_STREAM) { // If this is a stream socket then we need to listen for incoming data - status = listen(wdp->wd_e2ep->e2e_sd, SOMAXCONN); - if (status) { - xdd_e2e_err(wdp,"xdd_e2e_setup_dest_socket","ERROR: bad status starting LISTEN on socket\n"); - return(-1); - } - } - - return(0); - -} /* end of xdd_e2e_setup_dest_socket() */ - -/*----------------------------------------------------------------------*/ -/* - * xdd_e2e_set_socket_opts() - set the options for specified socket. - * - */ -void -xdd_e2e_set_socket_opts(worker_data_t *wdp, int skt) { - target_data_t *tdp; - int status; - int level = SOL_SOCKET; - xdd_plan_t* planp = wdp->wd_tdp->td_planp; - -#if WIN32 - char optionvalue; -#else - int optionvalue; -#endif - - tdp = wdp->wd_tdp; - /* Create the socket and set some options */ - optionvalue = 1; - status = setsockopt(skt, IPPROTO_TCP, TCP_NODELAY, &optionvalue, sizeof(optionvalue)); - if (status != 0) { - xdd_e2e_err(wdp,"xdd_e2e_set_socket_opts","Error setting TCP_NODELAY \n"); - } - status = setsockopt(skt,level,SO_SNDBUF,(char *)&planp->e2e_TCP_Win,sizeof(planp->e2e_TCP_Win)); - if (status < 0) { - fprintf(xgp->errout,"%s: xdd_e2e_set_socket_opts: Target %d Worker Thread %d: WARNING: on setsockopt SO_SNDBUF: status %d: %s\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - status, - strerror(errno)); - } - status = setsockopt(skt,level,SO_RCVBUF,(char *)&planp->e2e_TCP_Win,sizeof(planp->e2e_TCP_Win)); - if (status < 0) { - fprintf(xgp->errout,"%s: xdd_e2e_set_socket_opts: Target %d Worker Thread %d: WARNING: on setsockopt SO_RCVBUF: status %d: %s\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - status, - strerror(errno)); - } - status = setsockopt(skt,level,SO_REUSEADDR,(char *)&planp->e2e_TCP_Win,sizeof(planp->e2e_TCP_Win)); - if (status < 0) { - fprintf(xgp->errout,"%s: xdd_e2e_set_socket_opts: Target %d Worker Thread %d: WARNING: on setsockopt SO_REUSEPORT: status %d: %s\n", - xgp->progname, - tdp->td_target_number, - wdp->wd_worker_number, - status, - strerror(errno)); - } - -} // End of xdd_e2e_set_socket_opts() -/*----------------------------------------------------------------------*/ -/* - * xdd_e2e_prt_socket_opts() - print the currnet options for specified - * socket. - * - */ -void -xdd_e2e_prt_socket_opts(int skt) { - int level = SOL_SOCKET; - int sockbuf_sizs; - int sockbuf_sizr; - int reuse_addr; - socklen_t optlen; - - - optlen = sizeof(sockbuf_sizs); - getsockopt(skt,level,SO_SNDBUF,(char *)&sockbuf_sizs,&optlen); - optlen = sizeof(sockbuf_sizr); - getsockopt(skt,level,SO_RCVBUF,(char *)&sockbuf_sizr,&optlen); - optlen = sizeof(reuse_addr); - getsockopt(skt,level,SO_REUSEADDR,(char *)&reuse_addr,&optlen); -} // End of xdd_e2e_prt_socket_opts() - -/*----------------------------------------------------------------------*/ -/* xdd_e2e_err(fmt...) - * - * General-purpose error handling routine. Prints the short message - * provided to standard error, along with some boilerplate information - * such as the program name and errno value. Any remaining arguments - * are used in printed message (the usage here takes the same form as - * printf()). - */ -void -xdd_e2e_err(worker_data_t *wdp, char const *whence, char const *fmt, ...) { -#ifdef WIN32 - LPVOID lpMsgBuf; - fprintf(xgp->errout, "last error was %d\n", WSAGetLastError()); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - WSAGetLastError(), - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language - (LPTSTR) &lpMsgBuf, - 0, - NULL); - fprintf(xgp->errout,"Reason: %s",lpMsgBuf); -#endif /* WIN32 */ - fprintf(xgp->errout,"\n%s: %s: Target %d Worker Thread %d: ", - xgp->progname, - whence, - wdp->wd_tdp->td_target_number, - wdp->wd_worker_number); - fprintf(xgp->errout, "%s", fmt); - perror(" Reason"); - return; -} /* end of xdd_e2e_err() */ - -/*----------------------------------------------------------------------*/ -/* xdd_sockets_init() - Windows Only - * - * Windows requires the WinSock startup routine to be run - * before running a bunch of socket routines. We encapsulate - * that here in case some other environment needs something similar. - * - * Return values: 0 is good - init successful, -1 is bad - * - * The sample code I based this on happened to be requesting - * (and verifying) a WinSock DLL version 2.2 environment was - * present, and it worked, so I kept it that way. - */ -int32_t xdd_sockets_init(void) { -#ifdef WIN32 - WSADATA wsaData; /* Data structure used by WSAStartup */ - int wsastatus; /* status returned by WSAStartup */ - char *reason; - wsastatus = WSAStartup(MAKEWORD(2, 2), &wsaData); - if (wsastatus != 0) { /* Error in starting the network */ - switch (wsastatus) { - case WSASYSNOTREADY: - reason = "Network is down"; - break; - case WSAVERNOTSUPPORTED: - reason = "Request version of sockets <2.2> is not supported"; - break; - case WSAEINPROGRESS: - reason = "Another Windows Sockets operation is in progress"; - break; - case WSAEPROCLIM: - reason = "The limit of the number of sockets tasks has been exceeded"; - break; - case WSAEFAULT: - reason = "Program error: pointer to wsaData is not valid"; - break; - default: - reason = "Unknown error code"; - break; - }; - fprintf(xgp->errout,"%s: Error initializing network connection\nReason: %s\n", - xgp->progname, reason); - fflush(xgp->errout); - WSACleanup(); - return(-1); - } -#endif - - return(0); - -} /* end of xdd_sockets_init() */ - -/*----------------------------------------------------------------------------*/ -/* xdd_get_e2ep() - return a pointer to the xdd_e2e Data Structure - */ -xint_e2e_t * -xdd_get_e2ep(void) { - xint_e2e_t *e2ep; - - e2ep = malloc(sizeof(xint_e2e_t)); - if (e2ep == NULL) { - fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for E2E data structure \n", - xgp->progname, (int)sizeof(xint_e2e_t)); - return(NULL); - } - memset(e2ep, 0, sizeof(xint_e2e_t)); - - return(e2ep); -} /* End of xdd_get_e2ep() */ - -/* - * Local variables: - * indent-tabs-mode: t - * default-tab-width: 4 - * c-indent-level: 4 - * c-basic-offset: 4 - * End: - * - * vim: ts=4 sts=4 sw=4 noexpandtab - */ diff --git a/src/net/module.mk b/src/net/module.mk index 00e3d0cf..d2b685a5 100644 --- a/src/net/module.mk +++ b/src/net/module.mk @@ -3,7 +3,5 @@ # DIR := src/net -NET_SRC := $(DIR)/end_to_end.c \ - $(DIR)/end_to_end_init.c \ - $(DIR)/read_after_write.c \ +NET_SRC := $(DIR)/read_after_write.c \ $(DIR)/net_utils.c diff --git a/src/public/libxdd.c b/src/public/libxdd.c index 06ad1027..cb392b9c 100644 --- a/src/public/libxdd.c +++ b/src/public/libxdd.c @@ -240,7 +240,7 @@ int local_target_init(target_data_t *tdp, // Setup a data pattern and e2e buffer before initialization tdp->td_dpp = malloc(sizeof(*tdp->td_dpp)); xdd_data_pattern_init(tdp->td_dpp); - tdp->td_e2ep = xdd_get_e2ep(); + tdp->td_e2ep = xint_get_e2ep(); // Now initialize the target data xdd_init_new_target_data(tdp, target_idx); diff --git a/src/tools/utils/matchadd_kernel_events.c b/src/tools/utils/matchadd_kernel_events.c index 98d718fe..98256a11 100644 --- a/src/tools/utils/matchadd_kernel_events.c +++ b/src/tools/utils/matchadd_kernel_events.c @@ -281,7 +281,7 @@ matchadd_kernel_events(int issource, int nthreads, int thread_id[], char *filesp xdd_data->tsh_tte[i].tte_net_start_k = ts_beg_op; xdd_data->tsh_tte[i].tte_net_end_k = ts_end_op; /* xdd eof stuff */ - if ( xdd_data->tsh_tte[i].tte_net_xfer_size != size_op && xdd_data->tsh_tte[i].tte_net_xfer_size != sizeof(xdd_e2e_header_t)) + if ( xdd_data->tsh_tte[i].tte_net_xfer_size != size_op ) fprintf(stderr, "xddop# %zd pid %d op %d size %d != %"PRId64"\n",i,thread_id[k],xdd_data->tsh_tte[i].tte_op_type,xdd_data->tsh_tte[i].tte_net_xfer_size,size_op); if ( xdd_data->tsh_tte[i].tte_net_xfer_calls != nops_op && nops_op > 0) fprintf(stderr, "xddop# %zd pid %d nops %d != %"PRId64"\n",i,thread_id[k],xdd_data->tsh_tte[i].tte_net_xfer_calls,nops_op); diff --git a/src/xnet/xnet_end_to_end.c b/src/xnet/xnet_end_to_end.c index 2d63474a..76ba2183 100644 --- a/src/xnet/xnet_end_to_end.c +++ b/src/xnet/xnet_end_to_end.c @@ -44,63 +44,104 @@ #define de2eprintf(...) #endif -int32_t xint_e2e_src_connect(target_data_t *tdp) { +// forward declarations +static int32_t do_connect(worker_data_t*, int); +static xdd_e2e_ate_t *worker_address_table_entry(worker_data_t*); + +static int32_t +do_connect(worker_data_t *wdp, int isdest) +{ + pthread_mutex_t * const mutex = xint_e2e_worker_connection_mutex(wdp); + (void)pthread_mutex_lock(mutex); + + xni_connection_t * const conn = xint_e2e_worker_connection(wdp); + //TODO: find a safer test (maybe add something to XNI?) + if (*conn) { + // Bail; some other worker has already connected + (void)pthread_mutex_unlock(mutex); + return 0; + } int rc = 0; - int e2e_idx = 0; - - /* Loop through the available addresses, and connect */ - while (0 == tdp->td_e2ep->e2e_address_table[e2e_idx].port_count) - e2e_idx++; - - /* Resolve name to an IP */ - rc = xint_lookup_addr(tdp->td_e2ep->e2e_address_table[e2e_idx].hostname, 0, - &tdp->td_e2ep->e2e_dest_addr); - struct in_addr addr = { .s_addr = tdp->td_e2ep->e2e_dest_addr }; + target_data_t * const tdp = wdp->wd_tdp; + + // Get this worker's assigned address entry + xdd_e2e_ate_t *ate = worker_address_table_entry(wdp); + + // Resolve name to an IP address + rc = xint_lookup_addr(ate->hostname, 0, &wdp->wd_e2ep->e2e_dest_addr); + assert(0 == rc); + struct in_addr addr = { .s_addr = wdp->wd_e2ep->e2e_dest_addr }; char* ip_string = inet_ntoa(addr); - fprintf(xgp->errout, "Dest host: %s Connect IP: %s Port: %d\n", tdp->td_e2ep->e2e_address_table[e2e_idx].hostname, ip_string, tdp->td_e2ep->e2e_address_table[e2e_idx].base_port); + + if (!isdest) { + fprintf(xgp->errout, "Dest host: %s Connect IP: %s Port: %d\n", + ate->hostname, ip_string, ate->base_port); + } - /* Create an XNI endpoint from the e2e spec */ - xni_endpoint_t xep = {.host = ip_string, - .port = tdp->td_e2ep->e2e_address_table[e2e_idx].base_port}; - rc = xni_connect(tdp->xni_ctx, &xep, &tdp->td_e2ep->xni_td_conn); - return rc; -} + // Create an XNI endpoint from the e2e spec + xni_endpoint_t xep = {.host = ip_string, .port = ate->base_port}; + + // Initialize the set of I/O buffers + xni_bufset_t bufset; + memset(&bufset, 0, sizeof(bufset)); + bufset.bufs = tdp->io_buffers; + // Find the first buffer to be owned by this worker's connection + for (const xdd_e2e_ate_t *p = tdp->td_e2ep->e2e_address_table; + p != ate; + p++) { + + bufset.bufs += p->port_count; + } + bufset.bufcount = ate->port_count; // one buffer per port + bufset.bufsize = tdp->io_buffer_size; + bufset.reserved = getpagesize(); -int32_t xint_e2e_src_disconnect(target_data_t *tdp) { + // Check for overflow + assert(bufset.bufs+bufset.bufcount <= tdp->io_buffers+tdp->io_buffers_count); + + if (isdest) { + rc = xni_accept_connection(tdp->xni_ctx, &xep, &bufset, conn); + } else { + rc = xni_connect(tdp->xni_ctx, &xep, &bufset, conn); + } + // Translate the error code + rc = (XNI_OK == rc) ? 0 : -1; - /* Perform XNI disconnect */ - int rc = xni_close_connection(&tdp->td_e2ep->xni_td_conn); + (void)pthread_mutex_unlock(mutex); return rc; } -int32_t xint_e2e_dest_connect(target_data_t *tdp) { - - int rc = 0; - int e2e_idx = 0; +int32_t xint_e2e_src_connect(worker_data_t *wdp) +{ + return do_connect(wdp, FALSE); +} - /* Loop through the available addresses, and connect */ - while (0 == tdp->td_e2ep->e2e_address_table[e2e_idx].port_count) - e2e_idx++; - - /* Resolve name to an IP */ - rc = xint_lookup_addr(tdp->td_e2ep->e2e_address_table[e2e_idx].hostname, 0, - &tdp->td_e2ep->e2e_dest_addr); - struct in_addr addr = { .s_addr = tdp->td_e2ep->e2e_dest_addr }; - char* ip_string = inet_ntoa(addr); - - /* Create an XNI endpoint from the e2e spec */ - xni_endpoint_t xep = {.host = ip_string, - .port = tdp->td_e2ep->e2e_address_table[e2e_idx].base_port}; - rc = xni_accept_connection(tdp->xni_ctx, &xep, &tdp->td_e2ep->xni_td_conn); - return rc; +int32_t xint_e2e_dest_connect(worker_data_t *wdp) +{ + return do_connect(wdp, TRUE); } -int32_t xint_e2e_dest_disconnect(target_data_t *tdp) { +/* + * xint_e2e_disconnect() - close connections and free resources + * This function will close all connections associated with the given + * target. + * + * Returns 0 on success, -1 on failure + */ +int32_t +xint_e2e_disconnect(target_data_t *tdp) +{ + xint_e2e_t * const e2ep = tdp->td_e2ep; + + // Close all connections + for (int i = 0; i < e2ep->xni_td_connections_count; i++) { + int rc = xni_close_connection(e2ep->xni_td_connections+i); + //TODO: handle errors + assert(XNI_OK == rc); + } - /* Perform XNI disconnect */ - int rc = xni_close_connection(&tdp->td_e2ep->xni_td_conn); - return rc; + return 0; } /* @@ -120,7 +161,6 @@ int32_t xint_e2e_dest_disconnect(target_data_t *tdp) { int32_t xint_e2e_xni_send(worker_data_t *wdp) { target_data_t *tdp; xint_e2e_t *e2ep; // Pointer to the E2E data struct - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header //int bytes_sent; // Cumulative number of bytes sent //int sento_calls; // Number of times sendto() has been called xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry @@ -128,9 +168,8 @@ int32_t xint_e2e_xni_send(worker_data_t *wdp) { /* Local aliases */ tdp = wdp->wd_tdp; e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; - de2eprintf("DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: ENTER: e2ep=%p: e2ehp=%p: e2e_datap=%p\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep, e2ehp, e2ep->e2e_datap); + de2eprintf("DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: ENTER: e2ep=%p\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep); /* Some timestamp code */ if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { @@ -138,46 +177,34 @@ int32_t xint_e2e_xni_send(worker_data_t *wdp) { ttep->tte_net_processor_start = xdd_get_processor(); } - /* Construct the e2e header */ - e2ehp->e2eh_worker_thread_number = wdp->wd_worker_number; - e2ehp->e2eh_sequence_number = wdp->wd_task.task_op_number; - e2ehp->e2eh_byte_offset = wdp->wd_task.task_byte_offset; - e2ehp->e2eh_data_length = wdp->wd_task.task_xfer_size; - e2ep->e2e_xfer_size = sizeof(xdd_e2e_header_t) + e2ehp->e2eh_data_length; - e2ep->e2e_xfer_size = getpagesize() + e2ehp->e2eh_data_length; + // Set XNI parameters and send + xni_target_buffer_set_sequence_number(wdp->wd_task.task_op_number, + wdp->wd_e2ep->xni_wd_buf); + xni_target_buffer_set_target_offset(wdp->wd_task.task_byte_offset, + wdp->wd_e2ep->xni_wd_buf); + xni_target_buffer_set_data_length(wdp->wd_task.task_xfer_size, + wdp->wd_e2ep->xni_wd_buf); - de2eprintf("DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: Preparing to send %d bytes: e2ep=%p: e2ehp=%p: e2e_datap=%p: e2e_xfer_size=%d: e2eh_data_length=%lld\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_xfer_size,e2ep,e2ehp,e2ep->e2e_datap,e2ep->e2e_xfer_size,(long long int)e2ehp->e2eh_data_length); - if (xgp->global_options & GO_DEBUG_E2E) xdd_show_e2e_header((xdd_e2e_header_t *)xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf)); + de2eprintf("DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: Preparing to send: e2ep=%p: e2eh_data_length=%lld\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number,e2ep,(long long int)xni_target_buffer_data_length(wdp->wd_e2ep->xni_wd_buf)); nclk_now(&wdp->wd_counters.tc_current_net_start_time); - /* Don't send magic eof across wire */ - if (XDD_E2E_EOF != e2ehp->e2eh_magic) { - /* Set XNI parameters and send */ - xni_target_buffer_set_data_length(e2ep->e2e_xfer_size, - e2ep->xni_wd_buf); - xni_target_buffer_set_target_offset(e2ehp->e2eh_byte_offset, - e2ep->xni_wd_buf); - e2ep->e2e_send_status = xni_send_target_buffer(tdp->td_e2ep->xni_td_conn, - &e2ep->xni_wd_buf); - /* Request a fresh buffer from XNI */ - xni_request_target_buffer(tdp->xni_ctx, &wdp->wd_e2ep->xni_wd_buf); - - /* The first page is XNI, the second page is E2E header */ - uintptr_t bufp = - (uintptr_t) xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); - wdp->wd_task.task_datap = (unsigned char*)(bufp + getpagesize()); - wdp->wd_e2ep->e2e_hdrp = (xdd_e2e_header_t *)(bufp + (getpagesize() - sizeof(xdd_e2e_header_t))); - wdp->wd_e2ep->e2e_datap = wdp->wd_task.task_datap; - } else { - fprintf(stderr, "Triggered EOF\n"); - e2ep->e2e_send_status = e2ep->e2e_xfer_size; - } + + xni_connection_t * const connp = xint_e2e_worker_connection(wdp); + + e2ep->e2e_send_status = xni_send_target_buffer(*connp, + &e2ep->xni_wd_buf); + // Request a fresh buffer from XNI + xni_request_target_buffer(*connp, &wdp->wd_e2ep->xni_wd_buf); + + // Keep a pointer to the data portion of the buffer + wdp->wd_task.task_datap = xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); + nclk_now(&wdp->wd_counters.tc_current_net_end_time); // Time stamp if requested if (tdp->td_ts_table.ts_options & (TS_ON | TS_TRIGGERED)) { ttep = &tdp->td_ts_table.ts_hdrp->tsh_tte[wdp->wd_ts_entry]; - ttep->tte_net_xfer_size = e2ep->e2e_xfer_size; + ttep->tte_net_xfer_size = xni_target_buffer_data_length(e2ep->xni_wd_buf); ttep->tte_net_start = wdp->wd_counters.tc_current_net_start_time; ttep->tte_net_end = wdp->wd_counters.tc_current_net_end_time; ttep->tte_net_processor_end = xdd_get_processor(); @@ -187,17 +214,37 @@ int32_t xint_e2e_xni_send(worker_data_t *wdp) { // Calculate the Send/Receive time by the time it took the last sendto() to run e2ep->e2e_sr_time = (wdp->wd_counters.tc_current_net_end_time - wdp->wd_counters.tc_current_net_start_time); - //if (bytes_sent != e2ep->e2e_xfer_size) { - // xdd_e2e_err(wdp,"xdd_e2e_src_send","ERROR: could not send header+data from e2e source\n"); - // return(-1); - //} - de2eprintf("DEBUG_E2E: %lld: xdd_e2e_src_send: Target: %d: Worker: %d: EXIT...\n",(long long int)pclk_now(),tdp->td_target_number, wdp->wd_worker_number); return(0); } /* end of xdd_e2e_src_send() */ +/*----------------------------------------------------------------------*/ +/* xint_e2e_eof_source_side() - End-Of-File processing for Source + * Return values: 0 is good, -1 is bad + */ +int32_t +xint_e2e_eof_source_side(worker_data_t *wdp) { + target_data_t *tdp; + xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker + + tdp = wdp->wd_tdp; + e2ep = wdp->wd_e2ep; + +if (xgp->global_options & GO_DEBUG_E2E) fprintf(stderr,"DEBUG_E2E: %lld: xdd_e2e_eof_source_side: Target %d Worker: %d: ENTER: \n", (long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number); + + /* If this is XNI, just short circuit */ + if (PLAN_ENABLE_XNI & tdp->td_planp->plan_options) { + e2ep->e2e_send_status = 0; + e2ep->e2e_sr_time = 0; + return 0; + } + + // we only support XNI now + return -1; +} /* end of xdd_e2e_eof_source_side() */ + /* * xint_e2e_xni_recv() - recv the data from source at destination * @@ -211,55 +258,33 @@ int32_t xint_e2e_xni_send(worker_data_t *wdp) { int32_t xint_e2e_xni_recv(worker_data_t *wdp) { target_data_t *tdp; // Pointer to the Target Data xint_e2e_t *e2ep; // Pointer to the E2E struct for this worker - xdd_e2e_header_t *e2ehp; // Pointer to the E2E Header int32_t status; // Status of the call to xdd_e2e_dst_connection() nclk_t e2e_wait_1st_msg_start_time; // This is the time stamp of when the first message arrived xdd_ts_tte_t *ttep; // Pointer to a time stamp table entry - /* Release the current target buffer to XNI */ - xni_release_target_buffer(&wdp->wd_e2ep->xni_wd_buf); - /* Collect the begin time */ nclk_now(&e2e_wait_1st_msg_start_time); wdp->wd_counters.tc_current_net_start_time = e2e_wait_1st_msg_start_time; /* Receive a target buffer and assemble it into the wdp */ tdp = wdp->wd_tdp; - status = xni_receive_target_buffer(tdp->td_e2ep->xni_td_conn, + status = xni_receive_target_buffer(*xint_e2e_worker_connection(wdp), &wdp->wd_e2ep->xni_wd_buf); if (XNI_OK == status) { /* Assemble pointers into the worker's target buffer */ - uintptr_t bufp = - (uintptr_t)xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); - //for (int i = 0; i < 256; i++) { - // printf("8 bytes %ld: %d\n", bufp + i, *((int*)bufp + i)); - //} - wdp->wd_task.task_datap = (unsigned char*)(bufp + (1*getpagesize())); - wdp->wd_e2ep->e2e_hdrp = (xdd_e2e_header_t *)(bufp + (1*getpagesize() - sizeof(xdd_e2e_header_t))); - wdp->wd_e2ep->e2e_datap = wdp->wd_task.task_datap; + wdp->wd_task.task_datap = xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); } else if (XNI_EOF == status) { - /* No buffer set on EOF, so just create a static one */ - xdd_e2e_header_t *eof_header = malloc(sizeof(*eof_header)); - wdp->wd_e2ep->e2e_hdrp = eof_header; - wdp->wd_task.task_datap = 0; - wdp->wd_e2ep->e2e_datap = 0; - - /* Perform EOF Assembly */ - wdp->wd_e2ep->e2e_hdrp->e2eh_magic = XDD_E2E_EOF; + wdp->wd_task.task_datap = NULL; + wdp->wd_e2ep->received_eof = TRUE; } else { fprintf(xgp->errout, "Error receiving data via XNI."); return -1; } - //de2eprintf("DEBUG_E2E: %lld: xdd_e2e_dest_recv: Target: %d: Worker: %d: Preparing to send %d bytes: e2ep=%p: e2ehp=%p: e2e_datap=%p: e2e_xfer_size=%d: e2eh_data_length=%lld\n",(long long int)pclk_now(), tdp->td_target_number, wdp->wd_worker_number, e2ep->e2e_xfer_size,e2ep,e2ehp,e2ep->e2e_datap,e2ep->e2e_xfer_size,(long long int)e2ehp->e2eh_data_length); - //xgp->global_options |= GO_DEBUG_E2E; - //if (xgp->global_options & GO_DEBUG_E2E) xdd_show_e2e_header(wdp->wd_e2ep->e2e_hdrp); - /* Local aliases */ e2ep = wdp->wd_e2ep; - e2ehp = e2ep->e2e_hdrp; /* Collect the end time */ nclk_now(&wdp->wd_counters.tc_current_net_end_time); @@ -274,19 +299,69 @@ int32_t xint_e2e_xni_recv(worker_data_t *wdp) { ttep->tte_net_start = wdp->wd_counters.tc_current_net_start_time; ttep->tte_net_end = wdp->wd_counters.tc_current_net_end_time; ttep->tte_net_processor_end = xdd_get_processor(); - ttep->tte_net_xfer_size = e2ep->e2e_recv_status; - ttep->tte_byte_offset = e2ehp->e2eh_byte_offset; - ttep->tte_disk_xfer_size = e2ehp->e2eh_data_length; - ttep->tte_op_number = e2ehp->e2eh_sequence_number; - if (e2ehp->e2eh_magic == XDD_E2E_EOF) + ttep->tte_net_xfer_size = xni_target_buffer_data_length(e2ep->xni_wd_buf); + ttep->tte_byte_offset = xni_target_buffer_target_offset(e2ep->xni_wd_buf); + ttep->tte_disk_xfer_size = xni_target_buffer_data_length(e2ep->xni_wd_buf); + ttep->tte_op_number = xni_target_buffer_sequence_number(e2ep->xni_wd_buf); + if (wdp->wd_e2ep->received_eof) { ttep->tte_op_type = SO_OP_EOF; - else ttep->tte_op_type = SO_OP_WRITE; + } else { + ttep->tte_op_type = SO_OP_WRITE; + } } return(0); } /* end of xint_e2e_xni_recv() */ +int +xint_is_e2e(const target_data_t *tdp) +{ + return (TO_ENDTOEND == (TO_ENDTOEND & tdp->td_target_options)); +} + +xni_connection_t* +xint_e2e_worker_connection(worker_data_t *wdp) +{ + const int idx = wdp->wd_e2ep->address_table_index; + xni_connection_t * const conn = idx >= 0 + ? wdp->wd_tdp->td_e2ep->xni_td_connections+idx + : NULL; + + return conn; +} + +pthread_mutex_t* +xint_e2e_worker_connection_mutex(worker_data_t *wdp) +{ + const int idx = wdp->wd_e2ep->address_table_index; + pthread_mutex_t * const mutex = idx >= 0 + ? wdp->wd_tdp->td_e2ep->xni_td_connection_mutexes+idx + : NULL; + + return mutex; +} + +static xdd_e2e_ate_t* +worker_address_table_entry(worker_data_t *wdp) +{ + const int idx = wdp->wd_e2ep->address_table_index; + xdd_e2e_ate_t * const atep = idx >= 0 + ? wdp->wd_tdp->td_e2ep->e2e_address_table+idx + : NULL; + + return atep; +} + +const char* +xint_e2e_worker_dest_hostname(worker_data_t *wdp) +{ + const xdd_e2e_ate_t * const atep = worker_address_table_entry(wdp); + const char *hostname = atep->hostname; + + return hostname; +} + /* * Local variables: * indent-tabs-mode: t diff --git a/src/xnet/xnet_end_to_end_init.c b/src/xnet/xnet_end_to_end_init.c index 934a8ba5..886d551f 100644 --- a/src/xnet/xnet_end_to_end_init.c +++ b/src/xnet/xnet_end_to_end_init.c @@ -36,21 +36,53 @@ #include "xni.h" +// forward declarations +static int init_xni(target_data_t*); +static int init_src_worker(worker_data_t*); +static int init_dest_worker(worker_data_t*); + + /*----------------------------------------------------------------------*/ -/* xdd_e2e_target_init() - init socket library +/* xint_e2e_target_init() - init target structure for E2E * This routine is called during target initialization to initialize the - * socket library. + * target data structure for end-to-end. * * Return values: 0 is good, -1 is bad * */ -int32_t xint_e2e_xni_init(target_data_t *tdp) { +int32_t +xint_e2e_target_init(target_data_t *tdp) { + xint_restart_t *rp; // pointer to a restart structure + int status; + + // Perform XNI initialization if required + status = init_xni(tdp); + if (status == -1) { + fprintf(xgp->errout,"%s: xint_e2e_target_init: could not initialize XNI for e2e target\n",xgp->progname); + return(-1); + } + + // Restart processing if necessary + if ((tdp->td_target_options & TO_RESTART_ENABLE) && (tdp->td_restartp)) { // Check to see if restart was requested + // Set the last_committed_byte_offset to 0 + rp = tdp->td_restartp; + rp->last_committed_byte_offset = rp->byte_offset; + rp->last_committed_length = 0; + } + + return(0); +} + +static int +init_xni(target_data_t *tdp) +{ int rc = 0; /* Create the XNI control block */ size_t num_threads = tdp->td_planp->number_of_iothreads; if (xni_protocol_tcp == tdp->xni_pcl) rc = xni_allocate_tcp_control_block(num_threads, tdp->xni_tcp_congestion, + tdp->td_planp->e2e_TCP_Win, &tdp->xni_cb); #if HAVE_ENABLE_IB else if (xni_protocol_ib == tdp->xni_pcl) @@ -65,9 +97,166 @@ int32_t xint_e2e_xni_init(target_data_t *tdp) { /* Create the XNI context */ rc = xni_context_create(tdp->xni_pcl, tdp->xni_cb, &tdp->xni_ctx); assert(0 == rc); + + struct xint_e2e * const e2ep = tdp->td_e2ep; + const int conncnt = (int)e2ep->e2e_address_table_host_count; + + // Initialize mutexes that protect connection establishment + e2ep->xni_td_connection_mutexes = calloc(conncnt, + sizeof(*e2ep->xni_td_connection_mutexes)); + for (int i = 0; i < conncnt; i++) { + rc = pthread_mutex_init(e2ep->xni_td_connection_mutexes+i, NULL); + assert(0 == rc); + } + + // Allocate XNI connections, one per e2e host + e2ep->xni_td_connections = calloc(conncnt, + sizeof(*e2ep->xni_td_connections)); + e2ep->xni_td_connections_count = conncnt; + + return(0); +} + +/*----------------------------------------------------------------------*/ +/* xint_e2e_worker_init() - init source and destination sides + * This routine is called during Worker Thread initialization to initialize + * a source or destination Worker Thread. + * + * Return values: 0 is good, -1 is bad + * + */ +int32_t +xint_e2e_worker_init(worker_data_t *wdp) { + target_data_t *tdp; + int status; + in_addr_t addr; + + tdp = wdp->wd_tdp; + wdp->wd_e2ep->e2e_sr_time = 0; + + if(xint_e2e_worker_dest_hostname(wdp) == NULL) { + fprintf(xgp->errout,"%s: xint_e2e_worker_init: Target %d Worker Thread %d: No DESTINATION host name or IP address specified for this end-to-end operation.\n", + xgp->progname, + tdp->td_target_number, + wdp->wd_worker_number); + fprintf(xgp->errout,"%s: xint_e2e_worker_init: Target %d Worker Thread %d: Use the '-e2e destination' option to specify the DESTINATION host name or IP address.\n", + xgp->progname, + tdp->td_target_number, + wdp->wd_worker_number); + return(-1); + } + + // Get the IP address of the destination host + status = xint_lookup_addr(xint_e2e_worker_dest_hostname(wdp), 0, &addr); + if (status) { + fprintf(xgp->errout, "%s: xint_e2e_worker_init: unable to identify host '%s'\n", + xgp->progname, xint_e2e_worker_dest_hostname(wdp)); + return(-1); + } + + // Convert to host byte order + wdp->wd_e2ep->e2e_dest_addr = ntohl(addr); + + if (tdp->td_target_options & TO_E2E_DESTINATION) { // This is the Destination side of an End-to-End + status = init_dest_worker(wdp); + } else if (tdp->td_target_options & TO_E2E_SOURCE) { // This is the Source side of an End-to-End + status = init_src_worker(wdp); + } else { // Should never reach this point + status = -1; + } + + return status; +} // xint_e2e_worker_init() + +static int +init_src_worker(worker_data_t *wdp) +{ + target_data_t *tdp; + xint_e2e_t *e2ep; // Pointer to the E2E data struct + + + tdp = wdp->wd_tdp; + e2ep = wdp->wd_e2ep; + + // Check to make sure that the source target is actually *reading* the data from a file or device + if (tdp->td_rwratio < 1.0) { // Something is wrong - the source file/device is not 100% read + fprintf(xgp->errout,"%s: xint_e2e_src_init: Target %d Worker Thread %d: Error - E2E source file '%s' is not being *read*: rwratio=%5.2f is not valid\n", + xgp->progname, + tdp->td_target_number, + wdp->wd_worker_number, + tdp->td_target_full_pathname, + tdp->td_rwratio); + return(-1); + } + + // Init the relevant variables + e2ep->e2e_msg_sequence_number = 0; + + int status = xint_e2e_src_connect(wdp); + if (0 != status) { + fprintf(xgp->errout, "Failure during XNI connection.\n"); + return -1; + } + + // Request an I/O buffer from XNI + xni_request_target_buffer(*xint_e2e_worker_connection(wdp), + &wdp->wd_e2ep->xni_wd_buf); + wdp->wd_task.task_datap = xni_target_buffer_data(wdp->wd_e2ep->xni_wd_buf); + + return(0); +} + +static int +init_dest_worker(worker_data_t *wdp) +{ + target_data_t *tdp; + + + tdp = wdp->wd_tdp; + // Check to make sure that the destination target is actually *writing* the data it receives to a file or device + if (tdp->td_rwratio > 0.0) { // Something is wrong - the destination file/device is not 100% write + fprintf(xgp->errout,"%s: xdd_e2e_dest_init: Target %d Worker Thread %d: Error - E2E destination file/device '%s' is not being *written*: rwratio=%5.2f is not valid\n", + xgp->progname, + tdp->td_target_number, + wdp->wd_worker_number, + tdp->td_target_full_pathname, + tdp->td_rwratio); + return(-1); + } + + // Initialize the message counter and sequencer to 0 + wdp->wd_e2ep->e2e_msg_sequence_number = 0; + + // Clear the end-of-file flag + wdp->wd_e2ep->received_eof = FALSE; + + int status = xint_e2e_dest_connect(wdp); + if (0 != status) { + fprintf(xgp->errout, "Failure during XNI connection.\n"); + return -1; + } + return(0); } +/*----------------------------------------------------------------------------*/ +/* xint_get_e2ep() - allocate a new E2E Data Structure + */ +xint_e2e_t * +xint_get_e2ep(void) { + xint_e2e_t *e2ep; + + e2ep = malloc(sizeof(xint_e2e_t)); + if (e2ep == NULL) { + fprintf(xgp->errout,"%s: ERROR: Cannot allocate %d bytes of memory for E2E data structure \n", + xgp->progname, (int)sizeof(xint_e2e_t)); + return(NULL); + } + memset(e2ep, 0, sizeof(xint_e2e_t)); + + return(e2ep); +} /* End of xint_get_e2ep() */ + /* * Local variables: * indent-tabs-mode: t diff --git a/src/xni/xni.c b/src/xni/xni.c index c5835927..f912313f 100644 --- a/src/xni/xni.c +++ b/src/xni/xni.c @@ -25,26 +25,15 @@ int xni_context_destroy(xni_context_t* ctx) return (*ctx)->protocol->context_destroy(ctx); } -int xni_accept_connection(xni_context_t ctx, struct xni_endpoint* local, xni_connection_t* conn) +int xni_accept_connection(xni_context_t ctx, struct xni_endpoint* local, xni_bufset_t *bufset, xni_connection_t* conn) { - return ctx->protocol->accept_connection(ctx, local, conn); -} - -int xni_register_buffer(xni_context_t ctx, void* buf, size_t nbytes, size_t reserved, - xni_target_buffer_t* tbp) -{ - return ctx->protocol->register_buffer(ctx, buf, nbytes, reserved, tbp); -} - -int xni_unregister_buffer(xni_context_t ctx, void* buf) -{ - return ctx->protocol->unregister_buffer(ctx, buf); + return ctx->protocol->accept_connection(ctx, local, bufset, conn); } //TODO: local_endpoint -int xni_connect(xni_context_t ctx, struct xni_endpoint* remote, xni_connection_t* conn) +int xni_connect(xni_context_t ctx, struct xni_endpoint* remote, xni_bufset_t *bufset, xni_connection_t* conn) { - return ctx->protocol->connect(ctx, remote, conn); + return ctx->protocol->connect(ctx, remote, bufset, conn); } int xni_close_connection(xni_connection_t* conn) @@ -52,10 +41,10 @@ int xni_close_connection(xni_connection_t* conn) return (*conn)->context->protocol->close_connection(conn); } -int xni_request_target_buffer(xni_context_t context, +int xni_request_target_buffer(xni_connection_t conn, xni_target_buffer_t* buffer) { - return context->protocol->request_target_buffer(context, buffer); + return conn->context->protocol->request_target_buffer(conn, buffer); } int xni_send_target_buffer(xni_connection_t conn, xni_target_buffer_t* buffer) @@ -70,7 +59,7 @@ int xni_receive_target_buffer(xni_connection_t conn, xni_target_buffer_t* buffer int xni_release_target_buffer(xni_target_buffer_t* buffer) { - return (*buffer)->context->protocol->release_target_buffer(buffer); + return (*buffer)->connection->context->protocol->release_target_buffer(buffer); } void *xni_target_buffer_data(xni_target_buffer_t tb) @@ -97,3 +86,13 @@ void xni_target_buffer_set_data_length(int length, xni_target_buffer_t tb) { tb->data_length = length; } + +int64_t xni_target_buffer_sequence_number(xni_target_buffer_t tb) +{ + return tb->sequence_number; +} + +void xni_target_buffer_set_sequence_number(int64_t seq, xni_target_buffer_t tb) +{ + tb->sequence_number = seq; +} diff --git a/src/xni/xni.h b/src/xni/xni.h index ed65b4d5..8f55b730 100644 --- a/src/xni/xni.h +++ b/src/xni/xni.h @@ -88,6 +88,19 @@ struct xni_target_buffer; */ typedef struct xni_target_buffer *xni_target_buffer_t; +struct xni_bufset { + /*! An array of buffer base addresses. */ + unsigned char **bufs; + /*! The number of elements in \c bufs */ + size_t bufcount; + /*! The size of each buffer in bytes. */ + size_t bufsize; + /*! The offset into the buffer at which the caller will insert + application data. */ + size_t reserved; +}; +/*! \brief Set of data buffers. */ +typedef struct xni_bufset xni_bufset_t; /*! \brief Perform library-specific initialization. * @@ -145,48 +158,15 @@ int xni_context_create(xni_protocol_t protocol, xni_control_block_t control_bloc */ int xni_context_destroy(xni_context_t *context); -/*! \brief Register memory with XNI. - * - * This function provides XNI drivers to perform optimizations based on - * the adress of the memory buffers in use. - * - * \param[in,out] context The context to register the buffer with. - * \param[in] buf The memory buffer to register. - * \param[in] nbytes The total size of the buffer in bytes. - * \param[in] reserved The offset into the buffer at which the caller will - * insert application data. Although this seems backwards, it ensures both - * the caller and XNI can align data per their own requirements. - * \param[out] tb The xni target buffer to use for send/recvs. - * - * \return #XNI_OK if registration was successful. - * \return #XNI_ERR if registration failed. - * - * \sa xni_unregister() - */ -int xni_register_buffer(xni_context_t context, void* buf, size_t nbytes, size_t reserved, xni_target_buffer_t* tb); -/*! \brief Free resources associated with registering memory with XNI. - * - * This function frees any resources used to register memory for use with - * XNI. - * - * \return #XNI_OK if the cleanup was successful. - * \return #XNI_ERR if the cleanup failed. - * - * \sa xni_register() - */ -int xni_unregister_buffer(xni_context_t context, void* buf); - /*! \brief Wait for a connection from a remote process. * * This function creates a destination-side connection by * listening on the address specified by \e local for a connection * from a remote process. * - * It is forbidden for the \e num_buffers and \e buffer_size arguments - * to differ from those specified at the remote end to xni_connect(). - * * \param context The network context under which to create the connection. * \param[in] local The local address to listen on. + * \param[in] bufset The buffers to be used for communication with the remote. * \param[out] connection The newly created destination-side connection. * * \return #XNI_OK if the connection was successfully created. @@ -195,7 +175,7 @@ int xni_unregister_buffer(xni_context_t context, void* buf); * \sa xni_close_connection() * \sa xni_receive_target_buffer() */ -int xni_accept_connection(xni_context_t context, struct xni_endpoint *local, xni_connection_t *connection); +int xni_accept_connection(xni_context_t context, struct xni_endpoint *local, xni_bufset_t *bufset, xni_connection_t *connection); /*! \brief Initiate a connection to a remote process. * * This function creates a source-side connection by @@ -205,12 +185,9 @@ int xni_accept_connection(xni_context_t context, struct xni_endpoint *local, xni * context was created. These buffers will be aligned on 512-byte * boundaries. * - * It is forbidden for the \e num_buffers and \e buffer_size arguments - * to differ from those specified at the remote end to - * xni_accept_connection(). - * * \param context The network context under which to create the connection. * \param[in] remote The remote address to connect to. + * \param[in] bufset The buffers to be used for communication with the remote. * \param[out] connection The newly created source-side connection. * * \return #XNI_OK if the connection was successfully created. @@ -220,7 +197,7 @@ int xni_accept_connection(xni_context_t context, struct xni_endpoint *local, xni * \sa xni_request_target_buffer() */ //TODO: local_endpoint -int xni_connect(xni_context_t context, struct xni_endpoint *remote, xni_connection_t *connection); +int xni_connect(xni_context_t context, struct xni_endpoint *remote, xni_bufset_t *bufset, xni_connection_t *connection); /*! \brief Close a connection and free its resources. * * This function closes a connection and frees all allocated target @@ -251,8 +228,8 @@ int xni_close_connection(xni_connection_t *connection); * temporarily owned by the caller until the buffer is passed to * xni_send_target_buffer() or xni_release_target_buffer(). * - * \param contest The source-side context from which to request - * the buffer. + * \param connection The source-side connection from which to + * request the buffer. * \param[out] buffer The requested target buffer. * * \return #XNI_OK if a target buffer was reserved. @@ -260,7 +237,7 @@ int xni_close_connection(xni_connection_t *connection); * * \sa xni_send_target_buffer(); */ -int xni_request_target_buffer(xni_context_t ctx, xni_target_buffer_t *buffer); +int xni_request_target_buffer(xni_connection_t conn, xni_target_buffer_t *buffer); /*! \brief Send a target buffer to the remote process. * * This function transfers the target buffer \e buffer to the remote @@ -321,9 +298,6 @@ int xni_receive_target_buffer(xni_connection_t connection, xni_target_buffer_t * int xni_release_target_buffer(xni_target_buffer_t *buffer); /*! \brief Get a target buffer's data pointer. - * - * The data pointer will point to a block of memory aligned on a - * 512-byte boundary. * * \param buffer The buffer to inspect. * @@ -376,6 +350,23 @@ int xni_target_buffer_data_length(xni_target_buffer_t buffer); * \sa xni_target_buffer_data_length() */ void xni_target_buffer_set_data_length(int length, xni_target_buffer_t buffer); +/*! \brief Get the operation sequence number. + * + * \param buffer The buffer to inspect. + * + * \return The operation sequence number. + * + * \sa xni_target_buffer_set_sequence_number() + */ +int64_t xni_target_buffer_sequence_number(xni_target_buffer_t buffer); +/*! \brief Set the operation sequence number. + * + * \param sequence_number The operation sequence number. + * \param buffer The buffer to modify. + * + * \sa xni_target_buffer_sequence_number() + */ +void xni_target_buffer_set_sequence_number(int64_t sequence_number, xni_target_buffer_t buffer); /*! @} */ @@ -388,6 +379,7 @@ enum { XNI_TCP_DEFAULT_NUM_SOCKETS = 0, /*!< \brief Use the default number of sockets. */ }; extern const char *XNI_TCP_DEFAULT_CONGESTION; /*!< \brief Use the default TCP congestion avoidance algorithm. */ +enum { XNI_TCP_DEFAULT_WINDOW_SIZE = 0 }; /*!< \brief Use the operating system default TCP window size */ /*! \brief Create a control block for the TCP implementation. * * If \e num_sockets is #XNI_TCP_DEFAULT_NUM_SOCKETS then the number @@ -397,8 +389,12 @@ extern const char *XNI_TCP_DEFAULT_CONGESTION; /*!< \brief Use the default TCP * If \e congestion is #XNI_TCP_DEFAULT_CONGESTION then the system * default congestion avoidance algorithm will be used. * + * If \e window_size is #XNI_TCP_DEFAULT_WINDOW_SIZE then the + * operating system default TCP window size will be used. + * * \param num_sockets The number of TCP sockets to create per connection. * \param congestion the congestion control algorithm to use + * \param window_size the TCP window size * \param[out] control_block The newly allocated control block. * * \return #XNI_OK if the control block was successfully created. @@ -406,7 +402,7 @@ extern const char *XNI_TCP_DEFAULT_CONGESTION; /*!< \brief Use the default TCP * * \sa xni_free_tcp_control_block() */ -int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, xni_control_block_t *control_block); + int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, int window_size, xni_control_block_t *control_block); /*! \brief Free a TCP control block. * * It is forbidden to call this function more than once with the same diff --git a/src/xni/xni_ib.c b/src/xni/xni_ib.c index 9c5567df..58cb3683 100644 --- a/src/xni/xni_ib.c +++ b/src/xni/xni_ib.c @@ -21,7 +21,7 @@ #include "xni_internal.h" //#define XNI_TRACE 1 -#define PROTOCOL_NAME "ib-nlmills-20120809" +#define PROTOCOL_NAME "ib-nlmills-20140602" #define ALIGN(val,align) (((val)+(align)-1UL) & ~((align)-1UL)) @@ -38,17 +38,6 @@ struct ib_context { struct ib_control_block control_block; struct ibv_context *verbs_context; struct ibv_pd *domain; - - // target buffer registration data - struct ib_target_buffer *target_buffers; - size_t num_registered; - - // locks - pthread_mutex_t target_buffers_mutex; - pthread_cond_t target_buffers_cond; - pthread_mutex_t busy_flag_mutex; - pthread_cond_t busy_flag_cond; - }; struct ib_connection { @@ -74,9 +63,21 @@ struct ib_connection { uint32_t remote_qpnum; uint16_t remote_lid; + + // target buffer registration data + struct ib_target_buffer *target_buffers; + size_t num_registered; + + // locks + pthread_mutex_t target_buffers_mutex; + pthread_cond_t target_buffers_cond; + pthread_mutex_t busy_flag_mutex; + pthread_cond_t busy_flag_cond; }; -#define IB_DATA_MESSAGE_HEADER_SIZE 12 // = tag(4) + target_offset(8) +#define IB_DATA_MESSAGE_HEADER_SIZE 20 // = tag(4) + + // sequence_number(8) + + // target_offset(8) enum send_state { QUEUED, SENT, @@ -84,8 +85,9 @@ enum send_state { }; struct ib_target_buffer { // inherited from xni_target_buffer - struct ib_context *context; + struct ib_connection *connection; void *data; + int64_t sequence_number; size_t target_offset; int data_length; @@ -95,7 +97,6 @@ struct ib_target_buffer { enum send_state send_state; struct ibv_mr *memory_region; void *header; - struct ib_connection *connection; }; #define IB_CREDIT_MESSAGE_SIZE 8 // = tag(4) + credits(4) @@ -180,12 +181,6 @@ static int ib_context_create(xni_protocol_t proto_, xni_control_block_t cb_, xni tmp->control_block = *cb; tmp->verbs_context = verbsctx; tmp->domain = pd; - tmp->target_buffers = calloc(cb->num_buffers, sizeof(*tmp->target_buffers)); - tmp->num_registered = 0; - pthread_mutex_init(&tmp->target_buffers_mutex, NULL); - pthread_cond_init(&tmp->target_buffers_cond, NULL); - pthread_mutex_init(&tmp->busy_flag_mutex, NULL); - pthread_cond_init(&tmp->busy_flag_cond, NULL); *ctx = tmp; return XNI_OK; @@ -204,56 +199,41 @@ static int ib_context_destroy(xni_context_t *ctx_) return XNI_OK; } -static int ib_register_buffer(xni_context_t ctx_, void* buf, size_t nbytes, size_t reserved, - xni_target_buffer_t* xtb) +static int register_buffer(struct ib_connection *conn, void* buf, size_t nbytes, size_t reserved) { - struct ib_context* ctx = (struct ib_context*)ctx_; uintptr_t beginp = (uintptr_t)buf; uintptr_t datap = (uintptr_t)buf + (uintptr_t)(reserved); size_t avail = (size_t)(datap - beginp); - // Make sure space exists in the registered buffer array - if (ctx->control_block.num_buffers <= ctx->num_registered) - return XNI_ERR; - // Make sure enough padding exists if (avail < IB_DATA_MESSAGE_HEADER_SIZE) return XNI_ERR; // Register the memory with verbs - struct ibv_mr *mr = ibv_reg_mr(ctx->domain, buf, nbytes, + struct ibv_mr *mr = ibv_reg_mr(conn->context->domain, buf, nbytes, IBV_ACCESS_LOCAL_WRITE); if (NULL == mr) return XNI_ERR; // Add the buffer into the array of registered buffers - pthread_mutex_lock(&ctx->target_buffers_mutex); - struct ib_target_buffer* tb = ctx->target_buffers + ctx->num_registered; - tb->context = ctx; + pthread_mutex_lock(&conn->target_buffers_mutex); + struct ib_target_buffer* tb = conn->target_buffers + conn->num_registered; + tb->connection = conn; tb->data = (void*)datap; + tb->sequence_number = 0; + tb->target_offset = 0; tb->data_length = -1; tb->buffer_size = nbytes - reserved; tb->busy = 0; - tb->send_state = 0; + tb->send_state = QUEUED; tb->memory_region = mr; tb->header = (void*)(datap - IB_DATA_MESSAGE_HEADER_SIZE); - tb->connection = NULL; - ctx->num_registered++; - pthread_mutex_unlock(&ctx->target_buffers_mutex); + conn->num_registered++; + pthread_mutex_unlock(&conn->target_buffers_mutex); - // Set the outbound target buffer - *xtb= (xni_target_buffer_t)&tb; return XNI_OK; } -static int ib_unregister_buffer(xni_context_t ctx_, void* buf) -{ - struct ib_context* ctx = (struct ib_context*)ctx_; - pthread_mutex_lock(&ctx->target_buffers_mutex); - pthread_mutex_unlock(&ctx->target_buffers_mutex); - return XNI_OK; -} - static struct ib_credit_buffer **allocate_credit_buffers(struct ib_context *ctx, int nbuf) { struct ib_credit_buffer **credit_buffers = calloc((nbuf + 1), sizeof(*credit_buffers)); @@ -410,7 +390,7 @@ static int send_credits(struct ib_connection *conn, int ncredits) // otherwise mark any that have become free if (cb == NULL) { - size_t num_bufs = conn->context->num_registered; + size_t num_bufs = conn->num_registered; struct ibv_wc wc[num_bufs]; int completed = ibv_poll_cq(conn->send_cq, num_bufs, wc); if (completed < 0) { @@ -432,9 +412,8 @@ static int send_credits(struct ib_connection *conn, int ncredits) // // encode and send the credit message // - //TODO: NBO? memcpy(cb->msgbuf, CREDIT_MESSAGE_TAG, TAG_LENGTH); - uint32_t tmp32 = (uint32_t)ncredits; + uint32_t tmp32 = htonl((uint32_t)ncredits); memcpy(cb->msgbuf+TAG_LENGTH, &tmp32, 4); struct ibv_sge sge; @@ -489,8 +468,8 @@ static int consume_credit(struct ib_connection *conn) if (completed > 0) { struct ib_credit_buffer *cb = (struct ib_credit_buffer*)wc.wr_id; uint32_t tmp32 = 0; - //TODO: NBO? memcpy(&tmp32, cb->msgbuf+TAG_LENGTH, 4); + tmp32 = ntohl(tmp32); //TODO: check for deadlock if receive can't be posted post_receive(conn->queue_pair, cb->memory_region, cb->msgbuf, IB_CREDIT_MESSAGE_SIZE, (uintptr_t)cb); @@ -508,7 +487,7 @@ static int send_eof(struct ib_connection *conn) { //TODO: use a better buffer //XXX: for now, just hijack the first target bufffer - struct ib_target_buffer *tb = conn->context->target_buffers; + struct ib_target_buffer *tb = conn->target_buffers; // encode the message memcpy(tb->header, EOF_MESSAGE_TAG, TAG_LENGTH); @@ -548,7 +527,7 @@ static int send_eof(struct ib_connection *conn) return 0; } -static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, xni_connection_t* conn_) +static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, xni_bufset_t *bufset, xni_connection_t* conn_) { struct ib_context *ctx = (struct ib_context*)ctx_; struct ib_connection **conn = (struct ib_connection**)conn_; @@ -559,23 +538,23 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, struct ibv_qp *qp = NULL; int server=-1, client=-1; - // Ensure a registered buffer exists - if (ctx->num_registered < 1) + // Ensure at least one buffer exists + if (bufset->bufcount < 1) return XNI_ERR; tmpconn = calloc(1, sizeof(*tmpconn)); tmpconn->context = ctx; - credit_buffers = allocate_credit_buffers(ctx, ctx->num_registered); + credit_buffers = allocate_credit_buffers(ctx, bufset->bufcount); if (credit_buffers == NULL) goto error_out; - if ((sendcq = ibv_create_cq(ctx->verbs_context, ctx->num_registered, NULL, NULL, 0)) == NULL) + if ((sendcq = ibv_create_cq(ctx->verbs_context, bufset->bufcount, NULL, NULL, 0)) == NULL) goto error_out; - if ((recvcq = ibv_create_cq(ctx->verbs_context, ctx->num_registered, NULL, NULL, 0)) == NULL) + if ((recvcq = ibv_create_cq(ctx->verbs_context, bufset->bufcount, NULL, NULL, 0)) == NULL) goto error_out; - if ((qp = create_queue_pair(ctx, sendcq, recvcq, ctx->num_registered)) == NULL) + if ((qp = create_queue_pair(ctx, sendcq, recvcq, bufset->bufcount)) == NULL) goto error_out; // start listening for a client @@ -621,7 +600,9 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, goto error_out; memcpy(&remote_qpnum, msgbuf, 4); + remote_qpnum = ntohl(remote_qpnum); memcpy(&remote_lid, msgbuf+4, 2); + remote_lid = ntohs(remote_lid); #ifdef XNI_TRACE printf("Remote QPN=%u, LID=%u\n", @@ -629,13 +610,13 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, (unsigned int)remote_lid); #endif // XNI_TRACE - uint32_t tmp32 = qp->qp_num; + uint32_t tmp32 = htonl(qp->qp_num); memcpy(msgbuf, &tmp32, 4); struct ibv_port_attr portattr; memset(&portattr, 0, sizeof(portattr)); if (ibv_query_port(ctx->verbs_context, 1, &portattr)) goto error_out; - uint16_t tmp16 = portattr.lid; + uint16_t tmp16 = htons(portattr.lid); memcpy(msgbuf+4, &tmp16, 2); #ifdef XNI_TRACE @@ -653,24 +634,41 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, if (move_qp_to_init(qp)) goto error_out; - for (size_t i =0; i < ctx->num_registered; i++) { - struct ib_target_buffer* tbp = ctx->target_buffers + i; - tbp->connection = tmpconn; - size_t bufsiz = tbp->buffer_size; + // Prepare for target buffer registration + tmpconn->target_buffers = calloc(bufset->bufcount, sizeof(*tmpconn->target_buffers)); + tmpconn->num_registered = 0; + pthread_mutex_init(&tmpconn->target_buffers_mutex, NULL); + pthread_cond_init(&tmpconn->target_buffers_cond, NULL); + pthread_mutex_init(&tmpconn->busy_flag_mutex, NULL); + pthread_cond_init(&tmpconn->busy_flag_cond, NULL); + + // Register the target buffers + for (size_t i =0; i < bufset->bufcount; i++) { + int rc = register_buffer(tmpconn, bufset->bufs[i], + bufset->bufsize, bufset->reserved); + if (rc != XNI_OK) { + goto error_out; + } + } + + // Post the receives + for (size_t i = 0; i < tmpconn->num_registered; i++) { + struct ib_target_buffer *tbp = tmpconn->target_buffers + i; + const size_t bufsiz = tbp->buffer_size; if (post_receive(qp, tbp->memory_region, tbp->header, (int)((char*)(tbp->data) - (char*)(tbp->header) + bufsiz), (uintptr_t)tbp)) goto error_out; } - if (move_qp_to_rtr(qp, remote_qpnum, remote_lid, ctx->num_registered) || - move_qp_to_rts(qp, ctx->num_registered)) + + if (move_qp_to_rtr(qp, remote_qpnum, remote_lid, tmpconn->num_registered) || + move_qp_to_rts(qp, tmpconn->num_registered)) goto error_out; #ifdef XNI_TRACE puts("Connected."); #endif // XNI_TRACE - tmpconn->context = ctx; tmpconn->credit_buffers = credit_buffers; tmpconn->eof = 0; pthread_mutex_init(&tmpconn->credit_mutex, NULL); @@ -682,7 +680,7 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, tmpconn->remote_lid = remote_lid; // send the initial credits - if (send_credits(tmpconn, ctx->num_registered)) + if (send_credits(tmpconn, tmpconn->num_registered)) goto error_out; *conn = tmpconn; @@ -708,7 +706,7 @@ static int ib_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, return XNI_ERR; } -static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_connection_t* conn_) +static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_bufset_t *bufset, xni_connection_t* conn_) { struct ib_context *ctx = (struct ib_context*)ctx_; struct ib_connection **conn = (struct ib_connection**)conn_; @@ -719,8 +717,8 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne struct ibv_qp *qp = NULL; int server=-1, client=-1; - // Ensure a registered buffer exists - if (ctx->num_registered < 1) + // Ensure at least one buffer exists + if (bufset->bufcount < 1) return XNI_ERR; #ifdef XNI_TRACE @@ -734,19 +732,19 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne #ifdef XNI_TRACE puts("3"); #endif // XNI_TRACE - credit_buffers = allocate_credit_buffers(ctx, ctx->num_registered); + credit_buffers = allocate_credit_buffers(ctx, bufset->bufcount); if (credit_buffers == NULL) goto error_out; #ifdef XNI_TRACE puts("4"); #endif // XNI_TRACE - if ((sendcq = ibv_create_cq(ctx->verbs_context, ctx->num_registered, NULL, NULL, 0)) == NULL) + if ((sendcq = ibv_create_cq(ctx->verbs_context, bufset->bufcount, NULL, NULL, 0)) == NULL) goto error_out; - if ((recvcq = ibv_create_cq(ctx->verbs_context, ctx->num_registered, NULL, NULL, 0)) == NULL) + if ((recvcq = ibv_create_cq(ctx->verbs_context, bufset->bufcount, NULL, NULL, 0)) == NULL) goto error_out; - if ((qp = create_queue_pair(ctx, sendcq, recvcq, ctx->num_registered)) == NULL) + if ((qp = create_queue_pair(ctx, sendcq, recvcq, bufset->bufcount)) == NULL) goto error_out; #ifdef XNI_TRACE puts("Create qps"); @@ -774,13 +772,13 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne uint32_t remote_qpnum; uint16_t remote_lid; - uint32_t tmp32 = qp->qp_num; + uint32_t tmp32 = htonl(qp->qp_num); memcpy(msgbuf, &tmp32, 4); struct ibv_port_attr portattr; memset(&portattr, 0, sizeof(portattr)); if (ibv_query_port(ctx->verbs_context, 1, &portattr)) goto error_out; - uint16_t tmp16 = portattr.lid; + uint16_t tmp16 = htons(portattr.lid); memcpy(msgbuf+4, &tmp16, 2); #ifdef XNI_TRACE @@ -798,7 +796,9 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne client = -1; memcpy(&remote_qpnum, msgbuf, 4); + remote_qpnum = ntohl(remote_qpnum); memcpy(&remote_lid, msgbuf+4, 2); + remote_lid = ntohs(remote_lid); #ifdef XNI_TRACE printf("Remote QPN=%u, LID=%u\n", @@ -815,8 +815,8 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne IB_CREDIT_MESSAGE_SIZE, (uintptr_t)*cbptr)) goto error_out; - if (move_qp_to_rtr(qp, remote_qpnum, remote_lid, ctx->num_registered) || - move_qp_to_rts(qp, ctx->num_registered)) + if (move_qp_to_rtr(qp, remote_qpnum, remote_lid, bufset->bufcount) || + move_qp_to_rts(qp, bufset->bufcount)) goto error_out; tmpconn->context = ctx; @@ -831,9 +831,22 @@ static int ib_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conne tmpconn->remote_qpnum = remote_qpnum; tmpconn->remote_lid = remote_lid; - // Add the connection to the registered buffers - for (size_t i = 0; i < ctx->num_registered; i++) - ctx->target_buffers[i].connection = tmpconn; + // Prepare for target buffer registration + tmpconn->target_buffers = calloc(bufset->bufcount, sizeof(*tmpconn->target_buffers)); + tmpconn->num_registered = 0; + pthread_mutex_init(&tmpconn->target_buffers_mutex, NULL); + pthread_cond_init(&tmpconn->target_buffers_cond, NULL); + pthread_mutex_init(&tmpconn->busy_flag_mutex, NULL); + pthread_cond_init(&tmpconn->busy_flag_cond, NULL); + + // Register the target buffers + for (size_t i =0; i < bufset->bufcount; i++) { + int rc = register_buffer(tmpconn, bufset->bufs[i], + bufset->bufsize, bufset->reserved); + if (rc != XNI_OK) { + goto error_out; + } + } #ifdef XNI_TRACE puts("Connected."); @@ -885,16 +898,16 @@ static int ib_close_connection(xni_connection_t *conn_) return XNI_OK; } -static int ib_request_target_buffer(xni_context_t ctx_, xni_target_buffer_t *targetbuf_) +static int ib_request_target_buffer(xni_connection_t conn_, xni_target_buffer_t *targetbuf_) { - struct ib_context *ctx = (struct ib_context*)ctx_; + struct ib_connection *conn = (struct ib_connection*)conn_; struct ib_target_buffer **targetbuf = (struct ib_target_buffer**)targetbuf_; struct ib_target_buffer *tb = NULL; - pthread_mutex_lock(&ctx->busy_flag_mutex); + pthread_mutex_lock(&conn->busy_flag_mutex); while (tb == NULL) { - for (size_t i = 0; i < ctx->num_registered; i++) { - struct ib_target_buffer* ptr = ctx->target_buffers + i; + for (size_t i = 0; i < conn->num_registered; i++) { + struct ib_target_buffer* ptr = conn->target_buffers + i; if (!ptr->busy) { tb = ptr; @@ -903,10 +916,10 @@ static int ib_request_target_buffer(xni_context_t ctx_, xni_target_buffer_t *tar } } if (tb == NULL) - pthread_cond_wait(&ctx->busy_flag_cond, - &ctx->busy_flag_mutex); + pthread_cond_wait(&conn->busy_flag_cond, + &conn->busy_flag_mutex); } - pthread_mutex_unlock(&ctx->busy_flag_mutex); + pthread_mutex_unlock(&conn->busy_flag_mutex); *targetbuf = tb; return XNI_OK; @@ -926,10 +939,11 @@ static int ib_send_target_buffer(xni_connection_t conn_, xni_target_buffer_t *ta goto free_out; // encode the message - //TODO: NBO? memcpy(tb->header, DATA_MESSAGE_TAG, TAG_LENGTH); - uint64_t tmp64 = tb->target_offset; + uint64_t tmp64 = htonll(tb->sequence_number); memcpy(((char*)tb->header)+TAG_LENGTH, &tmp64, 8); + tmp64 = htonll(tb->target_offset); + memcpy(((char*)tb->header)+TAG_LENGTH+8, &tmp64, 8); // send the message struct ibv_sge sge; @@ -955,7 +969,7 @@ static int ib_send_target_buffer(xni_connection_t conn_, xni_target_buffer_t *ta // wait for send completion pthread_mutex_lock(&conn->send_state_mutex); while (tb->send_state == QUEUED) { - size_t num_bufs = conn->context->num_registered; + size_t num_bufs = conn->num_registered; struct ibv_wc wc[num_bufs]; int completed = ibv_poll_cq(conn->send_cq, num_bufs, wc); if (completed < 0) { @@ -979,10 +993,10 @@ static int ib_send_target_buffer(xni_connection_t conn_, xni_target_buffer_t *ta free_out: // mark the buffer as free - pthread_mutex_lock(&conn->context->busy_flag_mutex); + pthread_mutex_lock(&conn->busy_flag_mutex); tb->busy = 0; - pthread_cond_signal(&conn->context->busy_flag_cond); - pthread_mutex_unlock(&conn->context->busy_flag_mutex); + pthread_cond_signal(&conn->busy_flag_cond); + pthread_mutex_unlock(&conn->busy_flag_mutex); return return_code; } @@ -1010,7 +1024,10 @@ static int ib_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t // decode the message tb = (struct ib_target_buffer*)wc.wr_id; if (memcmp(tb->header, DATA_MESSAGE_TAG, TAG_LENGTH) == 0) { - memcpy(&tb->target_offset, ((char*)tb->header)+TAG_LENGTH, 8); + memcpy(&tb->sequence_number, ((char*)tb->header)+TAG_LENGTH, 8); + tb->sequence_number = ntohll(tb->sequence_number); + memcpy(&tb->target_offset, ((char*)tb->header)+TAG_LENGTH+8, 8); + tb->target_offset = ntohll(tb->target_offset); tb->data_length = wc.byte_len - (int)((char*)tb->data - (char*)tb->header); } else if (memcmp(tb->header, EOF_MESSAGE_TAG, TAG_LENGTH) == 0) conn->eof = 1; @@ -1046,10 +1063,10 @@ static int ib_release_target_buffer(xni_target_buffer_t *targetbuf_) if (send_credits(tb->connection, 1)) return XNI_ERR; } else { - pthread_mutex_lock(&tb->connection->context->busy_flag_mutex); + pthread_mutex_lock(&tb->connection->busy_flag_mutex); tb->busy = 0; - pthread_cond_signal(&tb->connection->context->busy_flag_cond); - pthread_mutex_unlock(&tb->connection->context->busy_flag_mutex); + pthread_cond_signal(&tb->connection->busy_flag_cond); + pthread_mutex_unlock(&tb->connection->busy_flag_mutex); } *targetbuf = NULL; @@ -1061,8 +1078,6 @@ static struct xni_protocol protocol_ib = { .name = PROTOCOL_NAME, .context_create = ib_context_create, .context_destroy = ib_context_destroy, - .register_buffer = ib_register_buffer, - .unregister_buffer = ib_unregister_buffer, .accept_connection = ib_accept_connection, .connect = ib_connect, .close_connection = ib_close_connection, diff --git a/src/xni/xni_internal.h b/src/xni/xni_internal.h index fb7b8c81..f2979365 100644 --- a/src/xni/xni_internal.h +++ b/src/xni/xni_internal.h @@ -2,19 +2,19 @@ #define XDD_XNI_INTERNAL_H +#include // for ntohl() + + struct xni_protocol { const char *name; int (*context_create)(xni_protocol_t, xni_control_block_t, xni_context_t*); int (*context_destroy)(xni_context_t*); - int (*register_buffer)(xni_context_t, void*, size_t, size_t, xni_target_buffer_t*); - int (*unregister_buffer)(xni_context_t, void*); - - int (*accept_connection)(xni_context_t, struct xni_endpoint*, xni_connection_t*); - int (*connect)(xni_context_t, struct xni_endpoint*, xni_connection_t*); + int (*accept_connection)(xni_context_t, struct xni_endpoint*, xni_bufset_t*, xni_connection_t*); + int (*connect)(xni_context_t, struct xni_endpoint*, xni_bufset_t*, xni_connection_t*); int (*close_connection)(xni_connection_t*); - int (*request_target_buffer)(xni_context_t, xni_target_buffer_t*); + int (*request_target_buffer)(xni_connection_t, xni_target_buffer_t*); int (*send_target_buffer)(xni_connection_t, xni_target_buffer_t*); int (*receive_target_buffer)(xni_connection_t, xni_target_buffer_t*); int (*release_target_buffer)(xni_target_buffer_t*); @@ -29,12 +29,40 @@ struct xni_connection { }; struct xni_target_buffer { - struct xni_context *context; + struct xni_connection *connection; void *data; + int64_t sequence_number; size_t target_offset; int data_length; }; + +/** + * Convert from network byte-order (big endian) to host order + */ +static inline uint64_t ntohll(uint64_t value) +{ + int endian_test = 1; + + // Determine if host order is little endian + if (endian_test == *((char*)(&endian_test))) { + // Swap the bytes + uint32_t low = ntohl((uint32_t)(value & 0xFFFFFFFFLL)); + uint32_t high = ntohl((uint32_t)(value >> 32)); + value = ((uint64_t)(low) << 32) | (uint64_t)(high); + } + return value; +} + +/** + * Convert from host byte-order to network byte-order (big endian) + */ +static inline uint64_t htonll(uint64_t value) +{ + // Re-use the ntohll implementation to swap the bytes + return ntohll(value); +} + #endif // XDD_XNI_INTERNAL_H /* diff --git a/src/xni/xni_tcp.c b/src/xni/xni_tcp.c index 85525097..4a708dd4 100644 --- a/src/xni/xni_tcp.c +++ b/src/xni/xni_tcp.c @@ -20,16 +20,19 @@ #include "xni_internal.h" -#define PROTOCOL_NAME "tcp-nlmills-20120809" +#define PROTOCOL_NAME "tcp-nlmills-20140602" #define ALIGN(val,align) (((val)+(align)-1UL) & ~((align)-1UL)) const char *XNI_TCP_DEFAULT_CONGESTION = ""; -static const size_t TCP_DATA_MESSAGE_HEADER_SIZE = 12; +static const size_t TCP_DATA_MESSAGE_HEADER_SIZE = 20; // = sequence_number(8) + + // target_offset(8) + + // data_length(4) struct tcp_control_block { size_t num_sockets; char congestion[16]; + int window_size; }; struct tcp_context { @@ -38,13 +41,6 @@ struct tcp_context { // added by struct tcp_context struct tcp_control_block control_block; - - // added by struct tcp_connection - struct tcp_target_buffer *registered_buffers; // NULL-terminated - size_t num_registered; - pthread_mutex_t buffer_mutex; - pthread_cond_t buffer_cond; - }; struct tcp_socket { @@ -57,18 +53,29 @@ struct tcp_connection { // inherited from struct xni_connection struct tcp_context *context; + // added by struct tcp_connection + // 1 = destination side, 0 = source side int destination; struct tcp_socket *sockets; int num_sockets; pthread_mutex_t socket_mutex; pthread_cond_t socket_cond; + + // size is equal to num_sockets + struct tcp_target_buffer *registered_buffers; + // count of registered buffers + size_t num_registered; + // these protect registered_buffers and num_registered + pthread_mutex_t buffer_mutex; + pthread_cond_t buffer_cond; }; struct tcp_target_buffer { // inherited from xni_target_buffer - struct tcp_context *context; + struct tcp_connection *connection; void *data; + int64_t sequence_number; size_t target_offset; int data_length; @@ -78,7 +85,7 @@ struct tcp_target_buffer { }; -int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, xni_control_block_t *cb_) +int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, int window_size, xni_control_block_t *cb_) { struct tcp_control_block **cb = (struct tcp_control_block**)cb_; @@ -90,9 +97,14 @@ int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, xni_ if (strlen(congestion) >= sizeof((*cb)->congestion)) return XNI_ERR; + // sanity check + if (window_size < 1 && window_size != XNI_TCP_DEFAULT_WINDOW_SIZE) + return XNI_ERR; + struct tcp_control_block *tmp = calloc(1, sizeof(*tmp)); tmp->num_sockets = num_sockets; strncpy(tmp->congestion, congestion, (sizeof(tmp->congestion) - 1)); + tmp->window_size = window_size; *cb = tmp; return XNI_OK; } @@ -100,28 +112,22 @@ int xni_allocate_tcp_control_block(int num_sockets, const char *congestion, xni_ int xni_free_tcp_control_block(xni_control_block_t *cb_) { struct tcp_control_block **cb = (struct tcp_control_block**)cb_; - free(*cb); *cb = NULL; return XNI_OK; } -static int tcp_context_create(xni_protocol_t proto_, xni_control_block_t cb_, xni_context_t *ctx_) +static int tcp_context_create(xni_protocol_t proto, xni_control_block_t cb_, xni_context_t *ctx_) { - struct xni_protocol *proto = proto_; struct tcp_control_block *cb = (struct tcp_control_block*)cb_; struct tcp_context **ctx = (struct tcp_context**)ctx_; - size_t num_buffers = cb->num_sockets; assert(strcmp(proto->name, PROTOCOL_NAME) == 0); // fill in a new context struct tcp_context *tmp = calloc(1, sizeof(*tmp)); tmp->protocol = proto; tmp->control_block = *cb; - tmp->registered_buffers = calloc(num_buffers, sizeof(*tmp->registered_buffers)); - pthread_mutex_init(&tmp->buffer_mutex, NULL); - pthread_cond_init(&tmp->buffer_cond, NULL); // Swap in the context *ctx = tmp; @@ -132,23 +138,23 @@ static int tcp_context_create(xni_protocol_t proto_, xni_control_block_t cb_, xn static int tcp_context_destroy(xni_context_t *ctx_) { struct tcp_context **ctx = (struct tcp_context **)ctx_; - pthread_mutex_destroy(&(*ctx)->buffer_mutex); - pthread_cond_destroy(&(*ctx)->buffer_cond); - free((*ctx)->registered_buffers); free(*ctx); *ctx = NULL; return XNI_OK; } -static int tcp_register_buffer(xni_context_t ctx_, void* buf, size_t nbytes, size_t reserved, xni_target_buffer_t* tbp) { - struct tcp_context* ctx = (struct tcp_context*) ctx_; +static int register_buffer(struct tcp_connection *conn, void* buf, size_t nbytes, size_t reserved) { + // buffer base address uintptr_t beginp = (uintptr_t)buf; + // buffer data area base address uintptr_t datap = (uintptr_t)buf + (uintptr_t)(reserved); + // number of bytes available for headers + //TODO: isn't this just the same as `reserved' ? size_t avail = (size_t)(datap - beginp); // Make sure space exists in the registered buffers array - if (ctx->control_block.num_sockets <= ctx->num_registered) + if (conn->context->control_block.num_sockets <= conn->num_registered) return XNI_ERR; // Make sure enough padding exists @@ -156,28 +162,22 @@ static int tcp_register_buffer(xni_context_t ctx_, void* buf, size_t nbytes, siz return XNI_ERR; // Add the buffer into the array of registered buffers - pthread_mutex_lock(&ctx->buffer_mutex); - struct tcp_target_buffer *tb = ctx->registered_buffers + ctx->num_registered; - tb->context = ctx; + pthread_mutex_lock(&conn->buffer_mutex); + struct tcp_target_buffer *tb = conn->registered_buffers + conn->num_registered; + tb->connection = conn; tb->data = (void*)datap; + tb->sequence_number = 0; tb->target_offset = 0; tb->data_length = -1; tb->busy = 0; tb->header = (void*)(datap - TCP_DATA_MESSAGE_HEADER_SIZE); - ctx->registered_buffers[ctx->num_registered] = *tb; - ctx->num_registered++; - pthread_mutex_unlock(&ctx->buffer_mutex); - - // Set the user's target buffer - *tbp = (xni_target_buffer_t)&(tb); - return XNI_OK; -} + conn->num_registered++; + pthread_mutex_unlock(&conn->buffer_mutex); -static int tcp_unregister_buffer(xni_context_t ctx, void* buf) { return XNI_OK; } -static int tcp_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, xni_connection_t* conn_) +static int tcp_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, xni_bufset_t *bufset, xni_connection_t* conn_) { struct tcp_context *ctx = (struct tcp_context*)ctx_; struct tcp_connection **conn = (struct tcp_connection**)conn_; @@ -226,6 +226,29 @@ static int tcp_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, #endif // HAVE_DECL_TCP_CONGESTION } + // optionally set the window size + if (ctx->control_block.window_size != XNI_TCP_DEFAULT_WINDOW_SIZE) { + optval = ctx->control_block.window_size; + int rc = setsockopt(servers[i], + SOL_SOCKET, + SO_SNDBUF, + &optval, + sizeof(optval)); + if (rc) { + perror("setsockopt"); + } + + optval = ctx->control_block.window_size; + rc = setsockopt(servers[i], + SOL_SOCKET, + SO_RCVBUF, + &optval, + sizeof(optval)); + if (rc) { + perror("setsockopt"); + } + } + struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; @@ -258,6 +281,18 @@ static int tcp_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, tmpconn->num_sockets = num_sockets; pthread_mutex_init(&tmpconn->socket_mutex, NULL); pthread_cond_init(&tmpconn->socket_cond, NULL); + tmpconn->registered_buffers = calloc(ctx->control_block.num_sockets, + sizeof(*tmpconn->registered_buffers)); + tmpconn->num_registered = 0; + pthread_mutex_init(&tmpconn->buffer_mutex, NULL); + pthread_cond_init(&tmpconn->buffer_cond, NULL); + + for (size_t i = 0; i < bufset->bufcount; i++) { + int rc = register_buffer(tmpconn, bufset->bufs[i], bufset->bufsize, bufset->reserved); + if (rc != XNI_OK) { + goto error_out; + } + } *conn = tmpconn; return XNI_OK; @@ -273,18 +308,17 @@ static int tcp_accept_connection(xni_context_t ctx_, struct xni_endpoint* local, if (servers[i] != -1) close(servers[i]); - /*// free any allocated target buffers - for (struct tcp_target_buffer **ptr = target_buffers; *ptr; ptr++) - ctx->control_block.free_fn(*ptr); + if (tmpconn) { + free(tmpconn->registered_buffers); + free(tmpconn); + } - free(target_buffers);*/ - free(tmpconn); free(clients); return XNI_ERR; } -static int tcp_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_connection_t* conn_) +static int tcp_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_bufset_t *bufset, xni_connection_t* conn_) { struct tcp_context *ctx = (struct tcp_context*)ctx_; struct tcp_connection **conn = (struct tcp_connection**)conn_; @@ -324,6 +358,29 @@ static int tcp_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conn #endif // HAVE_DECL_TCP_CONGESTION } + // optionally set the window size + if (ctx->control_block.window_size != XNI_TCP_DEFAULT_WINDOW_SIZE) { + int optval = ctx->control_block.window_size; + int rc = setsockopt(servers[i].sockd, + SOL_SOCKET, + SO_SNDBUF, + &optval, + sizeof(optval)); + if (rc) { + perror("setsockopt"); + } + + optval = ctx->control_block.window_size; + rc = setsockopt(servers[i].sockd, + SOL_SOCKET, + SO_RCVBUF, + &optval, + sizeof(optval)); + if (rc) { + perror("setsockopt"); + } + } + struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; @@ -335,14 +392,24 @@ static int tcp_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conn } } - //TODO: allocate target buffer list and attach - tmpconn->context = ctx; tmpconn->destination = 0; tmpconn->sockets = servers; tmpconn->num_sockets = num_sockets; pthread_mutex_init(&tmpconn->socket_mutex, NULL); pthread_cond_init(&tmpconn->socket_cond, NULL); + tmpconn->registered_buffers = calloc(ctx->control_block.num_sockets, + sizeof(*tmpconn->registered_buffers)); + tmpconn->num_registered = 0; + pthread_mutex_init(&tmpconn->buffer_mutex, NULL); + pthread_cond_init(&tmpconn->buffer_cond, NULL); + + for (size_t i = 0; i < bufset->bufcount; i++) { + int rc = register_buffer(tmpconn, bufset->bufs[i], bufset->bufsize, bufset->reserved); + if (rc != XNI_OK) { + goto error_out; + } + } *conn = tmpconn; return XNI_OK; @@ -353,12 +420,11 @@ static int tcp_connect(xni_context_t ctx_, struct xni_endpoint* remote, xni_conn if (servers[i].sockd != -1) close(servers[i].sockd); - // free any allocated target buffers - //for (struct tcp_target_buffer **ptr = target_buffers; *ptr; ptr++) - // ctx->control_block.free_fn(*ptr); + if (tmpconn) { + free(tmpconn->registered_buffers); + free(tmpconn); + } - //free(target_buffers); - free(tmpconn); free(servers); return XNI_ERR; @@ -377,27 +443,26 @@ static int tcp_close_connection(xni_connection_t *conn_) if (c->sockets[i].sockd != -1) close(c->sockets[i].sockd); - /*BWS - struct tcp_target_buffer **buffers = (c->destination ? c->receive_buffers : c->send_buffers); - while (*buffers++) - c->context->control_block.free_fn(*buffers); - */ + pthread_mutex_destroy(&c->buffer_mutex); + pthread_cond_destroy(&c->buffer_cond); + free(c->registered_buffers); + free(c); *conn = NULL; return XNI_OK; } -static int tcp_request_target_buffer(xni_context_t ctx_, xni_target_buffer_t *targetbuf_) +static int tcp_request_target_buffer(xni_connection_t conn_, xni_target_buffer_t *targetbuf_) { - struct tcp_context *ctx = (struct tcp_context*)ctx_; + struct tcp_connection *conn = (struct tcp_connection*)conn_; struct tcp_target_buffer **targetbuf = (struct tcp_target_buffer**)targetbuf_; struct tcp_target_buffer *tb = NULL; - pthread_mutex_lock(&ctx->buffer_mutex); + pthread_mutex_lock(&conn->buffer_mutex); while (tb == NULL) { - for (size_t i = 0; i < ctx->num_registered; i++) { - struct tcp_target_buffer *ptr = ctx->registered_buffers + i; + for (size_t i = 0; i < conn->num_registered; i++) { + struct tcp_target_buffer *ptr = conn->registered_buffers + i; if (!ptr->busy) { tb = ptr; tb->busy = 1; @@ -405,9 +470,9 @@ static int tcp_request_target_buffer(xni_context_t ctx_, xni_target_buffer_t *ta } } if (tb == NULL) - pthread_cond_wait(&ctx->buffer_cond, &ctx->buffer_mutex); + pthread_cond_wait(&conn->buffer_cond, &conn->buffer_mutex); } - pthread_mutex_unlock(&ctx->buffer_mutex); + pthread_mutex_unlock(&conn->buffer_mutex); *targetbuf = tb; return XNI_OK; @@ -424,10 +489,12 @@ static int tcp_send_target_buffer(xni_connection_t conn_, xni_target_buffer_t *t return XNI_ERR; // encode the message header - uint64_t tmp64 = tb->target_offset; + uint64_t tmp64 = htonll(tb->sequence_number); memcpy(tb->header, &tmp64, 8); - uint32_t tmp32 = tb->data_length; - memcpy(((char*)tb->header)+8, &tmp32, 4); + tmp64 = htonll(tb->target_offset); + memcpy(((char*)tb->header)+8, &tmp64, 8); + uint32_t tmp32 = htonl(tb->data_length); + memcpy(((char*)tb->header)+16, &tmp32, 4); // locate a free socket struct tcp_socket *socket = NULL; @@ -464,10 +531,10 @@ static int tcp_send_target_buffer(xni_connection_t conn_, xni_target_buffer_t *t pthread_mutex_unlock(&conn->socket_mutex); // mark the buffer as free - pthread_mutex_lock(&tb->context->buffer_mutex); + pthread_mutex_lock(&conn->buffer_mutex); tb->busy = 0; - pthread_cond_signal(&tb->context->buffer_cond); - pthread_mutex_unlock(&tb->context->buffer_mutex); + pthread_cond_signal(&conn->buffer_cond); + pthread_mutex_unlock(&conn->buffer_mutex); *targetbuf = NULL; return XNI_OK; @@ -481,10 +548,10 @@ static int tcp_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t // grab a free buffer struct tcp_target_buffer *tb = NULL; - pthread_mutex_lock(&conn->context->buffer_mutex); + pthread_mutex_lock(&conn->buffer_mutex); while (tb == NULL) { - for (size_t i = 0; i < conn->context->num_registered; i++) { - struct tcp_target_buffer *ptr = conn->context->registered_buffers + i; + for (size_t i = 0; i < conn->num_registered; i++) { + struct tcp_target_buffer *ptr = conn->registered_buffers + i; if (!ptr->busy) { tb = ptr; tb->busy = 1; @@ -492,9 +559,9 @@ static int tcp_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t } } if (tb == NULL) - pthread_cond_wait(&conn->context->buffer_cond, &conn->context->buffer_mutex); + pthread_cond_wait(&conn->buffer_cond, &conn->buffer_mutex); } - pthread_mutex_unlock(&conn->context->buffer_mutex); + pthread_mutex_unlock(&conn->buffer_mutex); struct tcp_socket *socket = NULL; while (socket == NULL) { @@ -551,10 +618,16 @@ static int tcp_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t received += cnt; } + // decode the header + uint64_t sequence_number; + memcpy(&sequence_number, recvbuf, 8); + sequence_number = ntohll(sequence_number); uint64_t target_offset; - memcpy(&target_offset, recvbuf, 8); + memcpy(&target_offset, recvbuf+8, 8); + target_offset = ntohll(target_offset); uint32_t data_length; - memcpy(&data_length, recvbuf+8, 4); + memcpy(&data_length, recvbuf+16, 4); + data_length = ntohl(data_length); recvbuf = (char*)tb->data; total = data_length; @@ -570,6 +643,7 @@ static int tcp_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t } //TODO: sanity checks (e.g. tb->connection) + tb->sequence_number = sequence_number; tb->target_offset = target_offset; tb->data_length = (int)data_length; @@ -589,10 +663,10 @@ static int tcp_receive_target_buffer(xni_connection_t conn_, xni_target_buffer_t buffer_out: if (return_code != XNI_OK) { // mark the buffer as free - pthread_mutex_lock(&conn->context->buffer_mutex); + pthread_mutex_lock(&conn->buffer_mutex); tb->busy = 0; - pthread_cond_signal(&conn->context->buffer_cond); - pthread_mutex_unlock(&conn->context->buffer_mutex); + pthread_cond_signal(&conn->buffer_cond); + pthread_mutex_unlock(&conn->buffer_mutex); } return return_code; @@ -606,10 +680,12 @@ static int tcp_release_target_buffer(xni_target_buffer_t *targetbuf_) tb->target_offset = 0; tb->data_length = -1; - pthread_mutex_lock(&tb->context->buffer_mutex); + //TODO: is it really OK to access the lock protecting an object + // through the object itself? + pthread_mutex_lock(&tb->connection->buffer_mutex); tb->busy = 0; - pthread_cond_signal(&tb->context->buffer_cond); - pthread_mutex_unlock(&tb->context->buffer_mutex); + pthread_cond_signal(&tb->connection->buffer_cond); + pthread_mutex_unlock(&tb->connection->buffer_mutex); *targetbuf = NULL; return XNI_OK; @@ -620,8 +696,6 @@ static struct xni_protocol protocol_tcp = { .name = PROTOCOL_NAME, .context_create = tcp_context_create, .context_destroy = tcp_context_destroy, - .register_buffer = tcp_register_buffer, - .unregister_buffer = tcp_unregister_buffer, .accept_connection = tcp_accept_connection, .connect = tcp_connect, .close_connection = tcp_close_connection, diff --git a/tests/other/xni/basic-ib-test.c b/tests/other/xni/basic-ib-test.c index 6a32031f..7fe2e494 100644 --- a/tests/other/xni/basic-ib-test.c +++ b/tests/other/xni/basic-ib-test.c @@ -60,7 +60,6 @@ int start_server() xni_release_target_buffer(&xtb); // XNI cleanup stuff - xni_unregister_buffer(xni_ctx, buf); xni_finalize(); free(buf); #else diff --git a/tests/other/xni/basic-xni-test.c b/tests/other/xni/basic-xni-test.c index eb014a13..17e918fa 100644 --- a/tests/other/xni/basic-xni-test.c +++ b/tests/other/xni/basic-xni-test.c @@ -46,7 +46,6 @@ int start_server() xni_release_target_buffer(&xtb); // XNI cleanup stuff - xni_unregister_buffer(xni_ctx, buf); xni_finalize(); free(buf); return rc; diff --git a/tests/paper/config.sh b/tests/paper/config.sh index 1c962427..9ab2b9a8 100755 --- a/tests/paper/config.sh +++ b/tests/paper/config.sh @@ -49,6 +49,9 @@ E2EPORT=40010 # number of network threads (also number of I/O threads) E2ETHREADS=1 +# TCP port for iperf to listen on/connect to +IPERFPORT=40310 + # device to use for XNI InfiniBand IBDEVICE="mlx4_0" diff --git a/tests/paper/drive.sh b/tests/paper/drive.sh new file mode 100755 index 00000000..f0abe051 --- /dev/null +++ b/tests/paper/drive.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +source ./functions.sh + +declare -r ONEGB=1000000000 +declare -r TENGB=$((10*${ONEGB})) +declare -r TWENTYGB=$((20*${ONEGB})) +declare -r FORTYGB=$((40*${ONEGB})) +declare -r EIGHTYGB=$((80*${ONEGB})) + +NITER=10 + +OUTFILE=results.csv + +HEADER="proto,pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize" + +echo ${HEADER} >${OUTFILE} + +for size in ${TENGB} ${TWENTYGB} ${FORTYGB} ${EIGHTYGB} +do + for i in `seq ${NITER}` + do + echo -n "iperf," >>${OUTFILE} + run_remote_iperf ${size} >>${OUTFILE} + sleep 4 + + echo -n "tcp," >>${OUTFILE} + run_remote_xdd tcp ${size} >>${OUTFILE} + sleep 4 + + echo -n "xtcp," >>${OUTFILE} + run_remote_xdd xtcp ${size} >>${OUTFILE} + sleep 4 + done +done diff --git a/tests/paper/functions.sh b/tests/paper/functions.sh index b6fbfaab..88b21033 100755 --- a/tests/paper/functions.sh +++ b/tests/paper/functions.sh @@ -9,120 +9,7 @@ abort () { exit 101 } -#TODO: NUMA -#TODO: congestion -# Arguments: transport [bytes] -# transport can be tcp, xtcp, or ib -run_local_xdd () { - # override configuration values - E2ESRC=localhost - E2EDEST=localhost - - # set option variables - if [ "$1" = 'tcp' ] - then - local XNIOPT='' - elif [ "$1" = 'xtcp' ] - then - local XNIOPT='-xni tcp' - elif [ "$1" = 'ib' ] - then - local XNIOPT='-xni ib' - local IBDEVICEOPT="-ibdevice ${IBDEVICE}" - else - abort "unknown transport '$1'" - fi - - if [ -n "$2" ] - then - local BYTES="$2" - fi - - local TARGETOPT="-targets 1 null" # reads/writes are no-ops - local E2EOPT="-e2e dest ${E2ESRC}:${E2EPORT},${E2ETHREADS}" - local REQSIZEOPT="-reqsize ${REQSIZE}" - local BYTESOPT="-bytes ${BYTES}" - - # start destination side in background and ignore output - ${XDD} \ - ${XNIOPT} \ - ${IBDEVICEOPT} \ - ${TARGETOPT} \ - -op write -e2e isdest \ - ${E2EOPT} \ - ${BYTESOPT} \ - ${REQSIZEOPT} \ - >/dev/null \ - & - - # wait for destination side to start - sleep 3 - - # start source side and output - # pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize - ${XDD} \ - ${XNIOPT} \ - ${IBDEVICEOPT} \ - ${TARGETOPT} \ - -op read -e2e issrc \ - ${E2EOPT} \ - ${BYTESOPT} \ - ${REQSIZEOPT} \ - | grep -o 'COMBINED .*' \ - | sed 's/ */,/g' \ - | cut -d ',' -f 2-13 -} - -#TODO: NUMA -#TODO: congestion -# Arguments: [bytes] -run_local_iperf () { - # override configuration values - E2ESRC=localhost - E2EDEST=localhost - - # set option variables - if [ -n "$1" ] - then - local BYTES="$1" - fi - - local CLIENTOPT="-c ${E2EDEST}" - local CSVOPT="-y c" - local INTERVALOPT="-i 3600" # large interval so only the total is output - local BUFLENOPT="-l $((${REQSIZE}*1024))" - local NUMOPT="-n ${BYTES}" - - # start destination side in background and ignore output - ${IPERF} \ - -s \ - >/dev/null 2>/dev/null \ - & - - # wait for destination side to start - sleep 3 - - # start source side and save output - local CSVOUT=$( - ${IPERF} \ - ${CLIENTOPT} \ - ${CSVOPT} \ - ${INTERVALOPT} \ - ${BUFLENOPT} \ - ${NUMOPT} \ - | tail -1 - ) - - # parse output values - local ELAPSEDOUT=`cut -d , -f 7 <<<${CSVOUT} | cut -d - -f 2` - local BANDWIDTHOUT=$((`cut -d , -f 9 <<<${CSVOUT}`/8000000)) - - # output - # pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize - echo ,,,${BYTES},,${ELAPSEDOUT},${BANDWIDTHOUT},,,,,${REQSIZE} -} - -# Arguments: transport [bytes] +# Arguments: transport [bytes] [reqsize] # transport can be tcp, xtcp, or ib run_remote_xdd () { # set option variables @@ -151,6 +38,11 @@ run_remote_xdd () { local BYTES="$2" fi + if [ -n "$3" ] + then + local REQSIZE="$3" + fi + local TARGETOPT="-targets 1 null" # reads/writes are no-ops local E2EOPT="-e2e dest ${E2EDEST}:${E2EPORT},${E2ETHREADS}" [ -n "${CONGESTION}" ] && local CONGESTIONOPT="-congestion ${CONGESTION}" @@ -196,8 +88,24 @@ run_remote_xdd () { | cut -d ',' -f 2-13 } -# Arguments: [bytes] +function ceiling() { + local dividend=$1 + local divisor=$2 + local result=$(( (${dividend} - 1)/${divisor} + 1)) + echo ${result} +} + +# Arguments: [bytes] [reqsize] run_remote_iperf () { + # ensure that Iperf can understand our command line and that we + # can understand Iperf's output + local VERSION=`${IPERF} --version 2>&1 | cut -d ' ' -f 1-3` + if [ "$VERSION" != 'iperf version 2.0.5' ] + then + abort "unsupported iperf: ${VERSION}" + fi + + # set option variables local NUMACMD="" if [ "$NUMA" == 'true' ] @@ -210,11 +118,17 @@ run_remote_iperf () { local BYTES="$1" fi + if [ -n "$2" ] + then + local REQSIZE="$2" + fi + local CLIENTOPT="-c ${E2EDEST}" local CSVOPT="-y c" - local INTERVALOPT="-i 3600" # large interval so only the total is output local BUFLENOPT="-l $((${REQSIZE}*1024))" - local NUMOPT="-n ${BYTES}" + local NUMOPT="-n `ceiling ${BYTES} ${E2ETHREADS}`" + local PORTOPT="-p ${IPERFPORT}" + local PARALLELOPT="-P ${E2ETHREADS}" [ -n "${CONGESTION}" ] && local CONGESTIONOPT="-Z ${CONGESTION}" local SSHOPT="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"\ " -o BatchMode=yes" @@ -223,9 +137,11 @@ run_remote_iperf () { ${NUMACMD} \ ${IPERF} \ -s \ + ${PORTOPT} \ ${CONGESTIONOPT} \ >/dev/null 2>/dev/null \ & + local IPERFPID=$! # wait for destination side to start sleep 3 @@ -238,10 +154,11 @@ run_remote_iperf () { ${NUMACMD} \ ${IPERF} \ ${CLIENTOPT} \ + ${PORTOPT} \ ${CSVOPT} \ - ${INTERVALOPT} \ ${BUFLENOPT} \ ${NUMOPT} \ + ${PARALLELOPT} \ ${CONGESTIONOPT} \ | tail -1 ) @@ -253,4 +170,7 @@ run_remote_iperf () { # output # pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize echo ,,,${BYTES},,${ELAPSEDOUT},${BANDWIDTHOUT},,,,,${REQSIZE} + + # cleanup by killing iperf server + kill "${IPERFPID}" } diff --git a/tests/paper/local-iperf-null.sh b/tests/paper/local-iperf-null.sh deleted file mode 100755 index 4003d21b..00000000 --- a/tests/paper/local-iperf-null.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# load support functions -source ./functions.sh - -run_local_iperf diff --git a/tests/paper/local-tcp-null.sh b/tests/paper/local-tcp-null.sh deleted file mode 100755 index ed754f99..00000000 --- a/tests/paper/local-tcp-null.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# load support functions -source ./functions.sh - -run_local_xdd tcp diff --git a/tests/paper/local-xtcp-null.sh b/tests/paper/local-xtcp-null.sh deleted file mode 100755 index 9a1f8fe8..00000000 --- a/tests/paper/local-xtcp-null.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# load support functions -source ./functions.sh - -run_local_xdd xtcp diff --git a/tests/paper/localdriver.sh b/tests/paper/localdriver.sh deleted file mode 100755 index 9a7f356b..00000000 --- a/tests/paper/localdriver.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -NITER=10 - -TCPOUT=tcp-10g.csv -XTCPOUT=xtcp-10g.csv -IPERFOUT=iperf-10g.csv - -HEADER="pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize" -echo ${HEADER} >${TCPOUT} -echo ${HEADER} >${XTCPOUT} -echo ${HEADER} >${IPERFOUT} -for i in `seq $NITER` -do - ./local-tcp-null.sh >>${TCPOUT} - sleep 4 - ./local-xtcp-null.sh >>${XTCPOUT} - sleep 4 - ./local-iperf-null.sh >>${IPERFOUT} - sleep 4 -done diff --git a/tests/paper/numadrive.sh b/tests/paper/numadrive.sh new file mode 100755 index 00000000..6223145f --- /dev/null +++ b/tests/paper/numadrive.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +source ./functions.sh + +declare -r ONEGB=1000000000 +declare -r TENGB=$((10*${ONEGB})) +declare -r TWENTYGB=$((20*${ONEGB})) +declare -r FORTYGB=$((40*${ONEGB})) +declare -r EIGHTYGB=$((80*${ONEGB})) + +NITER=10 + +OUTFILE="numa-node" +if [ "$NUMA" == 'true' ] +then + OUTFILE="${OUTFILE}${NUMANODE}" +else + OUTFILE="${OUTFILE}default" +fi +OUTFILE="${OUTFILE}-${E2ETHREADS}stream.csv" + +HEADER="proto,pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize" + +echo ${HEADER} >${OUTFILE} + +for reqsize in 1024 2048 4096 8192 16384 +do + for i in `seq ${NITER}` + do + echo -n "iperf," >>${OUTFILE} + run_remote_iperf ${EIGHTYGB} ${reqsize} >>${OUTFILE} + sleep 4 + + echo -n "xtcp," >>${OUTFILE} + run_remote_xdd xtcp ${EIGHTYGB} ${reqsize} >>${OUTFILE} + sleep 4 + done +done diff --git a/tests/paper/local-ib-null.sh b/tests/paper/remote-ib-null.sh similarity index 77% rename from tests/paper/local-ib-null.sh rename to tests/paper/remote-ib-null.sh index 90e6ad91..295e4d0d 100755 --- a/tests/paper/local-ib-null.sh +++ b/tests/paper/remote-ib-null.sh @@ -3,4 +3,4 @@ # load support functions source ./functions.sh -run_local_xdd ib +run_remote_xdd ib diff --git a/tests/paper/remotedriver.sh b/tests/paper/remotedriver.sh deleted file mode 100755 index 32193f39..00000000 --- a/tests/paper/remotedriver.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -NITER=10 - -TCPOUT=tcp-10g.csv -XTCPOUT=xtcp-10g.csv -IPERFOUT=iperf-10g.csv - -HEADER="pass,target,queue,size,ops,elapsed,bandwidth,iops,latency,cpu,op,reqsize" -echo ${HEADER} >${TCPOUT} -echo ${HEADER} >${XTCPOUT} -echo ${HEADER} >${IPERFOUT} -for i in `seq $NITER` -do - ./remote-tcp-null.sh >>${TCPOUT} - sleep 4 - ./remote-xtcp-null.sh >>${XTCPOUT} - sleep 4 - ./remote-iperf-null.sh >>${IPERFOUT} - sleep 4 -done