From be771a7332d8fda451169fd1bbce5b7cabeaa489 Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Thu, 21 Nov 2019 11:05:31 +0800 Subject: [PATCH 01/14] =?UTF-8?q?dpdk-iface-kmod:=20remove=20warning=20of?= =?UTF-8?q?=20=E2=80=98rte=5Feth=5Fdev=5Fcount=E2=80=99=20is=20deprecated.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rte_eth_dev_count is deprecated and use rte_eth_dev_count_avail instead. Signed-off-by: Qingmin Liu --- dpdk-iface-kmod/dpdk_iface_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpdk-iface-kmod/dpdk_iface_main.c b/dpdk-iface-kmod/dpdk_iface_main.c index 143cea4d1..53278b2e5 100644 --- a/dpdk-iface-kmod/dpdk_iface_main.c +++ b/dpdk-iface-kmod/dpdk_iface_main.c @@ -260,7 +260,11 @@ main(int argc, char **argv) ret = rte_eal_init(rte_argc, rte_argv); /* get total count of detected ethernet ports */ +#if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) num_devices = rte_eth_dev_count(); +#else + num_devices = rte_eth_dev_count_avail(); +#endif if (num_devices == 0) { fprintf(stderr, "No Ethernet port detected!\n"); exit(EXIT_FAILURE); From d63af3e94478c666fa6a2444f71d7e356ee0cfbd Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 28 Oct 2019 18:17:01 +0800 Subject: [PATCH 02/14] dpdk-iface-kmod: add cross-compile support - Use variable CROSS to specify toolchain prefix - Use variable RTE_KERNELDIR to specify kernel build dir if RTE_KERNELDIR is not defined. Signed-off-by: Qingmin Liu --- dpdk-iface-kmod/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dpdk-iface-kmod/Makefile b/dpdk-iface-kmod/Makefile index 93da3030c..ab6b6fd3d 100644 --- a/dpdk-iface-kmod/Makefile +++ b/dpdk-iface-kmod/Makefile @@ -6,9 +6,12 @@ endif ifeq ($(RTE_TARGET),) $(error "Please define RTE_TARGET environment variable") endif + +RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build/ + #-------------------------------------------------------------------------# include $(RTE_SDK)/mk/rte.vars.mk -CC=gcc +CC=$(CROSS)gcc obj-m=dpdk_iface.o DPDK_MACHINE_LINKER_FLAGS=$${RTE_SDK}/$${RTE_TARGET}/lib/ldflags.txt DPDK_MACHINE_LDFLAGS=$(shell cat ${DPDK_MACHINE_LINKER_FLAGS}) @@ -25,7 +28,7 @@ else endif #-------------------------------------------------------------------------# all: dpdk_iface.c $(appname) $(appname).c - make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules + make -C ${RTE_KERNELDIR} M=$(PWD) modules $(appname): $(appname).c $(MSG) " CC $<" @@ -34,7 +37,7 @@ $(appname): $(appname).c -L$(DPDK_LIB) ${DPDK_MACHINE_LDFLAGS} -lpthread clean: - make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean + make -C ${RTE_KERNELDIR} M=$(PWD) clean $(MSG) " CLEAN $(appname)" $(HIDE) rm -rf *~ *.o *.ko dpdk_iface_main From 8a4fed4afaa98791c6eee08ffd0473769d076bff Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 28 Oct 2019 18:19:26 +0800 Subject: [PATCH 03/14] mtcp: use ARCH to specify the build arch - If ARCH is not defined, retrieve it from system. - Only x86_64 adds -m64 Signed-off-by: Qingmin Liu --- apps/example/Makefile.in | 5 ++++- mtcp/src/Makefile.in | 5 ++++- util/Makefile.in | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/apps/example/Makefile.in b/apps/example/Makefile.in index 23544f309..10c0a486a 100644 --- a/apps/example/Makefile.in +++ b/apps/example/Makefile.in @@ -8,8 +8,11 @@ NETMAP=@NETMAP@ ONVM=@ONVM@ CFLAGS=@CFLAGS@ +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) + # Add arch-specific optimization -ifeq ($(shell uname -m),x86_64) +ifeq ($(ARCH),x86_64) LIBS += -m64 endif diff --git a/mtcp/src/Makefile.in b/mtcp/src/Makefile.in index c703ae213..711250d11 100644 --- a/mtcp/src/Makefile.in +++ b/mtcp/src/Makefile.in @@ -19,7 +19,10 @@ MTCP_HDR = mtcp_api.h mtcp_epoll.h GCC=@CC@ ### FLAGS ### -ifeq ($(shell uname -m),x86_64) + +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) GCC_OPT = -m64 else GCC_OPT = diff --git a/util/Makefile.in b/util/Makefile.in index 0d03bf271..f48fcafe2 100644 --- a/util/Makefile.in +++ b/util/Makefile.in @@ -1,7 +1,9 @@ ### GCC ### GCC=@CC@ -ifeq ($(shell uname -m),x86_64) +# If ARCH is not defined, retrive from system +ARCH ?= $(shell uname -m) +ifeq ($(ARCH),x86_64) GCC_OPT = -m64 else GCC_OPT = From 49cab59d8be9f56b0a267c9f8fbe9e4d7891004d Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 28 Oct 2019 18:24:22 +0800 Subject: [PATCH 04/14] apps: add external LDFLAGS inherit - If not it cannot find external lib Signed-off-by: Qingmin Liu --- apps/example/Makefile.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/example/Makefile.in b/apps/example/Makefile.in index 10c0a486a..e9086bebd 100644 --- a/apps/example/Makefile.in +++ b/apps/example/Makefile.in @@ -7,6 +7,7 @@ PS=@PSIO@ NETMAP=@NETMAP@ ONVM=@ONVM@ CFLAGS=@CFLAGS@ +LDFLAGS=@LDFLAGS@ # If ARCH is not defined, retrive from system ARCH ?= $(shell uname -m) @@ -47,7 +48,7 @@ endif ifeq ($(DPDK),1) DPDK_MACHINE_LINKER_FLAGS=$${RTE_SDK}/$${RTE_TARGET}/lib/ldflags.txt DPDK_MACHINE_LDFLAGS=$(shell cat ${DPDK_MACHINE_LINKER_FLAGS}) -LIBS += -g -O3 -pthread -lrt -march=native ${MTCP_FLD}/lib/libmtcp.a -lnuma -lmtcp -lpthread -lrt -ldl -lgmp -L${RTE_SDK}/${RTE_TARGET}/lib ${DPDK_MACHINE_LDFLAGS} +LIBS += -g -O3 -pthread -lrt -march=native ${MTCP_FLD}/lib/libmtcp.a -lnuma -lmtcp -lpthread -lrt -ldl -lgmp -L${RTE_SDK}/${RTE_TARGET}/lib ${DPDK_MACHINE_LDFLAGS} ${LDFLAGS} endif # onvm-specific variables From c5e5d52e1c127c79263619f4a2430d732b157e5f Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 28 Oct 2019 18:25:01 +0800 Subject: [PATCH 05/14] build: add cross-compile build script for mtcp - Verified at ARM64 Platform. - For dependencies, introduce ext_lib to specify the dir. Signed-off-by: Qingmin Liu --- mtcp_crossbuild.sh | 101 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 mtcp_crossbuild.sh diff --git a/mtcp_crossbuild.sh b/mtcp_crossbuild.sh new file mode 100755 index 000000000..f3e7701a3 --- /dev/null +++ b/mtcp_crossbuild.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +args="$*" + +usage () +{ + echo "args=$args" + echo + echo "`basename $0` -h -c -k -i -d " + echo + echo "Helper script, used to build dpdk." + echo + echo " -h Help Usage" + echo " -c specify build platform" + echo " -k Directory that kernel builds if enable LKM build option" + echo " -i If needed introduce external lib dependencies" + echo " -d Directory that dpdk builds" + echo +} + +while getopts "hc:k:i:d:" opt; do + case $opt in + h) show_usage=1 + ;; + c) build_arch="$OPTARG" + ;; + k) kernel_build="$OPTARG" + ;; + i) ext_lib="$OPTARG" + ;; + d) dpdk_build="$OPTARG" + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + show_usage=1 + ;; + esac +done + +if [ "$show_usage" == "1" ]; then + usage + exit 1 +fi + +CUR_PATH=`pwd` + +if [ "$build_arch" == "aarch64" ]; then + CROSS=aarch64-linux-gnu- + RTE_TARGET=arm64-stingray-linuxapp-gcc +else + RTE_TARGET=x86_64-native-linuxapp-gcc +fi + +if [ -z "$kernel_build" ]; then + RTE_KERNELDIR=/lib/modules/`uname -r`/build +else + RTE_KERNELDIR=$kernel_build +fi + +if [ -z "$ext_lib" ]; then + EXT_LIB_DIR=$CUR_PATH/../ext_lib +else + EXT_LIB_DIR=$ext_lib +fi + +if [ -z $dpdk_build ]; then + RTE_SDK=$CUR_PATH/../dpdk +else + RTE_SDK=$dpdk_build +fi +DPDK_BUILD=$RTE_SDK/$RTE_TARGET +export RTE_SDK RTE_TARGET DPDK_BUILD RTE_KERNELDIR + +# check ldflags.txt +if grep "ldflags.txt" $RTE_SDK/mk/rte.app.mk > /dev/null +then + : +else + sed -i -e 's/O_TO_EXE_STR =/\$(shell if [ \! -d \${RTE_SDK}\/\${RTE_TARGET}\/lib ]\; then mkdir \${RTE_SDK}\/\${RTE_TARGET}\/lib\; fi)\nLINKER_FLAGS = \$(call linkerprefix,\$(LDLIBS))\n\$(shell echo \${LINKER_FLAGS} \> \${RTE_SDK}\/\${RTE_TARGET}\/lib\/ldflags\.txt)\nO_TO_EXE_STR =/g' $RTE_SDK/mk/rte.app.mk + echo "Need to rebuild dpdk." + exit 1 +fi + +echo +echo "======================================" +echo "Build_arch : $build_arch" +echo "RTE_TARGET : $RTE_TARGET" +echo "DPDK Build : $RTE_SDK" +echo "======================================" +echo + +CUR_PATH=`pwd` + +# build kernel module +cd $CUR_PATH/dpdk-iface-kmod && make CROSS=$CROSS RTE_KERNELDIR=$RTE_KERNELDIR V=1 + +# build application +cd $CUR_PATH && autoreconf -f -i && ./configure --host=aarch64 CC=${CROSS}gcc LD=${CROSS}ld --with-dpdk=$RTE_SDK/$RTE_TARGET --with-dpdk-lib=$RTE_SDK/$RTE_TARGET/lib CFLAGS="-I$EXT_LIB_DIR/include" LDFLAGS="-L$EXT_LIB_DIR/lib64" +make -j `grep -c ^processor /proc/cpuinfo` ARCH=arm64 CC=${CROSS}gcc LD=${CROSS}ld V=0 + +# end of file From 0ffd3d46bc8dfb3553bdec856f755b9a030e6b88 Mon Sep 17 00:00:00 2001 From: Kailiang Zhou <13405859891@163.com> Date: Fri, 22 Nov 2019 02:37:32 -0500 Subject: [PATCH 06/14] mtcp: When DPDK is initiated by APP, no need to exit when fail APP has already done initialization. However mtcp will call it again and report error. It should be avoided otherwise APP will also exit unexpectly. --- mtcp/src/io_module.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mtcp/src/io_module.c b/mtcp/src/io_module.c index 126c94f46..1b8e5e373 100644 --- a/mtcp/src/io_module.c +++ b/mtcp/src/io_module.c @@ -343,8 +343,10 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) /* initialize the dpdk eal env */ ret = rte_eal_init(argc, argv); if (ret < 0) { - TRACE_ERROR("Invalid EAL args!\n"); - exit(EXIT_FAILURE); + if(rte_errno != EALREADY) { + TRACE_ERROR("Invalid EAL args!\n"); + exit(EXIT_FAILURE); + } } /* give me the count of 'detected' ethernet ports */ #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) From 52b1955e142f99c87195eb155c076b5cebfb4eb6 Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 25 Nov 2019 11:34:45 +0800 Subject: [PATCH 07/14] mtcp: cfg: add configure for spdk support 1. Add --enable-spdk suppport. 2. Pass ENABLE_SPDK based on cfg. --- configure.ac | 11 +++++++++++ mtcp/src/Makefile.in | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/configure.ac b/configure.ac index 69f60c584..48d6fa680 100644 --- a/configure.ac +++ b/configure.ac @@ -63,6 +63,17 @@ AC_FUNC_MMAP AC_CHECK_FUNC([clock_gettime],,AC_MSG_ERROR([librt library is missing])) AC_CHECK_FUNCS([bzero getpagesize gettimeofday memmove memset munmap select socket strchr strerror strstr strtol],,AC_MSG_ERROR([glibc library is missing])) +# Reset SPDK to 0 +AC_SUBST(SPDK, 0) + +dnl Example of default-disabled feature +AC_ARG_ENABLE([spdk], + AS_HELP_STRING([--enable-spdk], [Enable SPDK Support])) + +AS_IF([test "x$enable_spdk" = "xyes"], [ + AC_SUBST(SPDK, 1) +]) + # Reset DPDK to 0 AC_SUBST(DPDK, 0) # Reset enforcement value diff --git a/mtcp/src/Makefile.in b/mtcp/src/Makefile.in index 711250d11..04936f1b9 100644 --- a/mtcp/src/Makefile.in +++ b/mtcp/src/Makefile.in @@ -4,6 +4,7 @@ ### TARGET ### PS=@PSIO@ +SPDK=@SPDK@ DPDK=@DPDK@ ENFORCE_RX_IDLE=@ENFORCE_RX_IDLE@ NETMAP=@NETMAP@ @@ -53,6 +54,10 @@ GCC_OPT += -DNDEBUG -g -O3 -DNETSTAT -DINFO -DDBGERR -DDBGCERR #GCC_OPT += -DNDEBUG -g -DNETSTAT -DINFO -DDBGERR -DDBGCERR GCC_OPT += $(DBG_OPT) +ifeq ($(SPDK),1) +GCC_OPT += -DENABLE_SPDK +endif + ifeq ($(LRO),1) GCC_OPT += -DENABLELRO endif From da3354e7f3417b16525409e7d274ebf81038675c Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 16 Dec 2019 10:44:40 +0800 Subject: [PATCH 08/14] mtcp: src: limit RSS key len to 40 Bytes. Stingray has only 320-bit RSS key. So limit to 40B at the max. --- mtcp/src/dpdk_module.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mtcp/src/dpdk_module.c b/mtcp/src/dpdk_module.c index ebf124ecb..132282a16 100644 --- a/mtcp/src/dpdk_module.c +++ b/mtcp/src/dpdk_module.c @@ -702,6 +702,10 @@ dpdk_load_module(void) /* init port */ printf("Initializing port %u... ", (unsigned) portid); fflush(stdout); + + if (!strncmp(dev_info[portid].driver_name, "net_bnxt", 8)) + port_conf.rx_adv_conf.rss_conf.rss_key_len = 40; + ret = rte_eth_dev_configure(portid, CONFIG.num_cores, CONFIG.num_cores, &port_conf); if (ret < 0) rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u, cores: %d\n", From 5e0f482f276558810d654807ace017e8d01acb59 Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 16 Dec 2019 15:15:57 +0800 Subject: [PATCH 09/14] mtcp: dpdk-iface-kmod: fix segment fault with 4.14.x kernel with cross compile Specify ARCH/CROSS_COMPILE when build kernel module. --- dpdk-iface-kmod/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdk-iface-kmod/Makefile b/dpdk-iface-kmod/Makefile index ab6b6fd3d..a84732157 100644 --- a/dpdk-iface-kmod/Makefile +++ b/dpdk-iface-kmod/Makefile @@ -28,7 +28,7 @@ else endif #-------------------------------------------------------------------------# all: dpdk_iface.c $(appname) $(appname).c - make -C ${RTE_KERNELDIR} M=$(PWD) modules + make ARCH=${ARCH} CROSS_COMPILE=${CROSS} -C ${RTE_KERNELDIR} M=$(PWD) modules $(appname): $(appname).c $(MSG) " CC $<" From 93c72bdc444a18d5464da44d8a52e898f7b31b4d Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 25 Nov 2019 16:40:43 +0800 Subject: [PATCH 10/14] mtcp: get rid of warning msg 1. mtcp_destroy strict prototype 2. mtcp_read no inline --- mtcp/src/api.c | 2 +- mtcp/src/include/mtcp_api.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mtcp/src/api.c b/mtcp/src/api.c index c53714bcf..3429f4476 100644 --- a/mtcp/src/api.c +++ b/mtcp/src/api.c @@ -1284,7 +1284,7 @@ mtcp_recv(mctx_t mctx, int sockid, char *buf, size_t len, int flags) return ret; } /*----------------------------------------------------------------------------*/ -inline ssize_t +ssize_t mtcp_read(mctx_t mctx, int sockid, char *buf, size_t len) { return mtcp_recv(mctx, sockid, buf, len, 0); diff --git a/mtcp/src/include/mtcp_api.h b/mtcp/src/include/mtcp_api.h index 0a1ee40fd..5eda81010 100644 --- a/mtcp/src/include/mtcp_api.h +++ b/mtcp/src/include/mtcp_api.h @@ -46,7 +46,7 @@ int mtcp_init(const char *config_file); void -mtcp_destroy(); +mtcp_destroy(void); int mtcp_getconf(struct mtcp_conf *conf); @@ -124,7 +124,7 @@ int mtcp_getpeername(mctx_t mctx, int sockid, struct sockaddr *addr, socklen_t *addrlen); -inline ssize_t +ssize_t mtcp_read(mctx_t mctx, int sockid, char *buf, size_t len); ssize_t From a9fa7841c3135b011929180d0ae24feecae0fea7 Mon Sep 17 00:00:00 2001 From: Kailiang Zhou <13405859891@163.com> Date: Mon, 25 Nov 2019 02:56:17 -0500 Subject: [PATCH 11/14] mtcp: core: Add SPDK instance function SPDK poller will register seperately with mtcp_run_instance. Modify MTCPRunThread for SPDK scenario. --- mtcp/src/core.c | 229 ++++++++++++++++++++---------------- mtcp/src/include/mtcp_api.h | 6 + 2 files changed, 136 insertions(+), 99 deletions(-) diff --git a/mtcp/src/core.c b/mtcp/src/core.c index 7ce22c805..93b93634a 100644 --- a/mtcp/src/core.c +++ b/mtcp/src/core.c @@ -750,123 +750,140 @@ InterruptApplication(mtcp_manager_t mtcp) } } /*----------------------------------------------------------------------------*/ +#ifndef ENABLE_SPDK static void RunMainLoop(struct mtcp_thread_context *ctx) { mtcp_manager_t mtcp = ctx->mtcp_manager; - int i; - int recv_cnt; - int rx_inf, tx_inf; - struct timeval cur_ts = {0}; - uint32_t ts, ts_prev; - int thresh; - gettimeofday(&cur_ts, NULL); - TRACE_DBG("CPU %d: mtcp thread running.\n", ctx->cpu); - ts = ts_prev = 0; while ((!ctx->done || mtcp->flow_cnt) && !ctx->exit) { - - STAT_COUNT(mtcp->runstat.rounds); - recv_cnt = 0; - - gettimeofday(&cur_ts, NULL); - ts = TIMEVAL_TO_TS(&cur_ts); - mtcp->cur_ts = ts; - - for (rx_inf = 0; rx_inf < CONFIG.eths_num; rx_inf++) { - - static uint16_t len; - static uint8_t *pktbuf; - recv_cnt = mtcp->iom->recv_pkts(ctx, rx_inf); - STAT_COUNT(mtcp->runstat.rounds_rx_try); + mtcp_run_instance(ctx); + } - for (i = 0; i < recv_cnt; i++) { - pktbuf = mtcp->iom->get_rptr(mtcp->ctx, rx_inf, i, &len); - if (pktbuf != NULL) - ProcessPacket(mtcp, rx_inf, ts, pktbuf, len); -#ifdef NETSTAT - else - mtcp->nstat.rx_errors[rx_inf]++; +#if TESTING + DestroyRemainingFlows(mtcp); #endif - } - } - STAT_COUNT(mtcp->runstat.rounds_rx); - /* interaction with application */ - if (mtcp->flow_cnt > 0) { - - /* check retransmission timeout and timewait expire */ -#if 0 - thresh = (int)mtcp->flow_cnt / (TS_TO_USEC(PER_STREAM_TCHECK)); - assert(thresh >= 0); - if (thresh == 0) - thresh = 1; - if (recv_cnt > 0 && thresh > recv_cnt) - thresh = recv_cnt; + TRACE_DBG("MTCP thread %d out of main loop.\n", ctx->cpu); + /* flush logs */ + flush_log_data(mtcp); + TRACE_DBG("MTCP thread %d flushed logs.\n", ctx->cpu); + InterruptApplication(mtcp); + TRACE_INFO("MTCP thread %d finished.\n", ctx->cpu); +} #endif - thresh = CONFIG.max_concurrency; - - /* Eunyoung, you may fix this later - * if there is no rcv packet, we will send as much as possible - */ - if (thresh == -1) - thresh = CONFIG.max_concurrency; - - CheckRtmTimeout(mtcp, ts, thresh); - CheckTimewaitExpire(mtcp, ts, CONFIG.max_concurrency); - - if (CONFIG.tcp_timeout > 0 && ts != ts_prev) { - CheckConnectionTimeout(mtcp, ts, thresh); - } - } - - /* if epoll is in use, flush all the queued events */ - if (mtcp->ep) { - FlushEpollEvents(mtcp, ts); - } - - if (mtcp->flow_cnt > 0) { - /* hadnle stream queues */ - HandleApplicationCalls(mtcp, ts); - } - - WritePacketsToChunks(mtcp, ts); - /* send packets from write buffer */ - /* send until tx is available */ - for (tx_inf = 0; tx_inf < CONFIG.eths_num; tx_inf++) { - mtcp->iom->send_pkts(ctx, tx_inf); - } +RTE_DEFINE_PER_LCORE(uint32_t , ts_prev) = 0; - if (ts != ts_prev) { - ts_prev = ts; - if (ctx->cpu == mtcp_master) { - ARPTimer(mtcp, ts); +int mtcp_run_instance(void *tmp) +{ + struct mtcp_thread_context *ctx= (struct mtcp_thread_context *)tmp; + mtcp_manager_t mtcp = ctx->mtcp_manager; + int i; + int recv_cnt; + int rx_inf, tx_inf; + struct timeval cur_ts = {0}; + int thresh; + uint32_t ts=0; + + if ((!ctx->done || mtcp->flow_cnt) && !ctx->exit) { + + STAT_COUNT(mtcp->runstat.rounds); + recv_cnt = 0; + + gettimeofday(&cur_ts, NULL); + ts = TIMEVAL_TO_TS(&cur_ts); + mtcp->cur_ts = ts; + + for (rx_inf = 0; rx_inf < CONFIG.eths_num; rx_inf++) { + + static uint16_t len; + static uint8_t *pktbuf; + recv_cnt = mtcp->iom->recv_pkts(ctx, rx_inf); + STAT_COUNT(mtcp->runstat.rounds_rx_try); + + for (i = 0; i < recv_cnt; i++) { + pktbuf = mtcp->iom->get_rptr(mtcp->ctx, rx_inf, i, &len); + if (pktbuf != NULL) + ProcessPacket(mtcp, rx_inf, ts, pktbuf, len); #ifdef NETSTAT - PrintNetworkStats(mtcp, ts); + else + mtcp->nstat.rx_errors[rx_inf]++; #endif - } - } + } + } + STAT_COUNT(mtcp->runstat.rounds_rx); - mtcp->iom->select(ctx); + /* interaction with application */ + if (mtcp->flow_cnt > 0) { - if (ctx->interrupt) { - InterruptApplication(mtcp); - } - } + /* check retransmission timeout and timewait expire */ +#if 0 + thresh = (int)mtcp->flow_cnt / (TS_TO_USEC(PER_STREAM_TCHECK)); + assert(thresh >= 0); + if (thresh == 0) + thresh = 1; + if (recv_cnt > 0 && thresh > recv_cnt) + thresh = recv_cnt; +#endif + thresh = CONFIG.max_concurrency; + + /* Eunyoung, you may fix this later + * if there is no rcv packet, we will send as much as possible + */ + if (thresh == -1) + thresh = CONFIG.max_concurrency; + + CheckRtmTimeout(mtcp, ts, thresh); + CheckTimewaitExpire(mtcp, ts, CONFIG.max_concurrency); + + if (CONFIG.tcp_timeout > 0 && ts != RTE_PER_LCORE(ts_prev)) { + CheckConnectionTimeout(mtcp, ts, thresh); + } + } + + /* if epoll is in use, flush all the queued events */ + if (mtcp->ep) { + FlushEpollEvents(mtcp, ts); + } + + if (mtcp->flow_cnt > 0) { + /* hadnle stream queues */ + HandleApplicationCalls(mtcp, ts); + } + + WritePacketsToChunks(mtcp, ts); + + /* send packets from write buffer */ + /* send until tx is available */ + for (tx_inf = 0; tx_inf < CONFIG.eths_num; tx_inf++) { + mtcp->iom->send_pkts(ctx, tx_inf); + } + + if (ts != RTE_PER_LCORE(ts_prev)) { + RTE_PER_LCORE(ts_prev) = ts; + if (ctx->cpu == mtcp_master) { + ARPTimer(mtcp, ts); +#ifndef ENABLE_SPDK +#ifdef NETSTAT -#if TESTING - DestroyRemainingFlows(mtcp); + PrintNetworkStats(mtcp, ts); +#endif #endif + } + } - TRACE_DBG("MTCP thread %d out of main loop.\n", ctx->cpu); - /* flush logs */ - flush_log_data(mtcp); - TRACE_DBG("MTCP thread %d flushed logs.\n", ctx->cpu); - InterruptApplication(mtcp); - TRACE_INFO("MTCP thread %d finished.\n", ctx->cpu); + mtcp->iom->select(ctx); + + if (ctx->interrupt) { + InterruptApplication(mtcp); + } + } + + return 0; } + /*----------------------------------------------------------------------------*/ struct mtcp_sender * CreateMTCPSender(int ifidx) @@ -1075,7 +1092,7 @@ InitializeMTCPManager(struct mtcp_thread_context* ctx) return mtcp; } /*----------------------------------------------------------------------------*/ -static void * +void * MTCPRunThread(void *arg) { mctx_t mctx = (mctx_t)arg; @@ -1152,7 +1169,7 @@ MTCPRunThread(void *arg) fprintf(stderr, "CPU %d: initialization finished.\n", cpu); sem_post(&g_init_sem[ctx->cpu]); - +#ifndef ENABLE_SPDK /* start the main loop */ RunMainLoop(ctx); @@ -1164,8 +1181,20 @@ MTCPRunThread(void *arg) DestroyHashtable(g_mtcp[cpu]->listeners); TRACE_DBG("MTCP thread %d finished.\n", ctx->cpu); - return 0; +#else + sem_wait(&g_init_sem[ctx->cpu]); + sem_destroy(&g_init_sem[ctx->cpu]); + + running[ctx->cpu] = TRUE; + + if (mtcp_master < 0) { + mtcp_master = ctx->cpu; + TRACE_INFO("CPU %d is now the master thread.\n", mtcp_master); + } + + return ctx; +#endif } /*----------------------------------------------------------------------------*/ #ifndef DISABLE_DPDK @@ -1231,6 +1260,7 @@ mtcp_create_context(int cpu) return NULL; } #endif +#ifndef ENABLE_SPDK #ifndef DISABLE_DPDK /* Wake up mTCP threads (wake up I/O threads) */ if (current_iomodule_func == &dpdk_module_func) { @@ -1268,6 +1298,7 @@ mtcp_create_context(int cpu) TRACE_INFO("CPU %d is now the master thread.\n", mtcp_master); } +#endif return mctx; } /*----------------------------------------------------------------------------*/ diff --git a/mtcp/src/include/mtcp_api.h b/mtcp/src/include/mtcp_api.h index 5eda81010..017cb51d7 100644 --- a/mtcp/src/include/mtcp_api.h +++ b/mtcp/src/include/mtcp_api.h @@ -68,6 +68,9 @@ typedef void (*mtcp_sighandler_t)(int); mtcp_sighandler_t mtcp_register_signal(int signum, mtcp_sighandler_t handler); +void * +MTCPRunThread(void *arg); + int mtcp_pipe(mctx_t mctx, int pipeid[2]); @@ -87,6 +90,9 @@ mtcp_setsock_nonblock(mctx_t mctx, int sockid); int mtcp_socket_ioctl(mctx_t mctx, int sockid, int request, void *argp); +int +mtcp_run_instance(void *tmp); + int mtcp_socket(mctx_t mctx, int domain, int type, int protocol); From d77863ba0547c41e77394b751e6b1cccf8d950ca Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Thu, 9 Jan 2020 15:29:39 +0800 Subject: [PATCH 12/14] scripts: add option to enable spdk build Introduce '-e' option to enable spdk. - Disable spdk support without '-e'. - Enable spdk support with '-e'. --- mtcp_crossbuild.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/mtcp_crossbuild.sh b/mtcp_crossbuild.sh index f3e7701a3..64bf43ec0 100755 --- a/mtcp_crossbuild.sh +++ b/mtcp_crossbuild.sh @@ -6,7 +6,7 @@ usage () { echo "args=$args" echo - echo "`basename $0` -h -c -k -i -d " + echo "`basename $0` -h -c -k -i -d -e" echo echo "Helper script, used to build dpdk." echo @@ -15,10 +15,11 @@ usage () echo " -k Directory that kernel builds if enable LKM build option" echo " -i If needed introduce external lib dependencies" echo " -d Directory that dpdk builds" + echo " -e Enable SPDK Support" echo } -while getopts "hc:k:i:d:" opt; do +while getopts "hc:k:i:d:e" opt; do case $opt in h) show_usage=1 ;; @@ -30,6 +31,8 @@ while getopts "hc:k:i:d:" opt; do ;; d) dpdk_build="$OPTARG" ;; + e) spdk_enable=1 + ;; \?) echo "Invalid option: -$OPTARG" >&2 show_usage=1 @@ -63,6 +66,10 @@ else EXT_LIB_DIR=$ext_lib fi +if [ "$spdk_enable" == "1" ]; then + SPDK_OPTION="--enable-spdk" +fi + if [ -z $dpdk_build ]; then RTE_SDK=$CUR_PATH/../dpdk else @@ -86,6 +93,7 @@ echo "======================================" echo "Build_arch : $build_arch" echo "RTE_TARGET : $RTE_TARGET" echo "DPDK Build : $RTE_SDK" +echo "SPDK Support: $spdk_enable" echo "======================================" echo @@ -95,7 +103,8 @@ CUR_PATH=`pwd` cd $CUR_PATH/dpdk-iface-kmod && make CROSS=$CROSS RTE_KERNELDIR=$RTE_KERNELDIR V=1 # build application -cd $CUR_PATH && autoreconf -f -i && ./configure --host=aarch64 CC=${CROSS}gcc LD=${CROSS}ld --with-dpdk=$RTE_SDK/$RTE_TARGET --with-dpdk-lib=$RTE_SDK/$RTE_TARGET/lib CFLAGS="-I$EXT_LIB_DIR/include" LDFLAGS="-L$EXT_LIB_DIR/lib64" +cd $CUR_PATH && autoreconf -f -i && \ + ./configure --host=aarch64 CC=${CROSS}gcc LD=${CROSS}ld --with-dpdk=$RTE_SDK/$RTE_TARGET --with-dpdk-lib=$RTE_SDK/$RTE_TARGET/lib $SPDK_OPTION CFLAGS="-I$EXT_LIB_DIR/include" LDFLAGS="-L$EXT_LIB_DIR/lib64 -L$EXT_LIB_DIR/lib" make -j `grep -c ^processor /proc/cpuinfo` ARCH=arm64 CC=${CROSS}gcc LD=${CROSS}ld V=0 # end of file From 0feda6593571fd0280eea441442cbd3e64687ff4 Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Mon, 13 Jan 2020 11:16:01 +0800 Subject: [PATCH 13/14] README: add doc README.spdk Add README.spdk for mtcp+spdk integration. Signed-off-by: Qingmin Liu --- README.spdk | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 README.spdk diff --git a/README.spdk b/README.spdk new file mode 100644 index 000000000..736f904f0 --- /dev/null +++ b/README.spdk @@ -0,0 +1,37 @@ +======================================================================== + README for mTCP development with spdk +======================================================================== + +1. Why mTCP changes + +mTCP is a highly scalable user-level TCP stack for multicore systems. +It requires per-thread architecture and starts "RunMainLoop" for each +thread with dead-loop handling. + +However, spdk will register poller per thread to handle receiving and +events and so it requires mtcp to supply a separated recv/send handler. +It's necessary to split RunMainLoop to two APIs: RunMainLoop and +mtcp_run_instance. Original mTCP apps still use RunMainLoop and it's not +affected and spdk will use mtcp_run_instance for its poller. + +2. How to enable spdk support + +Add option "--enable-spdk" to choose spdk support enable or not. +Also introduce mtcp_crossbuild.sh for reference. + +3. SPDK changes + +For SPDK, it needs more changes. + +- Add mtcp sock interface. Current now it only supports posix/vpp sock + interface. Just follow the framework to add mtcp sock interface. +- Change connection schedule mode to support mtcp per-thread arch. + +Now upstream is ongoing. + +======================================================================== + + Contact: mtcp-user at list.ndsl.kaist.edu + April 2, 2015. + EunYoung Jeong + M. Asim Jamshed From 1dd9131e42927cd513f096272bdb40fa2ad52fd1 Mon Sep 17 00:00:00 2001 From: Qingmin Liu Date: Fri, 17 Jan 2020 16:28:38 +0800 Subject: [PATCH 14/14] mtcp: add dpdk 19.11 support. - dpdk 19.11 added rte_ to some variables and struct. - change it to support this. --- dpdk-iface-kmod/dpdk_iface_main.c | 4 ++++ mtcp/src/dpdk_module.c | 30 +++++++++++++++--------------- mtcp/src/include/mtcp.h | 16 ++++++++++++++++ mtcp/src/io_module.c | 4 ++++ mtcp/src/onvm_module.c | 4 ++++ 5 files changed, 43 insertions(+), 15 deletions(-) diff --git a/dpdk-iface-kmod/dpdk_iface_main.c b/dpdk-iface-kmod/dpdk_iface_main.c index 53278b2e5..89f579a90 100644 --- a/dpdk-iface-kmod/dpdk_iface_main.c +++ b/dpdk-iface-kmod/dpdk_iface_main.c @@ -22,7 +22,11 @@ typedef struct { PciDevice pd; struct rte_eth_dev_info dev_details; +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) struct ether_addr ports_eth_addr; +#else + struct rte_ether_addr ports_eth_addr; +#endif } DevInfo; static DevInfo di[RTE_MAX_ETHPORTS]; diff --git a/mtcp/src/dpdk_module.c b/mtcp/src/dpdk_module.c index 132282a16..0688a2098 100644 --- a/mtcp/src/dpdk_module.c +++ b/mtcp/src/dpdk_module.c @@ -87,7 +87,7 @@ #define ETHER_IFG 12 #define ETHER_PREAMBLE 8 -#define ETHER_OVR (ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG) +#define ETHER_OVR (MTCP_ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG) static const uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; static const uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; @@ -98,7 +98,7 @@ static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; //#define DEBUG 1 #ifdef DEBUG /* ethernet addresses of ports */ -static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +static MTCP_ETHER_ADDR ports_eth_addr[RTE_MAX_ETHPORTS]; #endif static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; @@ -106,7 +106,7 @@ static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; static struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, - .max_rx_pkt_len = ETHER_MAX_LEN, + .max_rx_pkt_len = MTCP_ETHER_MAX_LEN, #if RTE_VERSION > RTE_VERSION_NUM(17, 8, 0, 0) .offloads = ( #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0) @@ -412,7 +412,7 @@ dpdk_get_wptr(struct mtcp_thread_context *ctxt, int ifidx, uint16_t pktsize) m = dpc->wmbufs[ifidx].m_table[len_of_mbuf]; /* retrieve the right write offset */ - ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); + ptr = (void *)rte_pktmbuf_mtod(m, MTCP_ETHER_HDR *); m->pkt_len = m->data_len = pktsize; m->nb_segs = 1; m->next = NULL; @@ -467,16 +467,16 @@ dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) struct rte_mbuf * ip_reassemble(struct dpdk_private_context *dpc, struct rte_mbuf *m) { - struct ether_hdr *eth_hdr; + MTCP_ETHER_HDR *eth_hdr; struct rte_ip_frag_tbl *tbl; struct rte_ip_frag_death_row *dr; /* if packet is IPv4 */ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - struct ipv4_hdr *ip_hdr; + MTCP_IPV4_HDR *ip_hdr; - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1); + eth_hdr = rte_pktmbuf_mtod(m, MTCP_ETHER_HDR *); + ip_hdr = (MTCP_IPV4_HDR *)(eth_hdr + 1); /* if it is a fragmented packet, then try to reassemble. */ if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) { @@ -826,7 +826,7 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) goto dev_ioctl_err; m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; - m->l2_len = sizeof(struct ether_hdr); + m->l2_len = sizeof(MTCP_ETHER_HDR); m->l3_len = (iph->ihl<<2); break; case PKT_TX_TCP_CSUM: @@ -835,19 +835,19 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); m->ol_flags |= PKT_TX_TCP_CKSUM; - tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); + tcph->check = rte_ipv4_phdr_cksum((MTCP_IPV4_HDR *)iph, m->ol_flags); break; #ifdef ENABLELRO case PKT_RX_TCP_LROSEG: m = dpc->cur_rx_m; //if (m->next != NULL) // rte_prefetch0(rte_pktmbuf_mtod(m->next, void *)); - iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(struct ether_hdr)); + iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(MTCP_ETHER_HDR)); tcph = (struct tcphdr *)((u_char *)iph + (iph->ihl << 2)); payload = (uint8_t *)tcph + (tcph->doff << 2); seg_off = m->data_len - - sizeof(struct ether_hdr) - (iph->ihl << 2) - + sizeof(MTCP_ETHER_HDR) - (iph->ihl << 2) - (tcph->doff << 2); to = (uint8_t *) argp; @@ -870,13 +870,13 @@ dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) if ((dev_info[nif].tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM) == 0) goto dev_ioctl_err; m = dpc->wmbufs[eidx].m_table[len_of_mbuf - 1]; - iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(struct ether_hdr)); + iph = rte_pktmbuf_mtod_offset(m, struct iphdr *, sizeof(MTCP_ETHER_HDR)); tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2)); - m->l2_len = sizeof(struct ether_hdr); + m->l2_len = sizeof(MTCP_ETHER_HDR); m->l3_len = (iph->ihl<<2); m->l4_len = (tcph->doff<<2); m->ol_flags = PKT_TX_TCP_CKSUM | PKT_TX_IP_CKSUM | PKT_TX_IPV4; - tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); + tcph->check = rte_ipv4_phdr_cksum((MTCP_IPV4_HDR *)iph, m->ol_flags); break; case PKT_RX_IP_CSUM: if ((dev_info[nif].rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0) diff --git a/mtcp/src/include/mtcp.h b/mtcp/src/include/mtcp.h index ba07fd773..1e92c3433 100644 --- a/mtcp/src/include/mtcp.h +++ b/mtcp/src/include/mtcp.h @@ -39,6 +39,22 @@ #define ERROR (-1) #endif +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) +typedef struct ether_addr MTCP_ETHER_ADDR; +typedef struct ipv4_hdr MTCP_IPV4_HDR; +typedef struct ether_hdr MTCP_ETHER_HDR; + +#define MTCP_ETHER_MAX_LEN ETHER_MAX_LEN +#define MTCP_ETHER_CRC_LEN ETHER_CRC_LEN +#else +typedef struct rte_ether_addr MTCP_ETHER_ADDR; +typedef struct rte_ipv4_hdr MTCP_IPV4_HDR; +typedef struct rte_ether_hdr MTCP_ETHER_HDR; + +#define MTCP_ETHER_MAX_LEN RTE_ETHER_MAX_LEN +#define MTCP_ETHER_CRC_LEN RTE_ETHER_CRC_LEN +#endif + #define ETHERNET_HEADER_LEN 14 // sizeof(struct ethhdr) #define IP_HEADER_LEN 20 // sizeof(struct iphdr) #define TCP_HEADER_LEN 20 // sizeof(struct tcphdr) diff --git a/mtcp/src/io_module.c b/mtcp/src/io_module.c index 1b8e5e373..9864ad12a 100644 --- a/mtcp/src/io_module.c +++ b/mtcp/src/io_module.c @@ -260,7 +260,11 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) char socket_mem_str[32] = ""; // int i; int ret, socket_mem; +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#else + static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#endif /* STEP 1: first determine CPU mask */ mpz_init(_cpumask); diff --git a/mtcp/src/onvm_module.c b/mtcp/src/onvm_module.c index eed55f263..4d8a8a5ea 100644 --- a/mtcp/src/onvm_module.c +++ b/mtcp/src/onvm_module.c @@ -63,7 +63,11 @@ static struct rte_mempool *pktmbuf_pool = NULL; //#define DEBUG 1 #ifdef DEBUG /* ethernet addresses of ports */ +#if RTE_VERSION < RTE_VERSION_NUM(19, 8, 0, 0) static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#else +static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; +#endif #endif static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS];