From f601c9f8eee1585892530f6e4d847c6801b3bd2d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 11 Jun 2019 12:26:55 -0400 Subject: [PATCH 01/28] net-tcp_bbr: broaden app-limited rate sample detection This commit is a bug fix for the Linux TCP app-limited (application-limited) logic that is used for collecting rate (bandwidth) samples. Previously the app-limited logic only looked for "bubbles" of silence in between application writes, by checking at the start of each sendmsg. But "bubbles" of silence can also happen before retransmits: e.g. bubbles can happen between an application write and a retransmit, or between two retransmits. Retransmits are triggered by ACKs or timers. So this commit checks for bubbles of app-limited silence upon ACKs or timers. Why does this commit check for app-limited state at the start of ACKs and timer handling? Because at that point we know whether inflight was fully using the cwnd. During processing the ACK or timer event we often change the cwnd; after changing the cwnd we can't know whether inflight was fully using the old cwnd. Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc Change-Id: I37221506f5166877c2b110753d39bb0757985e68 --- net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_timer.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 57c8af1859c1..c01c6b0d8ee5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3819,6 +3819,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); + tcp_rate_check_app_limited(sk); /* ts_recent update must be made after we are sure that the packet * is in window. diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 470f581eedd4..2b8d7e94a369 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -624,6 +624,7 @@ void tcp_write_timer_handler(struct sock *sk) return; } + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; From 9cb2d74a55ce4d621666a93c59f8635a91c03975 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 24 Jun 2018 21:55:59 -0400 Subject: [PATCH 02/28] net-tcp_bbr: v2: shrink delivered_mstamp, first_tx_mstamp to u32 to free up 8 bytes Free up some space for tracking inflight and losses for each bw sample, in upcoming commits. These timestamps are in microseconds, and are now stored in 32 bits. So they can only hold time intervals up to roughly 2^12 = 4096 seconds. But Linux TCP RTT and RTO tracking has the same 32-bit microsecond implementation approach and resulting deployment limitations. So this is not introducing a new limit. And these should not be a limitation for the foreseeable future. Effort: net-tcp_bbr Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55 Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c --- include/net/tcp.h | 9 +++++++-- net/ipv4/tcp_rate.c | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 226bce6d1e8c..e7510d7e4fd7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -819,6 +819,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } +static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) +{ + return max_t(s32, t1 - t0, 0); +} + static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { return tcp_ns_to_ts(skb->skb_mstamp_ns); @@ -894,9 +899,9 @@ struct tcp_skb_cb { /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - u64 first_tx_mstamp; + u32 first_tx_mstamp; /* when we reached the "delivered" count */ - u64 delivered_mstamp; + u32 delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index a8f6d9d06f2e..763362159fe9 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ - rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, - scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp32_us_delta( + tp->first_tx_mstamp, + scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being @@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); From 767930979dacb584aa07b9f492f521d1f06a9bc3 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 5 Aug 2017 11:49:50 -0400 Subject: [PATCH 03/28] net-tcp_bbr: v2: snapshot packets in flight at transmit time and pass in rate_sample CC algorithms may want to snapshot the number of packets in flight at transmit time and pass in rate_sample, to understand the relationship between inflight and losses or ECN signals, to try to find the highest inflight value that has acceptable levels of loss/ECN marking. We split out the code to set an skb's tx.in_flight field into its own function, so that this code can be used for the TCP_REPAIR "fake send" code path that inserts skbs into the rtx queue without sending them. Effort: net-tcp_bbr Origin-9xx-SHA1: b3eb4f2d20efab4ca001f32c9294739036c493ea Origin-9xx-SHA1: e880fc907d06ea7354333f60f712748ebce9497b Origin-9xx-SHA1: 330f825a08a6fe92cef74d799cc468864c479f63 Change-Id: I7314047d0ff14dd261a04b1969a46dc658c8836a --- include/net/tcp.h | 6 ++++++ net/ipv4/tcp_output.c | 1 + net/ipv4/tcp_rate.c | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index e7510d7e4fd7..8a95b9015ee6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -902,6 +902,10 @@ struct tcp_skb_cb { u32 first_tx_mstamp; /* when we reached the "delivered" count */ u32 delivered_mstamp; +#define TCPCB_IN_FLIGHT_BITS 20 +#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) + u32 in_flight:20, /* packets in flight at transmit */ + unused2:12; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1049,6 +1053,7 @@ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ + u32 tx_in_flight; /* packets in flight at starting timestamp */ s32 delivered; /* number of packets delivered over interval */ s32 delivered_ce; /* number of packets delivered w/ CE marks*/ long interval_us; /* time for tp->delivered to incr "delivered" */ @@ -1171,6 +1176,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) void tcp_set_ca_state(struct sock *sk, const u8 ca_state); /* From tcp_rate.c */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2cb39b6dad02..369428af806f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2674,6 +2674,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); + tcp_set_tx_in_flight(sk, skb); goto repair; /* Skip network transmission */ } diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 763362159fe9..1424911b1b9e 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -34,6 +34,24 @@ * ready to send in the write queue. */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 in_flight; + + /* Check, sanitize, and record packets in flight after skb was sent. */ + in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); + if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, + "insane in_flight %u cc %s mss %u " + "cwnd %u pif %u %u %u %u\n", + in_flight, inet_csk(sk)->icsk_ca_ops->name, + tp->mss_cache, tp->snd_cwnd, + tp->packets_out, tp->retrans_out, + tp->sacked_out, tp->lost_out)) + in_flight = TCPCB_IN_FLIGHT_MAX; + TCP_SKB_CB(skb)->tx.in_flight = in_flight; +} + /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -67,6 +85,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; + tcp_set_tx_in_flight(sk, skb); } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -96,6 +115,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->tx_in_flight = scb->tx.in_flight; rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ From 4e589c6069b75bf559f59e09fa19871fd92fb44d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 12 Oct 2017 23:44:27 -0400 Subject: [PATCH 04/28] net-tcp_bbr: v2: count packets lost over TCP rate sampling interval For understanding the relationship between inflight and packet loss signals, to try to find the highest inflight value that has acceptable levels of packet losses. Effort: net-tcp_bbr Origin-9xx-SHA1: 4527e26b2bd7756a88b5b9ef1ada3da33dd609ab Change-Id: I594c2500868d9c530770e7ddd68ffc87c57f4fd5 --- include/net/tcp.h | 5 ++++- net/ipv4/tcp_rate.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 8a95b9015ee6..819d0af40bec 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -906,6 +906,7 @@ struct tcp_skb_cb { #define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) u32 in_flight:20, /* packets in flight at transmit */ unused2:12; + u32 lost; /* packets lost so far upon tx of skb */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1051,11 +1052,13 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ + u32 prior_lost; /* tp->lost at "prior_mstamp" */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ u32 tx_in_flight; /* packets in flight at starting timestamp */ + s32 lost; /* number of packets lost over interval */ s32 delivered; /* number of packets delivered over interval */ - s32 delivered_ce; /* number of packets delivered w/ CE marks*/ + s32 delivered_ce; /* packets delivered w/ CE mark over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ u32 snd_interval_us; /* snd interval for delivered packets */ u32 rcv_interval_us; /* rcv interval for delivered packets */ diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 1424911b1b9e..8737f2134648 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -84,6 +84,7 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; + TCP_SKB_CB(skb)->tx.lost = tp->lost; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; tcp_set_tx_in_flight(sk, skb); } @@ -110,6 +111,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, scb->end_seq, rs->last_end_seq)) { + rs->prior_lost = scb->tx.lost; rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; @@ -165,6 +167,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, return; } rs->delivered = tp->delivered - rs->prior_delivered; + rs->lost = tp->lost - rs->prior_lost; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ From ade2a0e3f26b45f0de9fe9b368c9bef6609a2c8f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 19 Nov 2018 13:48:36 -0500 Subject: [PATCH 05/28] net-tcp_bbr: v2: export FLAG_ECE in rate_sample.is_ece For understanding the relationship between inflight and ECN signals, to try to find the highest inflight value that has acceptable levels ECN marking. Effort: net-tcp_bbr Origin-9xx-SHA1: 3eba998f2898541406c2666781182200934965a8 Change-Id: I3a964e04cee83e11649a54507043d2dfe769a3b3 --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index 819d0af40bec..0e2632feb2e8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1070,6 +1070,7 @@ struct rate_sample { bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ + bool is_ece; /* did this ACK have ECN marked? */ }; struct tcp_congestion_ops { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c01c6b0d8ee5..44c1c2c12bb4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3918,6 +3918,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); + rs.is_ece = !!(flag & FLAG_ECE); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); From 1f4015e7004cea97a458feb4bb847a78a3367607 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 7 Aug 2018 21:52:06 -0400 Subject: [PATCH 06/28] net-tcp_bbr: v2: introduce ca_ops->skb_marked_lost() CC module callback API For connections experiencing reordering, RACK can mark packets lost long after we receive the SACKs/ACKs hinting that the packets were actually lost. This means that CC modules cannot easily learn the volume of inflight data at which packet loss happens by looking at the current inflight or even the packets in flight when the most recently SACKed packet was sent. To learn this, CC modules need to know how many packets were in flight at the time lost packets were sent. This new callback, combined with TCP_SKB_CB(skb)->tx.in_flight, allows them to learn this. This also provides a consistent callback that is invoked whether packets are marked lost upon ACK processing, using the RACK reordering timer, or at RTO time. Effort: net-tcp_bbr Origin-9xx-SHA1: afcbebe3374e4632ac6714d39e4dc8a8455956f4 Change-Id: I54826ab53df636be537e5d3c618a46145d12d51a --- include/net/tcp.h | 3 +++ net/ipv4/tcp_input.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0e2632feb2e8..4141a2751cc4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1097,6 +1097,9 @@ struct tcp_congestion_ops { /* override sysctl_tcp_min_tso_segs */ u32 (*min_tso_segs)(struct sock *sk); + /* react to a specific lost skb (optional) */ + void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); + /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 44c1c2c12bb4..ce0d74d52590 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1079,7 +1079,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { + struct sock *sk = (struct sock *)tp; + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tp->lost += tcp_skb_pcount(skb); + if (ca_ops->skb_marked_lost) + ca_ops->skb_marked_lost(sk, skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) From f82a3d3f940c5220b37a2e0884ef399fc8b952c2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 May 2019 20:16:33 -0400 Subject: [PATCH 07/28] net-tcp_bbr: v2: adjust skb tx.in_flight upon merge in tcp_shifted_skb() When tcp_shifted_skb() updates state as adjacent SACKed skbs are coalesced, previously the tx.in_flight was not adjusted, so we could get contradictory state where the skb's recorded pcount was bigger than the tx.in_flight (the number of segments that were in_flight after sending the skb). Normally have a SACKed skb with contradictory pcount/tx.in_flight would not matter. However, with SACK reneging, the SACKed bit is removed, and an skb once again becomes eligible for retransmitting, fragmenting, SACKing, etc. Packetdrill testing verified the following sequence is possible in a kernel that does not have this commit: - skb N is SACKed - skb N+1 is SACKed and combined with skb N using tcp_shifted_skb() - tcp_shifted_skb() will increase the pcount of prev, but leave tx.in_flight as-is - so prev skb can have pcount > tx.in_flight - RTO, tcp_timeout_mark_lost(), detect reneg, remove "SACKed" bit, mark skb N as lost - find pcount of skb N is greater than its tx.in_flight I suspect this issue iw what caused the bbr2_inflight_hi_from_lost_skb(): WARN_ON_ONCE(inflight_prev < 0) to fire in production machines using bbr2. Effort: net-tcp_bbr Origin-9xx-SHA1: 1a3e997e613d2dcf32b947992882854ebe873715 Change-Id: I1b0b75c27519953430c7db51c6f358f104c7af55 --- net/ipv4/tcp_input.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ce0d74d52590..7c3824fb0cd8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1465,6 +1465,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); + /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ + if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, + "prev in_flight: %u skb in_flight: %u pcount: %u", + TCP_SKB_CB(prev)->tx.in_flight, + TCP_SKB_CB(skb)->tx.in_flight, + pcount)) + TCP_SKB_CB(skb)->tx.in_flight = 0; + else + TCP_SKB_CB(skb)->tx.in_flight -= pcount; + TCP_SKB_CB(prev)->tx.in_flight += pcount; + /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep From bfa26db027f29177f05a9772094ed16c8c88c488 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 May 2019 20:16:25 -0400 Subject: [PATCH 08/28] net-tcp_bbr: v2: adjust skb tx.in_flight upon split in tcp_fragment() When we fragment an skb that has already been sent, we need to update the tx.in_flight for the first skb in the resulting pair ("buff"). Because we were not updating the tx.in_flight, the tx.in_flight value was inconsistent with the pcount of the "buff" skb (tx.in_flight would be too high). That meant that if the "buff" skb was lost, then bbr2_inflight_hi_from_lost_skb() would calculate an inflight_hi value that is too high. This could result in longer queues and higher packet loss. Packetdrill testing verified that without this commit, when the second half of an skb is SACKed and then later the first half of that skb is marked lost, the calculated inflight_hi was incorrect. Effort: net-tcp_bbr Origin-9xx-SHA1: 385f1ddc610798fab2837f9f372857438b25f874 Origin-9xx-SHA1: a0eb099690af net-tcp_bbr: v2: fix tcp_fragment() tx.in_flight recomputation [prod feb 8 2021; use as a fixup] Origin-9xx-SHA1: 885503228153ff0c9114e net-tcp_bbr: v2: introduce tcp_skb_tx_in_flight_is_suspicious() helper for warnings Change-Id: I617f8cab4e9be7a0b8e8d30b047bf8645393354d --- include/net/tcp.h | 15 +++++++++++++++ net/ipv4/tcp_output.c | 26 +++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4141a2751cc4..720472317770 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1196,6 +1196,21 @@ static inline bool tcp_skb_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) return t1 > t2 || (t1 == t2 && after(seq1, seq2)); } +/* If a retransmit failed due to local qdisc congestion or other local issues, + * then we may have called tcp_set_skb_tso_segs() to increase the number of + * segments in the skb without increasing the tx.in_flight. In all other cases, + * the tx.in_flight should be at least as big as the pcount of the sk_buff. We + * do not have the state to know whether a retransmit failed due to local qdisc + * congestion or other local issues, so to avoid spurious warnings we consider + * that any skb marked lost may have suffered that fate. + */ +static inline bool tcp_skb_tx_in_flight_is_suspicious(u32 skb_pcount, + u32 skb_sacked_flags, + u32 tx_in_flight) +{ + return (skb_pcount > tx_in_flight) && !(skb_sacked_flags & TCPCB_LOST); +} + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 369428af806f..3c16966afc70 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1532,7 +1532,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int old_factor; + int old_factor, inflight_prev; long limit; int nlen; u8 flags; @@ -1607,6 +1607,30 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (diff) tcp_adjust_pcount(sk, skb, diff); + + inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; + if (inflight_prev < 0) { + WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( + old_factor, + TCP_SKB_CB(skb)->sacked, + TCP_SKB_CB(skb)->tx.in_flight), + "inconsistent: tx.in_flight: %u " + "old_factor: %d mss: %u sacked: %u " + "1st pcount: %d 2nd pcount: %d " + "1st len: %u 2nd len: %u ", + TCP_SKB_CB(skb)->tx.in_flight, old_factor, + mss_now, TCP_SKB_CB(skb)->sacked, + tcp_skb_pcount(skb), tcp_skb_pcount(buff), + skb->len, buff->len); + inflight_prev = 0; + } + /* Set 1st tx.in_flight as if 1st were sent by itself: */ + TCP_SKB_CB(skb)->tx.in_flight = inflight_prev + + tcp_skb_pcount(skb); + /* Set 2nd tx.in_flight with new 1st and 2nd pcounts: */ + TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + + tcp_skb_pcount(skb) + + tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ From 0fa4869b2177bafeadc4a15a5d9c37dad13b147b Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Wed, 23 May 2018 17:55:54 -0700 Subject: [PATCH 09/28] net-tcp: add new ca opts flag TCP_CONG_WANTS_CE_EVENTS Add a a new ca opts flag TCP_CONG_WANTS_CE_EVENTS that allows a congestion control module to receive CE events. Currently congestion control modules have to set the TCP_CONG_NEEDS_ECN bit in opts flag to receive CE events but this may incur changes in ECN behavior elsewhere. This patch adds a new bit TCP_CONG_WANTS_CE_EVENTS that allows congestion control modules to receive CE events independently of TCP_CONG_NEEDS_ECN. Effort: net-tcp Origin-9xx-SHA1: 9f7e14716cde760bc6c67ef8ef7e1ee48501d95b Change-Id: I2255506985242f376d910c6fd37daabaf4744f24 --- include/net/tcp.h | 14 +++++++++++++- net/ipv4/tcp_input.c | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 720472317770..a3459ee0cfb7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1032,7 +1032,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ +#define TCP_CONG_WANTS_CE_EVENTS 0x4 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; @@ -1164,6 +1168,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) } #endif +static inline bool tcp_ca_wants_ce_events(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | + TCP_CONG_WANTS_CE_EVENTS); +} + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7c3824fb0cd8..26c4ee66b6a2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -349,7 +349,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { @@ -360,7 +360,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; From c20e56d9661031647ddc99c47ae8971d0a7b99b7 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 27 Sep 2019 17:10:26 -0400 Subject: [PATCH 10/28] net-tcp: re-generalize TSO sizing in TCP CC module API Reorganize the API for CC modules so that the CC module once again gets complete control of the TSO sizing decision. This is how the API was set up around 2016 and the initial BBRv1 upstreaming. Later Eric Dumazet simplified it. But with wider testing it now seems that to avoid CPU regressions BBR needs to have a different TSO sizing function. This is necessary to handle cases where there are many flows bottlenecked on the sender host's NIC, in which case BBR's pacing rate is much lower than CUBIC/Reno/DCTCP's. Why does this happen? Because BBR's pacing rate adapts to the low bandwidth share each flow sees. By contrast, CUBIC/Reno/DCTCP see no loss or ECN, so they grow a very large cwnd, and thus large pacing rate and large TSO burst size. Change-Id: Ic8ccfdbe4010ee8d4bf6a6334c48a2fceb2171ea --- include/net/tcp.h | 4 ++-- net/ipv4/tcp_bbr.c | 37 ++++++++++++++++++++++++++----------- net/ipv4/tcp_output.c | 11 +++++------ 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a3459ee0cfb7..b620e246f91a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1098,8 +1098,8 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); + /* pick target number of segments per TSO/GSO skb (optional): */ + u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); /* react to a specific lost skb (optional) */ void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 146792cd26fe..cd31cd8cdc69 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -300,20 +300,35 @@ __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + u32 segs; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 segs, bytes; - - /* Sort of tcp_tso_autosize() but ignoring - * driver provided sk_gso_max_size. - */ - bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - return min(segs, 0x7FU); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -1149,7 +1164,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .min_tso_segs = bbr_min_tso_segs, + .tso_segs = bbr_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3c16966afc70..ba6865731dfe 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2006,13 +2006,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; + u32 tso_segs; - min_tso = ca_ops->min_tso_segs ? - ca_ops->min_tso_segs(sk) : - READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); - - tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + tso_segs = ca_ops->tso_segs ? + ca_ops->tso_segs(sk, mss_now) : + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } From a5cc0063dc64f3d43c82390567ec9ddf16f4727c Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 16 Nov 2019 13:16:25 -0500 Subject: [PATCH 11/28] net-tcp: add fast_ack_mode=1: skip rwin check in tcp_fast_ack_mode__tcp_ack_snd_check() Add logic for an experimental TCP connection behavior, enabled with tp->fast_ack_mode = 1, which disables checking the receive window before sending an ack in __tcp_ack_snd_check(). If this behavior is enabled, the data receiver sends an ACK if the amount of data is > RCV.MSS. Change-Id: Iaa0a0fd7108221f883137a79d5bfa724f1b096d4 --- include/linux/tcp.h | 3 ++- net/ipv4/tcp.c | 1 + net/ipv4/tcp_cong.c | 1 + net/ipv4/tcp_input.c | 5 +++-- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b4c08ac86983..4297c9176435 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -255,7 +255,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + fast_ack_mode:2, /* which fast ack mode ? */ + unused:3; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03e08745308..326b2c4bacf6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3083,6 +3083,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; + tp->fast_ack_mode = 0; /* Clean up fastopen related fields */ diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1b34050a7538..66d40449b3f4 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -241,6 +241,7 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; + tcp_sk(sk)->fast_ack_mode = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 26c4ee66b6a2..3193ef5aac61 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5545,13 +5545,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + (tp->fast_ack_mode == 1 || /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ - (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || - __tcp_select_window(sk) >= tp->rcv_wnd)) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd))) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ From 4fef7ac2a9ccc4402d8d079002c65e42e9187068 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Fri, 19 Jun 2020 17:33:45 +0000 Subject: [PATCH 12/28] net-tcp_bbr: v2: record app-limited status of TLP-repaired flight When sending a TLP retransmit, record whether the outstanding flight of data is application limited. This is important for congestion control modules that want to respond to losses repaired by TLP retransmits. This is important because the following scenarios convey very different information: (1) a packet loss with a small number of packets in flight; (2) a packet loss with the maximum amount of data in flight allowed by the CC module; Effort: net-tcp_bbr Change-Id: Ic8ae567caa4e4bfd5fd82c3d4be12a5d9171655e --- include/linux/tcp.h | 3 ++- net/ipv4/tcp_output.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4297c9176435..40e08abc6aa1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -256,7 +256,8 @@ struct tcp_sock { u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ fast_ack_mode:2, /* which fast ack mode ? */ - unused:3; + tlp_orig_data_app_limited:1, /* app-limited before TLP rtx? */ + unused:2; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ba6865731dfe..a3022028d291 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2910,6 +2910,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; + tp->tlp_orig_data_app_limited = TCP_SKB_CB(skb)->tx.is_app_limited; if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; From dc4a1f8de1f074d505b3f539df47635af22621f9 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Tue, 16 Jun 2020 17:41:19 +0000 Subject: [PATCH 13/28] net-tcp_bbr: v2: inform CC module of losses repaired by TLP probe Before this commit, when there is a packet loss that creates a sequence hole that is filled by a TLP loss probe, then tcp_process_tlp_ack() only informs the congestion control (CC) module via a back-to-back entry and exit of CWR. But some congestion control modules (e.g. BBR) do not respond to CWR events. This commit adds a new CA event with which the core TCP stack notifies the CC module when a loss is repaired by a TLP. This will allow CC modules that do not use the CWR mechanism to have a custom handler for such TLP recoveries. Effort: net-tcp_bbr Change-Id: Ieba72332b401b329bff5a641d2b2043a3fb8f632 --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/tcp.h b/include/net/tcp.h index b620e246f91a..a41124044566 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1010,6 +1010,7 @@ enum tcp_ca_event { CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ + CA_EVENT_TLP_RECOVERY, /* a lost segment was repaired by TLP probe */ }; /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3193ef5aac61..f89cafa741a5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3721,6 +3721,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) /* ACK advances: there was a loss, so reduce cwnd. Reset * tlp_high_seq in tcp_init_cwnd_reduction() */ + tcp_ca_event(sk, CA_EVENT_TLP_RECOVERY); tcp_init_cwnd_reduction(sk); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); From 9f5cbd8717f7c95c7def5af51972316ea92cbf7b Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 21 Sep 2020 14:46:26 -0400 Subject: [PATCH 14/28] net-tcp_bbr: v2: introduce is_acking_tlp_retrans_seq into rate_sample Introduce is_acking_tlp_retrans_seq into rate_sample. This bool will export to the CC module the knowledge of whether the current ACK matched a TLP retransmit. Note that when this bool is true, we cannot yet tell (in general) whether this ACK is for the original or the TLP retransmit. Effort: net-tcp_bbr Change-Id: I2e6494332167e75efcbdc99bd5c119034e9c39b4 --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index a41124044566..051e0f72281e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1074,6 +1074,7 @@ struct rate_sample { u32 last_end_seq; /* end_seq of most recently ACKed packet */ bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ + bool is_acking_tlp_retrans_seq; /* ACKed a TLP retransmit sequence? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ bool is_ece; /* did this ACK have ECN marked? */ }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f89cafa741a5..2195ba488142 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3704,7 +3704,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) /* This routine deals with acks during a TLP episode and ends an episode by * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ -static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag, + struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); @@ -3732,6 +3733,11 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) FLAG_NOT_DUP | FLAG_DATA_SACKED))) { /* Pure dupack: original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; + } else { + /* This ACK matches a TLP retransmit. We cannot yet tell if + * this ACK is for the original or the TLP retransmit. + */ + rs->is_acking_tlp_retrans_seq = 1; } } @@ -3911,7 +3917,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_rack_update_reo_wnd(sk, &rs); if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); + tcp_process_tlp_ack(sk, ack, flag, &rs); if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | @@ -3955,7 +3961,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_ack_probe(sk); if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); + tcp_process_tlp_ack(sk, ack, flag, &rs); return 1; old_ack: From 40f1ce936f1a1732add87dd3518fdd6e5fa0982c Mon Sep 17 00:00:00 2001 From: David Morley Date: Fri, 14 Jul 2023 11:07:56 -0400 Subject: [PATCH 15/28] tcp: introduce per-route feature RTAX_FEATURE_ECN_LOW Define and implement a new per-route feature, RTAX_FEATURE_ECN_LOW. This feature indicates that the given destination network is a low-latency ECN environment, meaning both that ECN CE marks are applied by the network using a low-latency marking threshold and also that TCP endpoints provide precise per-data-segment ECN feedback in ACKs (where the ACK ECE flag echoes the received CE status of all newly-acknowledged data segments). This feature indication can be used by congestion control algorithms to decide how to interpret ECN signals over the given destination network. This feature is appropriate for datacenter-style ECN marking, such as the ECN marking approach expected by DCTCP or BBR congestion control modules. Signed-off-by: David Morley Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Tested-by: David Morley Change-Id: I6bc06e9c6cb426fbae7243fc71c9a8c18175f5d3 --- include/net/tcp.h | 10 ++++++++++ include/uapi/linux/rtnetlink.h | 4 +++- net/ipv4/tcp_minisocks.c | 2 ++ net/ipv4/tcp_output.c | 6 ++++-- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 051e0f72281e..33c53e65bda4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,6 +370,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 +#define TCP_ECN_LOW 16 enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -723,6 +724,15 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +static inline void tcp_set_ecn_low_from_dst(struct sock *sk, + const struct dst_entry *dst) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (dst_feature(dst, RTAX_FEATURE_ECN_LOW)) + tp->ecn_flags |= TCP_ECN_LOW; +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 51c13cf9c5ae..de8dcba26bec 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -506,9 +506,11 @@ enum { #define RTAX_FEATURE_SACK (1 << 1) #define RTAX_FEATURE_TIMESTAMP (1 << 2) #define RTAX_FEATURE_ALLFRAG (1 << 3) +#define RTAX_FEATURE_ECN_LOW (1 << 4) #define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | RTAX_FEATURE_SACK | \ - RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG) + RTAX_FEATURE_TIMESTAMP | RTAX_FEATURE_ALLFRAG \ + | RTAX_FEATURE_ECN_LOW) struct rta_session { __u8 proto; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 04fc328727e6..39e5298a8fe0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -440,6 +440,8 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) u32 ca_key = dst_metric(dst, RTAX_CC_ALGO); bool ca_got_dst = false; + tcp_set_ecn_low_from_dst(sk, dst); + if (ca_key != TCP_CA_UNSPEC) { const struct tcp_congestion_ops *ca; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a3022028d291..a5d117ed62bf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -325,10 +325,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 || tcp_ca_needs_ecn(sk) || bpf_needs_ecn; + const struct dst_entry *dst = __sk_dst_get(sk); if (!use_ecn) { - const struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } @@ -340,6 +339,9 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = TCP_ECN_OK; if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn) INET_ECN_xmit(sk); + + if (dst) + tcp_set_ecn_low_from_dst(sk, dst); } } From aa27c22a2ebe5696b5b42002337425e2a53b2f79 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 11 Jun 2019 12:54:22 -0400 Subject: [PATCH 16/28] net-tcp_bbr: v3: update TCP "bbr" congestion control module to BBRv3 BBR v3 is an enhacement to the BBR v1 algorithm. It's designed to aim for lower queues, lower loss, and better Reno/CUBIC coexistence than BBR v1. BBR v3 maintains the core of BBR v1: an explicit model of the network path that is two-dimensional, adapting to estimate the (a) maximum available bandwidth and (b) maximum safe volume of data a flow can keep in-flight in the network. It maintains the estimated BDP as a core guide for estimating an appropriate level of in-flight data. BBR v3 makes several key enhancements: o Its bandwidth-probing time scale is adapted, within bounds, to allow improved coexistence with Reno and CUBIC. The bandwidth-probing time scale is (a) extended dynamically based on estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by an interactive wall-clock time-scale to be more scalable and responsive than Reno and CUBIC. o Rather than being largely agnostic to loss and ECN marks, it explicitly uses loss and (DCTCP-style) ECN signals to maintain its model. o It aims for lower losses than v1 by adjusting its model to attempt to stay within loss rate and ECN mark rate bounds (loss_thresh and ecn_thresh, respectively). o It adapts to loss/ECN signals even when the application is running out of data ("application-limited"), in case the "application-limited" flow is also "network-limited" (the bw and/or inflight available to this flow is lower than previously estimated when the flow ran out of data). o It has a three-part model: the model explicit three tracks operating points, where an operating point is a tuple: (bandwidth, inflight). The three operating points are: o latest: the latest measurement from the current round trip o upper bound: robust, optimistic, long-term upper bound o lower bound: robust, conservative, short-term lower bound These are stored in the following state variables: o latest: bw_latest, inflight_latest o lo: bw_lo, inflight_lo o hi: bw_hi[2], inflight_hi To gain intuition about the meaning of the three operating points, it may help to consider the analogs in CUBIC, which has a somewhat analogous three-part model used by its probing state machine: BBR param CUBIC param ----------- ------------- latest ~ cwnd lo ~ ssthresh hi ~ last_max_cwnd The analogy is only a loose one, though, since the BBR operating points are calculated differently, and are 2-dimensional (bw,inflight) rather than CUBIC's one-dimensional notion of operating point (inflight). o It uses the three-part model to adapt the magnitude of its bandwidth to match the estimated space available in the buffer, rather than (as in BBR v1) assuming that it was always acceptable to place 0.25*BDP in the bottleneck buffer when probing (commodity datacenter switches commonly do not have that much buffer for WAN flows). When BBR v3 estimates it hit a buffer limit during probing, its bandwidth probing then starts gently in case little space is still available in the buffer, and the accelerates, slowly at first and then rapidly if it can grow inflight without seeing congestion signals. In such cases, probing is bounded by inflight_hi + inflight_probe, where inflight_probe grows as: [0, 1, 2, 4, 8, 16,...]. This allows BBR to keep losses low and bounded if a bottleneck remains congested, while rapidly/scalably utilizing free bandwidth when it becomes available. o It has a slightly revised state machine, to achieve the goals above. BBR_BW_PROBE_UP: pushes up inflight to probe for bw/vol BBR_BW_PROBE_DOWN: drain excess inflight from the queue BBR_BW_PROBE_CRUISE: use pipe, w/ headroom in queue/pipe BBR_BW_PROBE_REFILL: try refill the pipe again to 100%, leaving queue empty o The estimated BDP: BBR v3 continues to maintain an estimate of the path's two-way propagation delay, by tracking a windowed min_rtt, and coordinating (on an as-ndeeded basis) to try to expose the two-way propagation delay by draining the bottleneck queue. BBR v3 continues to use its min_rtt and (currently-applicable) bandwidth estimate to estimate the current bandwidth-delay product. The estimated BDP still provides one important guideline for bounding inflight data. However, because any min-filtered RTT and max-filtered bw inherently tend to both overestimate, the estimated BDP is often too high; in this case loss or ECN marks can ensue, in which case BBR v3 adjusts inflight_hi and inflight_lo to adapt its sending rate and inflight down to match the available capacity of the path. o Space: Note that ICSK_CA_PRIV_SIZE increased. This is because BBR v3 requires more space. Note that much of the space is due to support for per-socket parameterization and debugging in this release for research and debugging. With that state removed, the full "struct bbr" is 140 bytes, or 144 with padding. This is an increase of 40 bytes over the existing ca_priv space. o Code: BBR v3 reuses many pieces from BBR v1. But it omits the following significant pieces: o "packet conservation" (bbr_set_cwnd_to_recover_or_restore(), bbr_can_grow_inflight()) o long-term bandwidth estimator ("policer mode") The code layout tries to keep BBR v3 code near the bottom of the file, so that v1-applicable code in the top does not accidentally refer to v3 code. o Docs: See the following docs for more details and diagrams decsribing the BBR v3 algorithm: https://datatracker.ietf.org/meeting/104/materials/slides-104-iccrg-an-update-on-bbr-00 https://datatracker.ietf.org/meeting/102/materials/slides-102-iccrg-an-update-on-bbr-work-at-google-00 o Internal notes: For this upstream rebase, Neal started from: git show fed518041ac6:net/ipv4/tcp_bbr.c > net/ipv4/tcp_bbr.c then removed dev instrumentation (dynamic get/set for parameters) and code that was only used by BBRv1 Effort: net-tcp_bbr Origin-9xx-SHA1: 2c84098e60bed6d67dde23cd7538c51dee273102 Change-Id: I125cf26ba2a7a686f2fa5e87f4c2afceb65f7a05 --- include/net/inet_connection_sock.h | 4 +- include/net/tcp.h | 2 +- include/uapi/linux/inet_diag.h | 23 + net/ipv4/Kconfig | 21 +- net/ipv4/tcp_bbr.c | 2217 +++++++++++++++++++++------- 5 files changed, 1742 insertions(+), 525 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c2b15f7e5516..a400a84088d3 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -135,8 +135,8 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; - u64 icsk_ca_priv[104 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv) +#define ICSK_CA_PRIV_SIZE (144) + u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 33c53e65bda4..169949f59a18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2217,7 +2217,7 @@ struct tcp_plb_state { u8 consec_cong_rounds:5, /* consecutive congested rounds */ unused:3; u32 pause_until; /* jiffies32 when PLB can resume rerouting */ -}; +} __attribute__ ((__packed__)); static inline void tcp_plb_init(const struct sock *sk, struct tcp_plb_state *plb) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 50655de04c9b..82f8bd8f0d16 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -229,6 +229,29 @@ struct tcp_bbr_info { __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ + __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ + __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ + __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ + __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ + __u8 bbr_mode; /* current bbr_mode in state machine */ + __u8 bbr_phase; /* current state machine phase */ + __u8 unused1; /* alignment padding; not used yet */ + __u8 bbr_version; /* BBR algorithm version */ + __u32 bbr_inflight_lo; /* lower short-term data volume bound */ + __u32 bbr_inflight_hi; /* higher long-term data volume bound */ + __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ +}; + +/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ +enum tcp_bbr_phase { + BBR_PHASE_INVALID = 0, + BBR_PHASE_STARTUP = 1, + BBR_PHASE_DRAIN = 2, + BBR_PHASE_PROBE_RTT = 3, + BBR_PHASE_PROBE_BW_UP = 4, + BBR_PHASE_PROBE_BW_DOWN = 5, + BBR_PHASE_PROBE_BW_CRUISE = 6, + BBR_PHASE_PROBE_BW_REFILL = 7, }; union tcp_cc_info { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 2dfb12230f08..2e14db3bee70 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -668,15 +668,18 @@ config TCP_CONG_BBR default n help - BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to - maximize network utilization and minimize queues. It builds an explicit - model of the bottleneck delivery rate and path round-trip propagation - delay. It tolerates packet loss and delay unrelated to congestion. It - can operate over LAN, WAN, cellular, wifi, or cable modem links. It can - coexist with flows that use loss-based congestion control, and can - operate with shallow buffers, deep buffers, bufferbloat, policers, or - AQM schemes that do not provide a delay signal. It requires the fq - ("Fair Queue") pacing packet scheduler. + BBR (Bottleneck Bandwidth and RTT) TCP congestion control is a + model-based congestion control algorithm that aims to maximize + network utilization, keep queues and retransmit rates low, and to be + able to coexist with Reno/CUBIC in common scenarios. It builds an + explicit model of the network path. It tolerates a targeted degree + of random packet loss and delay. It can operate over LAN, WAN, + cellular, wifi, or cable modem links, and can use shallow-threshold + ECN signals. It can coexist to some degree with flows that use + loss-based congestion control, and can operate with shallow buffers, + deep buffers, bufferbloat, policers, or AQM schemes that do not + provide a delay signal. It requires pacing, using either TCP internal + pacing or the fq ("Fair Queue") pacing packet scheduler. choice prompt "Default TCP congestion control" diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index cd31cd8cdc69..4fec37e8f900 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1,18 +1,19 @@ -/* Bottleneck Bandwidth and RTT (BBR) congestion control +/* BBR (Bottleneck Bandwidth and RTT) congestion control * - * BBR congestion control computes the sending rate based on the delivery - * rate (throughput) estimated from ACKs. In a nutshell: + * BBR is a model-based congestion control algorithm that aims for low queues, + * low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model of the + * network path, it uses measurements of bandwidth and RTT, as well as (if they + * occur) packet loss and/or shallow-threshold ECN signals. Note that although + * it can use ECN or loss signals explicitly, it does not require either; it + * can bound its in-flight data based on its estimate of the BDP. * - * On each ACK, update our model of the network path: - * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) - * min_rtt = windowed_min(rtt, 10 seconds) - * pacing_rate = pacing_gain * bottleneck_bandwidth - * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) - * - * The core algorithm does not react directly to packet losses or delays, - * although BBR may adjust the size of next send per ACK when loss is - * observed, or adjust the sending rate if it estimates there is a - * traffic policer, in order to keep the drop rate reasonable. + * The model has both higher and lower bounds for the operating range: + * lo: bw_lo, inflight_lo: conservative short-term lower bound + * hi: bw_hi, inflight_hi: robust long-term upper bound + * The bandwidth-probing time scale is (a) extended dynamically based on + * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by + * an interactive wall-clock time-scale to be more scalable and responsive + * than Reno and CUBIC. * * Here is a state transition diagram for BBR: * @@ -65,6 +66,13 @@ #include #include +#include +#include "tcp_dctcp.h" + +#define BBR_VERSION 3 + +#define bbr_param(sk,name) (bbr_ ## name) + /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. @@ -85,36 +93,41 @@ enum bbr_mode { BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ }; +/* How does the incoming ACK stream relate to our bandwidth probing? */ +enum bbr_ack_phase { + BBR_ACKS_INIT, /* not probing; not getting probe feedback */ + BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ + BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ + BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ + BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ +}; + /* BBR congestion control block */ struct bbr { u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ u32 min_rtt_stamp; /* timestamp of min_rtt_us */ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ - struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ - u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 probe_rtt_min_us; /* min RTT in probe_rtt_win_ms win */ + u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ u64 cycle_mstamp; /* time of this cycle phase start */ - u32 mode:3, /* current bbr_mode in state machine */ + u32 mode:2, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ - packet_conservation:1, /* use packet conservation? */ round_start:1, /* start of packet-timed tx->ack round? */ + ce_state:1, /* If most recent data has CE bit set */ + bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ + try_fast_path:1, /* can we take fast path? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:13, - lt_is_sampling:1, /* taking long-term ("LT") samples now? */ - lt_rtt_cnt:7, /* round trips in long-term interval */ - lt_use_bw:1; /* use lt_bw as our bw estimate? */ - u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ - u32 lt_last_delivered; /* LT intvl start: tp->delivered */ - u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ - u32 lt_last_lost; /* LT intvl start: tp->lost */ + init_cwnd:7, /* initial cwnd */ + unused_1:10; u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ full_bw_reached:1, /* reached full bw in Startup? */ full_bw_cnt:2, /* number of rounds without large bw gains */ - cycle_idx:3, /* current index in pacing_gain cycle array */ + cycle_idx:2, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ - unused_b:5; + unused_2:6; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ @@ -124,19 +137,67 @@ struct bbr { u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ extra_acked_win_idx:1, /* current index in extra_acked array */ - unused_c:6; + /* BBR v3 state: */ + full_bw_now:1, /* recently reached full bw plateau? */ + startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ + loss_in_cycle:1, /* packet loss in this cycle? */ + ecn_in_cycle:1, /* ECN in this cycle? */ + unused_3:1; + u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ + u32 undo_bw_lo; /* bw_lo before latest losses */ + u32 undo_inflight_lo; /* inflight_lo before latest losses */ + u32 undo_inflight_hi; /* inflight_hi before latest losses */ + u32 bw_latest; /* max delivered bw in last round trip */ + u32 bw_lo; /* lower bound on sending bandwidth */ + u32 bw_hi[2]; /* max recent measured bw sample */ + u32 inflight_latest; /* max delivered data in last round trip */ + u32 inflight_lo; /* lower bound of inflight data range */ + u32 inflight_hi; /* upper bound of inflight data range */ + u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ + u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ + u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ + u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ + u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ + ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ + bw_probe_samples:1, /* rate samples reflect bw probing? */ + prev_probe_too_high:1, /* did last PROBE_UP go too high? */ + stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ + rounds_since_probe:8, /* packet-timed rounds since probed bw */ + loss_round_start:1, /* loss_round_delivered round trip? */ + loss_in_round:1, /* loss marked in this round trip? */ + ecn_in_round:1, /* ECN marked in this round trip? */ + ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ + loss_events_in_round:4,/* losses in STARTUP round */ + initialized:1; /* has bbr_init() been called? */ + u32 alpha_last_delivered; /* tp->delivered at alpha update */ + u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ + + u8 unused_4; /* to preserve alignment */ + struct tcp_plb_state plb; }; -#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ +struct bbr_context { + u32 sample_bw; +}; -/* Window length of bw filter (in rounds): */ -static const int bbr_bw_rtts = CYCLE_LEN + 2; /* Window length of min_rtt filter (in sec): */ static const u32 bbr_min_rtt_win_sec = 10; /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ static const u32 bbr_probe_rtt_mode_ms = 200; -/* Skip TSO below the following bandwidth (bits/sec): */ -static const int bbr_min_tso_rate = 1200000; +/* Window length of probe_rtt_min_us filter (in ms), and consequently the + * typical interval between PROBE_RTT mode entries. The default is 5000ms. + * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC + */ +static const u32 bbr_probe_rtt_win_ms = 5000; +/* Proportion of cwnd to estimated BDP in PROBE_RTT, in units of BBR_UNIT: */ +static const u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; + +/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. We cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. + */ +static const u32 bbr_tso_rtt_shift = 9; /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. * In order to help drive the network toward lower queues and low latency while @@ -146,13 +207,15 @@ static const int bbr_min_tso_rate = 1200000; */ static const int bbr_pacing_margin_percent = 1; -/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain +/* We use a startup_pacing_gain of 4*ln(2) because it's the smallest value * that will allow a smoothly increasing pacing rate that will double each RTT * and send the same number of packets per RTT that an un-paced, slow-starting * Reno or CUBIC flow would: */ -static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; -/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain +static const int bbr_startup_pacing_gain = BBR_UNIT * 277 / 100 + 1; +/* The gain for deriving startup cwnd: */ +static const int bbr_startup_cwnd_gain = BBR_UNIT * 2; +/* The pacing gain in BBR_DRAIN is calculated to typically drain * the queue created in BBR_STARTUP in a single round: */ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; @@ -160,13 +223,17 @@ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; static const int bbr_cwnd_gain = BBR_UNIT * 2; /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ static const int bbr_pacing_gain[] = { - BBR_UNIT * 5 / 4, /* probe for more available bw */ - BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ - BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ - BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ + BBR_UNIT * 5 / 4, /* UP: probe for more available bw */ + BBR_UNIT * 91 / 100, /* DOWN: drain queue and/or yield bw */ + BBR_UNIT, /* CRUISE: try to use pipe w/ some headroom */ + BBR_UNIT, /* REFILL: refill pipe to estimated 100% */ +}; +enum bbr_pacing_gain_phase { + BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ + BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ + BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ + BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ }; -/* Randomize the starting gain cycling phase over N phases: */ -static const u32 bbr_cycle_rand = 7; /* Try to keep at least this many packets in flight, if things go smoothly. For * smooth functioning, a sliding window protocol ACKing every other packet @@ -174,24 +241,12 @@ static const u32 bbr_cycle_rand = 7; */ static const u32 bbr_cwnd_min_target = 4; -/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* To estimate if BBR_STARTUP or BBR_BW_PROBE_UP has filled pipe... */ /* If bw has increased significantly (1.25x), there may be more bw available: */ static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ static const u32 bbr_full_bw_cnt = 3; -/* "long-term" ("LT") bandwidth estimator parameters... */ -/* The minimum number of rounds in an LT bw sampling interval: */ -static const u32 bbr_lt_intvl_min_rtts = 4; -/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ -static const u32 bbr_lt_loss_thresh = 50; -/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ -static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; -/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ -static const u32 bbr_lt_bw_diff = 4000 / 8; -/* If we estimate we're policed, use lt_bw for this many round trips: */ -static const u32 bbr_lt_bw_max_rtts = 48; - /* Gain factor for adding extra_acked to target cwnd: */ static const int bbr_extra_acked_gain = BBR_UNIT; /* Window length of extra_acked window. */ @@ -201,8 +256,121 @@ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; /* Time period for clamping cwnd increment due to ack aggregation */ static const u32 bbr_extra_acked_max_us = 100 * 1000; +/* Flags to control BBR ECN-related behavior... */ + +/* Ensure ACKs only ACK packets with consistent ECN CE status? */ +static const bool bbr_precise_ece_ack = true; + +/* Max RTT (in usec) at which to use sender-side ECN logic. + * Disabled when 0 (ECN allowed at any RTT). + */ +static const u32 bbr_ecn_max_rtt_us = 5000; + +/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. + * No loss response when 0. + */ +static const u32 bbr_beta = BBR_UNIT * 30 / 100; + +/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE (1/16 = 6.25%) */ +static const u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; + +/* The initial value for ecn_alpha; 1.0 allows a flow to respond quickly + * to congestion if the bottleneck is congested when the flow starts up. + */ +static const u32 bbr_ecn_alpha_init = BBR_UNIT; + +/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. + * No ECN based bounding when 0. + */ +static const u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ + +/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. + * Scaled by BBR_SCALE. Disabled when 0. + */ +static const u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ + +/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN + * clears then make the first round's increment to inflight_hi the following + * fraction of inflight_hi. + */ +static const u32 bbr_ecn_reprobe_gain = BBR_UNIT * 1 / 2; + +/* Estimate bw probing has gone too far if loss rate exceeds this level. */ +static const u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ + +/* Slow down for a packet loss recovered by TLP? */ +static const bool bbr_loss_probe_recovery = true; + +/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, + * and loss rate is higher than bbr_loss_thresh. + * Disabled if 0. + */ +static const u32 bbr_full_loss_cnt = 6; + +/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh + * meets this count. + */ +static const u32 bbr_full_ecn_cnt = 2; + +/* Fraction of unutilized headroom to try to leave in path upon high loss. */ +static const u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; + +/* How much do we increase cwnd_gain when probing for bandwidth in + * BBR_BW_PROBE_UP? This specifies the increment in units of + * BBR_UNIT/4. The default is 1, meaning 0.25. + * The min value is 0 (meaning 0.0); max is 3 (meaning 0.75). + */ +static const u32 bbr_bw_probe_cwnd_gain = 1; + +/* Max number of packet-timed rounds to wait before probing for bandwidth. If + * we want to tolerate 1% random loss per round, and not have this cut our + * inflight too much, we must probe for bw periodically on roughly this scale. + * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. + * We aim to be fair with Reno/CUBIC up to a BDP of at least: + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + */ +static const u32 bbr_bw_probe_max_rounds = 63; + +/* Max amount of randomness to inject in round counting for Reno-coexistence. + */ +static const u32 bbr_bw_probe_rand_rounds = 2; + +/* Use BBR-native probe time scale starting at this many usec. + * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: + * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs + */ +static const u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ + +/* Use BBR-native probes spread over this many usec: */ +static const u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ + +/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ +static const bool bbr_fast_path = true; + +/* Use fast ack mode? */ +static const bool bbr_fast_ack_mode = true; + +static u32 bbr_max_bw(const struct sock *sk); +static u32 bbr_bw(const struct sock *sk); +static void bbr_exit_probe_rtt(struct sock *sk); +static void bbr_reset_congestion_signals(struct sock *sk); +static void bbr_run_loss_probe_recovery(struct sock *sk); + static void bbr_check_probe_rtt_done(struct sock *sk); +/* This connection can use ECN if both endpoints have signaled ECN support in + * the handshake and the per-route settings indicated this is a + * shallow-threshold ECN environment, meaning both: + * (a) ECN CE marks indicate low-latency/shallow-threshold congestion, and + * (b) TCP endpoints provide precise ACKs that only ACK data segments + * with consistent ECN CE status + */ +static bool bbr_can_use_ecn(const struct sock *sk) +{ + return (tcp_sk(sk)->ecn_flags & TCP_ECN_OK) && + (tcp_sk(sk)->ecn_flags & TCP_ECN_LOW); +} + /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { @@ -214,17 +382,17 @@ static bool bbr_full_bw_reached(const struct sock *sk) /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ static u32 bbr_max_bw(const struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); + const struct bbr *bbr = inet_csk_ca(sk); - return minmax_get(&bbr->bw); + return max(bbr->bw_hi[0], bbr->bw_hi[1]); } /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ static u32 bbr_bw(const struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); + const struct bbr *bbr = inet_csk_ca(sk); - return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); + return min(bbr_max_bw(sk), bbr->bw_lo); } /* Return maximum extra acked in past k-2k round trips, @@ -241,15 +409,23 @@ static u16 bbr_extra_acked(const struct sock *sk) * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. */ -static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, + int margin) { unsigned int mss = tcp_sk(sk)->mss_cache; rate *= mss; rate *= gain; rate >>= BBR_SCALE; - rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); - return rate >> BW_SCALE; + rate *= USEC_PER_SEC / 100 * (100 - margin); + rate >>= BW_SCALE; + rate = max(rate, 1ULL); + return rate; +} + +static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) +{ + return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); } /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ @@ -257,12 +433,13 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) { u64 rate = bw; - rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = bbr_rate_bytes_per_sec(sk, rate, gain, + bbr_pacing_margin_percent); rate = min_t(u64, rate, sk->sk_max_pacing_rate); return rate; } -/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +/* Initialize pacing rate to: startup_pacing_gain * init_cwnd / RTT. */ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -278,7 +455,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) } bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); - sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); + sk->sk_pacing_rate = + bbr_bw_to_pacing_rate(sk, bw, bbr_param(sk, startup_pacing_gain)); } /* Pace using current bw estimate and a gain factor. */ @@ -294,31 +472,38 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* override sysctl_tcp_min_tso_segs */ -__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) -{ - return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; -} - -/* Return the number of segments BBR would like in a TSO/GSO skb, given - * a particular max gso size as a constraint. +/* Return the number of segments BBR would like in a TSO/GSO skb, given a + * particular max gso size as a constraint. TODO: make this simpler and more + * consistent by switching bbr to just call tcp_tso_autosize(). */ static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, u32 gso_max_size) { - u32 segs; + struct bbr *bbr = inet_csk_ca(sk); + u32 segs, r; u64 bytes; /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + /* Budget a TSO/GSO burst size allowance based on min_rtt. For every + * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. + * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) + */ + if (bbr_param(sk, tso_rtt_shift)) { + r = bbr->min_rtt_us >> bbr_param(sk, tso_rtt_shift); + if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ + bytes += GSO_LEGACY_MAX_SIZE >> r; + } + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + segs = max_t(u32, bytes / mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return segs; } /* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ -static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +__bpf_kfunc static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) { return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); } @@ -328,7 +513,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_LEGACY_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -348,7 +533,9 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - if (event == CA_EVENT_TX_START && tp->app_limited) { + if (event == CA_EVENT_TX_START) { + if (!tp->app_limited) + return; bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; @@ -359,6 +546,16 @@ __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); else if (bbr->mode == BBR_PROBE_RTT) bbr_check_probe_rtt_done(sk); + } else if ((event == CA_EVENT_ECN_IS_CE || + event == CA_EVENT_ECN_NO_CE) && + bbr_can_use_ecn(sk) && + bbr_param(sk, precise_ece_ack)) { + u32 state = bbr->ce_state; + dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); + bbr->ce_state = state; + } else if (event == CA_EVENT_TLP_RECOVERY && + bbr_param(sk, loss_probe_recovery)) { + bbr_run_loss_probe_recovery(sk); } } @@ -381,10 +578,10 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) * default. This should only happen when the connection is not using TCP * timestamps and has retransmitted all of the SYN/SYNACK/data packets * ACKed so far. In this case, an RTO can cut cwnd to 1, in which - * case we need to slow-start up toward something safe: TCP_INIT_CWND. + * case we need to slow-start up toward something safe: initial cwnd. */ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ - return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + return bbr->init_cwnd; /* be safe: cap at initial cwnd */ w = (u64)bw * bbr->min_rtt_us; @@ -401,23 +598,23 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) * - one skb in sending host Qdisc, * - one skb in sending host TSO/GSO engine * - one skb being received by receiver host LRO/GRO/delayed-ACK engine - * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because - * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * Don't worry, at low rates this won't bloat cwnd because + * in such cases tso_segs_goal is small. The minimum cwnd is 4 packets, * which allows 2 outstanding 2-packet sequences, to try to keep pipe * full even with ACK-every-other-packet delayed ACKs. */ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) { struct bbr *bbr = inet_csk_ca(sk); + u32 tso_segs_goal; - /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr_tso_segs_goal(sk); - - /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ - cwnd = (cwnd + 1) & ~1U; + tso_segs_goal = 3 * bbr_tso_segs_goal(sk); + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd = max_t(u32, cwnd, tso_segs_goal); + cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ - if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) cwnd += 2; return cwnd; @@ -472,10 +669,10 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) { u32 max_aggr_cwnd, aggr_cwnd = 0; - if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { + if (bbr_param(sk, extra_acked_gain)) { max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) / BW_UNIT; - aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) + aggr_cwnd = (bbr_param(sk, extra_acked_gain) * bbr_extra_acked(sk)) >> BBR_SCALE; aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); } @@ -483,66 +680,27 @@ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) return aggr_cwnd; } -/* An optimization in BBR to reduce losses: On the first round of recovery, we - * follow the packet conservation principle: send P packets per P packets acked. - * After that, we slow-start and send at most 2*P packets per P packets acked. - * After recovery finishes, or upon undo, we restore the cwnd we had when - * recovery started (capped by the target cwnd based on estimated BDP). - * - * TODO(ycheng/ncardwell): implement a rate-based approach. - */ -static bool bbr_set_cwnd_to_recover_or_restore( - struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +/* Returns the cwnd for PROBE_RTT mode. */ +static u32 bbr_probe_rtt_cwnd(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; - u32 cwnd = tcp_snd_cwnd(tp); - - /* An ACK for P pkts should release at most 2*P packets. We do this - * in two steps. First, here we deduct the number of lost packets. - * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. - */ - if (rs->losses > 0) - cwnd = max_t(s32, cwnd - rs->losses, 1); - - if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { - /* Starting 1st round of Recovery, so do packet conservation. */ - bbr->packet_conservation = 1; - bbr->next_rtt_delivered = tp->delivered; /* start round now */ - /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ - cwnd = tcp_packets_in_flight(tp) + acked; - } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { - /* Exiting loss recovery; restore cwnd saved before recovery. */ - cwnd = max(cwnd, bbr->prior_cwnd); - bbr->packet_conservation = 0; - } - bbr->prev_ca_state = state; - - if (bbr->packet_conservation) { - *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); - return true; /* yes, using packet conservation */ - } - *new_cwnd = cwnd; - return false; + return max_t(u32, bbr_param(sk, cwnd_min_target), + bbr_bdp(sk, bbr_bw(sk), bbr_param(sk, probe_rtt_cwnd_gain))); } /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss * has drawn us down below target), or snap down to target if we're above it. */ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, - u32 acked, u32 bw, int gain) + u32 acked, u32 bw, int gain, u32 cwnd, + struct bbr_context *ctx) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; + u32 target_cwnd = 0; if (!acked) goto done; /* no packet fully ACKed; just apply caps */ - if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) - goto done; - target_cwnd = bbr_bdp(sk, bw, gain); /* Increment the cwnd to account for excess ACKed data that seems @@ -551,74 +709,26 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, target_cwnd += bbr_ack_aggregation_cwnd(sk); target_cwnd = bbr_quantization_budget(sk, target_cwnd); - /* If we're below target cwnd, slow start cwnd toward target cwnd. */ - if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ - cwnd = min(cwnd + acked, target_cwnd); - else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) - cwnd = cwnd + acked; - cwnd = max(cwnd, bbr_cwnd_min_target); + /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ + bbr->try_fast_path = 0; + if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ + cwnd += acked; + if (cwnd >= target_cwnd) { + cwnd = target_cwnd; + bbr->try_fast_path = 1; + } + } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { + cwnd += acked; + } else { + bbr->try_fast_path = 1; + } + cwnd = max_t(u32, cwnd, bbr_param(sk, cwnd_min_target)); done: - tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ + tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* global cap */ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ - tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); -} - -/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ -static bool bbr_is_next_cycle_phase(struct sock *sk, - const struct rate_sample *rs) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - bool is_full_length = - tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > - bbr->min_rtt_us; - u32 inflight, bw; - - /* The pacing_gain of 1.0 paces at the estimated bw to try to fully - * use the pipe without increasing the queue. - */ - if (bbr->pacing_gain == BBR_UNIT) - return is_full_length; /* just use wall clock time */ - - inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); - bw = bbr_max_bw(sk); - - /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at - * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is - * small (e.g. on a LAN). We do not persist if packets are lost, since - * a path with small buffers may not hold that much. - */ - if (bbr->pacing_gain > BBR_UNIT) - return is_full_length && - (rs->losses || /* perhaps pacing_gain*BDP won't fit */ - inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); - - /* A pacing_gain < 1.0 tries to drain extra queue we added if bw - * probing didn't find more bw. If inflight falls to match BDP then we - * estimate queue is drained; persisting would underutilize the pipe. - */ - return is_full_length || - inflight <= bbr_inflight(sk, bw, BBR_UNIT); -} - -static void bbr_advance_cycle_phase(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); - bbr->cycle_mstamp = tp->delivered_mstamp; -} - -/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ -static void bbr_update_cycle_phase(struct sock *sk, - const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - - if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) - bbr_advance_cycle_phase(sk); + tcp_snd_cwnd_set(tp, min_t(u32, tcp_snd_cwnd(tp), + bbr_probe_rtt_cwnd(sk))); } static void bbr_reset_startup_mode(struct sock *sk) @@ -628,191 +738,49 @@ static void bbr_reset_startup_mode(struct sock *sk) bbr->mode = BBR_STARTUP; } -static void bbr_reset_probe_bw_mode(struct sock *sk) -{ - struct bbr *bbr = inet_csk_ca(sk); - - bbr->mode = BBR_PROBE_BW; - bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); - bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ -} - -static void bbr_reset_mode(struct sock *sk) -{ - if (!bbr_full_bw_reached(sk)) - bbr_reset_startup_mode(sk); - else - bbr_reset_probe_bw_mode(sk); -} - -/* Start a new long-term sampling interval. */ -static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - - bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); - bbr->lt_last_delivered = tp->delivered; - bbr->lt_last_lost = tp->lost; - bbr->lt_rtt_cnt = 0; -} - -/* Completely reset long-term bandwidth sampling. */ -static void bbr_reset_lt_bw_sampling(struct sock *sk) -{ - struct bbr *bbr = inet_csk_ca(sk); - - bbr->lt_bw = 0; - bbr->lt_use_bw = 0; - bbr->lt_is_sampling = false; - bbr_reset_lt_bw_sampling_interval(sk); -} - -/* Long-term bw sampling interval is done. Estimate whether we're policed. */ -static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) -{ - struct bbr *bbr = inet_csk_ca(sk); - u32 diff; - - if (bbr->lt_bw) { /* do we have bw from a previous interval? */ - /* Is new bw close to the lt_bw from the previous interval? */ - diff = abs(bw - bbr->lt_bw); - if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || - (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= - bbr_lt_bw_diff)) { - /* All criteria are met; estimate we're policed. */ - bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ - bbr->lt_use_bw = 1; - bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ - bbr->lt_rtt_cnt = 0; - return; - } - } - bbr->lt_bw = bw; - bbr_reset_lt_bw_sampling_interval(sk); -} - -/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of - * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and - * explicitly models their policed rate, to reduce unnecessary losses. We - * estimate that we're policed if we see 2 consecutive sampling intervals with - * consistent throughput and high packet loss. If we think we're being policed, - * set lt_bw to the "long-term" average delivery rate from those 2 intervals. +/* See if we have reached next round trip. Upon start of the new round, + * returns packets delivered since previous round start plus this ACK. */ -static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 lost, delivered; - u64 bw; - u32 t; - - if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ - if (bbr->mode == BBR_PROBE_BW && bbr->round_start && - ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { - bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ - bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ - } - return; - } - - /* Wait for the first loss before sampling, to let the policer exhaust - * its tokens and estimate the steady-state rate allowed by the policer. - * Starting samples earlier includes bursts that over-estimate the bw. - */ - if (!bbr->lt_is_sampling) { - if (!rs->losses) - return; - bbr_reset_lt_bw_sampling_interval(sk); - bbr->lt_is_sampling = true; - } - - /* To avoid underestimates, reset sampling if we run out of data. */ - if (rs->is_app_limited) { - bbr_reset_lt_bw_sampling(sk); - return; - } - - if (bbr->round_start) - bbr->lt_rtt_cnt++; /* count round trips in this interval */ - if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) - return; /* sampling interval needs to be longer */ - if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { - bbr_reset_lt_bw_sampling(sk); /* interval is too long */ - return; - } - - /* End sampling interval when a packet is lost, so we estimate the - * policer tokens were exhausted. Stopping the sampling before the - * tokens are exhausted under-estimates the policed rate. - */ - if (!rs->losses) - return; - - /* Calculate packets lost and delivered in sampling interval. */ - lost = tp->lost - bbr->lt_last_lost; - delivered = tp->delivered - bbr->lt_last_delivered; - /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ - if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) - return; - - /* Find average delivery rate in this sampling interval. */ - t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; - if ((s32)t < 1) - return; /* interval is less than one ms, so wait */ - /* Check if can multiply without overflow */ - if (t >= ~0U / USEC_PER_MSEC) { - bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ - return; - } - t *= USEC_PER_MSEC; - bw = (u64)delivered * BW_UNIT; - do_div(bw, t); - bbr_lt_bw_interval_done(sk, bw); -} - -/* Estimate the bandwidth based on how fast packets are delivered */ -static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +static u32 bbr_update_round_start(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw; + u32 round_delivered = 0; bbr->round_start = 0; - if (rs->delivered < 0 || rs->interval_us <= 0) - return; /* Not a valid observation */ /* See if we've reached the next RTT */ - if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + if (rs->interval_us > 0 && + !before(rs->prior_delivered, bbr->next_rtt_delivered)) { + round_delivered = tp->delivered - bbr->next_rtt_delivered; bbr->next_rtt_delivered = tp->delivered; - bbr->rtt_cnt++; bbr->round_start = 1; - bbr->packet_conservation = 0; } + return round_delivered; +} - bbr_lt_bw_sampling(sk, rs); +/* Calculate the bandwidth based on how fast packets are delivered */ +static void bbr_calculate_bw_sample(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + u64 bw = 0; /* Divide delivered by the interval to find a (lower bound) bottleneck * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. + * Round up to allow growth at low rates, even with integer division. */ - bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); - - /* If this sample is application-limited, it is likely to have a very - * low delivered count that represents application behavior rather than - * the available network rate. Such a sample could drag down estimated - * bw, causing needless slow-down. Thus, to continue to send at the - * last measured network rate, we filter out app-limited samples unless - * they describe the path bw at least as well as our bw model. - * - * So the goal during app-limited phase is to proceed with the best - * network rate no matter how long. We automatically leave this - * phase when app writes faster than the network can deliver :) - */ - if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { - /* Incorporate new sample into our max bw filter. */ - minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + if (rs->interval_us > 0) { + if (WARN_ONCE(rs->delivered < 0, + "negative delivered: %d interval_us: %ld\n", + rs->delivered, rs->interval_us)) + return; + + bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); } + + ctx->sample_bw = bw; } /* Estimates the windowed max degree of ack aggregation. @@ -826,7 +794,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). * Max filter is an approximate sliding window of 5-10 (packet timed) round - * trips. + * trips for non-startup phase, and 1-2 round trips for startup. */ static void bbr_update_ack_aggregation(struct sock *sk, const struct rate_sample *rs) @@ -834,15 +802,19 @@ static void bbr_update_ack_aggregation(struct sock *sk, u32 epoch_us, expected_acked, extra_acked; struct bbr *bbr = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + u32 extra_acked_win_rtts_thresh = bbr_param(sk, extra_acked_win_rtts); - if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || + if (!bbr_param(sk, extra_acked_gain) || rs->acked_sacked <= 0 || rs->delivered < 0 || rs->interval_us <= 0) return; if (bbr->round_start) { bbr->extra_acked_win_rtts = min(0x1F, bbr->extra_acked_win_rtts + 1); - if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { + if (!bbr_full_bw_reached(sk)) + extra_acked_win_rtts_thresh = 1; + if (bbr->extra_acked_win_rtts >= + extra_acked_win_rtts_thresh) { bbr->extra_acked_win_rtts = 0; bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? 0 : 1; @@ -876,49 +848,6 @@ static void bbr_update_ack_aggregation(struct sock *sk, bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } -/* Estimate when the pipe is full, using the change in delivery rate: BBR - * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by - * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited - * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the - * higher rwin, 3: we get higher delivery rate samples. Or transient - * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar - * design goal, but uses delay and inter-ACK spacing instead of bandwidth. - */ -static void bbr_check_full_bw_reached(struct sock *sk, - const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - u32 bw_thresh; - - if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) - return; - - bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; - if (bbr_max_bw(sk) >= bw_thresh) { - bbr->full_bw = bbr_max_bw(sk); - bbr->full_bw_cnt = 0; - return; - } - ++bbr->full_bw_cnt; - bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; -} - -/* If pipe is probably full, drain the queue and then enter steady-state. */ -static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) -{ - struct bbr *bbr = inet_csk_ca(sk); - - if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { - bbr->mode = BBR_DRAIN; /* drain queue we created */ - tcp_sk(sk)->snd_ssthresh = - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); - } /* fall through to check if in-flight is already small: */ - if (bbr->mode == BBR_DRAIN && - bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= - bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) - bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ -} - static void bbr_check_probe_rtt_done(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -928,9 +857,9 @@ static void bbr_check_probe_rtt_done(struct sock *sk) after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) return; - bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ + bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); - bbr_reset_mode(sk); + bbr_exit_probe_rtt(sk); } /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and @@ -956,23 +885,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bool filter_expired; + bool probe_rtt_expired, min_rtt_expired; + u32 expire; - /* Track min RTT seen in the min_rtt_win_sec filter window: */ - filter_expired = after(tcp_jiffies32, - bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ + expire = bbr->probe_rtt_min_stamp + + msecs_to_jiffies(bbr_param(sk, probe_rtt_win_ms)); + probe_rtt_expired = after(tcp_jiffies32, expire); if (rs->rtt_us >= 0 && - (rs->rtt_us < bbr->min_rtt_us || - (filter_expired && !rs->is_ack_delayed))) { - bbr->min_rtt_us = rs->rtt_us; - bbr->min_rtt_stamp = tcp_jiffies32; + (rs->rtt_us < bbr->probe_rtt_min_us || + (probe_rtt_expired && !rs->is_ack_delayed))) { + bbr->probe_rtt_min_us = rs->rtt_us; + bbr->probe_rtt_min_stamp = tcp_jiffies32; + } + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + expire = bbr->min_rtt_stamp + bbr_param(sk, min_rtt_win_sec) * HZ; + min_rtt_expired = after(tcp_jiffies32, expire); + if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || + min_rtt_expired) { + bbr->min_rtt_us = bbr->probe_rtt_min_us; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; } - if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + if (bbr_param(sk, probe_rtt_mode_ms) > 0 && probe_rtt_expired && !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr->probe_rtt_done_stamp = 0; + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; } if (bbr->mode == BBR_PROBE_RTT) { @@ -981,9 +922,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && - tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { bbr->probe_rtt_done_stamp = tcp_jiffies32 + - msecs_to_jiffies(bbr_probe_rtt_mode_ms); + msecs_to_jiffies(bbr_param(sk, probe_rtt_mode_ms)); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; } else if (bbr->probe_rtt_done_stamp) { @@ -1004,18 +945,20 @@ static void bbr_update_gains(struct sock *sk) switch (bbr->mode) { case BBR_STARTUP: - bbr->pacing_gain = bbr_high_gain; - bbr->cwnd_gain = bbr_high_gain; + bbr->pacing_gain = bbr_param(sk, startup_pacing_gain); + bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); break; case BBR_DRAIN: - bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ - bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ + bbr->pacing_gain = bbr_param(sk, drain_gain); /* slow, to drain */ + bbr->cwnd_gain = bbr_param(sk, startup_cwnd_gain); /* keep cwnd */ break; case BBR_PROBE_BW: - bbr->pacing_gain = (bbr->lt_use_bw ? - BBR_UNIT : - bbr_pacing_gain[bbr->cycle_idx]); - bbr->cwnd_gain = bbr_cwnd_gain; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; + bbr->cwnd_gain = bbr_param(sk, cwnd_gain); + if (bbr_param(sk, bw_probe_cwnd_gain) && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr->cwnd_gain += + BBR_UNIT * bbr_param(sk, bw_probe_cwnd_gain) / 4; break; case BBR_PROBE_RTT: bbr->pacing_gain = BBR_UNIT; @@ -1027,140 +970,1380 @@ static void bbr_update_gains(struct sock *sk) } } -static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) { - bbr_update_bw(sk, rs); - bbr_update_ack_aggregation(sk, rs); - bbr_update_cycle_phase(sk, rs); - bbr_check_full_bw_reached(sk, rs); - bbr_check_drain(sk, rs); - bbr_update_min_rtt(sk, rs); - bbr_update_gains(sk); + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; } -__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs) +/* Incorporate a new bw sample into the current window of our max filter. */ +static void bbr_take_max_bw_sample(struct sock *sk, u32 bw) { struct bbr *bbr = inet_csk_ca(sk); - u32 bw; - - bbr_update_model(sk, rs); - bw = bbr_bw(sk); - bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); + bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); } -__bpf_kfunc static void bbr_init(struct sock *sk) +/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ +static void bbr_advance_max_bw_filter(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - bbr->prior_cwnd = 0; - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - bbr->rtt_cnt = 0; - bbr->next_rtt_delivered = tp->delivered; - bbr->prev_ca_state = TCP_CA_Open; - bbr->packet_conservation = 0; - - bbr->probe_rtt_done_stamp = 0; - bbr->probe_rtt_round_done = 0; - bbr->min_rtt_us = tcp_min_rtt(tp); - bbr->min_rtt_stamp = tcp_jiffies32; - - minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + if (!bbr->bw_hi[1]) + return; /* no samples in this window; remember old window */ + bbr->bw_hi[0] = bbr->bw_hi[1]; + bbr->bw_hi[1] = 0; +} - bbr->has_seen_rtt = 0; - bbr_init_pacing_rate_from_rtt(sk); +/* Reset the estimator for reaching full bandwidth based on bw plateau. */ +static void bbr_reset_full_bw(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); - bbr->round_start = 0; - bbr->idle_restart = 0; - bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; - bbr->cycle_mstamp = 0; - bbr->cycle_idx = 0; - bbr_reset_lt_bw_sampling(sk); - bbr_reset_startup_mode(sk); - - bbr->ack_epoch_mstamp = tp->tcp_mstamp; - bbr->ack_epoch_acked = 0; - bbr->extra_acked_win_rtts = 0; - bbr->extra_acked_win_idx = 0; - bbr->extra_acked[0] = 0; - bbr->extra_acked[1] = 0; - - cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); + bbr->full_bw_now = 0; } -__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) +/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ +static u32 bbr_target_inflight(struct sock *sk) { - /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ - return 3; + u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); + + return min(bdp, tcp_sk(sk)->snd_cwnd); } -/* In theory BBR does not need to undo the cwnd since it does not - * always reduce cwnd on losses (see bbr_main()). Keep it for now. - */ -__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) +static bool bbr_is_probing_bandwidth(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); - bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ - bbr->full_bw_cnt = 0; - bbr_reset_lt_bw_sampling(sk); - return tcp_snd_cwnd(tcp_sk(sk)); + return (bbr->mode == BBR_STARTUP) || + (bbr->mode == BBR_PROBE_BW && + (bbr->cycle_idx == BBR_BW_PROBE_REFILL || + bbr->cycle_idx == BBR_BW_PROBE_UP)); } -/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + + return tcp_stamp_us_delta(tp->tcp_mstamp, + bbr->cycle_mstamp + interval_us) > 0; +} + +static void bbr_handle_queue_too_high_in_startup(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bdp; /* estimated BDP in packets, with quantization budget */ + + bbr->full_bw_reached = 1; + + bdp = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr->inflight_hi = max(bdp, bbr->inflight_latest); +} + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || + !bbr_param(sk, full_ecn_cnt) || !bbr_param(sk, ecn_thresh)) + return; + + if (ce_ratio >= bbr_param(sk, ecn_thresh)) + bbr->startup_ecn_rounds++; + else + bbr->startup_ecn_rounds = 0; + + if (bbr->startup_ecn_rounds >= bbr_param(sk, full_ecn_cnt)) { + bbr_handle_queue_too_high_in_startup(sk); + return; + } +} + +/* Updates ecn_alpha and returns ce_ratio. -1 if not available. */ +static int bbr_update_ecn_alpha(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + struct bbr *bbr = inet_csk_ca(sk); + s32 delivered, delivered_ce; + u64 alpha, ce_ratio; + u32 gain; + bool want_ecn_alpha; + + /* See if we should use ECN sender logic for this connection. */ + if (!bbr->ecn_eligible && bbr_can_use_ecn(sk) && + bbr_param(sk, ecn_factor) && + (bbr->min_rtt_us <= bbr_ecn_max_rtt_us || + !bbr_ecn_max_rtt_us)) + bbr->ecn_eligible = 1; + + /* Skip updating alpha only if not ECN-eligible and PLB is disabled. */ + want_ecn_alpha = (bbr->ecn_eligible || + (bbr_can_use_ecn(sk) && + READ_ONCE(net->ipv4.sysctl_tcp_plb_enabled))); + if (!want_ecn_alpha) + return -1; + + delivered = tp->delivered - bbr->alpha_last_delivered; + delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; + + if (delivered == 0 || /* avoid divide by zero */ + WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ + return -1; + + BUILD_BUG_ON(BBR_SCALE != TCP_PLB_SCALE); + ce_ratio = (u64)delivered_ce << BBR_SCALE; + do_div(ce_ratio, delivered); + + gain = bbr_param(sk, ecn_alpha_gain); + alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; + alpha += (gain * ce_ratio) >> BBR_SCALE; + bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); + + bbr->alpha_last_delivered = tp->delivered; + bbr->alpha_last_delivered_ce = tp->delivered_ce; + + bbr_check_ecn_too_high_in_startup(sk, ce_ratio); + return (int)ce_ratio; +} + +/* Protective Load Balancing (PLB). PLB rehashes outgoing data (to a new IPv6 + * flow label) if it encounters sustained congestion in the form of ECN marks. + */ +static void bbr_plb(struct sock *sk, const struct rate_sample *rs, int ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->round_start && ce_ratio >= 0) + tcp_plb_update_state(sk, &bbr->plb, ce_ratio); + + tcp_plb_check_rehash(sk, &bbr->plb); +} + +/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ +static void bbr_raise_inflight_hi_slope(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 growth_this_round, cnt; + + /* Calculate "slope": packets S/Acked per inflight_hi increment. */ + growth_this_round = 1 << bbr->bw_probe_up_rounds; + bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); + cnt = tcp_snd_cwnd(tp) / growth_this_round; + cnt = max(cnt, 1U); + bbr->bw_probe_up_cnt = cnt; +} + +/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ +static void bbr_probe_inflight_hi_upward(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 delta; + + if (!tp->is_cwnd_limited || tcp_snd_cwnd(tp) < bbr->inflight_hi) + return; /* not fully using inflight_hi, so don't grow it */ + + /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ + bbr->bw_probe_up_acks += rs->acked_sacked; + if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; + bbr->inflight_hi += delta; + bbr->try_fast_path = 0; /* Need to update cwnd */ + } + + if (bbr->round_start) + bbr_raise_inflight_hi_slope(sk); +} + +/* Does loss/ECN rate for this sample say inflight is "too high"? + * This is used by both the bbr_check_loss_too_high_in_startup() function, + * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which + * uses it to notice when loss/ECN rates suggest inflight is too high. + */ +static bool bbr_is_inflight_too_high(const struct sock *sk, + const struct rate_sample *rs) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh, ecn_thresh; + + if (rs->lost > 0 && rs->tx_in_flight) { + loss_thresh = (u64)rs->tx_in_flight * bbr_param(sk, loss_thresh) >> + BBR_SCALE; + if (rs->lost > loss_thresh) { + return true; + } + } + + if (rs->delivered_ce > 0 && rs->delivered > 0 && + bbr->ecn_eligible && bbr_param(sk, ecn_thresh)) { + ecn_thresh = (u64)rs->delivered * bbr_param(sk, ecn_thresh) >> + BBR_SCALE; + if (rs->delivered_ce > ecn_thresh) { + return true; + } + } + + return false; +} + +/* Calculate the tx_in_flight level that corresponded to excessive loss. + * We find "lost_prefix" segs of the skb where loss rate went too high, + * by solving for "lost_prefix" in the following equation: + * lost / inflight >= loss_thresh + * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh + * Then we take that equation, convert it to fixed point, and + * round up to the nearest packet. + */ +static u32 bbr_inflight_hi_from_lost_skb(const struct sock *sk, + const struct rate_sample *rs, + const struct sk_buff *skb) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 loss_thresh = bbr_param(sk, loss_thresh); + u32 pcount, divisor, inflight_hi; + s32 inflight_prev, lost_prev; + u64 loss_budget, lost_prefix; + + pcount = tcp_skb_pcount(skb); + + /* How much data was in flight before this skb? */ + inflight_prev = rs->tx_in_flight - pcount; + if (inflight_prev < 0) { + WARN_ONCE(tcp_skb_tx_in_flight_is_suspicious( + pcount, + TCP_SKB_CB(skb)->sacked, + rs->tx_in_flight), + "tx_in_flight: %u pcount: %u reneg: %u", + rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg); + return ~0U; + } + + /* How much inflight data was marked lost before this skb? */ + lost_prev = rs->lost - pcount; + if (WARN_ONCE(lost_prev < 0, + "cwnd: %u ca: %d out: %u lost: %u pif: %u " + "tx_in_flight: %u tx.lost: %u tp->lost: %u rs->lost: %d " + "lost_prev: %d pcount: %d seq: %u end_seq: %u reneg: %u", + tcp_snd_cwnd(tp), inet_csk(sk)->icsk_ca_state, + tp->packets_out, tp->lost_out, tcp_packets_in_flight(tp), + rs->tx_in_flight, TCP_SKB_CB(skb)->tx.lost, tp->lost, + rs->lost, lost_prev, pcount, + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->is_sack_reneg)) + return ~0U; + + /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ + loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; + loss_budget >>= BBR_SCALE; + if (lost_prev >= loss_budget) { + lost_prefix = 0; /* previous losses crossed loss_thresh */ + } else { + lost_prefix = loss_budget - lost_prev; + lost_prefix <<= BBR_SCALE; + divisor = BBR_UNIT - loss_thresh; + if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ + return ~0U; + do_div(lost_prefix, divisor); + } + + inflight_hi = inflight_prev + lost_prefix; + return inflight_hi; +} + +/* If loss/ECN rates during probing indicated we may have overfilled a + * buffer, return an operating point that tries to leave unutilized headroom in + * the path for other flows, for fairness convergence and lower RTTs and loss. + */ +static u32 bbr_inflight_with_headroom(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 headroom, headroom_fraction; + + if (bbr->inflight_hi == ~0U) + return ~0U; + + headroom_fraction = bbr_param(sk, inflight_headroom); + headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; + headroom = max(headroom, 1U); + return max_t(s32, bbr->inflight_hi - headroom, + bbr_param(sk, cwnd_min_target)); +} + +/* Bound cwnd to a sensible level, based on our current probing state + * machine phase and model of a good inflight level (inflight_lo, inflight_hi). + */ +static void bbr_bound_cwnd_for_inflight_model(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cap; + + /* tcp_rcv_synsent_state_process() currently calls tcp_ack() + * and thus cong_control() without first initializing us(!). + */ + if (!bbr->initialized) + return; + + cap = ~0U; + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { + /* Probe to see if more packets fit in the path. */ + cap = bbr->inflight_hi; + } else { + if (bbr->mode == BBR_PROBE_RTT || + (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) + cap = bbr_inflight_with_headroom(sk); + } + /* Adapt to any loss/ECN since our last bw probe. */ + cap = min(cap, bbr->inflight_lo); + + cap = max_t(u32, cap, bbr_param(sk, cwnd_min_target)); + tcp_snd_cwnd_set(tp, min(cap, tcp_snd_cwnd(tp))); +} + +/* How should we multiplicatively cut bw or inflight limits based on ECN? */ +u32 bbr_ecn_cut(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return BBR_UNIT - + ((bbr->ecn_alpha * bbr_param(sk, ecn_factor)) >> BBR_SCALE); +} + +/* Init lower bounds if have not inited yet. */ +static void bbr_init_lower_bounds(struct sock *sk, bool init_bw) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (init_bw && bbr->bw_lo == ~0U) + bbr->bw_lo = bbr_max_bw(sk); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tcp_snd_cwnd(tp); +} + +/* Reduce bw and inflight to (1 - beta). */ +static void bbr_loss_lower_bounds(struct sock *sk, u32 *bw, u32 *inflight) +{ + struct bbr* bbr = inet_csk_ca(sk); + u32 loss_cut = BBR_UNIT - bbr_param(sk, beta); + + *bw = max_t(u32, bbr->bw_latest, + (u64)bbr->bw_lo * loss_cut >> BBR_SCALE); + *inflight = max_t(u32, bbr->inflight_latest, + (u64)bbr->inflight_lo * loss_cut >> BBR_SCALE); +} + +/* Reduce inflight to (1 - alpha*ecn_factor). */ +static void bbr_ecn_lower_bounds(struct sock *sk, u32 *inflight) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_cut = bbr_ecn_cut(sk); + + *inflight = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; +} + +/* Estimate a short-term lower bound on the capacity available now, based + * on measurements of the current delivery process and recent history. When we + * are seeing loss/ECN at times when we are not probing bw, then conservatively + * move toward flow balance by multiplicatively cutting our short-term + * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a + * multiplicative decrease in order to converge to a lower capacity in time + * logarithmic in the magnitude of the decrease. + * + * However, we do not cut our short-term estimates lower than the current rate + * and volume of delivered data from this round trip, since from the current + * delivery process we can estimate the measured capacity available now. + * + * Anything faster than that approach would knowingly risk high loss, which can + * cause low bw for Reno/CUBIC and high loss recovery latency for + * request/response flows using any congestion control. + */ +static void bbr_adapt_lower_bounds(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_inflight_lo = ~0U; + + /* We only use lower-bound estimates when not probing bw. + * When probing we need to push inflight higher to probe bw. + */ + if (bbr_is_probing_bandwidth(sk)) + return; + + /* ECN response. */ + if (bbr->ecn_in_round && bbr_param(sk, ecn_factor)) { + bbr_init_lower_bounds(sk, false); + bbr_ecn_lower_bounds(sk, &ecn_inflight_lo); + } + + /* Loss response. */ + if (bbr->loss_in_round) { + bbr_init_lower_bounds(sk, true); + bbr_loss_lower_bounds(sk, &bbr->bw_lo, &bbr->inflight_lo); + } + + /* Adjust to the lower of the levels implied by loss/ECN. */ + bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); + bbr->bw_lo = max(1U, bbr->bw_lo); +} + +/* Reset any short-term lower-bound adaptation to congestion, so that we can + * push our inflight up. + */ +static void bbr_reset_lower_bounds(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_lo = ~0U; + bbr->inflight_lo = ~0U; +} + +/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state + * machine phase where we adapt our lower bound based on congestion signals. + */ +static void bbr_reset_congestion_signals(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->loss_in_cycle = 0; + bbr->ecn_in_cycle = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +static void bbr_exit_loss_recovery(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); + bbr->try_fast_path = 0; /* bound cwnd using latest model */ +} + +/* Update rate and volume of delivered data from latest round trip. */ +static void bbr_update_latest_delivery_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_round_start = 0; + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + + bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); + bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); + + if (!before(rs->prior_delivered, bbr->loss_round_delivered)) { + bbr->loss_round_delivered = tp->delivered; + bbr->loss_round_start = 1; /* mark start of new round trip */ + } +} + +/* Once per round, reset filter for latest rate and volume of delivered data. */ +static void bbr_advance_latest_delivery_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* If ACK matches a TLP retransmit, persist the filter. If we detect + * that a TLP retransmit plugged a tail loss, we'll want to remember + * how much data the path delivered before the tail loss. + */ + if (bbr->loss_round_start && !rs->is_acking_tlp_retrans_seq) { + bbr->bw_latest = ctx->sample_bw; + bbr->inflight_latest = rs->delivered; + } +} + +/* Update (most of) our congestion signals: track the recent rate and volume of + * delivered data, presence of loss, and EWMA degree of ECN marking. + */ +static void bbr_update_congestion_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + bw = ctx->sample_bw; + + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) + bbr_take_max_bw_sample(sk, bw); + + bbr->loss_in_round |= (rs->losses > 0); + + if (!bbr->loss_round_start) + return; /* skip the per-round-trip updates */ + /* Now do per-round-trip updates. */ + bbr_adapt_lower_bounds(sk, rs); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; +} + +/* Bandwidth probing can cause loss. To help coexistence with loss-based + * congestion control we spread out our probing in a Reno-conscious way. Due to + * the shape of the Reno sawtooth, the time required between loss epochs for an + * idealized Reno flow is a number of round trips that is the BDP of that + * flow. We count packet-timed round trips directly, since measured RTT can + * vary widely, and Reno is driven by packet-timed round trips. + */ +static bool bbr_is_reno_coexistence_probe_time(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 rounds; + + /* Random loss can shave some small percentage off of our inflight + * in each round. To survive this, flows need robust periodic probes. + */ + rounds = min_t(u32, bbr_param(sk, bw_probe_max_rounds), bbr_target_inflight(sk)); + return bbr->rounds_since_probe >= rounds; +} + +/* How long do we want to wait before probing for bandwidth (and risking + * loss)? We randomize the wait, for better mixing and fairness convergence. + * + * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. + * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, + * (eg 4K video to a broadband user): + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + * + * We bound the BBR-native inter-bw-probe wall clock time to be: + * (a) higher than 2 sec: to try to avoid causing loss for a long enough time + * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must + * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs + * (b) lower than 3 sec: to ensure flows can start probing in a reasonable + * amount of time to discover unutilized bw on human-scale interactive + * time-scales (e.g. perhaps traffic from a web page download that we + * were competing with is now complete). + */ +static void bbr_pick_probe_wait(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Decide the random round-trip bound for wait until probe: */ + bbr->rounds_since_probe = + get_random_u32_below(bbr_param(sk, bw_probe_rand_rounds)); + /* Decide the random wall clock bound for wait until probe: */ + bbr->probe_wait_us = bbr_param(sk, bw_probe_base_us) + + get_random_u32_below(bbr_param(sk, bw_probe_rand_us)); +} + +static void bbr_set_cycle_idx(struct sock *sk, int cycle_idx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = cycle_idx; + /* New phase, so need to update cwnd and pacing rate. */ + bbr->try_fast_path = 0; +} + +/* Send at estimated bw to fill the pipe, but not queue. We need this phase + * before PROBE_UP, because as soon as we send faster than the available bw + * we will start building a queue, and if the buffer is shallow we can cause + * loss. If we do not fill the pipe before we cause this loss, our bw_hi and + * inflight_hi estimates will underestimate. + */ +static void bbr_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_lower_bounds(sk); + bbr->bw_probe_up_rounds = bw_probe_up_rounds; + bbr->bw_probe_up_acks = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_REFILLING; + bbr->next_rtt_delivered = tp->delivered; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); +} + +/* Now probe max deliverable data rate and volume. */ +static void bbr_start_bw_probe_up(struct sock *sk, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->ack_phase = BBR_ACKS_PROBE_STARTING; + bbr->next_rtt_delivered = tp->delivered; + bbr->cycle_mstamp = tp->tcp_mstamp; + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_UP); + bbr_raise_inflight_hi_slope(sk); +} + +/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall + * clock time at which to probe beyond an inflight that we think to be + * safe. This will knowingly risk packet loss, so we want to do this rarely, to + * keep packet loss rates low. Also start a round-trip counter, to probe faster + * if we estimate a Reno flow at our BDP would probe faster. + */ +static void bbr_start_bw_probe_down(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_congestion_signals(sk); + bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ + bbr_pick_probe_wait(sk); + bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + bbr_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); +} + +/* Cruise: maintain what we estimate to be a neutral, conservative + * operating point, without attempting to probe up for bandwidth or down for + * RTT, and only reducing inflight in response to loss/ECN signals. + */ +static void bbr_start_bw_probe_cruise(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->inflight_lo != ~0U) + bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); + + bbr_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); +} + +/* Loss and/or ECN rate is too high while probing. + * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. + */ +static void bbr_handle_inflight_too_high(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + const u32 beta = bbr_param(sk, beta); + + bbr->prev_probe_too_high = 1; + bbr->bw_probe_samples = 0; /* only react once per probe */ + /* If we are app-limited then we are not robustly + * probing the max volume of inflight data we think + * might be safe (analogous to how app-limited bw + * samples are not known to be robustly probing bw). + */ + if (!rs->is_app_limited) { + bbr->inflight_hi = max_t(u32, rs->tx_in_flight, + (u64)bbr_target_inflight(sk) * + (BBR_UNIT - beta) >> BBR_SCALE); + } + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr_start_bw_probe_down(sk); +} + +/* If we're seeing bw and loss samples reflecting our bw probing, adapt + * using the signals we see. If loss or ECN mark rate gets too high, then adapt + * inflight_hi downward. If we're able to push inflight higher without such + * signals, push higher: adapt inflight_hi upward. + */ +static bool bbr_adapt_upper_bounds(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Track when we'll see bw/loss samples resulting from our bw probes. */ + if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) + bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; + if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { + /* End of samples from bw probing phase. */ + bbr->bw_probe_samples = 0; + bbr->ack_phase = BBR_ACKS_INIT; + /* At this point in the cycle, our current bw sample is also + * our best recent chance at finding the highest available bw + * for this flow. So now is the best time to forget the bw + * samples from the previous cycle, by advancing the window. + */ + if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) + bbr_advance_max_bw_filter(sk); + /* If we had an inflight_hi, then probed and pushed inflight all + * the way up to hit that inflight_hi without seeing any + * high loss/ECN in all the resulting ACKs from that probing, + * then probe up again, this time letting inflight persist at + * inflight_hi for a round trip, then accelerating beyond. + */ + if (bbr->mode == BBR_PROBE_BW && + bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { + bbr_start_bw_probe_refill(sk, 0); + return true; /* yes, decided state transition */ + } + } + if (bbr_is_inflight_too_high(sk, rs)) { + if (bbr->bw_probe_samples) /* sample is from bw probing? */ + bbr_handle_inflight_too_high(sk, rs); + } else { + /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ + + if (bbr->inflight_hi == ~0U) + return false; /* no excess queue signals yet */ + + /* To be resilient to random loss, we must raise bw/inflight_hi + * if we observe in any phase that a higher level is safe. + */ + if (rs->tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = rs->tx_in_flight; + } + + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr_probe_inflight_hi_upward(sk, rs); + } + + return false; +} + +/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ +static bool bbr_check_time_to_probe_bw(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 n; + + /* If we seem to be at an operating point where we are not seeing loss + * but we are seeing ECN marks, then when the ECN marks cease we reprobe + * quickly (in case cross-traffic has ceased and freed up bw). + */ + if (bbr_param(sk, ecn_reprobe_gain) && bbr->ecn_eligible && + bbr->ecn_in_cycle && !bbr->loss_in_cycle && + inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + /* Calculate n so that when bbr_raise_inflight_hi_slope() + * computes growth_this_round as 2^n it will be roughly the + * desired volume of data (inflight_hi*ecn_reprobe_gain). + */ + n = ilog2((((u64)bbr->inflight_hi * + bbr_param(sk, ecn_reprobe_gain)) >> BBR_SCALE)); + bbr_start_bw_probe_refill(sk, n); + return true; + } + + if (bbr_has_elapsed_in_phase(sk, bbr->probe_wait_us) || + bbr_is_reno_coexistence_probe_time(sk)) { + bbr_start_bw_probe_refill(sk, 0); + return true; + } + return false; +} + +/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ +static bool bbr_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) +{ + /* Always need to pull inflight down to leave headroom in queue. */ + if (inflight > bbr_inflight_with_headroom(sk)) + return false; + + return inflight <= bbr_inflight(sk, bw, BBR_UNIT); +} + +/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_bw_probe_done = false; + u32 inflight, bw; + + if (!bbr_full_bw_reached(sk)) + return; + + /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ + if (bbr_adapt_upper_bounds(sk, rs, ctx)) + return; /* already decided state transition */ + + if (bbr->mode != BBR_PROBE_BW) + return; + + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); + bw = bbr_max_bw(sk); + + switch (bbr->cycle_idx) { + /* First we spend most of our time cruising with a pacing_gain of 1.0, + * which paces at the estimated bw, to try to fully use the pipe + * without building queue. If we encounter loss/ECN marks, we adapt + * by slowing down. + */ + case BBR_BW_PROBE_CRUISE: + if (bbr_check_time_to_probe_bw(sk, rs)) + return; /* already decided state transition */ + break; + + /* After cruising, when it's time to probe, we first "refill": we send + * at the estimated bw to fill the pipe, before probing higher and + * knowingly risking overflowing the bottleneck buffer (causing loss). + */ + case BBR_BW_PROBE_REFILL: + if (bbr->round_start) { + /* After one full round trip of sending in REFILL, we + * start to see bw samples reflecting our REFILL, which + * may be putting too much data in flight. + */ + bbr->bw_probe_samples = 1; + bbr_start_bw_probe_up(sk, ctx); + } + break; + + /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to + * probe for bw. If we have not seen loss/ECN, we try to raise inflight + * to at least pacing_gain*BDP; note that this may take more than + * min_rtt if min_rtt is small (e.g. on a LAN). + * + * We terminate PROBE_UP bandwidth probing upon any of the following: + * + * (1) We've pushed inflight up to hit the inflight_hi target set in the + * most recent previous bw probe phase. Thus we want to start + * draining the queue immediately because it's very likely the most + * recently sent packets will fill the queue and cause drops. + * (2) If inflight_hi has not limited bandwidth growth recently, and + * yet delivered bandwidth has not increased much recently + * (bbr->full_bw_now). + * (3) Loss filter says loss rate is "too high". + * (4) ECN filter says ECN mark rate is "too high". + * + * (1) (2) checked here, (3) (4) checked in bbr_is_inflight_too_high() + */ + case BBR_BW_PROBE_UP: + if (bbr->prev_probe_too_high && + inflight >= bbr->inflight_hi) { + bbr->stopped_risky_probe = 1; + is_bw_probe_done = true; + } else { + if (tp->is_cwnd_limited && + tcp_snd_cwnd(tp) >= bbr->inflight_hi) { + /* inflight_hi is limiting bw growth */ + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + } else if (bbr->full_bw_now) { + /* Plateau in estimated bw. Pipe looks full. */ + is_bw_probe_done = true; + } + } + if (is_bw_probe_done) { + bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ + bbr_start_bw_probe_down(sk); /* restart w/ down */ + } + break; + + /* After probing in PROBE_UP, we have usually accumulated some data in + * the bottleneck buffer (if bw probing didn't find more bw). We next + * enter PROBE_DOWN to try to drain any excess data from the queue. To + * do this, we use a pacing_gain < 1.0. We hold this pacing gain until + * our inflight is less then that target cruising point, which is the + * minimum of (a) the amount needed to leave headroom, and (b) the + * estimated BDP. Once inflight falls to match the target, we estimate + * the queue is drained; persisting would underutilize the pipe. + */ + case BBR_BW_PROBE_DOWN: + if (bbr_check_time_to_probe_bw(sk, rs)) + return; /* already decided state transition */ + if (bbr_check_time_to_cruise(sk, inflight, bw)) + bbr_start_bw_probe_cruise(sk); + break; + + default: + WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); + } +} + +/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ +static void bbr_exit_probe_rtt(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_lower_bounds(sk); + if (bbr_full_bw_reached(sk)) { + bbr->mode = BBR_PROBE_BW; + /* Raising inflight after PROBE_RTT may cause loss, so reset + * the PROBE_BW clock and schedule the next bandwidth probe for + * a friendly and randomized future point in time. + */ + bbr_start_bw_probe_down(sk); + /* Since we are exiting PROBE_RTT, we know inflight is + * below our estimated BDP, so it is reasonable to cruise. + */ + bbr_start_bw_probe_cruise(sk); + } else { + bbr->mode = BBR_STARTUP; + } +} + +/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until + * the end of the round in recovery to get a good estimate of how many packets + * have been lost, and how many we need to drain with a low pacing rate. + */ +static void bbr_check_loss_too_high_in_startup(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk)) + return; + + /* For STARTUP exit, check the loss rate at the end of each round trip + * of Recovery episodes in STARTUP. We check the loss rate at the end + * of the round trip to filter out noisy/low loss and have a better + * sense of inflight (extent of loss), so we can drain more accurately. + */ + if (rs->losses && bbr->loss_events_in_round < 0xf) + bbr->loss_events_in_round++; /* update saturating counter */ + if (bbr_param(sk, full_loss_cnt) && bbr->loss_round_start && + inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && + bbr->loss_events_in_round >= bbr_param(sk, full_loss_cnt) && + bbr_is_inflight_too_high(sk, rs)) { + bbr_handle_queue_too_high_in_startup(sk); + return; + } + if (bbr->loss_round_start) + bbr->loss_events_in_round = 0; +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates bw probing filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh, full_cnt, thresh; + + if (bbr->full_bw_now || rs->is_app_limited) + return; + + thresh = bbr_param(sk, full_bw_thresh); + full_cnt = bbr_param(sk, full_bw_cnt); + bw_thresh = (u64)bbr->full_bw * thresh >> BBR_SCALE; + if (ctx->sample_bw >= bw_thresh) { + bbr_reset_full_bw(sk); + bbr->full_bw = ctx->sample_bw; + return; + } + if (!bbr->round_start) + return; + ++bbr->full_bw_cnt; + bbr->full_bw_now = bbr->full_bw_cnt >= full_cnt; + bbr->full_bw_reached |= bbr->full_bw_now; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + /* Set ssthresh to export purely for monitoring, to signal + * completion of initial STARTUP by setting to a non- + * TCP_INFINITE_SSTHRESH value (ssthresh is not used by BBR). + */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr_reset_congestion_signals(sk); + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) { + bbr->mode = BBR_PROBE_BW; + bbr_start_bw_probe_down(sk); + } +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + bbr_update_congestion_signals(sk, rs, ctx); + bbr_update_ack_aggregation(sk, rs); + bbr_check_loss_too_high_in_startup(sk, rs); + bbr_check_full_bw_reached(sk, rs, ctx); + bbr_check_drain(sk, rs, ctx); + bbr_update_cycle_phase(sk, rs, ctx); + bbr_update_min_rtt(sk, rs); +} + +/* Fast path for app-limited case. + * + * On each ack, we execute bbr state machine, which primarily consists of: + * 1) update model based on new rate sample, and + * 2) update control based on updated model or state change. + * + * There are certain workload/scenarios, e.g. app-limited case, where + * either we can skip updating model or we can skip update of both model + * as well as control. This provides signifcant softirq cpu savings for + * processing incoming acks. + * + * In case of app-limited, if there is no congestion (loss/ecn) and + * if observed bw sample is less than current estimated bw, then we can + * skip some of the computation in bbr state processing: + * + * - if there is no rtt/mode/phase change: In this case, since all the + * parameters of the network model are constant, we can skip model + * as well control update. + * + * - else we can skip rest of the model update. But we still need to + * update the control to account for the new rtt/mode/phase. + * + * Returns whether we can take fast path or not. + */ +static bool bbr_run_fast_path(struct sock *sk, bool *update_model, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 prev_min_rtt_us, prev_mode; + + if (bbr_param(sk, fast_path) && bbr->try_fast_path && + rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && + !bbr->loss_in_round && !bbr->ecn_in_round ) { + prev_mode = bbr->mode; + prev_min_rtt_us = bbr->min_rtt_us; + bbr_check_drain(sk, rs, ctx); + bbr_update_cycle_phase(sk, rs, ctx); + bbr_update_min_rtt(sk, rs); + + if (bbr->mode == prev_mode && + bbr->min_rtt_us == prev_min_rtt_us && + bbr->try_fast_path) { + return true; + } + + /* Skip model update, but control still needs to be updated */ + *update_model = false; + } + return false; +} + +__bpf_kfunc void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct bbr_context ctx = { 0 }; + bool update_model = true; + u32 bw, round_delivered; + int ce_ratio = -1; + + round_delivered = bbr_update_round_start(sk, rs, &ctx); + if (bbr->round_start) { + bbr->rounds_since_probe = + min_t(s32, bbr->rounds_since_probe + 1, 0xFF); + ce_ratio = bbr_update_ecn_alpha(sk); + } + bbr_plb(sk, rs, ce_ratio); + + bbr->ecn_in_round |= (bbr->ecn_eligible && rs->is_ece); + bbr_calculate_bw_sample(sk, rs, &ctx); + bbr_update_latest_delivery_signals(sk, rs, &ctx); + + if (bbr_run_fast_path(sk, &update_model, rs, &ctx)) + goto out; + + if (update_model) + bbr_update_model(sk, rs, &ctx); + + bbr_update_gains(sk); + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, + tcp_snd_cwnd(tp), &ctx); + bbr_bound_cwnd_for_inflight_model(sk); + +out: + bbr_advance_latest_delivery_signals(sk, rs, &ctx); + bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; + bbr->loss_in_cycle |= rs->lost > 0; + bbr->ecn_in_cycle |= rs->delivered_ce > 0; +} + +__bpf_kfunc static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->initialized = 1; + + bbr->init_cwnd = min(0x7FU, tcp_snd_cwnd(tp)); + bbr->prior_cwnd = tp->prior_cwnd; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + bbr->next_rtt_delivered = tp->delivered; + bbr->prev_ca_state = TCP_CA_Open; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->probe_rtt_min_us = tcp_min_rtt(tp); + bbr->probe_rtt_min_stamp = tcp_jiffies32; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_jiffies32; + + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw_reached = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp = 0; + bbr->cycle_idx = 0; + + bbr_reset_startup_mode(sk); + + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + + bbr->ce_state = 0; + bbr->prior_rcv_nxt = tp->rcv_nxt; + bbr->try_fast_path = 0; + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); + + /* Start sampling ECN mark rate after first full flight is ACKed: */ + bbr->loss_round_delivered = tp->delivered + 1; + bbr->loss_round_start = 0; + bbr->undo_bw_lo = 0; + bbr->undo_inflight_lo = 0; + bbr->undo_inflight_hi = 0; + bbr->loss_events_in_round = 0; + bbr->startup_ecn_rounds = 0; + bbr_reset_congestion_signals(sk); + bbr->bw_lo = ~0U; + bbr->bw_hi[0] = 0; + bbr->bw_hi[1] = 0; + bbr->inflight_lo = ~0U; + bbr->inflight_hi = ~0U; + bbr_reset_full_bw(sk); + bbr->bw_probe_up_cnt = ~0U; + bbr->bw_probe_up_acks = 0; + bbr->bw_probe_up_rounds = 0; + bbr->probe_wait_us = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_INIT; + bbr->rounds_since_probe = 0; + bbr->bw_probe_samples = 0; + bbr->prev_probe_too_high = 0; + bbr->ecn_eligible = 0; + bbr->ecn_alpha = bbr_param(sk, ecn_alpha_init); + bbr->alpha_last_delivered = 0; + bbr->alpha_last_delivered_ce = 0; + bbr->plb.pause_until = 0; + + tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; +} + +/* BBR marks the current round trip as a loss round. */ +static void bbr_note_loss(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + /* Capture "current" data over the full round trip of loss, to + * have a better chance of observing the full capacity of the path. + */ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = tp->delivered; /* set round trip */ + bbr->loss_in_round = 1; + bbr->loss_in_cycle = 1; +} + +/* Core TCP stack informs us that the given skb was just marked lost. */ +__bpf_kfunc static void bbr_skb_marked_lost(struct sock *sk, + const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct rate_sample rs = {}; + + bbr_note_loss(sk); + + if (!bbr->bw_probe_samples) + return; /* not an skb sent while probing for bandwidth */ + if (unlikely(!scb->tx.delivered_mstamp)) + return; /* skb was SACKed, reneged, marked lost; ignore it */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this lost skb, + * then see if the loss rate went too high, and if so at which packet. + */ + rs.tx_in_flight = scb->tx.in_flight; + rs.lost = tp->lost - scb->tx.lost; + rs.is_app_limited = scb->tx.is_app_limited; + if (bbr_is_inflight_too_high(sk, &rs)) { + rs.tx_in_flight = bbr_inflight_hi_from_lost_skb(sk, &rs, skb); + bbr_handle_inflight_too_high(sk, &rs); + } +} + +static void bbr_run_loss_probe_recovery(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct rate_sample rs = {0}; + + bbr_note_loss(sk); + + if (!bbr->bw_probe_samples) + return; /* not sent while probing for bandwidth */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this + * loss, then see if the loss rate went too high. + */ + rs.lost = 1; /* TLP probe repaired loss of a single segment */ + rs.tx_in_flight = bbr->inflight_latest + rs.lost; + rs.is_app_limited = tp->tlp_orig_data_app_limited; + if (bbr_is_inflight_too_high(sk, &rs)) + bbr_handle_inflight_too_high(sk, &rs); +} + +/* Revert short-term model if current loss recovery event was spurious. */ +__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_reset_full_bw(sk); /* spurious slow-down; reset full bw detector */ + bbr->loss_in_round = 0; + + /* Revert to cwnd and other state saved before loss episode. */ + bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); + bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); + bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); + bbr->try_fast_path = 0; /* take slow path to set proper cwnd, pacing */ + return bbr->prior_cwnd; +} + +/* Entering loss recovery, so save state for when we undo recovery. */ __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) { + struct bbr *bbr = inet_csk_ca(sk); + bbr_save_cwnd(sk); + /* For undo, save state that adapts based on loss signal. */ + bbr->undo_bw_lo = bbr->bw_lo; + bbr->undo_inflight_lo = bbr->inflight_lo; + bbr->undo_inflight_hi = bbr->inflight_hi; return tcp_sk(sk)->snd_ssthresh; } +static enum tcp_bbr_phase bbr_get_phase(struct bbr *bbr) +{ + switch (bbr->mode) { + case BBR_STARTUP: + return BBR_PHASE_STARTUP; + case BBR_DRAIN: + return BBR_PHASE_DRAIN; + case BBR_PROBE_BW: + break; + case BBR_PROBE_RTT: + return BBR_PHASE_PROBE_RTT; + default: + return BBR_PHASE_INVALID; + } + switch (bbr->cycle_idx) { + case BBR_BW_PROBE_UP: + return BBR_PHASE_PROBE_BW_UP; + case BBR_BW_PROBE_DOWN: + return BBR_PHASE_PROBE_BW_DOWN; + case BBR_BW_PROBE_CRUISE: + return BBR_PHASE_PROBE_BW_CRUISE; + case BBR_BW_PROBE_REFILL: + return BBR_PHASE_PROBE_BW_REFILL; + default: + return BBR_PHASE_INVALID; + } +} + static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, - union tcp_cc_info *info) + union tcp_cc_info *info) { if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { - struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw = bbr_bw(sk); - - bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; - memset(&info->bbr, 0, sizeof(info->bbr)); - info->bbr.bbr_bw_lo = (u32)bw; - info->bbr.bbr_bw_hi = (u32)(bw >> 32); - info->bbr.bbr_min_rtt = bbr->min_rtt_us; - info->bbr.bbr_pacing_gain = bbr->pacing_gain; - info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); + u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); + u64 bw_lo = bbr->bw_lo == ~0U ? + ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); + struct tcp_bbr_info *bbr_info = &info->bbr; + + memset(bbr_info, 0, sizeof(*bbr_info)); + bbr_info->bbr_bw_lo = (u32)bw; + bbr_info->bbr_bw_hi = (u32)(bw >> 32); + bbr_info->bbr_min_rtt = bbr->min_rtt_us; + bbr_info->bbr_pacing_gain = bbr->pacing_gain; + bbr_info->bbr_cwnd_gain = bbr->cwnd_gain; + bbr_info->bbr_bw_hi_lsb = (u32)bw_hi; + bbr_info->bbr_bw_hi_msb = (u32)(bw_hi >> 32); + bbr_info->bbr_bw_lo_lsb = (u32)bw_lo; + bbr_info->bbr_bw_lo_msb = (u32)(bw_lo >> 32); + bbr_info->bbr_mode = bbr->mode; + bbr_info->bbr_phase = (__u8)bbr_get_phase(bbr); + bbr_info->bbr_version = (__u8)BBR_VERSION; + bbr_info->bbr_inflight_lo = bbr->inflight_lo; + bbr_info->bbr_inflight_hi = bbr->inflight_hi; + bbr_info->bbr_extra_acked = bbr_extra_acked(sk); *attr = INET_DIAG_BBRINFO; - return sizeof(info->bbr); + return sizeof(*bbr_info); } return 0; } __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) { + struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); if (new_state == TCP_CA_Loss) { - struct rate_sample rs = { .losses = 1 }; bbr->prev_ca_state = TCP_CA_Loss; - bbr->full_bw = 0; - bbr->round_start = 1; /* treat RTO like end of a round */ - bbr_lt_bw_sampling(sk, &rs); + tcp_plb_update_state_upon_rto(sk, &bbr->plb); + /* The tcp_write_timeout() call to sk_rethink_txhash() likely + * repathed this flow, so re-learn the min network RTT on the + * new path: + */ + bbr_reset_full_bw(sk); + if (!bbr_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { + /* bbr_adapt_lower_bounds() needs cwnd before + * we suffered an RTO, to update inflight_lo: + */ + bbr->inflight_lo = + max(tcp_snd_cwnd(tp), bbr->prior_cwnd); + } + } else if (bbr->prev_ca_state == TCP_CA_Loss && + new_state != TCP_CA_Loss) { + bbr_exit_loss_recovery(sk); } } + static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { - .flags = TCP_CONG_NON_RESTRICTED, + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, .name = "bbr", .owner = THIS_MODULE, .init = bbr_init, .cong_control = bbr_main, .sndbuf_expand = bbr_sndbuf_expand, + .skb_marked_lost = bbr_skb_marked_lost, .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, @@ -1175,10 +2358,11 @@ BTF_SET8_START(tcp_bbr_check_kfunc_ids) BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) +BTF_ID_FLAGS(func, bbr_skb_marked_lost) BTF_ID_FLAGS(func, bbr_undo_cwnd) BTF_ID_FLAGS(func, bbr_cwnd_event) BTF_ID_FLAGS(func, bbr_ssthresh) -BTF_ID_FLAGS(func, bbr_min_tso_segs) +BTF_ID_FLAGS(func, bbr_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) #endif #endif @@ -1213,5 +2397,12 @@ MODULE_AUTHOR("Van Jacobson "); MODULE_AUTHOR("Neal Cardwell "); MODULE_AUTHOR("Yuchung Cheng "); MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_AUTHOR("Priyaranjan Jha "); +MODULE_AUTHOR("Yousuk Seung "); +MODULE_AUTHOR("Kevin Yang "); +MODULE_AUTHOR("Arjun Roy "); +MODULE_AUTHOR("David Morley "); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); +MODULE_VERSION(__stringify(BBR_VERSION)); From 5ad789ec25187629f09d7636ebd05ae1391fe916 Mon Sep 17 00:00:00 2001 From: Adithya Abraham Philip Date: Fri, 11 Jun 2021 21:56:10 +0000 Subject: [PATCH 17/28] net-tcp_bbr: v3: ensure ECN-enabled BBR flows set ECT on retransmits Adds a new flag TCP_ECN_ECT_PERMANENT that is used by CCAs to indicate that retransmitted packets and pure ACKs must have the ECT bit set. This is necessary for BBR, which when using ECN expects ECT to be set even on retransmitted packets and ACKs. Previous to this addition of TCP_ECN_ECT_PERMANENT, CCAs which can use ECN but don't "need" it did not have a way to indicate that ECT should be set on retransmissions/ACKs. Signed-off-by: Adithya Abraham Philip Signed-off-by: Neal Cardwell Change-Id: I8b048eaab35e136fe6501ef6cd89fd9faa15e6d2 --- include/net/tcp.h | 1 + net/ipv4/tcp_bbr.c | 3 +++ net/ipv4/tcp_output.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 169949f59a18..c65e657a396a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -371,6 +371,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 #define TCP_ECN_LOW 16 +#define TCP_ECN_ECT_PERMANENT 32 enum tcp_tw_status { TCP_TW_SUCCESS = 0, diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 4fec37e8f900..f4f477a69917 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -2151,6 +2151,9 @@ __bpf_kfunc static void bbr_init(struct sock *sk) bbr->plb.pause_until = 0; tp->fast_ack_mode = bbr_fast_ack_mode ? 1 : 0; + + if (bbr_can_use_ecn(sk)) + tp->ecn_flags |= TCP_ECN_ECT_PERMANENT; } /* BBR marks the current round trip as a loss round. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a5d117ed62bf..e5fe7183439b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -379,7 +379,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } - } else if (!tcp_ca_needs_ecn(sk)) { + } else if (!(tp->ecn_flags & TCP_ECN_ECT_PERMANENT) && + !tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } From a1d32ad82d426f29c71dd837393b3d7ea8501b5e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 23 Jul 2023 23:25:34 -0400 Subject: [PATCH 18/28] tcp: export TCPI_OPT_ECN_LOW in tcp_info tcpi_options field Analogous to other important ECN information, export TCPI_OPT_ECN_LOW in tcp_info tcpi_options field. Signed-off-by: Neal Cardwell Change-Id: I08d8d8c7e8780e6e37df54038ee50301ac5a0320 --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 879eeb0a084b..77270053a5e3 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -170,6 +170,7 @@ enum tcp_fastopen_client_fail { #define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ +#define TCPI_OPT_ECN_LOW 64 /* Low-latency ECN configured at init */ /* * Sender's congestion state indicating normal or abnormal situations diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 326b2c4bacf6..497e442bd608 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3777,6 +3777,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; + if (tp->ecn_flags & TCP_ECN_LOW) + info->tcpi_options |= TCPI_OPT_ECN_LOW; if (tp->syn_data_acked) info->tcpi_options |= TCPI_OPT_SYN_DATA; From a7743a2757fae9b06613c201cce6416a95d5f345 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 23 Jan 2016 20:51:41 -0500 Subject: [PATCH 19/28] net-test: add .config for kernel circa v5.10, with many TCP CC modules enabled This commit provides a kernel config file for GCE. It builds most (all?) of the available congestion control modules and uses bbr2 as the default. Tested: On GCE. Effort: net-test Change-Id: Ibc4dfdc119c804f1ad2853b3ee2c1c503bca01a9 --- config.gce | 3807 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3807 insertions(+) create mode 100644 config.gce diff --git a/config.gce b/config.gce new file mode 100644 index 000000000000..1538acf6818b --- /dev/null +++ b/config.gce @@ -0,0 +1,3807 @@ +# +# Automatically generated file; DO NOT EDIT. +# Linux/x86 4.8.0 Kernel Configuration +# +CONFIG_64BIT=y +CONFIG_X86_64=y +CONFIG_X86=y +CONFIG_INSTRUCTION_DECODER=y +CONFIG_OUTPUT_FORMAT="elf64-x86-64" +CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_MMU=y +CONFIG_ARCH_MMAP_RND_BITS_MIN=28 +CONFIG_ARCH_MMAP_RND_BITS_MAX=32 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 +CONFIG_NEED_DMA_MAP_STATE=y +CONFIG_NEED_SG_DMA_LENGTH=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y +CONFIG_HAVE_SETUP_PER_CPU_AREA=y +CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y +CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y +CONFIG_ARCH_WANT_GENERAL_HUGETLB=y +CONFIG_ZONE_DMA32=y +CONFIG_AUDIT_ARCH=y +CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y +CONFIG_HAVE_INTEL_TXT=y +CONFIG_X86_64_SMP=y +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_DEBUG_RODATA=y +CONFIG_PGTABLE_LEVELS=4 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_IRQ_WORK=y +CONFIG_BUILDTIME_EXTABLE_SORT=y + +# +# General setup +# +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_CROSS_COMPILE="" +# CONFIG_COMPILE_TEST is not set +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_HAVE_KERNEL_GZIP=y +CONFIG_HAVE_KERNEL_BZIP2=y +CONFIG_HAVE_KERNEL_LZMA=y +CONFIG_HAVE_KERNEL_XZ=y +CONFIG_HAVE_KERNEL_LZO=y +CONFIG_HAVE_KERNEL_LZ4=y +# CONFIG_KERNEL_GZIP is not set +# CONFIG_KERNEL_BZIP2 is not set +CONFIG_KERNEL_LZMA=y +# CONFIG_KERNEL_XZ is not set +# CONFIG_KERNEL_LZO is not set +# CONFIG_KERNEL_LZ4 is not set +CONFIG_DEFAULT_HOSTNAME="(none)" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y +CONFIG_CROSS_MEMORY_ATTACH=y +CONFIG_FHANDLE=y +CONFIG_USELIB=y +CONFIG_AUDIT=y +CONFIG_HAVE_ARCH_AUDITSYSCALL=y +CONFIG_AUDITSYSCALL=y +CONFIG_AUDIT_WATCH=y +CONFIG_AUDIT_TREE=y + +# +# IRQ subsystem +# +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_IRQ_SHOW=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_IRQ_DOMAIN=y +CONFIG_IRQ_DOMAIN_HIERARCHY=y +CONFIG_GENERIC_MSI_IRQ=y +CONFIG_GENERIC_MSI_IRQ_DOMAIN=y +# CONFIG_IRQ_DOMAIN_DEBUG is not set +CONFIG_IRQ_FORCED_THREADING=y +CONFIG_SPARSE_IRQ=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_ARCH_CLOCKSOURCE_DATA=y +CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y +CONFIG_GENERIC_CMOS_UPDATE=y + +# +# Timers subsystem +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ_COMMON=y +# CONFIG_HZ_PERIODIC is not set +CONFIG_NO_HZ_IDLE=y +# CONFIG_NO_HZ_FULL is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y + +# +# CPU/Task time and stats accounting +# +CONFIG_TICK_CPU_ACCOUNTING=y +# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set +CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_RCU_EXPERT is not set +CONFIG_SRCU=y +# CONFIG_TASKS_RCU is not set +CONFIG_RCU_STALL_COMMON=y +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_RCU_EXPEDITE_BOOT is not set +CONFIG_BUILD_BIN2C=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 +CONFIG_NMI_LOG_BUF_SHIFT=13 +CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y +CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y +CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y +CONFIG_ARCH_SUPPORTS_INT128=y +# CONFIG_NUMA_BALANCING is not set +CONFIG_CGROUPS=y +CONFIG_PAGE_COUNTER=y +CONFIG_MEMCG=y +# CONFIG_MEMCG_SWAP is not set +CONFIG_BLK_CGROUP=y +CONFIG_DEBUG_BLK_CGROUP=y +CONFIG_CGROUP_WRITEBACK=y +CONFIG_CGROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +# CONFIG_RT_GROUP_SCHED is not set +# CONFIG_CGROUP_PIDS is not set +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +# CONFIG_SCHED_AUTOGROUP is not set +# CONFIG_SYSFS_DEPRECATED is not set +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_RD_XZ=y +CONFIG_RD_LZO=y +CONFIG_RD_LZ4=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_HAVE_UID16=y +CONFIG_SYSCTL_EXCEPTION_TRACE=y +CONFIG_HAVE_PCSPKR_PLATFORM=y +CONFIG_BPF=y +CONFIG_EXPERT=y +CONFIG_UID16=y +CONFIG_MULTIUSER=y +CONFIG_SGETMASK_SYSCALL=y +CONFIG_SYSFS_SYSCALL=y +# CONFIG_SYSCTL_SYSCALL is not set +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y +CONFIG_KALLSYMS_BASE_RELATIVE=y +CONFIG_PRINTK=y +CONFIG_PRINTK_NMI=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_PCSPKR_PLATFORM=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_BPF_SYSCALL=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_ADVISE_SYSCALLS=y +# CONFIG_USERFAULTFD is not set +CONFIG_PCI_QUIRKS=y +CONFIG_MEMBARRIER=y +# CONFIG_EMBEDDED is not set +CONFIG_HAVE_PERF_EVENTS=y + +# +# Kernel Performance Events And Counters +# +CONFIG_PERF_EVENTS=y +# CONFIG_DEBUG_PERF_USE_VMALLOC is not set +CONFIG_VM_EVENT_COUNTERS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_SLAB_FREELIST_RANDOM is not set +# CONFIG_SYSTEM_DATA_VERIFICATION is not set +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +CONFIG_KEXEC_CORE=y +# CONFIG_OPROFILE is not set +CONFIG_HAVE_OPROFILE=y +CONFIG_OPROFILE_NMI_TIMER=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +# CONFIG_STATIC_KEYS_SELFTEST is not set +CONFIG_OPTPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +# CONFIG_UPROBES is not set +# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +CONFIG_ARCH_USE_BUILTIN_BSWAP=y +CONFIG_KRETPROBES=y +CONFIG_USER_RETURN_NOTIFIER=y +CONFIG_HAVE_IOREMAP_PROT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_OPTPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_NMI=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_DMA_CONTIGUOUS=y +CONFIG_GENERIC_SMP_IDLE_THREAD=y +CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y +CONFIG_HAVE_DMA_API_DEBUG=y +CONFIG_HAVE_HW_BREAKPOINT=y +CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y +CONFIG_HAVE_USER_RETURN_NOTIFIER=y +CONFIG_HAVE_PERF_EVENTS_NMI=y +CONFIG_HAVE_PERF_REGS=y +CONFIG_HAVE_PERF_USER_STACK_DUMP=y +CONFIG_HAVE_ARCH_JUMP_LABEL=y +CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y +CONFIG_HAVE_CMPXCHG_LOCAL=y +CONFIG_HAVE_CMPXCHG_DOUBLE=y +CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y +CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y +CONFIG_HAVE_ARCH_SECCOMP_FILTER=y +CONFIG_SECCOMP_FILTER=y +CONFIG_HAVE_GCC_PLUGINS=y +# CONFIG_GCC_PLUGINS is not set +CONFIG_HAVE_CC_STACKPROTECTOR=y +# CONFIG_CC_STACKPROTECTOR is not set +CONFIG_CC_STACKPROTECTOR_NONE=y +# CONFIG_CC_STACKPROTECTOR_REGULAR is not set +# CONFIG_CC_STACKPROTECTOR_STRONG is not set +CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y +CONFIG_HAVE_CONTEXT_TRACKING=y +CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y +CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y +CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y +CONFIG_HAVE_ARCH_HUGE_VMAP=y +CONFIG_HAVE_ARCH_SOFT_DIRTY=y +CONFIG_MODULES_USE_ELF_RELA=y +CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y +CONFIG_ARCH_HAS_ELF_RANDOMIZE=y +CONFIG_HAVE_ARCH_MMAP_RND_BITS=y +CONFIG_HAVE_EXIT_THREAD=y +CONFIG_ARCH_MMAP_RND_BITS=28 +CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 +CONFIG_HAVE_COPY_THREAD_TLS=y +CONFIG_HAVE_STACK_VALIDATION=y +# CONFIG_HAVE_ARCH_HASH is not set +# CONFIG_ISA_BUS_API is not set +CONFIG_OLD_SIGSUSPEND3=y +CONFIG_COMPAT_OLD_SIGACTION=y +# CONFIG_CPU_NO_EFFICIENT_FFS is not set + +# +# GCOV-based kernel profiling +# +# CONFIG_GCOV_KERNEL is not set +CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_MODULE_SIG is not set +# CONFIG_MODULE_COMPRESS is not set +# CONFIG_TRIM_UNUSED_KSYMS is not set +CONFIG_MODULES_TREE_LOOKUP=y +CONFIG_BLOCK=y +CONFIG_BLK_DEV_BSG=y +CONFIG_BLK_DEV_BSGLIB=y +# CONFIG_BLK_DEV_INTEGRITY is not set +# CONFIG_BLK_DEV_THROTTLING is not set +# CONFIG_BLK_CMDLINE_PARSER is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_AIX_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +# CONFIG_MAC_PARTITION is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_BSD_DISKLABEL is not set +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +CONFIG_EFI_PARTITION=y +# CONFIG_SYSV68_PARTITION is not set +# CONFIG_CMDLINE_PARTITION is not set +CONFIG_BLOCK_COMPAT=y + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_CFQ_GROUP_IOSCHED is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_PREEMPT_NOTIFIERS=y +CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +CONFIG_INLINE_READ_UNLOCK=y +CONFIG_INLINE_READ_UNLOCK_IRQ=y +CONFIG_INLINE_WRITE_UNLOCK=y +CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y +CONFIG_MUTEX_SPIN_ON_OWNER=y +CONFIG_RWSEM_SPIN_ON_OWNER=y +CONFIG_LOCK_SPIN_ON_OWNER=y +CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y +CONFIG_QUEUED_SPINLOCKS=y +CONFIG_ARCH_USE_QUEUED_RWLOCKS=y +CONFIG_QUEUED_RWLOCKS=y +CONFIG_FREEZER=y + +# +# Processor type and features +# +# CONFIG_ZONE_DMA is not set +CONFIG_SMP=y +CONFIG_X86_FEATURE_NAMES=y +CONFIG_X86_FAST_FEATURE_TESTS=y +CONFIG_X86_X2APIC=y +CONFIG_X86_MPPARSE=y +# CONFIG_GOLDFISH is not set +CONFIG_X86_EXTENDED_PLATFORM=y +# CONFIG_X86_NUMACHIP is not set +# CONFIG_X86_VSMP is not set +# CONFIG_X86_UV is not set +# CONFIG_X86_GOLDFISH is not set +# CONFIG_X86_INTEL_MID is not set +# CONFIG_X86_INTEL_LPSS is not set +# CONFIG_X86_AMD_PLATFORM_DEVICE is not set +# CONFIG_IOSF_MBI is not set +CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +# CONFIG_PARAVIRT_DEBUG is not set +# CONFIG_PARAVIRT_SPINLOCKS is not set +# CONFIG_XEN is not set +CONFIG_KVM_GUEST=y +# CONFIG_KVM_DEBUG_FS is not set +# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set +CONFIG_PARAVIRT_CLOCK=y +CONFIG_NO_BOOTMEM=y +# CONFIG_MK8 is not set +# CONFIG_MPSC is not set +# CONFIG_MCORE2 is not set +# CONFIG_MATOM is not set +CONFIG_GENERIC_CPU=y +CONFIG_X86_INTERNODE_CACHE_SHIFT=6 +CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_TSC=y +CONFIG_X86_CMPXCHG64=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=64 +CONFIG_X86_DEBUGCTLMSR=y +# CONFIG_PROCESSOR_SELECT is not set +CONFIG_CPU_SUP_INTEL=y +CONFIG_CPU_SUP_AMD=y +CONFIG_CPU_SUP_CENTAUR=y +CONFIG_HPET_TIMER=y +CONFIG_DMI=y +# CONFIG_GART_IOMMU is not set +# CONFIG_CALGARY_IOMMU is not set +CONFIG_SWIOTLB=y +CONFIG_IOMMU_HELPER=y +# CONFIG_MAXSMP is not set +CONFIG_NR_CPUS=96 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_X86_MCE=y +CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y +CONFIG_X86_MCE_THRESHOLD=y +CONFIG_X86_MCE_INJECT=m +CONFIG_X86_THERMAL_VECTOR=y + +# +# Performance monitoring +# +CONFIG_PERF_EVENTS_INTEL_UNCORE=y +CONFIG_PERF_EVENTS_INTEL_RAPL=y +CONFIG_PERF_EVENTS_INTEL_CSTATE=y +# CONFIG_PERF_EVENTS_AMD_POWER is not set +# CONFIG_VM86 is not set +CONFIG_X86_16BIT=y +CONFIG_X86_ESPFIX64=y +CONFIG_X86_VSYSCALL_EMULATION=y +# CONFIG_I8K is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_INTEL=y +CONFIG_MICROCODE_AMD=y +CONFIG_MICROCODE_OLD_INTERFACE=y +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m +CONFIG_ARCH_PHYS_ADDR_T_64BIT=y +CONFIG_ARCH_DMA_ADDR_T_64BIT=y +CONFIG_X86_DIRECT_GBPAGES=y +CONFIG_NUMA=y +CONFIG_AMD_NUMA=y +CONFIG_X86_64_ACPI_NUMA=y +CONFIG_NODES_SPAN_OTHER_NODES=y +# CONFIG_NUMA_EMU is not set +CONFIG_NODES_SHIFT=2 +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_PROC_KCORE_TEXT=y +CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y +CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_HAVE_MEMBLOCK=y +CONFIG_HAVE_MEMBLOCK_NODE_MAP=y +CONFIG_ARCH_DISCARD_MEMBLOCK=y +# CONFIG_MOVABLE_NODE is not set +# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set +# CONFIG_MEMORY_HOTPLUG is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y +CONFIG_COMPACTION=y +CONFIG_MIGRATION=y +CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y +CONFIG_PHYS_ADDR_T_64BIT=y +CONFIG_VIRT_TO_BUS=y +CONFIG_MMU_NOTIFIER=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y +# CONFIG_MEMORY_FAILURE is not set +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y +# CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set +CONFIG_TRANSPARENT_HUGE_PAGECACHE=y +# CONFIG_CLEANCACHE is not set +CONFIG_FRONTSWAP=y +# CONFIG_CMA is not set +CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ZSWAP=y +CONFIG_ZPOOL=y +# CONFIG_ZBUD is not set +# CONFIG_Z3FOLD is not set +# CONFIG_ZSMALLOC is not set +CONFIG_GENERIC_EARLY_IOREMAP=y +CONFIG_ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT=y +# CONFIG_IDLE_PAGE_TRACKING is not set +CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y +CONFIG_ARCH_HAS_PKEYS=y +# CONFIG_X86_PMEM_LEGACY is not set +# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set +CONFIG_X86_RESERVE_LOW=64 +CONFIG_MTRR=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_X86_PAT=y +CONFIG_ARCH_USES_PG_UNCACHED=y +CONFIG_ARCH_RANDOM=y +CONFIG_X86_SMAP=y +# CONFIG_X86_INTEL_MPX is not set +CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +# CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +# CONFIG_KEXEC_FILE is not set +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x1000000 +CONFIG_RELOCATABLE=y +# CONFIG_RANDOMIZE_BASE is not set +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set +# CONFIG_DEBUG_HOTPLUG_CPU0 is not set +# CONFIG_COMPAT_VDSO is not set +# CONFIG_LEGACY_VSYSCALL_NATIVE is not set +CONFIG_LEGACY_VSYSCALL_EMULATE=y +# CONFIG_LEGACY_VSYSCALL_NONE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="oops=panic panic=10 io_delay=0xed libata.allow_tpm=1 nmi_watchdog=panic tco_start=1 quiet svm.nested=0 acpi_enforce_resources=lax" +# CONFIG_CMDLINE_OVERRIDE is not set +CONFIG_MODIFY_LDT_SYSCALL=y +CONFIG_HAVE_LIVEPATCH=y +# CONFIG_LIVEPATCH is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_USE_PERCPU_NUMA_NODE_ID=y + +# +# Power management and ACPI options +# +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_SUSPEND_SKIP_SYNC is not set +# CONFIG_HIBERNATION is not set +CONFIG_PM_SLEEP=y +CONFIG_PM_SLEEP_SMP=y +# CONFIG_PM_AUTOSLEEP is not set +# CONFIG_PM_WAKELOCKS is not set +CONFIG_PM=y +CONFIG_PM_DEBUG=y +# CONFIG_PM_ADVANCED_DEBUG is not set +CONFIG_PM_SLEEP_DEBUG=y +CONFIG_PM_TRACE=y +CONFIG_PM_TRACE_RTC=y +# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set +CONFIG_ACPI=y +CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y +CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y +CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y +# CONFIG_ACPI_DEBUGGER is not set +CONFIG_ACPI_SLEEP=y +# CONFIG_ACPI_PROCFS_POWER is not set +CONFIG_ACPI_REV_OVERRIDE_POSSIBLE=y +# CONFIG_ACPI_EC_DEBUGFS is not set +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_CPU_FREQ_PSS=y +CONFIG_ACPI_PROCESSOR_CSTATE=y +CONFIG_ACPI_PROCESSOR_IDLE=y +CONFIG_ACPI_PROCESSOR=m +# CONFIG_ACPI_IPMI is not set +CONFIG_ACPI_HOTPLUG_CPU=y +# CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set +CONFIG_ACPI_THERMAL=m +CONFIG_ACPI_NUMA=y +# CONFIG_ACPI_CUSTOM_DSDT is not set +CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y +CONFIG_ACPI_TABLE_UPGRADE=y +# CONFIG_ACPI_DEBUG is not set +# CONFIG_ACPI_PCI_SLOT is not set +CONFIG_X86_PM_TIMER=y +CONFIG_ACPI_CONTAINER=y +CONFIG_ACPI_HOTPLUG_IOAPIC=y +# CONFIG_ACPI_SBS is not set +# CONFIG_ACPI_HED is not set +# CONFIG_ACPI_CUSTOM_METHOD is not set +# CONFIG_ACPI_BGRT is not set +# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set +# CONFIG_ACPI_NFIT is not set +CONFIG_HAVE_ACPI_APEI=y +CONFIG_HAVE_ACPI_APEI_NMI=y +# CONFIG_ACPI_APEI is not set +# CONFIG_DPTF_POWER is not set +# CONFIG_ACPI_EXTLOG is not set +# CONFIG_PMIC_OPREGION is not set +# CONFIG_ACPI_CONFIGFS is not set +# CONFIG_SFI is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ATTR_SET=y +CONFIG_CPU_FREQ_GOV_COMMON=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_FREQ_GOV_ONDEMAND=m +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +# CONFIG_CPU_FREQ_GOV_SCHEDUTIL is not set + +# +# CPU frequency scaling drivers +# +# CONFIG_X86_INTEL_PSTATE is not set +# CONFIG_X86_PCC_CPUFREQ is not set +CONFIG_X86_ACPI_CPUFREQ=m +CONFIG_X86_ACPI_CPUFREQ_CPB=y +CONFIG_X86_POWERNOW_K8=m +# CONFIG_X86_AMD_FREQ_SENSITIVITY is not set +CONFIG_X86_SPEEDSTEP_CENTRINO=m +# CONFIG_X86_P4_CLOCKMOD is not set + +# +# shared options +# +# CONFIG_X86_SPEEDSTEP_LIB is not set + +# +# CPU Idle +# +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y +# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set +CONFIG_INTEL_IDLE=y + +# +# Memory power savings +# +# CONFIG_I7300_IDLE is not set + +# +# Bus options (PCI etc.) +# +CONFIG_PCI=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_DOMAINS=y +# CONFIG_PCI_CNB20LE_QUIRK is not set +# CONFIG_PCIEPORTBUS is not set +CONFIG_PCI_BUS_ADDR_T_64BIT=y +CONFIG_PCI_MSI=y +CONFIG_PCI_MSI_IRQ_DOMAIN=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set +# CONFIG_PCI_STUB is not set +CONFIG_HT_IRQ=y +CONFIG_PCI_ATS=y +# CONFIG_PCI_IOV is not set +CONFIG_PCI_PRI=y +CONFIG_PCI_PASID=y +CONFIG_PCI_LABEL=y +CONFIG_HOTPLUG_PCI=y +# CONFIG_HOTPLUG_PCI_ACPI is not set +# CONFIG_HOTPLUG_PCI_CPCI is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# PCI host controller drivers +# +# CONFIG_PCIE_DW_PLAT is not set +# CONFIG_ISA_BUS is not set +# CONFIG_ISA_DMA_API is not set +CONFIG_AMD_NB=y +# CONFIG_PCCARD is not set +# CONFIG_RAPIDIO is not set +# CONFIG_X86_SYSFB is not set + +# +# Executable file formats / Emulations +# +CONFIG_BINFMT_ELF=y +CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_ELFCORE=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +CONFIG_BINFMT_SCRIPT=y +# CONFIG_HAVE_AOUT is not set +CONFIG_BINFMT_MISC=y +CONFIG_COREDUMP=y +CONFIG_IA32_EMULATION=y +# CONFIG_IA32_AOUT is not set +# CONFIG_X86_X32 is not set +CONFIG_COMPAT=y +CONFIG_COMPAT_FOR_U64_ALIGNMENT=y +CONFIG_SYSVIPC_COMPAT=y +CONFIG_KEYS_COMPAT=y +CONFIG_X86_DEV_DMA_OPS=y +CONFIG_PMC_ATOM=y +CONFIG_VMD=y +CONFIG_NET=y +CONFIG_NET_INGRESS=y +CONFIG_NET_EGRESS=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +# CONFIG_XFRM_USER is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +# CONFIG_IP_FIB_TRIE_STATS is not set +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_ROUTE_CLASSID=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IP_TUNNEL=y +CONFIG_NET_IPGRE=y +# CONFIG_NET_IPGRE_BROADCAST is not set +# CONFIG_IP_MROUTE is not set +CONFIG_SYN_COOKIES=y +# CONFIG_NET_UDP_TUNNEL is not set +# CONFIG_NET_FOU is not set +# CONFIG_NET_FOU_IP_TUNNELS is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +CONFIG_TCP_CONG_HSTCP=y +CONFIG_TCP_CONG_HYBLA=y +CONFIG_TCP_CONG_VEGAS=y +CONFIG_TCP_CONG_NV=y +CONFIG_TCP_CONG_SCALABLE=y +CONFIG_TCP_CONG_LP=y +CONFIG_TCP_CONG_VENO=y +CONFIG_TCP_CONG_YEAH=y +CONFIG_TCP_CONG_ILLINOIS=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TCP_CONG_CDG=y +CONFIG_TCP_CONG_BBR=y +CONFIG_TCP_CONG_BBR2=y +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_DCTCP is not set +CONFIG_DEFAULT_BBR2=y +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="bbr2" +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_IPV6_ILA is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +CONFIG_INET6_TUNNEL=y +# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET6_XFRM_MODE_TUNNEL is not set +# CONFIG_INET6_XFRM_MODE_BEET is not set +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +CONFIG_IPV6_SIT_6RD=y +CONFIG_IPV6_NDISC_NODETYPE=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_GRE=y +# CONFIG_IPV6_FOU is not set +# CONFIG_IPV6_FOU_TUNNEL is not set +CONFIG_IPV6_MULTIPLE_TABLES=y +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MROUTE is not set +# CONFIG_NETLABEL is not set +# CONFIG_NETWORK_SECMARK is not set +CONFIG_NET_PTP_CLASSIFY=y +# CONFIG_NETWORK_PHY_TIMESTAMPING is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y +# CONFIG_BRIDGE_NETFILTER is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_INGRESS=y +CONFIG_NETFILTER_NETLINK=m +# CONFIG_NETFILTER_NETLINK_ACCT is not set +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_LOG_COMMON=m +CONFIG_NF_CONNTRACK_MARK=y +# CONFIG_NF_CONNTRACK_ZONES is not set +CONFIG_NF_CONNTRACK_PROCFS=y +CONFIG_NF_CONNTRACK_EVENTS=y +# CONFIG_NF_CONNTRACK_TIMEOUT is not set +# CONFIG_NF_CONNTRACK_TIMESTAMP is not set +# CONFIG_NF_CT_PROTO_DCCP is not set +# CONFIG_NF_CT_PROTO_SCTP is not set +# CONFIG_NF_CT_PROTO_UDPLITE is not set +# CONFIG_NF_CONNTRACK_AMANDA is not set +CONFIG_NF_CONNTRACK_FTP=m +# CONFIG_NF_CONNTRACK_H323 is not set +# CONFIG_NF_CONNTRACK_IRC is not set +# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set +# CONFIG_NF_CONNTRACK_SNMP is not set +# CONFIG_NF_CONNTRACK_PPTP is not set +# CONFIG_NF_CONNTRACK_SANE is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_NF_CONNTRACK_TFTP is not set +CONFIG_NF_CT_NETLINK=m +# CONFIG_NF_CT_NETLINK_TIMEOUT is not set +# CONFIG_NETFILTER_NETLINK_GLUE_CT is not set +CONFIG_NF_NAT=m +CONFIG_NF_NAT_NEEDED=y +# CONFIG_NF_NAT_AMANDA is not set +CONFIG_NF_NAT_FTP=m +# CONFIG_NF_NAT_IRC is not set +# CONFIG_NF_NAT_SIP is not set +# CONFIG_NF_NAT_TFTP is not set +# CONFIG_NF_NAT_REDIRECT is not set +# CONFIG_NF_TABLES is not set +CONFIG_NETFILTER_XTABLES=y + +# +# Xtables combined modules +# +CONFIG_NETFILTER_XT_MARK=m +CONFIG_NETFILTER_XT_CONNMARK=m +CONFIG_NETFILTER_XT_SET=m + +# +# Xtables targets +# +# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set +# CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=y +CONFIG_NETFILTER_XT_TARGET_HL=m +# CONFIG_NETFILTER_XT_TARGET_HMARK is not set +# CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_NAT=m +# CONFIG_NETFILTER_XT_TARGET_NETMAP is not set +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NOTRACK=m +CONFIG_NETFILTER_XT_TARGET_RATEEST=m +# CONFIG_NETFILTER_XT_TARGET_REDIRECT is not set +# CONFIG_NETFILTER_XT_TARGET_TEE is not set +# CONFIG_NETFILTER_XT_TARGET_TPROXY is not set +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m + +# +# Xtables matches +# +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y +# CONFIG_NETFILTER_XT_MATCH_BPF is not set +# CONFIG_NETFILTER_XT_MATCH_CGROUP is not set +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +# CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +# CONFIG_NETFILTER_XT_MATCH_CPU is not set +CONFIG_NETFILTER_XT_MATCH_DCCP=m +# CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set +CONFIG_NETFILTER_XT_MATCH_DSCP=y +CONFIG_NETFILTER_XT_MATCH_ECN=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_HL=m +# CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +# CONFIG_NETFILTER_XT_MATCH_L2TP is not set +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +# CONFIG_NETFILTER_XT_MATCH_NFACCT is not set +# CONFIG_NETFILTER_XT_MATCH_OSF is not set +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +# CONFIG_NETFILTER_XT_MATCH_RECENT is not set +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_MAX=1024 +# CONFIG_IP_SET_BITMAP_IP is not set +# CONFIG_IP_SET_BITMAP_IPMAC is not set +# CONFIG_IP_SET_BITMAP_PORT is not set +CONFIG_IP_SET_HASH_IP=m +# CONFIG_IP_SET_HASH_IPMARK is not set +# CONFIG_IP_SET_HASH_IPPORT is not set +# CONFIG_IP_SET_HASH_IPPORTIP is not set +# CONFIG_IP_SET_HASH_IPPORTNET is not set +# CONFIG_IP_SET_HASH_MAC is not set +# CONFIG_IP_SET_HASH_NETPORTNET is not set +CONFIG_IP_SET_HASH_NET=m +# CONFIG_IP_SET_HASH_NETNET is not set +# CONFIG_IP_SET_HASH_NETPORT is not set +# CONFIG_IP_SET_HASH_NETIFACE is not set +# CONFIG_IP_SET_LIST_SET is not set +# CONFIG_IP_VS is not set + +# +# IP: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV4=m +CONFIG_NF_CONNTRACK_IPV4=m +# CONFIG_NF_DUP_IPV4 is not set +# CONFIG_NF_LOG_ARP is not set +CONFIG_NF_LOG_IPV4=m +CONFIG_NF_REJECT_IPV4=m +CONFIG_NF_NAT_IPV4=m +CONFIG_NF_NAT_MASQUERADE_IPV4=m +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +CONFIG_IP_NF_IPTABLES=y +# CONFIG_IP_NF_MATCH_AH is not set +CONFIG_IP_NF_MATCH_ECN=m +# CONFIG_IP_NF_MATCH_RPFILTER is not set +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +# CONFIG_IP_NF_TARGET_SYNPROXY is not set +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +# CONFIG_IP_NF_TARGET_NETMAP is not set +# CONFIG_IP_NF_TARGET_REDIRECT is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +# CONFIG_IP_NF_SECURITY is not set +# CONFIG_IP_NF_ARPTABLES is not set + +# +# IPv6: Netfilter Configuration +# +CONFIG_NF_DEFRAG_IPV6=m +CONFIG_NF_CONNTRACK_IPV6=m +# CONFIG_NF_DUP_IPV6 is not set +CONFIG_NF_REJECT_IPV6=m +CONFIG_NF_LOG_IPV6=m +# CONFIG_NF_NAT_IPV6 is not set +CONFIG_IP6_NF_IPTABLES=y +# CONFIG_IP6_NF_MATCH_AH is not set +# CONFIG_IP6_NF_MATCH_EUI64 is not set +CONFIG_IP6_NF_MATCH_FRAG=m +# CONFIG_IP6_NF_MATCH_OPTS is not set +CONFIG_IP6_NF_MATCH_HL=m +# CONFIG_IP6_NF_MATCH_IPV6HEADER is not set +# CONFIG_IP6_NF_MATCH_MH is not set +# CONFIG_IP6_NF_MATCH_RPFILTER is not set +# CONFIG_IP6_NF_MATCH_RT is not set +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +# CONFIG_IP6_NF_TARGET_SYNPROXY is not set +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=m +# CONFIG_IP6_NF_SECURITY is not set +# CONFIG_IP6_NF_NAT is not set +# CONFIG_BRIDGE_NF_EBTABLES is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_L2TP is not set +CONFIG_STP=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_IGMP_SNOOPING=y +# CONFIG_BRIDGE_VLAN_FILTERING is not set +CONFIG_HAVE_NET_DSA=y +CONFIG_VLAN_8021Q=m +# CONFIG_VLAN_8021Q_GVRP is not set +# CONFIG_VLAN_8021Q_MVRP is not set +# CONFIG_DECNET is not set +CONFIG_LLC=m +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_PHONET is not set +# CONFIG_6LOWPAN is not set +# CONFIG_IEEE802154 is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=y +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_PRIO=y +CONFIG_NET_SCH_MULTIQ=m +CONFIG_NET_SCH_RED=m +# CONFIG_NET_SCH_SFB is not set +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=y +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +# CONFIG_NET_SCH_CHOKE is not set +# CONFIG_NET_SCH_QFQ is not set +CONFIG_NET_SCH_CODEL=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_SCH_FQ=y +# CONFIG_NET_SCH_HHF is not set +# CONFIG_NET_SCH_PIE is not set +CONFIG_NET_SCH_INGRESS=m +# CONFIG_NET_SCH_PLUG is not set + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=y +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=y +# CONFIG_CLS_U32_PERF is not set +# CONFIG_CLS_U32_MARK is not set +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +# CONFIG_NET_CLS_CGROUP is not set +# CONFIG_NET_CLS_BPF is not set +# CONFIG_NET_CLS_FLOWER is not set +# CONFIG_NET_CLS_MATCHALL is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +# CONFIG_NET_EMATCH_IPSET is not set +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +# CONFIG_GACT_PROB is not set +CONFIG_NET_ACT_MIRRED=y +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +# CONFIG_NET_ACT_CSUM is not set +# CONFIG_NET_ACT_VLAN is not set +# CONFIG_NET_ACT_BPF is not set +# CONFIG_NET_ACT_CONNMARK is not set +# CONFIG_NET_ACT_SKBMOD is not set +CONFIG_NET_ACT_IFE=m +# CONFIG_NET_ACT_TUNNEL_KEY is not set +CONFIG_NET_IFE_SKBMARK=m +CONFIG_NET_IFE_SKBPRIO=m +# CONFIG_NET_IFE_SKBTCINDEX is not set +# CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y +CONFIG_DCB=y +CONFIG_DNS_RESOLVER=m +# CONFIG_BATMAN_ADV is not set +CONFIG_OPENVSWITCH=m +CONFIG_OPENVSWITCH_GRE=m +# CONFIG_VSOCKETS is not set +CONFIG_NETLINK_DIAG=y +CONFIG_MPLS=y +CONFIG_NET_MPLS_GSO=m +# CONFIG_MPLS_ROUTING is not set +# CONFIG_HSR is not set +# CONFIG_NET_SWITCHDEV is not set +# CONFIG_NET_L3_MASTER_DEV is not set +# CONFIG_NET_NCSI is not set +CONFIG_RPS=y +CONFIG_RFS_ACCEL=y +CONFIG_XPS=y +# CONFIG_SOCK_CGROUP_DATA is not set +# CONFIG_CGROUP_NET_PRIO is not set +# CONFIG_CGROUP_NET_CLASSID is not set +CONFIG_NET_RX_BUSY_POLL=y +CONFIG_BQL=y +CONFIG_BPF_JIT=y +CONFIG_NET_FLOW_LIMIT=y + +# +# Network testing +# +CONFIG_NET_PKTGEN=m +# CONFIG_NET_TCPPROBE is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_AF_KCM is not set +# CONFIG_STREAM_PARSER is not set +CONFIG_FIB_RULES=y +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set +# CONFIG_CAIF is not set +# CONFIG_CEPH_LIB is not set +# CONFIG_NFC is not set +# CONFIG_LWTUNNEL is not set +CONFIG_DST_CACHE=y +CONFIG_NET_DEVLINK=y +CONFIG_MAY_USE_DEVLINK=y +CONFIG_HAVE_EBPF_JIT=y + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_DEVTMPFS_MOUNT is not set +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +CONFIG_FW_LOADER_USER_HELPER=y +# CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set +CONFIG_ALLOW_DEV_COREDUMP=y +# CONFIG_DEBUG_DRIVER is not set +CONFIG_DEBUG_DEVRES=y +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_GENERIC_CPU_DEVICES is not set +CONFIG_GENERIC_CPU_AUTOPROBE=y +# CONFIG_DMA_SHARED_BUFFER is not set + +# +# Bus devices +# +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y +# CONFIG_MTD is not set +# CONFIG_OF is not set +CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y +# CONFIG_PARPORT is not set +CONFIG_PNP=y +CONFIG_PNP_DEBUG_MESSAGES=y + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_NULL_BLK is not set +# CONFIG_BLK_DEV_PCIESSD_MTIP32XX is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_DRBD is not set +CONFIG_BLK_DEV_NBD=m +# CONFIG_BLK_DEV_SKD is not set +# CONFIG_BLK_DEV_SX8 is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=16384 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_VIRTIO_BLK is not set +# CONFIG_BLK_DEV_HD is not set +# CONFIG_BLK_DEV_RBD is not set +# CONFIG_BLK_DEV_RSXX is not set +# CONFIG_BLK_DEV_NVME is not set +# CONFIG_NVME_RDMA is not set +# CONFIG_NVME_TARGET is not set + +# +# Misc devices +# +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_AD525X_DPOT is not set +# CONFIG_DUMMY_IRQ is not set +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +# CONFIG_APDS9802ALS is not set +# CONFIG_ISL29003 is not set +# CONFIG_ISL29020 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_SENSORS_BH1770 is not set +# CONFIG_SENSORS_APDS990X is not set +# CONFIG_HMC6352 is not set +# CONFIG_DS1682 is not set +# CONFIG_BMP085_I2C is not set +# CONFIG_USB_SWITCH_FSA9480 is not set +# CONFIG_SRAM is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=m +# CONFIG_EEPROM_MAX6875 is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_CB710_CORE is not set + +# +# Texas Instruments shared transport line discipline +# +# CONFIG_TI_ST is not set +# CONFIG_SENSORS_LIS3_I2C is not set + +# +# Altera FPGA firmware download module +# +# CONFIG_ALTERA_STAPL is not set +# CONFIG_INTEL_MEI is not set +# CONFIG_INTEL_MEI_ME is not set +# CONFIG_INTEL_MEI_TXE is not set +# CONFIG_VMWARE_VMCI is not set + +# +# Intel MIC Bus Driver +# +# CONFIG_INTEL_MIC_BUS is not set + +# +# SCIF Bus Driver +# +# CONFIG_SCIF_BUS is not set + +# +# VOP Bus Driver +# +# CONFIG_VOP_BUS is not set + +# +# Intel MIC Host Driver +# + +# +# Intel MIC Card Driver +# + +# +# SCIF Driver +# + +# +# Intel MIC Coprocessor State Management (COSM) Drivers +# + +# +# VOP Driver +# +# CONFIG_GENWQE is not set +# CONFIG_ECHO is not set +# CONFIG_CXL_BASE is not set +# CONFIG_CXL_AFU_DRIVER_OPS is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +CONFIG_SCSI_MOD=y +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_NETLINK is not set +# CONFIG_SCSI_MQ_DEFAULT is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +# CONFIG_SCSI_SCAN_ASYNC is not set + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=m +# CONFIG_SCSI_FC_ATTRS is not set +CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +CONFIG_ISCSI_TCP=y +# CONFIG_ISCSI_BOOT_SYSFS is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_CXGB4_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_BE2ISCSI is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_MVUMI is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_SCSI_ESAS2R is not set +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=y +CONFIG_MEGARAID_MAILBOX=y +# CONFIG_MEGARAID_LEGACY is not set +CONFIG_MEGARAID_SAS=y +# CONFIG_SCSI_MPT3SAS is not set +# CONFIG_SCSI_MPT2SAS is not set +# CONFIG_SCSI_UFSHCD is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_VMWARE_PVSCSI is not set +# CONFIG_SCSI_SNIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_ISCI is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_AM53C974 is not set +# CONFIG_SCSI_WD719X is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set +CONFIG_SCSI_VIRTIO=y +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_VERBOSE_ERROR=y +CONFIG_ATA_ACPI=y +# CONFIG_SATA_ZPODD is not set +CONFIG_SATA_PMP=y + +# +# Controllers with non-SFF native interface +# +CONFIG_SATA_AHCI=y +# CONFIG_SATA_AHCI_PLATFORM is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_SATA_ACARD_AHCI is not set +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y + +# +# SFF controllers with custom DMA interface +# +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_SX4 is not set +CONFIG_ATA_BMDMA=y + +# +# SATA SFF controllers with BMDMA +# +CONFIG_ATA_PIIX=y +# CONFIG_SATA_DWC is not set +CONFIG_SATA_MV=m +CONFIG_SATA_NV=y +# CONFIG_SATA_PROMISE is not set +CONFIG_SATA_SIL=m +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_SVW is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set + +# +# PATA SFF controllers with BMDMA +# +# CONFIG_PATA_ALI is not set +CONFIG_PATA_AMD=y +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +CONFIG_PATA_ATP867X=m +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set +# CONFIG_PATA_NS87415 is not set +CONFIG_PATA_OLDPIIX=y +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RDC is not set +CONFIG_PATA_SCH=y +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_TOSHIBA is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + +# +# PIO-only SFF controllers +# +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_PLATFORM is not set +# CONFIG_PATA_RZ1000 is not set + +# +# Generic fallback / legacy drivers +# +# CONFIG_PATA_ACPI is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_LEGACY is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=y +CONFIG_MD_RAID10=m +# CONFIG_MD_RAID456 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set +# CONFIG_BCACHE is not set +CONFIG_BLK_DEV_DM_BUILTIN=y +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_MQ_DEFAULT is not set +# CONFIG_DM_DEBUG is not set +CONFIG_DM_BUFIO=y +# CONFIG_DM_DEBUG_BLOCK_STACK_TRACING is not set +CONFIG_DM_BIO_PRISON=y +CONFIG_DM_PERSISTENT_DATA=y +CONFIG_DM_CRYPT=y +# CONFIG_DM_SNAPSHOT is not set +CONFIG_DM_THIN_PROVISIONING=y +# CONFIG_DM_CACHE is not set +# CONFIG_DM_ERA is not set +CONFIG_DM_MIRROR=y +# CONFIG_DM_LOG_USERSPACE is not set +# CONFIG_DM_RAID is not set +CONFIG_DM_ZERO=y +CONFIG_DM_MULTIPATH=y +CONFIG_DM_MULTIPATH_QL=y +CONFIG_DM_MULTIPATH_ST=y +# CONFIG_DM_DELAY is not set +# CONFIG_DM_UEVENT is not set +# CONFIG_DM_FLAKEY is not set +CONFIG_DM_VERITY=y +# CONFIG_DM_VERITY_FEC is not set +# CONFIG_DM_SWITCH is not set +# CONFIG_DM_LOG_WRITES is not set +# CONFIG_TARGET_CORE is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +CONFIG_FIREWIRE=m +CONFIG_FIREWIRE_OHCI=m +# CONFIG_FIREWIRE_SBP2 is not set +# CONFIG_FIREWIRE_NET is not set +# CONFIG_FIREWIRE_NOSY is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_BONDING=m +# CONFIG_DUMMY is not set +# CONFIG_EQUALIZER is not set +# CONFIG_NET_FC is not set +CONFIG_IFB=m +# CONFIG_NET_TEAM is not set +CONFIG_MACVLAN=m +# CONFIG_MACVTAP is not set +# CONFIG_VXLAN is not set +# CONFIG_MACSEC is not set +CONFIG_NETCONSOLE=m +# CONFIG_NETCONSOLE_DYNAMIC is not set +CONFIG_NETPOLL=y +CONFIG_NET_POLL_CONTROLLER=y +CONFIG_TUN=y +# CONFIG_TUN_VNET_CROSS_LE is not set +CONFIG_VETH=y +CONFIG_VIRTIO_NET=y +# CONFIG_NLMON is not set +# CONFIG_ARCNET is not set + +# +# CAIF transport drivers +# + +# +# Distributed Switch Architecture drivers +# +CONFIG_ETHERNET=y +CONFIG_MDIO=m +CONFIG_NET_VENDOR_3COM=y +# CONFIG_VORTEX is not set +# CONFIG_TYPHOON is not set +CONFIG_NET_VENDOR_ADAPTEC=y +# CONFIG_ADAPTEC_STARFIRE is not set +CONFIG_NET_VENDOR_AGERE=y +# CONFIG_ET131X is not set +CONFIG_NET_VENDOR_ALTEON=y +# CONFIG_ACENIC is not set +# CONFIG_ALTERA_TSE is not set +CONFIG_NET_VENDOR_AMAZON=y +# CONFIG_ENA_ETHERNET is not set +CONFIG_NET_VENDOR_AMD=y +# CONFIG_AMD8111_ETH is not set +# CONFIG_PCNET32 is not set +CONFIG_NET_VENDOR_ARC=y +CONFIG_NET_VENDOR_ATHEROS=y +# CONFIG_ATL2 is not set +# CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_ALX is not set +# CONFIG_NET_VENDOR_AURORA is not set +CONFIG_NET_CADENCE=y +# CONFIG_MACB is not set +CONFIG_NET_VENDOR_BROADCOM=y +# CONFIG_B44 is not set +# CONFIG_BCMGENET is not set +CONFIG_BNX2=m +# CONFIG_CNIC is not set +CONFIG_TIGON3=m +CONFIG_BNX2X=m +# CONFIG_BNXT is not set +CONFIG_NET_VENDOR_BROCADE=y +# CONFIG_BNA is not set +CONFIG_NET_VENDOR_CAVIUM=y +# CONFIG_THUNDER_NIC_PF is not set +# CONFIG_THUNDER_NIC_VF is not set +# CONFIG_THUNDER_NIC_BGX is not set +# CONFIG_THUNDER_NIC_RGX is not set +# CONFIG_LIQUIDIO is not set +CONFIG_NET_VENDOR_CHELSIO=y +# CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3=m +# CONFIG_CHELSIO_T4 is not set +# CONFIG_CHELSIO_T4VF is not set +CONFIG_NET_VENDOR_CISCO=y +# CONFIG_ENIC is not set +# CONFIG_CX_ECAT is not set +# CONFIG_DNET is not set +CONFIG_NET_VENDOR_DEC=y +# CONFIG_NET_TULIP is not set +CONFIG_NET_VENDOR_DLINK=y +# CONFIG_DL2K is not set +# CONFIG_SUNDANCE is not set +CONFIG_NET_VENDOR_EMULEX=y +# CONFIG_BE2NET is not set +CONFIG_NET_VENDOR_EZCHIP=y +CONFIG_NET_VENDOR_EXAR=y +# CONFIG_S2IO is not set +# CONFIG_VXGE is not set +CONFIG_NET_VENDOR_HP=y +# CONFIG_HP100 is not set +CONFIG_NET_VENDOR_INTEL=y +# CONFIG_E100 is not set +CONFIG_E1000=y +CONFIG_E1000E=m +CONFIG_E1000E_HWTS=y +# CONFIG_IGB is not set +# CONFIG_IGBVF is not set +# CONFIG_IXGB is not set +# CONFIG_IXGBE is not set +# CONFIG_IXGBEVF is not set +# CONFIG_I40E is not set +# CONFIG_I40EVF is not set +# CONFIG_FM10K is not set +CONFIG_NET_VENDOR_I825XX=y +# CONFIG_JME is not set +CONFIG_NET_VENDOR_MARVELL=y +# CONFIG_MVMDIO is not set +# CONFIG_MVNETA_BM is not set +# CONFIG_SKGE is not set +CONFIG_SKY2=m +# CONFIG_SKY2_DEBUG is not set +CONFIG_NET_VENDOR_MELLANOX=y +CONFIG_MLX4_EN=m +# CONFIG_MLX4_EN_DCB is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y +# CONFIG_MLX5_CORE is not set +# CONFIG_MLXSW_CORE is not set +CONFIG_NET_VENDOR_MICREL=y +# CONFIG_KS8851_MLL is not set +# CONFIG_KSZ884X_PCI is not set +CONFIG_NET_VENDOR_MYRI=y +# CONFIG_MYRI10GE is not set +# CONFIG_FEALNX is not set +CONFIG_NET_VENDOR_NATSEMI=y +# CONFIG_NATSEMI is not set +# CONFIG_NS83820 is not set +CONFIG_NET_VENDOR_NETRONOME=y +# CONFIG_NFP_NETVF is not set +CONFIG_NET_VENDOR_8390=y +# CONFIG_NE2K_PCI is not set +CONFIG_NET_VENDOR_NVIDIA=y +# CONFIG_FORCEDETH is not set +CONFIG_NET_VENDOR_OKI=y +# CONFIG_ETHOC is not set +CONFIG_NET_PACKET_ENGINE=y +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +CONFIG_NET_VENDOR_QLOGIC=y +# CONFIG_QLA3XXX is not set +# CONFIG_QLCNIC is not set +# CONFIG_QLGE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_QED is not set +CONFIG_NET_VENDOR_QUALCOMM=y +# CONFIG_QCOM_EMAC is not set +CONFIG_NET_VENDOR_REALTEK=y +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_R8169 is not set +CONFIG_NET_VENDOR_RENESAS=y +CONFIG_NET_VENDOR_RDC=y +# CONFIG_R6040 is not set +CONFIG_NET_VENDOR_ROCKER=y +CONFIG_NET_VENDOR_SAMSUNG=y +# CONFIG_SXGBE_ETH is not set +CONFIG_NET_VENDOR_SEEQ=y +CONFIG_NET_VENDOR_SILAN=y +# CONFIG_SC92031 is not set +CONFIG_NET_VENDOR_SIS=y +# CONFIG_SIS900 is not set +# CONFIG_SIS190 is not set +# CONFIG_SFC is not set +CONFIG_NET_VENDOR_SMSC=y +# CONFIG_EPIC100 is not set +# CONFIG_SMSC911X is not set +# CONFIG_SMSC9420 is not set +CONFIG_NET_VENDOR_STMICRO=y +# CONFIG_STMMAC_ETH is not set +CONFIG_NET_VENDOR_SUN=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NIU is not set +CONFIG_NET_VENDOR_SYNOPSYS=y +CONFIG_NET_VENDOR_TEHUTI=y +# CONFIG_TEHUTI is not set +CONFIG_NET_VENDOR_TI=y +# CONFIG_TI_CPSW_ALE is not set +# CONFIG_TLAN is not set +CONFIG_NET_VENDOR_VIA=y +# CONFIG_VIA_RHINE is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_NET_VENDOR_WIZNET=y +# CONFIG_WIZNET_W5100 is not set +# CONFIG_WIZNET_W5300 is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_NET_SB1000 is not set +CONFIG_PHYLIB=y + +# +# MDIO bus device drivers +# +# CONFIG_MDIO_BCM_UNIMAC is not set +# CONFIG_MDIO_BITBANG is not set +# CONFIG_MDIO_OCTEON is not set +# CONFIG_MDIO_THUNDER is not set +# CONFIG_MDIO_XGENE is not set + +# +# MII PHY device drivers +# +# CONFIG_AMD_PHY is not set +# CONFIG_AQUANTIA_PHY is not set +# CONFIG_AT803X_PHY is not set +# CONFIG_BCM7XXX_PHY is not set +# CONFIG_BCM87XX_PHY is not set +# CONFIG_BROADCOM_PHY is not set +# CONFIG_CICADA_PHY is not set +# CONFIG_DAVICOM_PHY is not set +# CONFIG_DP83848_PHY is not set +# CONFIG_DP83867_PHY is not set +# CONFIG_FIXED_PHY is not set +# CONFIG_ICPLUS_PHY is not set +# CONFIG_INTEL_XWAY_PHY is not set +# CONFIG_LSI_ET1011C_PHY is not set +# CONFIG_LXT_PHY is not set +# CONFIG_MARVELL_PHY is not set +# CONFIG_MICREL_PHY is not set +# CONFIG_MICROCHIP_PHY is not set +# CONFIG_MICROSEMI_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_QSEMI_PHY is not set +# CONFIG_REALTEK_PHY is not set +# CONFIG_SMSC_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_TERANETICS_PHY is not set +# CONFIG_VITESSE_PHY is not set +# CONFIG_XILINX_GMII2RGMII is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +CONFIG_USB_NET_DRIVERS=y +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_RTL8152 is not set +# CONFIG_USB_LAN78XX is not set +# CONFIG_USB_USBNET is not set +# CONFIG_USB_IPHETH is not set +# CONFIG_WLAN is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# +# CONFIG_WAN is not set +# CONFIG_VMXNET3 is not set +# CONFIG_FUJITSU_ES is not set +# CONFIG_ISDN is not set +# CONFIG_NVM is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set +CONFIG_INPUT_SPARSEKMAP=m +# CONFIG_INPUT_MATRIXKMAP is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ADP5588 is not set +# CONFIG_KEYBOARD_ADP5589 is not set +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_QT1070 is not set +# CONFIG_KEYBOARD_QT2160 is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_KEYBOARD_GPIO_POLLED is not set +# CONFIG_KEYBOARD_TCA6416 is not set +# CONFIG_KEYBOARD_TCA8418 is not set +# CONFIG_KEYBOARD_MATRIX is not set +# CONFIG_KEYBOARD_LM8333 is not set +# CONFIG_KEYBOARD_MAX7359 is not set +# CONFIG_KEYBOARD_MCS is not set +# CONFIG_KEYBOARD_MPR121 is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_AD714X is not set +# CONFIG_INPUT_BMA150 is not set +# CONFIG_INPUT_E3X0_BUTTON is not set +CONFIG_INPUT_PCSPKR=m +# CONFIG_INPUT_MMA8450 is not set +# CONFIG_INPUT_MPU3050 is not set +# CONFIG_INPUT_GP2A is not set +# CONFIG_INPUT_GPIO_BEEPER is not set +# CONFIG_INPUT_GPIO_TILT_POLLED is not set +# CONFIG_INPUT_ATLAS_BTNS is not set +# CONFIG_INPUT_ATI_REMOTE2 is not set +# CONFIG_INPUT_KEYSPAN_REMOTE is not set +# CONFIG_INPUT_KXTJ9 is not set +# CONFIG_INPUT_POWERMATE is not set +# CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set +# CONFIG_INPUT_UINPUT is not set +# CONFIG_INPUT_PCF8574 is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set +# CONFIG_INPUT_ADXL34X is not set +# CONFIG_INPUT_CMA3000 is not set +# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set +# CONFIG_INPUT_DRV260X_HAPTICS is not set +# CONFIG_INPUT_DRV2665_HAPTICS is not set +# CONFIG_INPUT_DRV2667_HAPTICS is not set +# CONFIG_RMI4_CORE is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=y +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO_ALTERA_PS2 is not set +# CONFIG_SERIO_PS2MULT is not set +# CONFIG_SERIO_ARC_PS2 is not set +# CONFIG_USERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_TTY=y +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_VT_CONSOLE_SLEEP=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set +# CONFIG_N_GSM is not set +# CONFIG_TRACE_SINK is not set +CONFIG_DEVMEM=y +CONFIG_DEVKMEM=y + +# +# Serial drivers +# +CONFIG_SERIAL_EARLYCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y +CONFIG_SERIAL_8250_PNP=y +# CONFIG_SERIAL_8250_FINTEK is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DMA=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +# CONFIG_SERIAL_8250_FSL is not set +# CONFIG_SERIAL_8250_DW is not set +# CONFIG_SERIAL_8250_RT288X is not set +# CONFIG_SERIAL_8250_MID is not set +# CONFIG_SERIAL_8250_MOXA is not set + +# +# Non-8250 serial port support +# +# CONFIG_SERIAL_UARTLITE is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_SCCNXP is not set +# CONFIG_SERIAL_SC16IS7XX is not set +# CONFIG_SERIAL_ALTERA_JTAGUART is not set +# CONFIG_SERIAL_ALTERA_UART is not set +# CONFIG_SERIAL_ARC is not set +# CONFIG_SERIAL_RP2 is not set +# CONFIG_SERIAL_FSL_LPUART is not set +# CONFIG_TTY_PRINTK is not set +# CONFIG_VIRTIO_CONSOLE is not set +CONFIG_IPMI_HANDLER=m +# CONFIG_IPMI_PANIC_EVENT is not set +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_SI=m +# CONFIG_IPMI_SSIF is not set +# CONFIG_IPMI_WATCHDOG is not set +CONFIG_IPMI_POWEROFF=m +# CONFIG_HW_RANDOM is not set +CONFIG_NVRAM=m +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_MWAVE is not set +# CONFIG_RAW_DRIVER is not set +CONFIG_HPET=y +CONFIG_HPET_MMAP=y +CONFIG_HPET_MMAP_DEFAULT=y +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_XILLYBUS is not set + +# +# I2C support +# +CONFIG_I2C=m +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_COMPAT=y +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_MUX=m + +# +# Multiplexer I2C Chip support +# +# CONFIG_I2C_MUX_GPIO is not set +CONFIG_I2C_MUX_PCA9541=m +CONFIG_I2C_MUX_PCA954x=m +# CONFIG_I2C_MUX_REG is not set +CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_SMBUS=m +CONFIG_I2C_ALGOBIT=m + +# +# I2C Hardware Bus support +# + +# +# PC SMBus host controller drivers +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +CONFIG_I2C_I801=m +# CONFIG_I2C_ISCH is not set +# CONFIG_I2C_ISMT is not set +CONFIG_I2C_PIIX4=m +CONFIG_I2C_NFORCE2=m +# CONFIG_I2C_NFORCE2_S4985 is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set + +# +# ACPI drivers +# +# CONFIG_I2C_SCMI is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_CBUS_GPIO is not set +# CONFIG_I2C_DESIGNWARE_PCI is not set +# CONFIG_I2C_GPIO is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_PXA_PCI is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_XILINX is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_DIOLAN_U2C is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_ROBOTFUZZ_OSIF is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_SLAVE is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_SPI is not set +# CONFIG_SPMI is not set +# CONFIG_HSI is not set + +# +# PPS support +# +CONFIG_PPS=y +# CONFIG_PPS_DEBUG is not set + +# +# PPS clients support +# +# CONFIG_PPS_CLIENT_KTIMER is not set +# CONFIG_PPS_CLIENT_LDISC is not set +# CONFIG_PPS_CLIENT_GPIO is not set + +# +# PPS generators support +# + +# +# PTP clock support +# +CONFIG_PTP_1588_CLOCK=y + +# +# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. +# +CONFIG_GPIOLIB=y +CONFIG_GPIO_DEVRES=y +CONFIG_GPIO_ACPI=y +# CONFIG_DEBUG_GPIO is not set +# CONFIG_GPIO_SYSFS is not set + +# +# Memory mapped GPIO drivers +# +# CONFIG_GPIO_AMDPT is not set +# CONFIG_GPIO_DWAPB is not set +# CONFIG_GPIO_GENERIC_PLATFORM is not set +# CONFIG_GPIO_ICH is not set +# CONFIG_GPIO_LYNXPOINT is not set +# CONFIG_GPIO_VX855 is not set +# CONFIG_GPIO_ZX is not set + +# +# Port-mapped I/O GPIO drivers +# +# CONFIG_GPIO_F7188X is not set +# CONFIG_GPIO_IT87 is not set +# CONFIG_GPIO_SCH is not set +# CONFIG_GPIO_SCH311X is not set + +# +# I2C GPIO expanders +# +# CONFIG_GPIO_ADP5588 is not set +# CONFIG_GPIO_MAX7300 is not set +# CONFIG_GPIO_MAX732X is not set +# CONFIG_GPIO_PCA953X is not set +# CONFIG_GPIO_PCF857X is not set +# CONFIG_GPIO_TPIC2810 is not set + +# +# MFD GPIO expanders +# + +# +# PCI GPIO expanders +# +# CONFIG_GPIO_AMD8111 is not set +# CONFIG_GPIO_BT8XX is not set +# CONFIG_GPIO_ML_IOH is not set +# CONFIG_GPIO_RDC321X is not set + +# +# SPI or I2C GPIO expanders +# + +# +# USB GPIO expanders +# +CONFIG_W1=m +CONFIG_W1_CON=y + +# +# 1-wire Bus Masters +# +# CONFIG_W1_MASTER_MATROX is not set +# CONFIG_W1_MASTER_DS2490 is not set +CONFIG_W1_MASTER_DS2482=m +# CONFIG_W1_MASTER_DS1WM is not set +# CONFIG_W1_MASTER_GPIO is not set + +# +# 1-wire Slaves +# +CONFIG_W1_SLAVE_THERM=m +# CONFIG_W1_SLAVE_SMEM is not set +# CONFIG_W1_SLAVE_DS2408 is not set +# CONFIG_W1_SLAVE_DS2413 is not set +# CONFIG_W1_SLAVE_DS2406 is not set +# CONFIG_W1_SLAVE_DS2423 is not set +# CONFIG_W1_SLAVE_DS2431 is not set +# CONFIG_W1_SLAVE_DS2433 is not set +# CONFIG_W1_SLAVE_DS2760 is not set +# CONFIG_W1_SLAVE_DS2780 is not set +# CONFIG_W1_SLAVE_DS2781 is not set +# CONFIG_W1_SLAVE_DS28E04 is not set +# CONFIG_W1_SLAVE_BQ27000 is not set +CONFIG_POWER_SUPPLY=y +# CONFIG_POWER_SUPPLY_DEBUG is not set +# CONFIG_PDA_POWER is not set +# CONFIG_TEST_POWER is not set +# CONFIG_BATTERY_DS2780 is not set +# CONFIG_BATTERY_DS2781 is not set +# CONFIG_BATTERY_DS2782 is not set +# CONFIG_BATTERY_SBS is not set +# CONFIG_BATTERY_BQ27XXX is not set +# CONFIG_BATTERY_MAX17040 is not set +# CONFIG_BATTERY_MAX17042 is not set +# CONFIG_CHARGER_MAX8903 is not set +# CONFIG_CHARGER_LP8727 is not set +# CONFIG_CHARGER_GPIO is not set +# CONFIG_CHARGER_BQ2415X is not set +# CONFIG_CHARGER_BQ24190 is not set +# CONFIG_CHARGER_BQ24735 is not set +# CONFIG_CHARGER_BQ25890 is not set +# CONFIG_CHARGER_SMB347 is not set +# CONFIG_BATTERY_GAUGE_LTC2941 is not set +# CONFIG_CHARGER_RT9455 is not set +# CONFIG_POWER_RESET is not set +# CONFIG_POWER_AVS is not set +CONFIG_HWMON=y +CONFIG_HWMON_VID=m +# CONFIG_HWMON_DEBUG_CHIP is not set + +# +# Native drivers +# +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7410 is not set +# CONFIG_SENSORS_ADT7411 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_ASC7621 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_K10TEMP is not set +# CONFIG_SENSORS_FAM15H_POWER is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS620 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_DELL_SMM is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_FTSTEUTATES is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_G760A is not set +# CONFIG_SENSORS_G762 is not set +# CONFIG_SENSORS_GPIO_FAN is not set +# CONFIG_SENSORS_HIH6130 is not set +# CONFIG_SENSORS_IBMAEM is not set +# CONFIG_SENSORS_IBMPEX is not set +# CONFIG_SENSORS_I5500 is not set +CONFIG_SENSORS_CORETEMP=m +CONFIG_SENSORS_IT87=m +# CONFIG_SENSORS_JC42 is not set +# CONFIG_SENSORS_POWR1220 is not set +# CONFIG_SENSORS_LINEAGE is not set +# CONFIG_SENSORS_LTC2945 is not set +# CONFIG_SENSORS_LTC2990 is not set +# CONFIG_SENSORS_LTC4151 is not set +# CONFIG_SENSORS_LTC4215 is not set +# CONFIG_SENSORS_LTC4222 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_LTC4260 is not set +# CONFIG_SENSORS_LTC4261 is not set +# CONFIG_SENSORS_MAX16065 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX1668 is not set +# CONFIG_SENSORS_MAX197 is not set +# CONFIG_SENSORS_MAX6639 is not set +# CONFIG_SENSORS_MAX6642 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_MAX6697 is not set +# CONFIG_SENSORS_MAX31790 is not set +# CONFIG_SENSORS_MCP3021 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM73 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LM95234 is not set +# CONFIG_SENSORS_LM95241 is not set +# CONFIG_SENSORS_LM95245 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_NTC_THERMISTOR is not set +# CONFIG_SENSORS_NCT6683 is not set +# CONFIG_SENSORS_NCT6775 is not set +# CONFIG_SENSORS_NCT7802 is not set +# CONFIG_SENSORS_NCT7904 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_PMBUS is not set +# CONFIG_SENSORS_SHT15 is not set +# CONFIG_SENSORS_SHT21 is not set +# CONFIG_SENSORS_SHT3x is not set +# CONFIG_SENSORS_SHTC1 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_EMC1403 is not set +# CONFIG_SENSORS_EMC2103 is not set +# CONFIG_SENSORS_EMC6W201 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SCH56XX_COMMON is not set +# CONFIG_SENSORS_SCH5627 is not set +# CONFIG_SENSORS_SCH5636 is not set +# CONFIG_SENSORS_SMM665 is not set +# CONFIG_SENSORS_ADC128D818 is not set +# CONFIG_SENSORS_ADS1015 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_AMC6821 is not set +# CONFIG_SENSORS_INA209 is not set +# CONFIG_SENSORS_INA2XX is not set +# CONFIG_SENSORS_INA3221 is not set +# CONFIG_SENSORS_TC74 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_TMP102 is not set +# CONFIG_SENSORS_TMP103 is not set +# CONFIG_SENSORS_TMP401 is not set +# CONFIG_SENSORS_TMP421 is not set +# CONFIG_SENSORS_VIA_CPUTEMP is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83795 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set + +# +# ACPI drivers +# +# CONFIG_SENSORS_ACPI_POWER is not set +# CONFIG_SENSORS_ATK0110 is not set +CONFIG_THERMAL=y +CONFIG_THERMAL_HWMON=y +CONFIG_THERMAL_WRITABLE_TRIPS=y +CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y +# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set +# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set +# CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR is not set +# CONFIG_THERMAL_GOV_FAIR_SHARE is not set +CONFIG_THERMAL_GOV_STEP_WISE=y +# CONFIG_THERMAL_GOV_BANG_BANG is not set +CONFIG_THERMAL_GOV_USER_SPACE=y +# CONFIG_THERMAL_GOV_POWER_ALLOCATOR is not set +# CONFIG_THERMAL_EMULATION is not set +# CONFIG_INTEL_POWERCLAMP is not set +CONFIG_X86_PKG_TEMP_THERMAL=m +# CONFIG_INTEL_SOC_DTS_THERMAL is not set + +# +# ACPI INT340X thermal drivers +# +# CONFIG_INT340X_THERMAL is not set +# CONFIG_INTEL_PCH_THERMAL is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y +# CONFIG_WATCHDOG_NOWAYOUT is not set +# CONFIG_WATCHDOG_SYSFS is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set +# CONFIG_XILINX_WATCHDOG is not set +# CONFIG_ZIIRAVE_WATCHDOG is not set +# CONFIG_CADENCE_WATCHDOG is not set +# CONFIG_DW_WATCHDOG is not set +# CONFIG_MAX63XX_WATCHDOG is not set +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_F71808E_WDT is not set +CONFIG_SP5100_TCO=y +# CONFIG_SBC_FITPC2_WATCHDOG is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_IE6XX_WDT is not set +CONFIG_ITCO_WDT=m +# CONFIG_ITCO_VENDOR_SUPPORT is not set +CONFIG_IT8712F_WDT=m +# CONFIG_IT87_WDT is not set +# CONFIG_HP_WATCHDOG is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_PC87413_WDT is not set +CONFIG_NV_TCO=y +# CONFIG_60XX_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_SMSC_SCH311X_WDT is not set +# CONFIG_SMSC37B787_WDT is not set +# CONFIG_VIA_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set +# CONFIG_NI903X_WDT is not set +# CONFIG_MEN_A21_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set +CONFIG_BCMA_POSSIBLE=y + +# +# Broadcom specific AMBA +# +# CONFIG_BCMA is not set + +# +# Multifunction device drivers +# +CONFIG_MFD_CORE=y +# CONFIG_MFD_BCM590XX is not set +# CONFIG_MFD_AXP20X_I2C is not set +# CONFIG_MFD_CROS_EC is not set +# CONFIG_MFD_DA9062 is not set +# CONFIG_MFD_DA9063 is not set +# CONFIG_MFD_DA9150 is not set +# CONFIG_MFD_DLN2 is not set +# CONFIG_MFD_MC13XXX_I2C is not set +# CONFIG_HTC_PASIC3 is not set +CONFIG_LPC_ICH=y +# CONFIG_LPC_SCH is not set +# CONFIG_MFD_INTEL_LPSS_ACPI is not set +# CONFIG_MFD_INTEL_LPSS_PCI is not set +# CONFIG_MFD_JANZ_CMODIO is not set +# CONFIG_MFD_KEMPLD is not set +# CONFIG_MFD_88PM800 is not set +# CONFIG_MFD_88PM805 is not set +# CONFIG_MFD_MAX14577 is not set +# CONFIG_MFD_MAX77693 is not set +# CONFIG_MFD_MAX8907 is not set +# CONFIG_MFD_MT6397 is not set +# CONFIG_MFD_MENF21BMC is not set +# CONFIG_MFD_VIPERBOARD is not set +# CONFIG_MFD_RETU is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_MFD_RDC321X is not set +# CONFIG_MFD_RTSX_PCI is not set +# CONFIG_MFD_RT5033 is not set +# CONFIG_MFD_RTSX_USB is not set +# CONFIG_MFD_SI476X_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_SKY81452 is not set +# CONFIG_ABX500_CORE is not set +# CONFIG_MFD_SYSCON is not set +# CONFIG_MFD_TI_AM335X_TSCADC is not set +# CONFIG_MFD_LP3943 is not set +# CONFIG_TPS6105X is not set +# CONFIG_TPS65010 is not set +# CONFIG_TPS6507X is not set +# CONFIG_MFD_TPS65086 is not set +# CONFIG_MFD_TPS65217 is not set +# CONFIG_MFD_TPS65218 is not set +# CONFIG_MFD_TPS65912_I2C is not set +# CONFIG_MFD_WL1273_CORE is not set +# CONFIG_MFD_LM3533 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_VX855 is not set +# CONFIG_MFD_ARIZONA_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set + +# +# Graphics support +# +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +# CONFIG_AGP_INTEL is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 +# CONFIG_VGA_SWITCHEROO is not set +# CONFIG_DRM is not set + +# +# ACP (Audio CoProcessor) Configuration +# + +# +# Frame buffer Devices +# +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set +# CONFIG_VGASTATE is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_VGACON_SOFT_SCROLLBACK is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_DUMMY_CONSOLE_COLUMNS=80 +CONFIG_DUMMY_CONSOLE_ROWS=25 +# CONFIG_SOUND is not set + +# +# HID support +# +CONFIG_HID=y +# CONFIG_HID_BATTERY_STRENGTH is not set +CONFIG_HIDRAW=y +# CONFIG_UHID is not set +CONFIG_HID_GENERIC=y + +# +# Special HID drivers +# +# CONFIG_HID_A4TECH is not set +# CONFIG_HID_ACRUX is not set +# CONFIG_HID_APPLE is not set +# CONFIG_HID_APPLEIR is not set +# CONFIG_HID_AUREAL is not set +# CONFIG_HID_BELKIN is not set +# CONFIG_HID_BETOP_FF is not set +# CONFIG_HID_CHERRY is not set +# CONFIG_HID_CHICONY is not set +# CONFIG_HID_CMEDIA is not set +# CONFIG_HID_CP2112 is not set +# CONFIG_HID_CYPRESS is not set +# CONFIG_HID_DRAGONRISE is not set +# CONFIG_HID_EMS_FF is not set +# CONFIG_HID_ELECOM is not set +# CONFIG_HID_ELO is not set +# CONFIG_HID_EZKEY is not set +# CONFIG_HID_GEMBIRD is not set +# CONFIG_HID_GFRM is not set +# CONFIG_HID_HOLTEK is not set +# CONFIG_HID_KEYTOUCH is not set +# CONFIG_HID_KYE is not set +# CONFIG_HID_UCLOGIC is not set +# CONFIG_HID_WALTOP is not set +# CONFIG_HID_GYRATION is not set +# CONFIG_HID_ICADE is not set +# CONFIG_HID_TWINHAN is not set +# CONFIG_HID_KENSINGTON is not set +# CONFIG_HID_LCPOWER is not set +# CONFIG_HID_LENOVO is not set +# CONFIG_HID_LOGITECH is not set +# CONFIG_HID_MAGICMOUSE is not set +# CONFIG_HID_MICROSOFT is not set +# CONFIG_HID_MONTEREY is not set +# CONFIG_HID_MULTITOUCH is not set +# CONFIG_HID_NTRIG is not set +# CONFIG_HID_ORTEK is not set +# CONFIG_HID_PANTHERLORD is not set +# CONFIG_HID_PENMOUNT is not set +# CONFIG_HID_PETALYNX is not set +# CONFIG_HID_PICOLCD is not set +# CONFIG_HID_PLANTRONICS is not set +# CONFIG_HID_PRIMAX is not set +# CONFIG_HID_ROCCAT is not set +# CONFIG_HID_SAITEK is not set +# CONFIG_HID_SAMSUNG is not set +# CONFIG_HID_SPEEDLINK is not set +# CONFIG_HID_STEELSERIES is not set +# CONFIG_HID_SUNPLUS is not set +# CONFIG_HID_RMI is not set +# CONFIG_HID_GREENASIA is not set +# CONFIG_HID_SMARTJOYPLUS is not set +# CONFIG_HID_TIVO is not set +# CONFIG_HID_TOPSEED is not set +# CONFIG_HID_THRUSTMASTER is not set +# CONFIG_HID_WACOM is not set +# CONFIG_HID_XINMO is not set +# CONFIG_HID_ZEROPLUS is not set +# CONFIG_HID_ZYDACRON is not set +# CONFIG_HID_SENSOR_HUB is not set +# CONFIG_HID_ALPS is not set + +# +# USB HID support +# +CONFIG_USB_HID=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# I2C HID support +# +# CONFIG_I2C_HID is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_SUPPORT=y +CONFIG_USB_COMMON=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y + +# +# Miscellaneous USB options +# +CONFIG_USB_DEFAULT_PERSIST=y +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_OTG is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set +# CONFIG_USB_ULPI_BUS is not set +CONFIG_USB_MON=y +# CONFIG_USB_WUSB_CBAF is not set + +# +# USB Host Controller Drivers +# +# CONFIG_USB_C67X00_HCD is not set +# CONFIG_USB_XHCI_HCD is not set +CONFIG_USB_EHCI_HCD=m +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_PCI=m +# CONFIG_USB_EHCI_HCD_PLATFORM is not set +# CONFIG_USB_OXU210HP_HCD is not set +# CONFIG_USB_ISP116X_HCD is not set +# CONFIG_USB_ISP1362_HCD is not set +# CONFIG_USB_FOTG210_HCD is not set +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_OHCI_HCD_PCI=m +# CONFIG_USB_OHCI_HCD_PLATFORM is not set +CONFIG_USB_UHCI_HCD=m +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_HCD_TEST_MODE is not set + +# +# USB Device Class drivers +# +CONFIG_USB_ACM=m +# CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may +# + +# +# also be needed; see USB_STORAGE Help for more info +# +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_REALTEK is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_ONETOUCH is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set +# CONFIG_USB_STORAGE_ENE_UB6250 is not set +# CONFIG_USB_UAS is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +# CONFIG_USBIP_CORE is not set +# CONFIG_USB_MUSB_HDRC is not set +# CONFIG_USB_DWC3 is not set +# CONFIG_USB_DWC2 is not set +# CONFIG_USB_CHIPIDEA is not set +# CONFIG_USB_ISP1760 is not set + +# +# USB port drivers +# +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +# CONFIG_USB_SERIAL_SIMPLE is not set +# CONFIG_USB_SERIAL_AIRCABLE is not set +# CONFIG_USB_SERIAL_ARK3116 is not set +# CONFIG_USB_SERIAL_BELKIN is not set +# CONFIG_USB_SERIAL_CH341 is not set +# CONFIG_USB_SERIAL_WHITEHEAT is not set +# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set +# CONFIG_USB_SERIAL_CP210X is not set +# CONFIG_USB_SERIAL_CYPRESS_M8 is not set +# CONFIG_USB_SERIAL_EMPEG is not set +CONFIG_USB_SERIAL_FTDI_SIO=m +# CONFIG_USB_SERIAL_VISOR is not set +# CONFIG_USB_SERIAL_IPAQ is not set +# CONFIG_USB_SERIAL_IR is not set +# CONFIG_USB_SERIAL_EDGEPORT is not set +# CONFIG_USB_SERIAL_EDGEPORT_TI is not set +# CONFIG_USB_SERIAL_F81232 is not set +# CONFIG_USB_SERIAL_GARMIN is not set +# CONFIG_USB_SERIAL_IPW is not set +# CONFIG_USB_SERIAL_IUU is not set +# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set +CONFIG_USB_SERIAL_KEYSPAN=m +# CONFIG_USB_SERIAL_KEYSPAN_MPR is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA28XA is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA28XB is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA19QW is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA19QI is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA49WLC is not set +# CONFIG_USB_SERIAL_KLSI is not set +# CONFIG_USB_SERIAL_KOBIL_SCT is not set +# CONFIG_USB_SERIAL_MCT_U232 is not set +# CONFIG_USB_SERIAL_METRO is not set +# CONFIG_USB_SERIAL_MOS7720 is not set +# CONFIG_USB_SERIAL_MOS7840 is not set +# CONFIG_USB_SERIAL_MXUPORT is not set +# CONFIG_USB_SERIAL_NAVMAN is not set +CONFIG_USB_SERIAL_PL2303=m +# CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QCAUX is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set +# CONFIG_USB_SERIAL_SPCP8X5 is not set +# CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set +# CONFIG_USB_SERIAL_TI is not set +# CONFIG_USB_SERIAL_CYBERJACK is not set +# CONFIG_USB_SERIAL_XIRCOM is not set +# CONFIG_USB_SERIAL_OPTION is not set +# CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set +# CONFIG_USB_SERIAL_XSENS_MT is not set +# CONFIG_USB_SERIAL_WISHBONE is not set +# CONFIG_USB_SERIAL_SSU100 is not set +# CONFIG_USB_SERIAL_QT2 is not set +# CONFIG_USB_SERIAL_DEBUG is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +CONFIG_USB_TEST=m +# CONFIG_USB_EHSET_TEST_FIXTURE is not set +# CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_YUREX is not set +CONFIG_USB_EZUSB_FX2=m +# CONFIG_USB_HSIC_USB3503 is not set +# CONFIG_USB_LINK_LAYER_TEST is not set +# CONFIG_UCSI is not set + +# +# USB Physical Layer drivers +# +# CONFIG_USB_PHY is not set +# CONFIG_NOP_USB_XCEIV is not set +# CONFIG_USB_GPIO_VBUS is not set +# CONFIG_USB_ISP1301 is not set +# CONFIG_USB_GADGET is not set +# CONFIG_UWB is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ON_DEMAND_PAGING=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y +# CONFIG_INFINIBAND_MTHCA is not set +# CONFIG_INFINIBAND_QIB is not set +# CONFIG_INFINIBAND_CXGB3 is not set +CONFIG_MLX4_INFINIBAND=m +# CONFIG_INFINIBAND_NES is not set +# CONFIG_INFINIBAND_OCRDMA is not set +# CONFIG_INFINIBAND_USNIC is not set +# CONFIG_INFINIBAND_IPOIB is not set +# CONFIG_INFINIBAND_SRP is not set +# CONFIG_INFINIBAND_ISER is not set +CONFIG_INFINIBAND_RDMAVT=m +CONFIG_INFINIBAND_HFI1=m +# CONFIG_HFI1_DEBUG_SDMA_ORDER is not set +CONFIG_HFI1_VERBS_31BIT_PSN=y +# CONFIG_SDMA_VERBOSITY is not set +CONFIG_EDAC_ATOMIC_SCRUB=y +CONFIG_EDAC_SUPPORT=y +# CONFIG_EDAC is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_MC146818_LIB=y +# CONFIG_RTC_CLASS is not set +CONFIG_DMADEVICES=y +# CONFIG_DMADEVICES_DEBUG is not set + +# +# DMA Devices +# +CONFIG_DMA_ACPI=y +# CONFIG_INTEL_IDMA64 is not set +# CONFIG_INTEL_IOATDMA is not set +# CONFIG_QCOM_HIDMA_MGMT is not set +# CONFIG_QCOM_HIDMA is not set +# CONFIG_DW_DMAC is not set +# CONFIG_DW_DMAC_PCI is not set + +# +# DMABUF options +# +# CONFIG_SYNC_FILE is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set +# CONFIG_VFIO is not set +CONFIG_IRQ_BYPASS_MANAGER=y +# CONFIG_VIRT_DRIVERS is not set +CONFIG_VIRTIO=y + +# +# Virtio drivers +# +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_PCI_LEGACY=y +# CONFIG_VIRTIO_BALLOON is not set +# CONFIG_VIRTIO_INPUT is not set +# CONFIG_VIRTIO_MMIO is not set + +# +# Microsoft Hyper-V guest support +# +# CONFIG_HYPERV is not set +# CONFIG_STAGING is not set +CONFIG_X86_PLATFORM_DEVICES=y +# CONFIG_ACERHDF is not set +# CONFIG_DELL_SMBIOS is not set +# CONFIG_DELL_SMO8800 is not set +# CONFIG_FUJITSU_TABLET is not set +# CONFIG_HP_ACCEL is not set +# CONFIG_HP_WIRELESS is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_INTEL_MENLOW is not set +# CONFIG_ASUS_WIRELESS is not set +# CONFIG_ACPI_WMI is not set +# CONFIG_TOPSTAR_LAPTOP is not set +# CONFIG_TOSHIBA_BT_RFKILL is not set +# CONFIG_TOSHIBA_HAPS is not set +# CONFIG_ACPI_CMPC is not set +CONFIG_INTEL_HID_EVENT=m +# CONFIG_INTEL_VBTN is not set +# CONFIG_INTEL_IPS is not set +# CONFIG_INTEL_PMC_CORE is not set +# CONFIG_IBM_RTL is not set +# CONFIG_SAMSUNG_Q10 is not set +# CONFIG_INTEL_RST is not set +# CONFIG_INTEL_SMARTCONNECT is not set +# CONFIG_PVPANIC is not set +# CONFIG_INTEL_PMC_IPC is not set +# CONFIG_SURFACE_PRO3_BUTTON is not set +CONFIG_INTEL_PUNIT_IPC=m +# CONFIG_CHROME_PLATFORMS is not set + +# +# Hardware Spinlock drivers +# + +# +# Clock Source drivers +# +CONFIG_CLKEVT_I8253=y +CONFIG_I8253_LOCK=y +CONFIG_CLKBLD_I8253=y +# CONFIG_ATMEL_PIT is not set +# CONFIG_SH_TIMER_CMT is not set +# CONFIG_SH_TIMER_MTU2 is not set +# CONFIG_SH_TIMER_TMU is not set +# CONFIG_EM_TIMER_STI is not set +# CONFIG_MAILBOX is not set +CONFIG_IOMMU_API=y +CONFIG_IOMMU_SUPPORT=y + +# +# Generic IOMMU Pagetable Support +# +CONFIG_IOMMU_IOVA=y +CONFIG_AMD_IOMMU=y +# CONFIG_AMD_IOMMU_V2 is not set +CONFIG_DMAR_TABLE=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_SVM is not set +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set +CONFIG_INTEL_IOMMU_FLOPPY_WA=y +CONFIG_IRQ_REMAP=y + +# +# Remoteproc drivers +# +# CONFIG_STE_MODEM_RPROC is not set + +# +# Rpmsg drivers +# + +# +# SOC (System On Chip) specific Drivers +# + +# +# Broadcom SoC drivers +# +# CONFIG_SUNXI_SRAM is not set +# CONFIG_SOC_TI is not set +# CONFIG_PM_DEVFREQ is not set +# CONFIG_EXTCON is not set +# CONFIG_MEMORY is not set +# CONFIG_IIO is not set +# CONFIG_NTB is not set +# CONFIG_VME_BUS is not set +# CONFIG_PWM is not set +CONFIG_ARM_GIC_MAX_NR=1 +# CONFIG_IPACK_BUS is not set +# CONFIG_RESET_CONTROLLER is not set +# CONFIG_FMC is not set + +# +# PHY Subsystem +# +CONFIG_GENERIC_PHY=y +# CONFIG_PHY_PXA_28NM_HSIC is not set +# CONFIG_PHY_PXA_28NM_USB2 is not set +# CONFIG_BCM_KONA_USB2_PHY is not set +# CONFIG_POWERCAP is not set +# CONFIG_MCB is not set + +# +# Performance monitor support +# +# CONFIG_RAS is not set +# CONFIG_THUNDERBOLT is not set + +# +# Android +# +# CONFIG_ANDROID is not set +# CONFIG_LIBNVDIMM is not set +# CONFIG_DEV_DAX is not set +# CONFIG_NVMEM is not set +# CONFIG_STM is not set +# CONFIG_INTEL_TH is not set + +# +# FPGA Configuration Support +# +# CONFIG_FPGA is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +CONFIG_FIRMWARE_MEMMAP=y +CONFIG_DELL_RBU=m +CONFIG_DCDBAS=m +CONFIG_DMIID=y +# CONFIG_DMI_SYSFS is not set +CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y +CONFIG_ISCSI_IBFT_FIND=y +# CONFIG_ISCSI_IBFT is not set +# CONFIG_FW_CFG_SYSFS is not set +CONFIG_GOOGLE_FIRMWARE=y + +# +# Google Firmware Drivers +# +CONFIG_GOOGLE_SMI=y +CONFIG_GOOGLE_MEMCONSOLE=y + +# +# EFI (Extensible Firmware Interface) Support +# +CONFIG_EFI_VARS=y +CONFIG_EFI_ESRT=y +CONFIG_EFI_RUNTIME_MAP=y +# CONFIG_EFI_FAKE_MEMMAP is not set +CONFIG_EFI_RUNTIME_WRAPPERS=y +# CONFIG_EFI_BOOTLOADER_CONTROL is not set +# CONFIG_EFI_CAPSULE_LOADER is not set + +# +# File systems +# +CONFIG_DCACHE_WORD_ACCESS=y +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_USE_FOR_EXT2=y +# CONFIG_EXT4_FS_POSIX_ACL is not set +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_EXT4_ENCRYPTION is not set +# CONFIG_EXT4_DEBUG is not set +CONFIG_JBD2=y +CONFIG_JBD2_DEBUG=y +CONFIG_FS_MBCACHE=y +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +# CONFIG_F2FS_FS is not set +# CONFIG_FS_DAX is not set +CONFIG_FS_POSIX_ACL=y +CONFIG_EXPORTFS=y +# CONFIG_EXPORTFS_BLOCK_OPS is not set +CONFIG_FILE_LOCKING=y +CONFIG_MANDATORY_FILE_LOCKING=y +# CONFIG_FS_ENCRYPTION is not set +CONFIG_FSNOTIFY=y +CONFIG_DNOTIFY=y +CONFIG_INOTIFY_USER=y +CONFIG_FANOTIFY=y +# CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not set +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +# CONFIG_QUOTA_DEBUG is not set +CONFIG_QUOTA_TREE=y +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +CONFIG_QUOTACTL=y +CONFIG_QUOTACTL_COMPAT=y +CONFIG_AUTOFS4_FS=m +CONFIG_FUSE_FS=y +# CONFIG_CUSE is not set +# CONFIG_OVERLAY_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_FAT_DEFAULT_UTF8 is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_VMCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_PROC_CHILDREN=y +CONFIG_KERNFS=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS_XATTR=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_CONFIGFS_FS=y +CONFIG_EFIVAR_FS=m +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ORANGEFS_FS is not set +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set +# CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX6FS_FS is not set +CONFIG_ROMFS_FS=m +CONFIG_ROMFS_BACKED_BY_BLOCK=y +CONFIG_ROMFS_ON_BLOCK=y +# CONFIG_PSTORE is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V2=m +CONFIG_NFS_V3=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +# CONFIG_NFS_SWAP is not set +# CONFIG_NFS_V4_1 is not set +# CONFIG_NFS_USE_LEGACY_DNS is not set +CONFIG_NFS_USE_KERNEL_DNS=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_GRACE_PERIOD=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_NFS_ACL_SUPPORT=m +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=m +CONFIG_SUNRPC_GSS=m +# CONFIG_SUNRPC_DEBUG is not set +CONFIG_SUNRPC_XPRT_RDMA=m +# CONFIG_CEPH_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_MAC_ROMAN is not set +# CONFIG_NLS_MAC_CELTIC is not set +# CONFIG_NLS_MAC_CENTEURO is not set +# CONFIG_NLS_MAC_CROATIAN is not set +# CONFIG_NLS_MAC_CYRILLIC is not set +# CONFIG_NLS_MAC_GAELIC is not set +# CONFIG_NLS_MAC_GREEK is not set +# CONFIG_NLS_MAC_ICELAND is not set +# CONFIG_NLS_MAC_INUIT is not set +# CONFIG_NLS_MAC_ROMANIAN is not set +# CONFIG_NLS_MAC_TURKISH is not set +CONFIG_NLS_UTF8=y +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y + +# +# printk and dmesg options +# +CONFIG_PRINTK_TIME=y +CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_DYNAMIC_DEBUG is not set + +# +# Compile-time checks and compiler options +# +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_INFO_REDUCED is not set +# CONFIG_DEBUG_INFO_SPLIT is not set +# CONFIG_DEBUG_INFO_DWARF4 is not set +# CONFIG_GDB_SCRIPTS is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=2048 +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_READABLE_ASM is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_PAGE_OWNER is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_SECTION_MISMATCH is not set +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_ARCH_WANT_FRAME_POINTERS=y +CONFIG_FRAME_POINTER=y +# CONFIG_STACK_VALIDATION is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 +CONFIG_DEBUG_KERNEL=y + +# +# Memory Debugging +# +# CONFIG_PAGE_EXTENSION is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_PAGE_POISONING is not set +# CONFIG_DEBUG_PAGE_REF is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +CONFIG_HAVE_DEBUG_KMEMLEAK=y +# CONFIG_DEBUG_KMEMLEAK is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_VIRTUAL is not set +CONFIG_DEBUG_MEMORY_INIT=y +# CONFIG_DEBUG_PER_CPU_MAPS is not set +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +CONFIG_HAVE_ARCH_KMEMCHECK=y +CONFIG_HAVE_ARCH_KASAN=y +# CONFIG_KASAN is not set +CONFIG_ARCH_HAS_KCOV=y +# CONFIG_KCOV is not set +# CONFIG_DEBUG_SHIRQ is not set + +# +# Debug Lockups and Hangs +# +CONFIG_LOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +# CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_DETECT_HUNG_TASK is not set +# CONFIG_WQ_WATCHDOG is not set +# CONFIG_PANIC_ON_OOPS is not set +CONFIG_PANIC_ON_OOPS_VALUE=0 +CONFIG_PANIC_TIMEOUT=0 +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +# CONFIG_SCHED_STACK_END_CHECK is not set +# CONFIG_DEBUG_TIMEKEEPING is not set +CONFIG_TIMER_STATS=y + +# +# Lock Debugging (spinlocks, mutexes, etc...) +# +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_ATOMIC_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_LOCK_TORTURE_TEST is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_PI_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set + +# +# RCU Debugging +# +# CONFIG_PROVE_RCU is not set +# CONFIG_SPARSE_RCU_POINTER is not set +# CONFIG_TORTURE_TEST is not set +# CONFIG_RCU_PERF_TEST is not set +# CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +# CONFIG_RCU_TRACE is not set +# CONFIG_RCU_EQS_DEBUG is not set +CONFIG_DEBUG_WQ_FORCE_RR_CPU=y +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set +# CONFIG_NOTIFIER_ERROR_INJECTION is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_SYSCALL_TRACEPOINTS=y +CONFIG_HAVE_FENTRY=y +CONFIG_HAVE_C_RECORDMCOUNT=y +CONFIG_TRACE_CLOCK=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_TRACING=y +CONFIG_GENERIC_TRACER=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +CONFIG_FUNCTION_TRACER=y +CONFIG_FUNCTION_GRAPH_TRACER=y +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_FTRACE_SYSCALLS=y +# CONFIG_TRACER_SNAPSHOT is not set +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_KPROBE_EVENT=y +# CONFIG_UPROBE_EVENT is not set +CONFIG_PROBE_EVENTS=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_DYNAMIC_FTRACE_WITH_REGS=y +# CONFIG_FUNCTION_PROFILER is not set +CONFIG_FTRACE_MCOUNT_RECORD=y +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_MMIOTRACE is not set +# CONFIG_HIST_TRIGGERS is not set +# CONFIG_TRACEPOINT_BENCHMARK is not set +# CONFIG_RING_BUFFER_BENCHMARK is not set +# CONFIG_RING_BUFFER_STARTUP_TEST is not set +# CONFIG_TRACE_ENUM_MAP_FILE is not set +CONFIG_TRACING_EVENTS_GPIO=y + +# +# Runtime Testing +# +CONFIG_LKDTM=y +# CONFIG_TEST_LIST_SORT is not set +# CONFIG_KPROBES_SANITY_TEST is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_RBTREE_TEST is not set +# CONFIG_INTERVAL_TREE_TEST is not set +# CONFIG_PERCPU_TEST is not set +# CONFIG_ATOMIC64_SELFTEST is not set +# CONFIG_TEST_HEXDUMP is not set +# CONFIG_TEST_STRING_HELPERS is not set +# CONFIG_TEST_KSTRTOX is not set +# CONFIG_TEST_PRINTF is not set +CONFIG_TEST_BITMAP=y +# CONFIG_TEST_UUID is not set +# CONFIG_TEST_RHASHTABLE is not set +# CONFIG_TEST_HASH is not set +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_TEST_LKM is not set +# CONFIG_TEST_USER_COPY is not set +# CONFIG_TEST_BPF is not set +# CONFIG_TEST_FIRMWARE is not set +# CONFIG_TEST_UDELAY is not set +# CONFIG_MEMTEST is not set +# CONFIG_TEST_STATIC_KEYS is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y +# CONFIG_UBSAN is not set +CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y +# CONFIG_STRICT_DEVMEM is not set +CONFIG_X86_VERBOSE_BOOTUP=y +CONFIG_EARLY_PRINTK=y +CONFIG_EARLY_PRINTK_DBGP=y +# CONFIG_EARLY_PRINTK_EFI is not set +# CONFIG_X86_PTDUMP_CORE is not set +# CONFIG_X86_PTDUMP is not set +# CONFIG_EFI_PGT_DUMP is not set +# CONFIG_DEBUG_RODATA_TEST is not set +# CONFIG_DEBUG_WX is not set +# CONFIG_DEBUG_SET_MODULE_RONX is not set +CONFIG_DEBUG_NX_TEST=m +CONFIG_DOUBLEFAULT=y +# CONFIG_DEBUG_TLBFLUSH is not set +# CONFIG_IOMMU_STRESS is not set +CONFIG_HAVE_MMIOTRACE_SUPPORT=y +# CONFIG_X86_DECODER_SELFTEST is not set +CONFIG_IO_DELAY_TYPE_0X80=0 +CONFIG_IO_DELAY_TYPE_0XED=1 +CONFIG_IO_DELAY_TYPE_UDELAY=2 +CONFIG_IO_DELAY_TYPE_NONE=3 +CONFIG_IO_DELAY_0X80=y +# CONFIG_IO_DELAY_0XED is not set +# CONFIG_IO_DELAY_UDELAY is not set +# CONFIG_IO_DELAY_NONE is not set +CONFIG_DEFAULT_IO_DELAY_TYPE=0 +CONFIG_DEBUG_BOOT_PARAMS=y +# CONFIG_CPA_DEBUG is not set +CONFIG_OPTIMIZE_INLINING=y +# CONFIG_DEBUG_ENTRY is not set +# CONFIG_DEBUG_NMI_SELFTEST is not set +CONFIG_X86_DEBUG_FPU=y +# CONFIG_PUNIT_ATOM_DEBUG is not set + +# +# Security options +# +CONFIG_KEYS=y +# CONFIG_PERSISTENT_KEYRINGS is not set +# CONFIG_BIG_KEYS is not set +# CONFIG_ENCRYPTED_KEYS is not set +# CONFIG_KEY_DH_OPERATIONS is not set +# CONFIG_SECURITY_DMESG_RESTRICT is not set +CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set +CONFIG_SECURITY_NETWORK=y +# CONFIG_SECURITY_PATH is not set +# CONFIG_INTEL_TXT is not set +CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y +CONFIG_HAVE_ARCH_HARDENED_USERCOPY=y +# CONFIG_HARDENED_USERCOPY is not set +# CONFIG_SECURITY_SELINUX is not set +# CONFIG_SECURITY_SMACK is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_SECURITY_APPARMOR is not set +# CONFIG_SECURITY_LOADPIN is not set +# CONFIG_SECURITY_YAMA is not set +CONFIG_INTEGRITY=y +# CONFIG_INTEGRITY_SIGNATURE is not set +CONFIG_INTEGRITY_AUDIT=y +# CONFIG_IMA is not set +# CONFIG_EVM is not set +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_RNG_DEFAULT=m +CONFIG_CRYPTO_AKCIPHER2=y +CONFIG_CRYPTO_KPP2=y +# CONFIG_CRYPTO_RSA is not set +# CONFIG_CRYPTO_DH is not set +# CONFIG_CRYPTO_ECDH is not set +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_USER is not set +CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y +# CONFIG_CRYPTO_GF128MUL is not set +CONFIG_CRYPTO_NULL=y +CONFIG_CRYPTO_NULL2=y +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_MCRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_CHACHA20POLY1305 is not set +# CONFIG_CRYPTO_SEQIV is not set +CONFIG_CRYPTO_ECHAINIV=m + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set +# CONFIG_CRYPTO_KEYWRAP is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_CMAC is not set +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set +CONFIG_CRYPTO_VMAC=y + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=y +# CONFIG_CRYPTO_CRC32C_INTEL is not set +# CONFIG_CRYPTO_CRC32 is not set +# CONFIG_CRYPTO_CRC32_PCLMUL is not set +# CONFIG_CRYPTO_CRCT10DIF is not set +# CONFIG_CRYPTO_GHASH is not set +# CONFIG_CRYPTO_POLY1305 is not set +# CONFIG_CRYPTO_POLY1305_X86_64 is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=y +# CONFIG_CRYPTO_SHA1_SSSE3 is not set +# CONFIG_CRYPTO_SHA256_SSSE3 is not set +# CONFIG_CRYPTO_SHA512_SSSE3 is not set +# CONFIG_CRYPTO_SHA1_MB is not set +# CONFIG_CRYPTO_SHA256_MB is not set +# CONFIG_CRYPTO_SHA512_MB is not set +CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_SHA512=y +# CONFIG_CRYPTO_SHA3 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set +# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=y +# CONFIG_CRYPTO_AES_X86_64 is not set +# CONFIG_CRYPTO_AES_NI_INTEL is not set +# CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_ARC4=y +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAMELLIA_X86_64 is not set +# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set +# CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_DES3_EDE_X86_64 is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SALSA20_X86_64 is not set +# CONFIG_CRYPTO_CHACHA20 is not set +# CONFIG_CRYPTO_CHACHA20_X86_64 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set +# CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set +# CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set +# CONFIG_CRYPTO_TWOFISH_X86_64 is not set +# CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set +# CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +CONFIG_CRYPTO_LZO=y +# CONFIG_CRYPTO_842 is not set +# CONFIG_CRYPTO_LZ4 is not set +# CONFIG_CRYPTO_LZ4HC is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_DRBG_MENU=m +CONFIG_CRYPTO_DRBG_HMAC=y +# CONFIG_CRYPTO_DRBG_HASH is not set +CONFIG_CRYPTO_DRBG=m +CONFIG_CRYPTO_JITTERENTROPY=m +# CONFIG_CRYPTO_USER_API_HASH is not set +# CONFIG_CRYPTO_USER_API_SKCIPHER is not set +# CONFIG_CRYPTO_USER_API_RNG is not set +# CONFIG_CRYPTO_USER_API_AEAD is not set +CONFIG_CRYPTO_HW=y +# CONFIG_CRYPTO_DEV_PADLOCK is not set +# CONFIG_CRYPTO_DEV_CCP is not set +# CONFIG_CRYPTO_DEV_QAT_DH895xCC is not set +# CONFIG_CRYPTO_DEV_QAT_C3XXX is not set +# CONFIG_CRYPTO_DEV_QAT_C62X is not set +# CONFIG_CRYPTO_DEV_QAT_DH895xCCVF is not set +# CONFIG_CRYPTO_DEV_QAT_C3XXXVF is not set +# CONFIG_CRYPTO_DEV_QAT_C62XVF is not set +# CONFIG_ASYMMETRIC_KEY_TYPE is not set + +# +# Certificates for signature checking +# +CONFIG_HAVE_KVM=y +CONFIG_HAVE_KVM_IRQCHIP=y +CONFIG_HAVE_KVM_IRQFD=y +CONFIG_HAVE_KVM_IRQ_ROUTING=y +CONFIG_HAVE_KVM_EVENTFD=y +CONFIG_KVM_MMIO=y +CONFIG_KVM_ASYNC_PF=y +CONFIG_HAVE_KVM_MSI=y +CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y +CONFIG_KVM_VFIO=y +CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT=y +CONFIG_KVM_COMPAT=y +CONFIG_HAVE_KVM_IRQ_BYPASS=y +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y +# CONFIG_KVM_MMU_AUDIT is not set +# CONFIG_KVM_DEVICE_ASSIGNMENT is not set +# CONFIG_VHOST_NET is not set +# CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_HAVE_ARCH_BITREVERSE is not set +CONFIG_RATIONAL=y +CONFIG_GENERIC_STRNCPY_FROM_USER=y +CONFIG_GENERIC_STRNLEN_USER=y +CONFIG_GENERIC_NET_UTILS=y +CONFIG_GENERIC_FIND_FIRST_BIT=y +CONFIG_GENERIC_PCI_IOMAP=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_IO=y +CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y +CONFIG_ARCH_HAS_FAST_MULTIPLIER=y +# CONFIG_CRC_CCITT is not set +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC32_SELFTEST is not set +CONFIG_CRC32_SLICEBY8=y +# CONFIG_CRC32_SLICEBY4 is not set +# CONFIG_CRC32_SARWATE is not set +# CONFIG_CRC32_BIT is not set +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=y +# CONFIG_CRC8 is not set +# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set +# CONFIG_RANDOM32_SELFTEST is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_LZ4_DECOMPRESS=y +CONFIG_XZ_DEC=y +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y +CONFIG_XZ_DEC_BCJ=y +# CONFIG_XZ_DEC_TEST is not set +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y +CONFIG_DECOMPRESS_XZ=y +CONFIG_DECOMPRESS_LZO=y +CONFIG_DECOMPRESS_LZ4=y +CONFIG_GENERIC_ALLOCATOR=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_RADIX_TREE_MULTIORDER=y +CONFIG_ASSOCIATIVE_ARRAY=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT_MAP=y +CONFIG_HAS_DMA=y +CONFIG_CHECK_SIGNATURE=y +CONFIG_CPU_RMAP=y +CONFIG_DQL=y +CONFIG_GLOB=y +# CONFIG_GLOB_SELFTEST is not set +CONFIG_NLATTR=y +CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y +# CONFIG_CORDIC is not set +# CONFIG_DDR is not set +CONFIG_IRQ_POLL=y +CONFIG_OID_REGISTRY=m +CONFIG_UCS2_STRING=y +# CONFIG_SG_SPLIT is not set +CONFIG_SG_POOL=y +CONFIG_ARCH_HAS_SG_CHAIN=y +CONFIG_ARCH_HAS_PMEM_API=y +CONFIG_ARCH_HAS_MMIO_FLUSH=y From 04ed1b49454dd2ce5d19a877b34612039a069a69 Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 23 Jan 2016 20:54:46 -0500 Subject: [PATCH 20/28] net-test: adds a gce-install.sh script to build and install kernel on GCE machine This commit adds a script to build an upstream Linux kernel and install it and boot it on a Google Cloud (GCE) virtual machine. Usage: ./gce-install.sh -m e.g.: ./gce-install.sh -m 1.2.3.4 ssh 1.2.3.4 Tested: On GCE. Effort: net-test Change-Id: I149233b802202335af93183728050aadb52cca2c --- .gitignore | 3 ++ gce-install.sh | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100755 gce-install.sh diff --git a/.gitignore b/.gitignore index 9fd4c9533b3d..c19a4e5cb0b6 100644 --- a/.gitignore +++ b/.gitignore @@ -156,6 +156,9 @@ x509.genkey /allrandom.config /allyes.config +# tmp build/install directory for /gce-install.sh: +/gce/ + # Kconfig savedefconfig output /defconfig diff --git a/gce-install.sh b/gce-install.sh new file mode 100755 index 000000000000..1d84fe1d313e --- /dev/null +++ b/gce-install.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Build a Linux kernel, install it on a remote machine, and reboot the machine. +# This script is only known to work for Debian/Ubuntu-based GCE VMs. + +set -e + +usage() { + echo "gce-install.sh -m " +} + +MACHINE="" +VERBOSE="" + +while getopts "h?vm:p:z:" opt; do + case "${opt}" in + h|\?) + usage + exit 0 + ;; + v) + VERBOSE="set -x" + ;; + m) + MACHINE=${OPTARG} + ;; + esac +done + +if [ -z ${MACHINE} ]; then + usage + exit -1 +fi + +umask 022 + +${VERBOSE} + +BRANCH=`git rev-parse --abbrev-ref HEAD | sed s/-/+/g` +SHA1=`git rev-parse --short HEAD` +LOCALVERSION=+${BRANCH}+${SHA1}+GCE +GCE_PKG_DIR=${PWD}/gce/${LOCALVERSION}/pkg +GCE_INSTALL_DIR=${PWD}/gce/${LOCALVERSION}/install +GCE_BUILD_DIR=${PWD}/gce/${LOCALVERSION}/build +KERNEL_PKG=kernel-${LOCALVERSION}.tar.gz2 +MAKE_OPTS="-j`nproc` \ + LOCALVERSION=${LOCALVERSION} \ + EXTRAVERSION="" \ + INSTALL_PATH=${GCE_INSTALL_DIR}/boot \ + INSTALL_MOD_PATH=${GCE_INSTALL_DIR}" + +echo "cleaning..." +mkdir -p ${GCE_BUILD_DIR} +mkdir -p ${GCE_INSTALL_DIR}/boot +mkdir -p ${GCE_PKG_DIR} + +set +e +echo "copying config.gce to .config ..." +cp config.gce .config +echo "running make olddefconfig ..." +make olddefconfig > /tmp/make.olddefconfig +make ${MAKE_OPTS} prepare > /tmp/make.prepare +echo "making..." +make ${MAKE_OPTS} > /tmp/make.default +echo "making modules ..." +make ${MAKE_OPTS} modules > /tmp/make.modules +echo "making install ..." +make ${MAKE_OPTS} install > /tmp/make.install +echo "making modules_install ..." +make ${MAKE_OPTS} modules_install > /tmp/make.modules_install +set -e + +echo "making tarball ..." +(cd ${GCE_INSTALL_DIR}; tar -cvzf ${GCE_PKG_DIR}/${KERNEL_PKG} boot/* lib/modules/* --owner=0 --group=0 > /tmp/make.tarball) + +echo "running: scp $GCE_PKG_DIR/$KERNEL_PKG ${MACHINE}:~/" +scp ${GCE_PKG_DIR}/${KERNEL_PKG} ${MACHINE}:~/ + +ssh ${MACHINE} " +$VERBOSE +sudo rm -rf /boot/*GCE /lib/modules/*GCE +sudo tar --no-same-owner -xzvf ${KERNEL_PKG} -C / > /tmp/tar.out.txt +cd /boot +for v in \$(ls vmlinuz-* | sed s/vmlinuz-//g); do + sudo mkinitramfs -k -o initrd.img-\${v} \${v} +done +sudo update-grub +sudo reboot +" +umask 027 From e7db8639c6a6a71785028c0804bc8b2b5942f57c Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 29 Aug 2018 10:27:59 -0400 Subject: [PATCH 21/28] net-test: scripts for testing BBR with upstream Linux kernels - runs a small set of simple tests - sets up netem to emulate a configured network scenario - runs /usr/bin/netperf and /usr/bin/netserver to generate traffic - writes pcaps and ss logs - analyzes test results - generates graphs Usage: cd gtests/net/tcp/bbr/nsperf/ ./configure.sh ./run_tests.sh ./graph_tests.sh Thanks for Jason Xing for an included bug fix: 0e156e93538ef Effort: net-test Change-Id: I38662f554b3c905aa79947a2c52a2ecfe3943f8c --- gtests/net/tcp/bbr/nsperf/configure.sh | 32 ++ gtests/net/tcp/bbr/nsperf/graph_tests.sh | 305 ++++++++++++ gtests/net/tcp/bbr/nsperf/median.py | 43 ++ gtests/net/tcp/bbr/nsperf/nsperf.py | 549 +++++++++++++++++++++ gtests/net/tcp/bbr/nsperf/run_tests.sh | 188 +++++++ gtests/net/tcp/bbr/nsperf/ss_log_parser.py | 207 ++++++++ 6 files changed, 1324 insertions(+) create mode 100755 gtests/net/tcp/bbr/nsperf/configure.sh create mode 100755 gtests/net/tcp/bbr/nsperf/graph_tests.sh create mode 100755 gtests/net/tcp/bbr/nsperf/median.py create mode 100755 gtests/net/tcp/bbr/nsperf/nsperf.py create mode 100755 gtests/net/tcp/bbr/nsperf/run_tests.sh create mode 100755 gtests/net/tcp/bbr/nsperf/ss_log_parser.py diff --git a/gtests/net/tcp/bbr/nsperf/configure.sh b/gtests/net/tcp/bbr/nsperf/configure.sh new file mode 100755 index 000000000000..9d44c4c99783 --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/configure.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Install the software necessary to run and graph the tests in +# this directory. Tested on Ubuntu Linux. + +sudo apt update +sudo apt install -y python2 +sudo apt install -y netperf +sudo apt install -y gnuplot-nox + +# On Ubuntu 18.04.2 LTS, there are issues with the iproute2 binaries: +# (1) the 'tc' binary has a bug and cannot parse netem random loss rates +# (2) the 'ss' tool is missing recent socket stats +# In addition, all off-the-shelf iproute2 binaries lack support for features +# added in BBRv3. +# So to use this testing tool we build our own iproute2 tools +# from the latest iproute2 sources, with patches from the BBRv3 +# source tree: + +sudo apt install -y pkg-config make bison flex + +# The patches for the iproute2 package are in the root directory of this +# test branch: +PATCH_DIR=`pwd`/../../../../../ + +sudo bash -c "\ + mkdir -p /root/iproute2/; \ + cd /root/iproute2; \ + git clone git://git.kernel.org/pub/scm/network/iproute2/iproute2.git; \ + cd ./iproute2/ ; \ + git am ${PATCH_DIR}/*patch \ + ./configure ; \ + make" diff --git a/gtests/net/tcp/bbr/nsperf/graph_tests.sh b/gtests/net/tcp/bbr/nsperf/graph_tests.sh new file mode 100755 index 000000000000..71064b5707a8 --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/graph_tests.sh @@ -0,0 +1,305 @@ +#!/bin/bash +# For all test results, generate png graphs and an HTML page linking to them. +# +# By default, graphs all tests: +# ./graph_tests.sh +# But you can also graph a subset of tests by setting the "tests" +# environment variable: +# tests="coexist shallow" ./graph_tests.sh +# +# fancier usage: +# indir=out.180.sec/ outdir=graphs/ ./graph_tests.sh + +if [ "$indir" = "" ]; then + indir="out/" +fi + +if [ "$outdir" = "" ]; then + outdir="graphs/" +fi + +# By default graph all tests. +# To graph a subset of tests, set the environment variable: tests="foo bar". +if [ "$tests" = "" ]; then + tests="coexist random_loss shallow bufferbloat ecn_bulk" +fi + +format=png +if [ "$format" = "png" ]; then + PNG_SIZE="1024,768" + TERMINAL="set terminal pngcairo noenhanced size $PNG_SIZE" +else + TERMINAL="set terminal wxt noenhanced size 1024,768" +fi + +mkdir -p $outdir + +# Start HTML for a web page showing all png graphs we generate. +TITLE="bbr v2 alpha upstream core tests" +HTML_PATH="${outdir}/index.html" +echo > $HTML_PATH +echo "$TITLE $TITLE
\n" >> $HTML_PATH + +if [[ $tests == *"coexist"* ]]; then + ####### + # show acceptable coexistence w/ cubic: + # graph tput of 1 cubic, 1 BBR at a range of buffer depths: + # (bw=50M, rtt=30ms, buf={...}xBDP) + rm -f $outdir/coexist.* + for cc_combo in cubic:1,bbr1:1 cubic:1,bbr:1; do + for bdp_of_buf in 0.1 1 2 4 8 16; do + echo -n "$bdp_of_buf " >> $outdir/coexist.${cc_combo} + grep THROUGHPUT $indir/coexist/${cc_combo}/${bdp_of_buf}/netperf.out.1.txt | \ + cut -d= -f2 >> $outdir/coexist.${cc_combo} + done + done + + OUTPNG="$outdir/coexist_1xcubic_1xbbr_50M_30ms_varybuf.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top left\n\ + set ytics nomirror\n\ + set grid\n\ + set title 'cubic vs BBR throughput'\n\ + set xlabel 'buffer size (as a multiple of BDP)'\n\ + set ylabel 'throughput in Mbit/sec'\n\ + set yrange [0:50]\n\ + plot '$outdir/coexist.cubic:1,bbr1:1' u 1:2 t 'bbr1' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/coexist.cubic:1,bbr:1' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/coexist.gnuplot + + gnuplot -persist $outdir/coexist.gnuplot + echo -e "\n" >> $HTML_PATH +fi + + +if [[ $tests == *"random_loss"* ]]; then + ####### + # show high throughput with random loss up to design parameter: + # graph tput of cubic, bbr at a range of random loss rates + # (bw=1G, rtt=100ms, loss={...} + rm -f $outdir/random_loss.* + loss_rates="0.00001 0.0001 0.001 0.01 0.1 0.2 0.5 1 2 3 10 15 20" + for loss_rate in $loss_rates; do + for cc_name in cubic bbr1 bbr; do + cc="${cc_name}:1" + sumd="$indir/random_loss/${cc}/${loss_rate}/summary/" + mkdir -p $sumd + rm -f "${sumd}/*txt" + for rep in `seq 1 10`; do + d="$indir/random_loss/${cc}/${loss_rate}/rep-${rep}" + grep THROUGHPUT ${d}/netperf.out.0.txt | cut -d= -f2 >> ${sumd}/THROUGHPUT.samples.txt + done + infile="${sumd}/THROUGHPUT.samples.txt" ./median.py > \ + ${sumd}/THROUGHPUT.median.txt + echo -n "$loss_rate " >> $outdir/random_loss.${cc} + cat ${sumd}/THROUGHPUT.median.txt >> $outdir/random_loss.${cc} + done + done + + OUTPNG="$outdir/random_loss_1G_100ms_varyloss.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top right\n\ + set ytics nomirror\n\ + set grid\n\ + set logscale x\n\ + set title 'cubic, bbr1, and bbr throughput with random loss'\n\ + set xlabel 'random loss rate, in percent'\n\ + set ylabel 'throughput in Mbit/sec'\n\ + set yrange [0:1000]\n\ + set xrange [:20]\n\ + plot '$outdir/random_loss.cubic:1' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\ + '$outdir/random_loss.bbr1:1' u 1:2 t 'bbr1' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/random_loss.bbr:1' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/random_loss.gnuplot + + gnuplot -persist $outdir/random_loss.gnuplot + echo -e "\n" >> $HTML_PATH +fi + + +if [[ $tests == *"shallow"* ]]; then + ####### + # show reasonably low loss rates in shallow buffers: + # graph retransmit rate for range of flow counts + # (bw=1G, rtt=100ms, buf=1ms, num_flows={...}) + # BDP is 1G*100ms = 8256 packets + rm -f $outdir/shallow_buf.* + for num_flows in 1 10 30 60 100; do + for cc_name in cubic bbr1 bbr; do + echo -n "$num_flows " >> $outdir/shallow_buf.${cc_name} + d="$indir/shallow/${cc_name}:${num_flows}/${num_flows}" + infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py + cat ${d}/retrans.out.total.txt >> $outdir/shallow_buf.${cc_name} + done + done + + OUTPNG="$outdir/shallow_buf_1G_100ms_varynumflows.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top left\n\ + set ytics nomirror\n\ + set grid\n\ + set logscale x\n\ + set title 'cubic, bbr1, and bbr retransmit rate in shallow buffers'\n\ + set xlabel 'number of flows'\n\ + set ylabel 'retransmit rate (percent)'\n\ + set yrange [0:15]\n\ + set xrange [:]\n\ + plot '$outdir/shallow_buf.cubic' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\ + '$outdir/shallow_buf.bbr1' u 1:2 t 'bbr1' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/shallow_buf.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/shallow_buf.gnuplot + + gnuplot -persist $outdir/shallow_buf.gnuplot + echo -e "\n" >> $HTML_PATH +fi + + +if [[ $tests == *"bufferbloat"* ]]; then + ####### + # show low delay in deep buffers, even without ECN signal: + # graph p50 RTT for two flows using either cubic or bbr, + # at a range of buffer depths. + # (bw=50M, rtt=30ms, buf={...}xBDP) + rm -f $outdir/bufferbloat.* + for bdp_of_buf in 1 10 50 100; do + for cc_name in cubic bbr1 bbr; do + echo -n "$bdp_of_buf " >> $outdir/bufferbloat.${cc_name} + num_flows=2 + d="$indir/bufferbloat/${cc_name}:${num_flows}/${bdp_of_buf}" + infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py + cat ${d}/rtt_p50.out.total.txt >> $outdir/bufferbloat.${cc_name} + done + done + + OUTPNG="$outdir/bufferbloat_50M_30ms_varybuf.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top left\n\ + set ytics nomirror\n\ + set grid\n\ + set title 'cubic, bbr1, and bbr median RTT'\n\ + set xlabel 'buffer size (as a multiple of BDP)'\n\ + set ylabel 'median srtt sample (ms)'\n\ + set yrange [0:]\n\ + set xrange [1:100]\n\ + plot '$outdir/bufferbloat.cubic' u 1:2 t 'cubic' w lp lw 2 pt 7 lt rgb \"#d7191c\",\ + '$outdir/bufferbloat.bbr1' u 1:2 t 'bbr1' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/bufferbloat.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/bufferbloat.gnuplot + + gnuplot -persist $outdir/bufferbloat.gnuplot + echo -e "\n" >> $HTML_PATH +fi + +if [[ $tests == *"ecn_bulk"* ]]; then + rm -f $outdir/ecn_bulk.* + + ####### + # show ECN support can keep queues very low: + # graph p50 for range of flow counts. + # (bw=1G, rtt=1ms, num_flows={...}) + # For each CC and flow count, show the median of the p50 RTT from N trials. + for cc_name in dctcp bbr1 bbr; do + for num_flows in 1 4 10 40 100; do + sumd="$indir/ecn_bulk/${cc_name}/${num_flows}/summary/" + mkdir -p $sumd + rm -f "${sumd}/*txt" + for rep in `seq 1 10`; do + # Find median srtt for this rep, and add it to list + # of all samples. + d="$indir/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}" + infile=${d}/ss.log outdir=${d}/ ./ss_log_parser.py + cat ${d}/rtt_p50.out.total.txt >> ${sumd}/rtt_p50.out.samples.txt + done + infile="${sumd}/rtt_p50.out.samples.txt" ./median.py > \ + ${sumd}/rtt_p50.out.median.txt + echo -n "$num_flows " >> $outdir/ecn_bulk.${cc_name} + cat ${sumd}/rtt_p50.out.median.txt >> $outdir/ecn_bulk.${cc_name} + done + done + + OUTPNG="$outdir/ecn_bulk_1G_1ms_rtt_varynumflows.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top left\n\ + set ytics nomirror\n\ + set grid\n\ + set logscale x\n\ + set title 'dctcp, bbr1, and bbr median RTT'\n\ + set xlabel 'number of flows'\n\ + set ylabel 'median srtt sample (ms)'\n\ + set yrange [0:]\n\ + set xrange [1:100]\n\ + plot '$outdir/ecn_bulk.dctcp' u 1:2 t 'dctcp' w lp lw 2 pt 7 lt rgb \"#d7191c\",\ + '$outdir/ecn_bulk.bbr1' u 1:2 t 'bbr1' w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/ecn_bulk.bbr' u 1:2 t 'bbr' w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/ecn_bulk_rtt.gnuplot + + gnuplot -persist $outdir/ecn_bulk_rtt.gnuplot + echo -e "\n" >> $HTML_PATH + + + ####### + # show ECN support can keep queues very low: + # graph median of retrans rates across N trials: + for cc_name in dctcp bbr1 bbr; do + for num_flows in 1 4 10 40 100; do + sumd="$indir/ecn_bulk/${cc_name}/${num_flows}/summary/" + mkdir -p $sumd + rm -f "${sumd}/*txt" + for rep in `seq 1 10`; do + # Find overall retrans rate for this rep, and add it to list + # of all samples. + d="$indir/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}" + cat ${d}/retrans.out.total.txt >> ${sumd}/retrans.out.samples.txt + done + infile="${sumd}/retrans.out.samples.txt" ./median.py > \ + ${sumd}/retrans.out.median.txt + echo -n "$num_flows " >> $outdir/ecn_bulk.retrans.${cc_name} + cat ${sumd}/retrans.out.median.txt >> $outdir/ecn_bulk.retrans.${cc_name} + done + done + + OUTPNG="$outdir/ecn_bulk_1G_1ms_retrans_varynumflows.png" + OUTPUT="\n\ +set output '$OUTPNG'" + + echo -e "set y2tics\n\ + $TERMINAL $OUTPUT\n\ + set key top left\n\ + set grid\n\ + set logscale x\n\ + set title 'dctcp, bbr1, and bbr retransmit rate'\n\ + set xlabel 'number of flows'\n\ + set ylabel 'retransmit rate (percent)'\n\ + set yrange [:]\n\ + set xrange [1:100]\n\ + plot '$outdir/ecn_bulk.retrans.dctcp' u 1:2 t 'dctcp' axis x1y1 w lp lw 2 pt 7 lt rgb \"#d7191c\",\ + '$outdir/ecn_bulk.retrans.bbr1' u 1:2 t 'bbr1' axis x1y1 w lp lw 2 pt 7 lt rgb \"#abd9e9\",\ + '$outdir/ecn_bulk.retrans.bbr' u 1:2 t 'bbr' axis x1y1 w lp lw 2 pt 7 lt rgb \"#2c7bb6\"\ + \n" > $outdir/ecn_bulk_retrans.gnuplot + + gnuplot -persist $outdir/ecn_bulk_retrans.gnuplot + echo -e "\n" >> $HTML_PATH + +fi + +echo "done graphing all tests: $tests" diff --git a/gtests/net/tcp/bbr/nsperf/median.py b/gtests/net/tcp/bbr/nsperf/median.py new file mode 100755 index 000000000000..301eab92cd29 --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/median.py @@ -0,0 +1,43 @@ +#!/usr/bin/python2 +# +# Read a file with one float per line, and print the median of all numbers. +# Usage: +# infile=numbers.txt ./median.py + +import os +import sys + +def read_file(): + """Read a file with one float per line, and return as a list of floats.""" + + nums = [] + + path = os.environ['infile'] + f = open(path) + + # Read a line, or EOF. + line = f.readline() + while True: + if not line: + return nums + num_str = line.strip() + num = float(num_str) + nums.append(num) + line = f.readline() + +def median(nums): + """Return median of all numbers.""" + + sorted_nums = sorted(nums) + n = len(sorted_nums) + m = n - 1 + return (sorted_nums[n/2] + sorted_nums[m/2]) / 2.0 + +def main(): + """Main function to run everything.""" + nums = read_file() + print('%s' % median(nums)) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/gtests/net/tcp/bbr/nsperf/nsperf.py b/gtests/net/tcp/bbr/nsperf/nsperf.py new file mode 100755 index 000000000000..3704a2eb4e14 --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/nsperf.py @@ -0,0 +1,549 @@ +#!/usr/bin/python2 +# +# Use netem, network namespaces, and veth virtual NICs +# to run a multi-flow TCP test on a single Linux machine. +# +# There is one network namespace for each emulated host. +# The emulated hosts are as follows: +# +# srv: server (sender) +# srt: server router +# mid: middle host to emulate delays and bandwidth constraints +# crt: client router +# cli: client (receiver) +# +# Most hosts have both a left ("l") and right ("r") virtual NIC. +# The server has only an "r" NIC and the client has only an "l" NIC. +# +# The topology is as follows: +# +# +-------+ +-------+ +-------+ +-------+ +-------+ +# | srv | | srt | | mid | | crt | | cli | +# | r +-+ l r +-+ l r +-+ l r +-+ l | +# +-------+ +-------+ +-------+ +-------+ +-------+ +# +# Authors: +# Neal Cardwell +# Soheil Hassas Yeganeh +# Kevin (Yudong) Yang +# Arjun Roy + +import os +import os.path +import socket +import sys +import threading +import time + +HOSTS = ['cli', 'crt', 'mid', 'srt', 'srv'] +IP_MODE = socket.AF_INET6 +SS_INTERVAL_SECONDS = 0.1 # gather 'ss' stats each X seconds +FIRST_PORT = 10000 # first TCP port to use + +# The latest version of 'ip', 'ss', and 'tc' binaries from iproute2, built by +# the ./configure.sh script: +IP_PATH = '/root/iproute2/iproute2/ip/ip' +SS_PATH = '/root/iproute2/iproute2/misc/ss' +TC_PATH = '/root/iproute2/iproute2/tc/tc' + +def netperf(): + if os.path.isfile('./netperf'): + return './netperf' + else: + return '/usr/bin/netperf' + +def netserver(): + if os.path.isfile('./netserver'): + return './netserver' + else: + return '/usr/bin/netserver' + +def log_dir(): + return '/tmp/' + +def run(cmd, verbose=True): + if verbose: + print('running: |%s|' % (cmd)) + sys.stdout.flush() + status = os.system(cmd) + if status != 0: + sys.stderr.write('error %d executing: %s' % (status, cmd)) + +def check_dependencies(): + """Check prerequisites for running this tool.""" + if (not os.path.isfile(SS_PATH) or + not os.path.isfile(TC_PATH)): + sys.stderr.write('nsperf.py: ' + 'please run ./configure.sh to install dependencies\n') + sys.exit(1) + +def cleanup(): + """Delete all veth pairs and all network namespaces.""" + for host in HOSTS: + run('( ip netns exec %(host)s ip link del dev %(host)s.l; ' + ' ip netns exec %(host)s ip link del dev %(host)s.r; ' + ' ip netns del %(host)s ) 2> /dev/null' % {'host' : host}) + +def setup_logging(): + """Set up all logging.""" + # Zero out /var/log/kern-debug.log so that we only get our test logs. + run('logrotate -f /etc/logrotate.conf') + +def setup_namespaces(): + """Set up all network namespaces.""" + for host in HOSTS: + run('ip netns add %(host)s' % {'host' : host}) + +def setup_loopback(): + """Set up loopback devices for all namespaces.""" + for host in HOSTS: + run('ip netns exec %(host)s ifconfig lo up' % {'host' : host}) + +def setup_veth(): + """Set up all veth interfaces.""" + c = '' + c += 'ip link add srv.r type veth peer name srt.l\n' + c += 'ip link add srt.r type veth peer name mid.l\n' + c += 'ip link add mid.r type veth peer name crt.l\n' + c += 'ip link add crt.r type veth peer name cli.l\n' + + c += 'ip link set dev srv.r netns srv\n' + c += 'ip link set dev srt.r netns srt\n' + c += 'ip link set dev srt.l netns srt\n' + c += 'ip link set dev mid.r netns mid\n' + c += 'ip link set dev mid.l netns mid\n' + c += 'ip link set dev crt.l netns crt\n' + c += 'ip link set dev crt.r netns crt\n' + c += 'ip link set dev cli.l netns cli\n' + + c += 'ip netns exec srv ip link set srv.r up\n' + c += 'ip netns exec srt ip link set srt.r up\n' + c += 'ip netns exec srt ip link set srt.l up\n' + c += 'ip netns exec mid ip link set mid.r up\n' + c += 'ip netns exec mid ip link set mid.l up\n' + c += 'ip netns exec crt ip link set crt.r up\n' + c += 'ip netns exec crt ip link set crt.l up\n' + c += 'ip netns exec cli ip link set cli.l up\n' + + # Disable TSO, GSO, GRO, or else netem limit is interpreted per + # multi-MSS skb, not per packet on the emulated wire. + c += 'ip netns exec srt ethtool -K srt.r tso off gso off gro off\n' + c += 'ip netns exec mid ethtool -K mid.l tso off gso off gro off\n' + c += 'ip netns exec mid ethtool -K mid.r tso off gso off gro off\n' + c += 'ip netns exec crt ethtool -K crt.l tso off gso off gro off\n' + + # server + c += 'ip netns exec srv ip addr add 192.168.0.1/24 dev srv.r\n' + + # server router + c += 'ip netns exec srt ip addr add 192.168.0.100/24 dev srt.l\n' + c += 'ip netns exec srt ip addr add 192.168.1.1/24 dev srt.r\n' + + # mid + c += 'ip netns exec mid ip addr add 192.168.1.100/24 dev mid.l\n' + c += 'ip netns exec mid ip addr add 192.168.2.1/24 dev mid.r\n' + + # client router + c += 'ip netns exec crt ip addr add 192.168.2.100/24 dev crt.l\n' + c += 'ip netns exec crt ip addr add 192.168.3.1/24 dev crt.r\n' + + # client + c += 'ip netns exec cli ip addr add 192.168.3.100/24 dev cli.l\n' + + run(c) + +def setup_routes(params): + """Set up all routes.""" + c = '' + + # server + c += 'h=srv\n' + c += 'ip=' + IP_PATH + '\n' + c += 'tc=' + TC_PATH + '\n' + c += 'ip netns exec $h $tc qdisc add dev $h.r root fq\n' + c += 'ip netns exec $h $ip route add default via 192.168.0.100 dev $h.r' + if params['ecn_low']: + c += ' features ecn_low' + c += '\n' + c += 'echo $h =================== ; ip netns exec $h $ip route show\n' + + # server router + c += 'h=srt\n' + c += 'ip netns exec $h ip route add default via 192.168.1.100 dev $h.r\n' + c += 'echo $h =================== ; ip netns exec $h $ip route show\n' + + # mid + c += 'h=mid\n' + c += 'ip netns exec $h ip route add 192.168.3.0/24 via 192.168.2.100\n' + c += 'ip netns exec $h ip route add default via 192.168.1.1 dev $h.l\n' + c += 'echo $h =================== ; ip netns exec $h $ip route show\n' + + # client router + c += 'h=crt\n' + c += 'ip netns exec $h ip route add default via 192.168.2.1 dev $h.l\n' + c += 'echo $h =================== ; ip netns exec $h $ip route show\n' + + # cli + c += 'h=cli\n' + c += 'ip netns exec $h ip route add default via 192.168.3.1 dev $h.l\n' + c += 'echo $h =================== ; ip netns exec $h $ip route show\n' + + run(c) + +def setup_forwarding(): + """Enable forwarding in each namespace.""" + for host in HOSTS: + run('ip netns exec %(host)s sysctl -q -w ' + 'net.ipv4.ip_forward=1 ' + 'net.ipv6.conf.all.forwarding=1' % {'host' : host}) + +def netem_limit(rate, delay, buf): + """Get netem limit in packets. + + Needs to hold the packets in emulated pipe and emulated buffer. + """ + bdp_bits = (rate * 1000000.0) * (delay / 1000.0) + bdp_bytes = bdp_bits / 8.0 + bdp = int(bdp_bytes / 1500.0) + limit = bdp + buf + return limit + +# Parse string like 'cubic:1,bbr:2' and return an array like: +# ['cubic', 'bbr', 'bbr'] +def parse_cc_param(param_string): + cc_list = [] + groups = param_string.split(',') + for group in groups: + (cc_name, count) = group.split(':') + count = int(count) + for i in range(0, count): + cc_list.append(cc_name) + return cc_list + +def get_params(): + # Invocations of this tool should set the following parameters as + # environment variables. + params = { + 'bw': -1, # input bottleneck bw in Mbit/sec; required + 'rtt': -1, # RTT in ms; required + 'buf': -1, # input bottleneck buffer in packets; required + 'loss': 0, # input bottleneck loss rate in percent; optional + 'policer': 0, # input bottleneck policer rate, Mbit/sec; optional + 'cc': '', # congestion control algorithm: required + 'interval': 0, # interval between flow starts, in secs; optional + 'dur': -1, # length of test in secs: required + 'outdir': '', # output directory for results + 'qdisc': '', # qdisc at downstream bottleneck (empty for FIFO) + 'cmd': '', # command to run (e.g. set sysctl values) + 'pcap': 0, # bytes per packet to capture; 0 for no tracing + 'ecn_low': 0, # set ip route features ecn_low on server? + 'mem': 0, # set netperf memory buffer sizes to this value + } + + for key in params.keys(): + print('parsing key %s' % key) + if key in os.environ: + print('looking at env var with key %s, val %s' % (key, os.environ[key])) + else: + print('no env var with key %s' % (key)) + if key not in os.environ: + if params[key] != 0: + sys.stderr.write('missing %s in environment variables\n' % key) + sys.exit(1) + elif key == 'cc': + params[key] = parse_cc_param(os.environ[key]) + elif type(params[key]) == str: + params[key] = os.environ[key] + else: + params[key] = float(os.environ[key]) + + print(params) + params['netperf'] = netperf() + params['receiver_ip'] = '192.168.3.100' + return params + +# Put bandwidth rate limiting using HTB, tied to user-specified +# queuing discipline at that bottleneck, on traffic coming in the cli.l device. +def setup_htb_and_qdisc(d): + """Set up HTB for rate limiting, and user-specified qdisc for the queue.""" + + c = '' + + # First load the necessary modules. + c += ('rmmod ifb\n' + 'modprobe ifb numifbs=10\n' + 'modprobe act_mirred\n') + + # Clear old queuing disciplines (qdisc) on the interfaces + d['ext'] = 'cli.l' + d['ext_ingress'] = 'cli.ifb0' + d['host'] = 'cli' + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc del dev %(ext)s root\n') % d + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc del dev %(ext)s ingress\n') % d + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc del dev %(ext_ingress)s root\n') % d + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc del dev %(ext_ingress)s ingress\n') % d + + # Create ingress ifb0 on client interface. + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(ext)s handle ffff: ingress\n') % d + c += ('ip netns exec %(host)s ' + 'ip link add %(ext_ingress)s type ifb\n') % d + c += ('ip netns exec %(host)s ' + 'ip link set dev %(ext_ingress)s up\n') % d + c += ('ip netns exec %(host)s ' + 'ifconfig %(ext_ingress)s txqueuelen 128000\n') % d + c += ('ip netns exec %(host)s ' + 'ifconfig %(ext_ingress)s\n') % d + + # Forward all ingress traffic to the IFB device. + c += ('ip netns exec %(host)s ' + '%(tc)s filter add dev %(ext)s parent ffff: protocol all u32 ' + 'match u32 0 0 action mirred egress redirect ' + 'dev %(ext_ingress)s\n') % d + + # Create an egress filter on the IFB device. + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(ext_ingress)s root handle 1: ' + 'htb default 11\n') % d + + # Add root class HTB with rate limiting. + c += ('ip netns exec %(host)s ' + '%(tc)s class add dev %(ext_ingress)s parent 1: classid 1:11 ' + ' htb rate %(IRATE)sMbit ceil %(IRATE)sMbit\n') % d + + # Add qdisc for downstream bottleneck. + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(ext_ingress)s parent 1:11 handle 20: ' + '%(QDISC)s\n') % d + + c += ('ip netns exec %(host)s %(tc)s -stat qdisc show\n') % d + + return c + +def setup_netem(params): + """Set up netem on the crt (client router) host.""" + + d = {} + + # Parameters for data direction. + d['IRATE'] = params['bw'] # Mbit/sec + d['IDELAY'] = params['rtt'] / 2 # ms + d['IBUF'] = params['buf'] # packets + d['ILOSS'] = params['loss'] + d['IREO'] = 0 # TODO: not implemented yet + d['ILIMIT'] = netem_limit(rate=d['IRATE'], delay=d['IDELAY'], buf=d['IBUF']) + d['POLICER'] = params['policer'] # Mbit/sec + d['QDISC'] = params['qdisc'] + + # Parameters for ACK direction. + d['ORATE'] = 1000 # Mbit/sec; TODO: not implemented yet + d['ODELAY'] = params['rtt'] / 2 # ms + d['OBUF'] = 1000 # packets; TODO: not implemented yet + d['OLOSS'] = 0 # TODO: not implemented yet + d['OREO'] = 0 # TODO: not implemented yet + d['OLIMIT'] = netem_limit(rate=d['ORATE'], delay=d['ODELAY'], buf=d['OBUF']) + + d['tc'] = TC_PATH + + c = '' + + # TODO: fix the policer mechanism to actually work... + if params['policer'] > 0: + d['host'] = 'mid' + c = ('ip netns exec %(host)s ' + '%(tc)s filter list dev %(host)s.r\n'% + d) + run(c) + + c = ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(host)s.l ingress\n' % + d) + run(c) + + c = ('ip netns exec %(host)s ' + '%(tc)s filter add dev %(host)s.l ' + 'parent 1: protocol ip prio 10 u32 ' + 'match ip src 192.168.0.1/32 flowid 1:2 ' + 'action police rate %(POLICER)sMbit burst 100k drop\n' % + d) + run(c) + c = '' + + if d['QDISC'] == '': + # If the user doesn't need a fancy qdisc, and FIFO will do, + # then use netem for rate limiting and buffering, + # since netem seems more accurate than HTB. + d['INETEM_RATE'] = 'rate %(IRATE)sMbit' % d + else: + d['INETEM_RATE'] = '' + d['ILIMIT'] = '%d' % (2*1000*1000*1000) # buffer is in user's qdisc + + # Inbound from sender -> receiver. Downstream rate limiting is on cli.l. + d['host'] = 'crt' + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(host)s.r root netem ' + 'limit %(ILIMIT)s delay %(IDELAY)sms %(IREO)sms ' + 'loss random %(ILOSS)s%% %(INETEM_RATE)s\n') % d + + # Outbound from receiver -> sender. + d['host'] = 'crt' + c += ('ip netns exec %(host)s ' + '%(tc)s qdisc add dev %(host)s.l root netem ' + 'limit %(OLIMIT)s delay %(ODELAY)sms %(OREO)sms ' + 'loss random %(OLOSS)s%% ' + 'rate %(ORATE)sMbit\n') % d + + c += ('ip netns exec %(host)s %(tc)s -stat qdisc show\n') % d + + if (d['QDISC'] != ''): + c += setup_htb_and_qdisc(d) + + run(c) + +def ss_log_thread(params): + """Repeatedly run ss command and append log to file.""" + dur = params['dur'] + outdir = params['outdir'] + ss_log_path = os.path.join(outdir, 'ss.log') + receiver_ip = params['receiver_ip'] + num_conns = len(params['cc']) + + t0 = time.time() + t = t0 + port_cnt = num_conns + f = open(ss_log_path, 'w') + f.truncate() + f.close() + if IP_MODE == socket.AF_INET6: + ss_ip = '[%s]' + else: + ss_ip = '%s' + ss_ip %= receiver_ip + ss_cmd = ('ip netns exec srv ' + '%s -tinmo "dport >= :%d and dport < :%d and dst %s" >> %s' % ( + SS_PATH, + FIRST_PORT, FIRST_PORT + port_cnt, ss_ip, ss_log_path)) + + while t < t0 + dur: + f = open(ss_log_path, 'a') + f.write('# %f\n' % (time.time(),)) + f.close() + run(ss_cmd, verbose=False) + t += SS_INTERVAL_SECONDS + to_sleep = t - time.time() + if to_sleep > 0: + time.sleep(to_sleep) + +def launch_ss(params): + t = threading.Thread(target=ss_log_thread, args=(params,)) + t.start() + return t + +def run_test(params): + """Run one test case.""" + print('command: %s' % (sys.argv)) + run('uname -a; date; uptime') + + # Configure sender namespaces. + run('ip netns exec srv bash -c "%s"' % params['cmd']) + + # Configure receiver namespace. + run('ip netns exec cli bash -c "%s"' % params['cmd']) + + # Set up receiver process. + run('pkill -f netserver') + run('ip netns exec cli %s -N' % (netserver())) + + # Set up output directory. + outdir = params['outdir'] + run('mkdir -p %s' % outdir) + + # Set up sender-side packet capture. + if params['pcap'] > 0: + snaplen = params['pcap'] + path = os.path.join(outdir, 'out.pcap') + run('ip netns exec srv tcpdump -i srv.r -s %(snaplen)d -w %(path)s &' % + {'path': path, 'snaplen': snaplen}) + time.sleep(1) # wait for tcpdump to come up + + # Set up periodic sender-side 'ss' stat capture. + ss_thread = launch_ss(params) + + if sys.argv[1] == 'stream': + num_conns = len(params['cc']) + print('num_conns = %d' % (num_conns)) + t0 = time.time() + t = t0 + for i in range(0, num_conns): + conn_params = params.copy() + if i != num_conns - 1: + conn_params['bg'] = '&' # all but the last in the background + else: + conn_params['bg'] = '' + conn_params['cc'] = params['cc'][i] + conn_params['port'] = FIRST_PORT + i + conn_params['outfile'] = '%s/netperf.out.%d.txt' % (outdir, i) + if params['mem']: + conn_params['memflags'] = ( + '-s %(mem)s,%(mem)s -S %(mem)s,%(mem)s ' % + params) + else: + conn_params['memflags'] = '' + run('ip netns exec srv %(netperf)s ' + '-l %(dur)d -H %(receiver_ip)s -- -k THROUGHPUT %(memflags)s' + '-K %(cc)s -P %(port)s ' + '> %(outfile)s ' + '%(bg)s' % conn_params) + t += params['interval'] + to_sleep = t - time.time() + if to_sleep > 0: + time.sleep(to_sleep) + elif sys.argv[1] == 'rr': + params['request_size'] = (10 + 20 + 40 + 80 + 160) * 1448 + params['test'] = sys.argv[2] + conn_params['port'] = FIRST_PORT + run('ip netns exec srv %(netperf)s ' + ' -P 0 -t %(test)s -H %(receiver_ip)s -- ' + '-K %(cc)s -P %(port)s ' + '-r %(request_size)d,1 ' + '-o P50_LATENCY,P90_LATENCY,P99_LATENCY,MAX_LATENCY,' + 'TRANSACTION_RATE,' + 'LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS' % params) + else: + sys.stderr.write('unknown test type argument: %s\n' % sys.argv[1]) + sys.exit(1) + + ss_thread.join() + run('killall tcpdump') + + run('ls -l /tmp/*.gz') + run('cp -af /var/log/kern-debug.log ' + outdir) + run('rm -f ' + outdir + '/*.gz') + run('ls -l /tmp/*.gz') + run('gzip ' + outdir + '/kern-debug.log') + run('gzip ' + outdir + '/out.pcap') + run('ls -l /tmp/*gz') + +def main(): + """Main function to run everything.""" + check_dependencies() + params = get_params() + cleanup() + setup_logging() + setup_namespaces() + setup_loopback() + setup_veth() + setup_routes(params) + setup_forwarding() + setup_netem(params) + run_test(params) + cleanup() + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/gtests/net/tcp/bbr/nsperf/run_tests.sh b/gtests/net/tcp/bbr/nsperf/run_tests.sh new file mode 100755 index 000000000000..159a3c34dfcd --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/run_tests.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# +# Run a set of tests with bbr1, bbr, cubic, dctcp. +# By default, runs all tests: +# ./run_tests.sh +# But you can also run a subset of tests by setting the "tests" +# environment variable: +# tests="coexist shallow" ./run_tests.sh +# + +set -x + +# By default run all tests. +# To run a subset of tests, set the environment variable: tests="foo bar". +if [ "$tests" = "" ]; then + tests="coexist random_loss shallow bufferbloat ecn_bulk" +fi + +# Make sure send and receive buffers can grow quite large. A BDP of 10Gbit/sec +# * 100ms is 125MBytes, so to tolerate high loss rates and lots of SACKed data, +# we allow autotuning to use 512MByte socket send and receive buffers: +MEM=$((512 * 1024 * 1024)) +set +e +sysctl -w net.core.rmem_max=$MEM net.ipv4.tcp_rmem="4096 131072 $MEM" +sysctl -w net.core.wmem_max=$MEM net.ipv4.tcp_wmem="4096 16384 $MEM" +set -e + +function get_buf_pkts() { + buf_pkts=`echo | awk -v bw=$bw -v rtt=$rtt -v bdp_of_buf=$bdp_of_buf '{bdp_pkts = int(bw*1000*1000*rtt/1000.0 / (1514 * 8) * bdp_of_buf); print bdp_pkts;}'` +} + +if [[ $tests == *"coexist"* ]]; then + # show acceptable coexistence w/ cubic: + # graph tput of 1 cubic, 1 bbr at a range of buffer depths: + # (bw=50M, rtt=30ms, buf={...}xBDP) + # [run for a very long time, 10minutes, to find convergence...] + for cc_combo in cubic:1,bbr1:1 cubic:1,bbr:1; do + for bdp_of_buf in 0.1 1 2 4 8 16; do + cmd="" + cc=$cc_combo # mix of CCs in this experiment + interval=2 # interval between flow starts, in secs + bw=50 # Mbit/sec + rtt=30 # ms + qdisc='' # use netem FIFO + loss=0 # loss in percent + dur=180 # test duration in secs + outdir="out/coexist/${cc}/$bdp_of_buf/" + # Create output directory: + mkdir -p $outdir + get_buf_pkts + set +e + cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \ + dur=$dur cmd=$cmd outdir=$outdir interval=$interval \ + ./nsperf.py stream | tee ${outdir}/nsperf.out.txt + set -e + done + done +fi + +if [[ $tests == *"random_loss"* ]]; then + # show high throughput with random loss up to design parameter: + # graph tput of cubic, bbr at a range of random loss rates + # (bw=1G, rtt=100ms, loss={....} + for rep in `seq 1 10`; do + for cc_name in cubic bbr1 bbr; do + loss_rates="0.00001 0.0001 0.001 0.01 0.1 0.2 0.5 1 2 3 10 15 20" + for loss_rate in $loss_rates; do + cmd="" + cc=${cc_name}:1 # 1 flow + interval=0 # interval between flow starts, in secs + bw=1000 # Mbit/sec + rtt=100 # ms + bdp_of_buf=1 # buffer = 100% of BDP, or 100ms + qdisc='' # use netem FIFO + loss=$loss_rate # loss in percent + mem=$MEM # bytes of netperf sock snd/rcv memory + dur=60 # test duration in secs + outdir="out/random_loss/${cc}/${loss}/rep-${rep}/" + # Create output directory: + mkdir -p $outdir + get_buf_pkts + set +e + cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \ + mem=$mem dur=$dur cmd=$cmd outdir=$outdir interval=$interval \ + ./nsperf.py stream | tee ${outdir}/nsperf.out.txt + set -e + done + done + done +fi + +if [[ $tests == *"shallow"* ]]; then + # show reasonably low loss rates in shallow buffers: + # graph retransmit rate for range of flow counts + # (bw=1G, rtt=100ms, buf=1ms, num_flows={...}) + # BDP is 1G*100ms = 8256 packets + for cc_name in cubic bbr1 bbr; do + for num_flows in 1 10 30 60 100; do + cmd="" + cc=${cc_name}:${num_flows} + interval=.139 # interval between flow starts, in secs + bw=1000 # Mbit/sec + rtt=100 # ms + bdp_of_buf=0.02 # buffer = 2% of BDP, or 2ms + qdisc='' # use netem FIFO + loss=0 # loss in percent + dur=300 # test duration in secs + outdir="out/shallow/${cc}/${num_flows}/" + # Create output directory: + mkdir -p $outdir + get_buf_pkts + set +e + cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \ + dur=$dur cmd=$cmd outdir=$outdir interval=$interval \ + ./nsperf.py stream | tee ${outdir}/nsperf.out.txt + set -e + done + done +fi + +if [[ $tests == *"bufferbloat"* ]]; then + # show low delay in deep buffers, even without ECN signal: + # graph p50 RTT for two flows using either cubic or bbr, + # at a range of buffer depths. + # (bw=50M, rtt=30ms, buf={...}xBDP) + for cc_name in cubic bbr1 bbr; do + for bdp_of_buf in 1 10 50 100; do + cmd="" + cc=${cc_name}:2 # 2 flows + interval=2 # interval between flow starts, in secs + bw=50 # Mbit/sec + rtt=30 # ms + qdisc='' # use netem FIFO + loss=0 # loss in percent + dur=120 # test duration in secs + outdir="out/bufferbloat/${cc}/${bdp_of_buf}/" + # Create output directory: + mkdir -p $outdir + get_buf_pkts + set +e + cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts qdisc=$qdisc loss=$loss \ + dur=$dur cmd=$cmd outdir=$outdir interval=$interval \ + ./nsperf.py stream | tee ${outdir}/nsperf.out.txt + set -e + done + done +fi + + +if [[ $tests == *"ecn_bulk"* ]]; then + # show ECN support can keep queues very low: + # graph p50 and p95 RTT (and retx, tput, fairness) for range of flow counts + # (bw=1G, rtt=1ms, num_flows={...}) + for rep in `seq 1 10`; do + for cc_name in bbr bbr1 dctcp; do + for num_flows in 1 4 10 40 100; do + # Inside the child/test namespaces, enable ECN for + # both active and passive connections: + cmd='sysctl net.ipv4.tcp_ecn=1' + cc=${cc_name}:${num_flows} + interval=.005 # interval between flow starts, in secs + bw=1000 # Mbit/sec + rtt=1 # ms + buf_pkts=0 # not using netem buffer + # Enable "features ecn_low" on default route of server: + ecn_low=1 + # We set the limit to 1000 packets, or 12ms at 1Gbit/sec. + # We configure the target to be far higher, to disable + # Codel-based drops. + qdisc='codel ce_threshold 242us limit 1000 target 100ms' + loss=0 # loss in percent + dur=10 # test duration in secs + outdir="out/ecn_bulk/${cc_name}/${num_flows}/rep-${rep}/" + # Create output directory: + mkdir -p $outdir + get_buf_pkts + set +e + cc=$cc bw=$bw rtt=$rtt buf=$buf_pkts ecn_low=$ecn_low \ + qdisc=$qdisc loss=$loss \ + dur=$dur cmd=$cmd outdir=$outdir interval=$interval \ + ./nsperf.py stream | tee ${outdir}/nsperf.out.txt + set -e + done + done + done +fi + +echo "done running all tests: $tests" diff --git a/gtests/net/tcp/bbr/nsperf/ss_log_parser.py b/gtests/net/tcp/bbr/nsperf/ss_log_parser.py new file mode 100755 index 000000000000..eeeccd7b6d4d --- /dev/null +++ b/gtests/net/tcp/bbr/nsperf/ss_log_parser.py @@ -0,0 +1,207 @@ +#!/usr/bin/python2 +# +# Parse ss.log textual output written by ss_log_thread() in nsperf.py. +# Usage: +# infile=foo/ss.log outdir=out/ ss_log_parser.py +# +# Author: +# Neal Cardwell +# Based on code by: +# Kevin (Yudong) Yang +# Soheil Hassas Yeganeh + +import os +import socket +import sys +import time + +DEBUG = False # enable debugging output? + +def debug(s): + if DEBUG: + print('DEBUG: %s' % s) + +def median(nums): + """Return median of all numbers.""" + + if len(nums) == 0: + return 0 + sorted_nums = sorted(nums) + n = len(sorted_nums) + m = n - 1 + return (sorted_nums[n/2] + sorted_nums[m/2]) / 2.0 + +def read_file(): + """Read the ss.log file and parse into a dictionary.""" + all_data = {} # data for all time: