| /* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ |
| |
| #include <linux/module.h> |
| #include <net/mptcp.h> |
| |
| static unsigned char num_segments __read_mostly = 1; |
| module_param(num_segments, byte, 0644); |
| MODULE_PARM_DESC(num_segments, "The number of consecutive segments that are part of a burst"); |
| |
| static bool cwnd_limited __read_mostly = 1; |
| module_param(cwnd_limited, bool, 0644); |
| MODULE_PARM_DESC(cwnd_limited, "if set to 1, the scheduler tries to fill the congestion-window on all subflows"); |
| |
| struct rrsched_priv { |
| unsigned char quota; |
| }; |
| |
| static struct rrsched_priv *rrsched_get_priv(const struct tcp_sock *tp) |
| { |
| return (struct rrsched_priv *)&tp->mptcp->mptcp_sched[0]; |
| } |
| |
| /* If the sub-socket sk available to send the skb? */ |
| static bool mptcp_rr_is_available(const struct sock *sk, const struct sk_buff *skb, |
| bool zero_wnd_test, bool cwnd_test) |
| { |
| const struct tcp_sock *tp = tcp_sk(sk); |
| unsigned int space, in_flight; |
| |
| /* Set of states for which we are allowed to send data */ |
| if (!mptcp_sk_can_send(sk)) |
| return false; |
| |
| /* We do not send data on this subflow unless it is |
| * fully established, i.e. the 4th ack has been received. |
| */ |
| if (tp->mptcp->pre_established) |
| return false; |
| |
| if (tp->pf) |
| return false; |
| |
| if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { |
| /* If SACK is disabled, and we got a loss, TCP does not exit |
| * the loss-state until something above high_seq has been acked. |
| * (see tcp_try_undo_recovery) |
| * |
| * high_seq is the snd_nxt at the moment of the RTO. As soon |
| * as we have an RTO, we won't push data on the subflow. |
| * Thus, snd_una can never go beyond high_seq. |
| */ |
| if (!tcp_is_reno(tp)) |
| return false; |
| else if (tp->snd_una != tp->high_seq) |
| return false; |
| } |
| |
| if (!tp->mptcp->fully_established) { |
| /* Make sure that we send in-order data */ |
| if (skb && tp->mptcp->second_packet && |
| tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) |
| return false; |
| } |
| |
| if (!cwnd_test) |
| goto zero_wnd_test; |
| |
| in_flight = tcp_packets_in_flight(tp); |
| /* Not even a single spot in the cwnd */ |
| if (in_flight >= tp->snd_cwnd) |
| return false; |
| |
| /* Now, check if what is queued in the subflow's send-queue |
| * already fills the cwnd. |
| */ |
| space = (tp->snd_cwnd - in_flight) * tp->mss_cache; |
| |
| if (tp->write_seq - tp->snd_nxt > space) |
| return false; |
| |
| zero_wnd_test: |
| if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp))) |
| return false; |
| |
| return true; |
| } |
| |
| /* Are we not allowed to reinject this skb on tp? */ |
| static int mptcp_rr_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb) |
| { |
| /* If the skb has already been enqueued in this sk, try to find |
| * another one. |
| */ |
| return skb && |
| /* Has the skb already been enqueued into this subsocket? */ |
| mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask; |
| } |
| |
| /* We just look for any subflow that is available */ |
| static struct sock *rr_get_available_subflow(struct sock *meta_sk, |
| struct sk_buff *skb, |
| bool zero_wnd_test) |
| { |
| const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; |
| struct sock *sk, *bestsk = NULL, *backupsk = NULL; |
| |
| /* Answer data_fin on same subflow!!! */ |
| if (meta_sk->sk_shutdown & RCV_SHUTDOWN && |
| skb && mptcp_is_data_fin(skb)) { |
| mptcp_for_each_sk(mpcb, sk) { |
| if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index && |
| mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) |
| return sk; |
| } |
| } |
| |
| /* First, find the best subflow */ |
| mptcp_for_each_sk(mpcb, sk) { |
| struct tcp_sock *tp = tcp_sk(sk); |
| |
| if (!mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) |
| continue; |
| |
| if (mptcp_rr_dont_reinject_skb(tp, skb)) { |
| backupsk = sk; |
| continue; |
| } |
| |
| bestsk = sk; |
| } |
| |
| if (bestsk) { |
| sk = bestsk; |
| } else if (backupsk) { |
| /* It has been sent on all subflows once - let's give it a |
| * chance again by restarting its pathmask. |
| */ |
| if (skb) |
| TCP_SKB_CB(skb)->path_mask = 0; |
| sk = backupsk; |
| } |
| |
| return sk; |
| } |
| |
| /* Returns the next segment to be sent from the mptcp meta-queue. |
| * (chooses the reinject queue if any segment is waiting in it, otherwise, |
| * chooses the normal write queue). |
| * Sets *@reinject to 1 if the returned segment comes from the |
| * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, |
| * and sets it to -1 if it is a meta-level retransmission to optimize the |
| * receive-buffer. |
| */ |
| static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int *reinject) |
| { |
| const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; |
| struct sk_buff *skb = NULL; |
| |
| *reinject = 0; |
| |
| /* If we are in fallback-mode, just take from the meta-send-queue */ |
| if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) |
| return tcp_send_head(meta_sk); |
| |
| skb = skb_peek(&mpcb->reinject_queue); |
| |
| if (skb) |
| *reinject = 1; |
| else |
| skb = tcp_send_head(meta_sk); |
| return skb; |
| } |
| |
| static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, |
| int *reinject, |
| struct sock **subsk, |
| unsigned int *limit) |
| { |
| const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; |
| struct sock *sk_it, *choose_sk = NULL; |
| struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject); |
| unsigned char split = num_segments; |
| unsigned char iter = 0, full_subs = 0; |
| |
| /* As we set it, we have to reset it as well. */ |
| *limit = 0; |
| |
| if (!skb) |
| return NULL; |
| |
| if (*reinject) { |
| *subsk = rr_get_available_subflow(meta_sk, skb, false); |
| if (!*subsk) |
| return NULL; |
| |
| return skb; |
| } |
| |
| retry: |
| |
| /* First, we look for a subflow who is currently being used */ |
| mptcp_for_each_sk(mpcb, sk_it) { |
| struct tcp_sock *tp_it = tcp_sk(sk_it); |
| struct rrsched_priv *rsp = rrsched_get_priv(tp_it); |
| |
| if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) |
| continue; |
| |
| iter++; |
| |
| /* Is this subflow currently being used? */ |
| if (rsp->quota > 0 && rsp->quota < num_segments) { |
| split = num_segments - rsp->quota; |
| choose_sk = sk_it; |
| goto found; |
| } |
| |
| /* Or, it's totally unused */ |
| if (!rsp->quota) { |
| split = num_segments; |
| choose_sk = sk_it; |
| } |
| |
| /* Or, it must then be fully used */ |
| if (rsp->quota >= num_segments) |
| full_subs++; |
| } |
| |
| /* All considered subflows have a full quota, and we considered at |
| * least one. |
| */ |
| if (iter && iter == full_subs) { |
| /* So, we restart this round by setting quota to 0 and retry |
| * to find a subflow. |
| */ |
| mptcp_for_each_sk(mpcb, sk_it) { |
| struct tcp_sock *tp_it = tcp_sk(sk_it); |
| struct rrsched_priv *rsp = rrsched_get_priv(tp_it); |
| |
| if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) |
| continue; |
| |
| rsp->quota = 0; |
| } |
| |
| goto retry; |
| } |
| |
| found: |
| if (choose_sk) { |
| unsigned int mss_now; |
| struct tcp_sock *choose_tp = tcp_sk(choose_sk); |
| struct rrsched_priv *rsp = rrsched_get_priv(choose_tp); |
| |
| if (!mptcp_rr_is_available(choose_sk, skb, false, true)) |
| return NULL; |
| |
| *subsk = choose_sk; |
| mss_now = tcp_current_mss(*subsk); |
| *limit = split * mss_now; |
| |
| if (skb->len > mss_now) |
| rsp->quota += DIV_ROUND_UP(skb->len, mss_now); |
| else |
| rsp->quota++; |
| |
| return skb; |
| } |
| |
| return NULL; |
| } |
| |
| static struct mptcp_sched_ops mptcp_sched_rr = { |
| .get_subflow = rr_get_available_subflow, |
| .next_segment = mptcp_rr_next_segment, |
| .name = "roundrobin", |
| .owner = THIS_MODULE, |
| }; |
| |
| static int __init rr_register(void) |
| { |
| BUILD_BUG_ON(sizeof(struct rrsched_priv) > MPTCP_SCHED_SIZE); |
| |
| if (mptcp_register_scheduler(&mptcp_sched_rr)) |
| return -1; |
| |
| return 0; |
| } |
| |
| static void rr_unregister(void) |
| { |
| mptcp_unregister_scheduler(&mptcp_sched_rr); |
| } |
| |
| module_init(rr_register); |
| module_exit(rr_unregister); |
| |
| MODULE_AUTHOR("Christoph Paasch"); |
| MODULE_LICENSE("GPL"); |
| MODULE_DESCRIPTION("ROUNDROBIN MPTCP"); |
| MODULE_VERSION("0.89"); |