blob: c17c306849a3aa4a39b3821210c52490953c9fe9 [file] [log] [blame]
/*
* MPTCP implementation - OPPORTUNISTIC LINKED INCREASES CONGESTION CONTROL:
*
* Algorithm design:
* Ramin Khalili <ramin.khalili@epfl.ch>
* Nicolas Gast <nicolas.gast@epfl.ch>
* Jean-Yves Le Boudec <jean-yves.leboudec@epfl.ch>
*
* Implementation:
* Ramin Khalili <ramin.khalili@epfl.ch>
*
* Ported to the official MPTCP-kernel:
* Christoph Paasch <christoph.paasch@uclouvain.be>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <net/tcp.h>
#include <net/mptcp.h>
#include <linux/module.h>
static int scale = 10;
struct mptcp_olia {
u32 mptcp_loss1;
u32 mptcp_loss2;
u32 mptcp_loss3;
int epsilon_num;
u32 epsilon_den;
int mptcp_snd_cwnd_cnt;
};
static inline int mptcp_olia_sk_can_send(const struct sock *sk)
{
return mptcp_sk_can_send(sk) && tcp_sk(sk)->srtt_us;
}
static inline u64 mptcp_olia_scale(u64 val, int scale)
{
return (u64) val << scale;
}
/* take care of artificially inflate (see RFC5681)
* of cwnd during fast-retransmit phase
*/
static u32 mptcp_get_crt_cwnd(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_state == TCP_CA_Recovery)
return tcp_sk(sk)->snd_ssthresh;
else
return tcp_sk(sk)->snd_cwnd;
}
/* return the dominator of the first term of the increasing term */
static u64 mptcp_get_rate(const struct mptcp_cb *mpcb , u32 path_rtt)
{
struct sock *sk;
u64 rate = 1; /* We have to avoid a zero-rate because it is used as a divisor */
mptcp_for_each_sk(mpcb, sk) {
struct tcp_sock *tp = tcp_sk(sk);
u64 scaled_num;
u32 tmp_cwnd;
if (!mptcp_olia_sk_can_send(sk))
continue;
tmp_cwnd = mptcp_get_crt_cwnd(sk);
scaled_num = mptcp_olia_scale(tmp_cwnd, scale) * path_rtt;
rate += div_u64(scaled_num , tp->srtt_us);
}
rate *= rate;
return rate;
}
/* find the maximum cwnd, used to find set M */
static u32 mptcp_get_max_cwnd(const struct mptcp_cb *mpcb)
{
struct sock *sk;
u32 best_cwnd = 0;
mptcp_for_each_sk(mpcb, sk) {
u32 tmp_cwnd;
if (!mptcp_olia_sk_can_send(sk))
continue;
tmp_cwnd = mptcp_get_crt_cwnd(sk);
if (tmp_cwnd > best_cwnd)
best_cwnd = tmp_cwnd;
}
return best_cwnd;
}
static void mptcp_get_epsilon(const struct mptcp_cb *mpcb)
{
struct mptcp_olia *ca;
struct tcp_sock *tp;
struct sock *sk;
u64 tmp_int, tmp_rtt, best_int = 0, best_rtt = 1;
u32 max_cwnd, tmp_cwnd;
u8 M = 0, B_not_M = 0;
/* TODO - integrate this in the following loop - we just want to iterate once */
max_cwnd = mptcp_get_max_cwnd(mpcb);
/* find the best path */
mptcp_for_each_sk(mpcb, sk) {
tp = tcp_sk(sk);
ca = inet_csk_ca(sk);
if (!mptcp_olia_sk_can_send(sk))
continue;
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
/* TODO - check here and rename variables */
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
ca->mptcp_loss2 - ca->mptcp_loss1);
if ((u64)tmp_int * best_rtt >= (u64)best_int * tmp_rtt) {
best_rtt = tmp_rtt;
best_int = tmp_int;
}
}
/* TODO - integrate this here in mptcp_get_max_cwnd and in the previous loop */
/* find the size of M and B_not_M */
mptcp_for_each_sk(mpcb, sk) {
tp = tcp_sk(sk);
ca = inet_csk_ca(sk);
if (!mptcp_olia_sk_can_send(sk))
continue;
tmp_cwnd = mptcp_get_crt_cwnd(sk);
if (tmp_cwnd == max_cwnd) {
M++;
} else {
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
ca->mptcp_loss2 - ca->mptcp_loss1);
if ((u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt)
B_not_M++;
}
}
/* check if the path is in M or B_not_M and set the value of epsilon accordingly */
mptcp_for_each_sk(mpcb, sk) {
tp = tcp_sk(sk);
ca = inet_csk_ca(sk);
if (!mptcp_olia_sk_can_send(sk))
continue;
if (B_not_M == 0) {
ca->epsilon_num = 0;
ca->epsilon_den = 1;
} else {
tmp_rtt = (u64)tp->srtt_us * tp->srtt_us;
tmp_int = max(ca->mptcp_loss3 - ca->mptcp_loss2,
ca->mptcp_loss2 - ca->mptcp_loss1);
tmp_cwnd = mptcp_get_crt_cwnd(sk);
if (tmp_cwnd < max_cwnd &&
(u64)tmp_int * best_rtt == (u64)best_int * tmp_rtt) {
ca->epsilon_num = 1;
ca->epsilon_den = mpcb->cnt_established * B_not_M;
} else if (tmp_cwnd == max_cwnd) {
ca->epsilon_num = -1;
ca->epsilon_den = mpcb->cnt_established * M;
} else {
ca->epsilon_num = 0;
ca->epsilon_den = 1;
}
}
}
}
/* setting the initial values */
static void mptcp_olia_init(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct mptcp_olia *ca = inet_csk_ca(sk);
if (mptcp(tp)) {
ca->mptcp_loss1 = tp->snd_una;
ca->mptcp_loss2 = tp->snd_una;
ca->mptcp_loss3 = tp->snd_una;
ca->mptcp_snd_cwnd_cnt = 0;
ca->epsilon_num = 0;
ca->epsilon_den = 1;
}
}
/* updating inter-loss distance and ssthresh */
static void mptcp_olia_set_state(struct sock *sk, u8 new_state)
{
if (!mptcp(tcp_sk(sk)))
return;
if (new_state == TCP_CA_Loss ||
new_state == TCP_CA_Recovery || new_state == TCP_CA_CWR) {
struct mptcp_olia *ca = inet_csk_ca(sk);
if (ca->mptcp_loss3 != ca->mptcp_loss2 &&
!inet_csk(sk)->icsk_retransmits) {
ca->mptcp_loss1 = ca->mptcp_loss2;
ca->mptcp_loss2 = ca->mptcp_loss3;
}
}
}
/* main algorithm */
static void mptcp_olia_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct mptcp_olia *ca = inet_csk_ca(sk);
const struct mptcp_cb *mpcb = tp->mpcb;
u64 inc_num, inc_den, rate, cwnd_scaled;
if (!mptcp(tp)) {
tcp_reno_cong_avoid(sk, ack, acked);
return;
}
ca->mptcp_loss3 = tp->snd_una;
if (!tcp_is_cwnd_limited(sk))
return;
/* slow start if it is in the safe area */
if (tcp_in_slow_start(tp)) {
tcp_slow_start(tp, acked);
return;
}
mptcp_get_epsilon(mpcb);
rate = mptcp_get_rate(mpcb, tp->srtt_us);
cwnd_scaled = mptcp_olia_scale(tp->snd_cwnd, scale);
inc_den = ca->epsilon_den * tp->snd_cwnd * rate ? : 1;
/* calculate the increasing term, scaling is used to reduce the rounding effect */
if (ca->epsilon_num == -1) {
if (ca->epsilon_den * cwnd_scaled * cwnd_scaled < rate) {
inc_num = rate - ca->epsilon_den *
cwnd_scaled * cwnd_scaled;
ca->mptcp_snd_cwnd_cnt -= div64_u64(
mptcp_olia_scale(inc_num , scale) , inc_den);
} else {
inc_num = ca->epsilon_den *
cwnd_scaled * cwnd_scaled - rate;
ca->mptcp_snd_cwnd_cnt += div64_u64(
mptcp_olia_scale(inc_num , scale) , inc_den);
}
} else {
inc_num = ca->epsilon_num * rate +
ca->epsilon_den * cwnd_scaled * cwnd_scaled;
ca->mptcp_snd_cwnd_cnt += div64_u64(
mptcp_olia_scale(inc_num , scale) , inc_den);
}
if (ca->mptcp_snd_cwnd_cnt >= (1 << scale) - 1) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
ca->mptcp_snd_cwnd_cnt = 0;
} else if (ca->mptcp_snd_cwnd_cnt <= 0 - (1 << scale) + 1) {
tp->snd_cwnd = max((int) 1 , (int) tp->snd_cwnd - 1);
ca->mptcp_snd_cwnd_cnt = 0;
}
}
static struct tcp_congestion_ops mptcp_olia = {
.init = mptcp_olia_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = mptcp_olia_cong_avoid,
.undo_cwnd = tcp_reno_undo_cwnd,
.set_state = mptcp_olia_set_state,
.owner = THIS_MODULE,
.name = "olia",
};
static int __init mptcp_olia_register(void)
{
BUILD_BUG_ON(sizeof(struct mptcp_olia) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&mptcp_olia);
}
static void __exit mptcp_olia_unregister(void)
{
tcp_unregister_congestion_control(&mptcp_olia);
}
module_init(mptcp_olia_register);
module_exit(mptcp_olia_unregister);
MODULE_AUTHOR("Ramin Khalili, Nicolas Gast, Jean-Yves Le Boudec");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MPTCP COUPLED CONGESTION CONTROL");
MODULE_VERSION("0.1");