blob: 56a066ed4ef09646940cf235cf7e41f418d086d1 [file] [log] [blame]
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <net/mptcp.h>
#include <net/mptcp_v4.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/mptcp_v6.h>
#include <net/addrconf.h>
#endif
enum {
MPTCP_EVENT_ADD = 1,
MPTCP_EVENT_DEL,
MPTCP_EVENT_MOD,
};
#define MPTCP_SUBFLOW_RETRY_DELAY 1000
/* Max number of local or remote addresses we can store.
* When changing, see the bitfield below in fullmesh_rem4/6.
*/
#define MPTCP_MAX_ADDR 8
struct fullmesh_rem4 {
u8 rem4_id;
u8 bitfield;
u8 retry_bitfield;
__be16 port;
struct in_addr addr;
};
struct fullmesh_rem6 {
u8 rem6_id;
u8 bitfield;
u8 retry_bitfield;
__be16 port;
struct in6_addr addr;
};
struct mptcp_loc_addr {
struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR];
u8 loc4_bits;
u8 next_v4_index;
struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR];
u8 loc6_bits;
u8 next_v6_index;
struct rcu_head rcu;
};
struct mptcp_addr_event {
struct list_head list;
unsigned short family;
u8 code:7,
low_prio:1;
int if_idx;
union inet_addr addr;
};
struct fullmesh_priv {
/* Worker struct for subflow establishment */
struct work_struct subflow_work;
/* Delayed worker, when the routing-tables are not yet ready. */
struct delayed_work subflow_retry_work;
/* Remote addresses */
struct fullmesh_rem4 remaddr4[MPTCP_MAX_ADDR];
struct fullmesh_rem6 remaddr6[MPTCP_MAX_ADDR];
struct mptcp_cb *mpcb;
u16 remove_addrs; /* Addresses to remove */
u8 announced_addrs_v4; /* IPv4 Addresses we did announce */
u8 announced_addrs_v6; /* IPv6 Addresses we did announce */
u8 add_addr; /* Are we sending an add_addr? */
u8 rem4_bits;
u8 rem6_bits;
/* Are we established the additional subflows for primary pair? */
u8 first_pair:1;
};
struct mptcp_fm_ns {
struct mptcp_loc_addr __rcu *local;
spinlock_t local_lock; /* Protecting the above pointer */
struct list_head events;
struct delayed_work address_worker;
struct net *net;
};
static int num_subflows __read_mostly = 1;
module_param(num_subflows, int, 0644);
MODULE_PARM_DESC(num_subflows, "choose the number of subflows per pair of IP addresses of MPTCP connection");
static int create_on_err __read_mostly;
module_param(create_on_err, int, 0644);
MODULE_PARM_DESC(create_on_err, "recreate the subflow upon a timeout");
static struct mptcp_pm_ops full_mesh __read_mostly;
static void full_mesh_create_subflows(struct sock *meta_sk);
static struct mptcp_fm_ns *fm_get_ns(const struct net *net)
{
return (struct mptcp_fm_ns *)net->mptcp.path_managers[MPTCP_PM_FULLMESH];
}
static struct fullmesh_priv *fullmesh_get_priv(const struct mptcp_cb *mpcb)
{
return (struct fullmesh_priv *)&mpcb->mptcp_pm[0];
}
/* Find the first free index in the bitfield */
static int __mptcp_find_free_index(u8 bitfield, u8 base)
{
int i;
/* There are anyways no free bits... */
if (bitfield == 0xff)
goto exit;
i = ffs(~(bitfield >> base)) - 1;
if (i < 0)
goto exit;
/* No free bits when starting at base, try from 0 on */
if (i + base >= sizeof(bitfield) * 8)
return __mptcp_find_free_index(bitfield, 0);
return i + base;
exit:
return -1;
}
static int mptcp_find_free_index(u8 bitfield)
{
return __mptcp_find_free_index(bitfield, 0);
}
static void mptcp_addv4_raddr(struct mptcp_cb *mpcb,
const struct in_addr *addr,
__be16 port, u8 id)
{
int i;
struct fullmesh_rem4 *rem4;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
rem4 = &fmp->remaddr4[i];
/* Address is already in the list --- continue */
if (rem4->rem4_id == id &&
rem4->addr.s_addr == addr->s_addr && rem4->port == port)
return;
/* This may be the case, when the peer is behind a NAT. He is
* trying to JOIN, thus sending the JOIN with a certain ID.
* However the src_addr of the IP-packet has been changed. We
* update the addr in the list, because this is the address as
* OUR BOX sees it.
*/
if (rem4->rem4_id == id && rem4->addr.s_addr != addr->s_addr) {
/* update the address */
mptcp_debug("%s: updating old addr:%pI4 to addr %pI4 with id:%d\n",
__func__, &rem4->addr.s_addr,
&addr->s_addr, id);
rem4->addr.s_addr = addr->s_addr;
rem4->port = port;
mpcb->list_rcvd = 1;
return;
}
}
i = mptcp_find_free_index(fmp->rem4_bits);
/* Do we have already the maximum number of local/remote addresses? */
if (i < 0) {
mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI4\n",
__func__, MPTCP_MAX_ADDR, &addr->s_addr);
return;
}
rem4 = &fmp->remaddr4[i];
/* Address is not known yet, store it */
rem4->addr.s_addr = addr->s_addr;
rem4->port = port;
rem4->bitfield = 0;
rem4->retry_bitfield = 0;
rem4->rem4_id = id;
mpcb->list_rcvd = 1;
fmp->rem4_bits |= (1 << i);
return;
}
static void mptcp_addv6_raddr(struct mptcp_cb *mpcb,
const struct in6_addr *addr,
__be16 port, u8 id)
{
int i;
struct fullmesh_rem6 *rem6;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
rem6 = &fmp->remaddr6[i];
/* Address is already in the list --- continue */
if (rem6->rem6_id == id &&
ipv6_addr_equal(&rem6->addr, addr) && rem6->port == port)
return;
/* This may be the case, when the peer is behind a NAT. He is
* trying to JOIN, thus sending the JOIN with a certain ID.
* However the src_addr of the IP-packet has been changed. We
* update the addr in the list, because this is the address as
* OUR BOX sees it.
*/
if (rem6->rem6_id == id) {
/* update the address */
mptcp_debug("%s: updating old addr: %pI6 to addr %pI6 with id:%d\n",
__func__, &rem6->addr, addr, id);
rem6->addr = *addr;
rem6->port = port;
mpcb->list_rcvd = 1;
return;
}
}
i = mptcp_find_free_index(fmp->rem6_bits);
/* Do we have already the maximum number of local/remote addresses? */
if (i < 0) {
mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI6\n",
__func__, MPTCP_MAX_ADDR, addr);
return;
}
rem6 = &fmp->remaddr6[i];
/* Address is not known yet, store it */
rem6->addr = *addr;
rem6->port = port;
rem6->bitfield = 0;
rem6->retry_bitfield = 0;
rem6->rem6_id = id;
mpcb->list_rcvd = 1;
fmp->rem6_bits |= (1 << i);
return;
}
static void mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id)
{
int i;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
if (fmp->remaddr4[i].rem4_id == id) {
/* remove address from bitfield */
fmp->rem4_bits &= ~(1 << i);
break;
}
}
}
static void mptcp_v6_rem_raddress(const struct mptcp_cb *mpcb, u8 id)
{
int i;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
if (fmp->remaddr6[i].rem6_id == id) {
/* remove address from bitfield */
fmp->rem6_bits &= ~(1 << i);
break;
}
}
}
/* Sets the bitfield of the remote-address field */
static void mptcp_v4_set_init_addr_bit(const struct mptcp_cb *mpcb,
const struct in_addr *addr, u8 index)
{
int i;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
if (fmp->remaddr4[i].addr.s_addr == addr->s_addr) {
fmp->remaddr4[i].bitfield |= (1 << index);
return;
}
}
}
/* Sets the bitfield of the remote-address field */
static void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb,
const struct in6_addr *addr, u8 index)
{
int i;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
if (ipv6_addr_equal(&fmp->remaddr6[i].addr, addr)) {
fmp->remaddr6[i].bitfield |= (1 << index);
return;
}
}
}
static void mptcp_set_init_addr_bit(struct mptcp_cb *mpcb,
const union inet_addr *addr,
sa_family_t family, u8 id)
{
if (family == AF_INET)
mptcp_v4_set_init_addr_bit(mpcb, &addr->in, id);
else
mptcp_v6_set_init_addr_bit(mpcb, &addr->in6, id);
}
static void mptcp_v4_subflows(struct sock *meta_sk,
const struct mptcp_loc4 *loc,
struct mptcp_rem4 *rem)
{
int i;
for (i = 1; i < num_subflows; i++)
mptcp_init4_subsockets(meta_sk, loc, rem);
}
#if IS_ENABLED(CONFIG_IPV6)
static void mptcp_v6_subflows(struct sock *meta_sk,
const struct mptcp_loc6 *loc,
struct mptcp_rem6 *rem)
{
int i;
for (i = 1; i < num_subflows; i++)
mptcp_init6_subsockets(meta_sk, loc, rem);
}
#endif
static void retry_subflow_worker(struct work_struct *work)
{
struct delayed_work *delayed_work = container_of(work,
struct delayed_work,
work);
struct fullmesh_priv *fmp = container_of(delayed_work,
struct fullmesh_priv,
subflow_retry_work);
struct mptcp_cb *mpcb = fmp->mpcb;
struct sock *meta_sk = mpcb->meta_sk;
struct mptcp_loc_addr *mptcp_local;
struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
int iter = 0, i;
/* We need a local (stable) copy of the address-list. Really, it is not
* such a big deal, if the address-list is not 100% up-to-date.
*/
rcu_read_lock_bh();
mptcp_local = rcu_dereference_bh(fm_ns->local);
mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
rcu_read_unlock_bh();
if (!mptcp_local)
return;
next_subflow:
if (iter) {
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
cond_resched();
}
mutex_lock(&mpcb->mpcb_mutex);
lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
iter++;
if (sock_flag(meta_sk, SOCK_DEAD))
goto exit;
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
struct fullmesh_rem4 *rem = &fmp->remaddr4[i];
/* Do we need to retry establishing a subflow ? */
if (rem->retry_bitfield) {
int i = mptcp_find_free_index(~rem->retry_bitfield);
struct mptcp_rem4 rem4;
rem->bitfield |= (1 << i);
rem->retry_bitfield &= ~(1 << i);
rem4.addr = rem->addr;
rem4.port = rem->port;
rem4.rem4_id = rem->rem4_id;
mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], &rem4);
mptcp_v4_subflows(meta_sk,
&mptcp_local->locaddr4[i],
&rem4);
goto next_subflow;
}
}
#if IS_ENABLED(CONFIG_IPV6)
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
struct fullmesh_rem6 *rem = &fmp->remaddr6[i];
/* Do we need to retry establishing a subflow ? */
if (rem->retry_bitfield) {
int i = mptcp_find_free_index(~rem->retry_bitfield);
struct mptcp_rem6 rem6;
rem->bitfield |= (1 << i);
rem->retry_bitfield &= ~(1 << i);
rem6.addr = rem->addr;
rem6.port = rem->port;
rem6.rem6_id = rem->rem6_id;
mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], &rem6);
mptcp_v6_subflows(meta_sk,
&mptcp_local->locaddr6[i],
&rem6);
goto next_subflow;
}
}
#endif
exit:
kfree(mptcp_local);
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
sock_put(meta_sk);
}
/**
* Create all new subflows, by doing calls to mptcp_initX_subsockets
*
* This function uses a goto next_subflow, to allow releasing the lock between
* new subflows and giving other processes a chance to do some work on the
* socket and potentially finishing the communication.
**/
static void create_subflow_worker(struct work_struct *work)
{
struct fullmesh_priv *fmp = container_of(work, struct fullmesh_priv,
subflow_work);
struct mptcp_cb *mpcb = fmp->mpcb;
struct sock *meta_sk = mpcb->meta_sk;
struct mptcp_loc_addr *mptcp_local;
const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
int iter = 0, retry = 0;
int i;
/* We need a local (stable) copy of the address-list. Really, it is not
* such a big deal, if the address-list is not 100% up-to-date.
*/
rcu_read_lock_bh();
mptcp_local = rcu_dereference_bh(fm_ns->local);
mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC);
rcu_read_unlock_bh();
if (!mptcp_local)
return;
next_subflow:
if (iter) {
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
cond_resched();
}
mutex_lock(&mpcb->mpcb_mutex);
lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
if (sock_flag(meta_sk, SOCK_DEAD))
goto exit;
if (mpcb->master_sk &&
!tcp_sk(mpcb->master_sk)->mptcp->fully_established)
goto exit;
/* Create the additional subflows for the first pair */
if (fmp->first_pair == 0 && mpcb->master_sk) {
struct mptcp_loc4 loc;
struct mptcp_rem4 rem;
loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
loc.loc4_id = 0;
loc.low_prio = 0;
loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
rem.port = inet_sk(meta_sk)->inet_dport;
rem.rem4_id = 0; /* Default 0 */
mptcp_v4_subflows(meta_sk, &loc, &rem);
fmp->first_pair = 1;
}
iter++;
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
struct fullmesh_rem4 *rem;
u8 remaining_bits;
rem = &fmp->remaddr4[i];
remaining_bits = ~(rem->bitfield) & mptcp_local->loc4_bits;
/* Are there still combinations to handle? */
if (remaining_bits) {
int i = mptcp_find_free_index(~remaining_bits);
struct mptcp_rem4 rem4;
rem->bitfield |= (1 << i);
rem4.addr = rem->addr;
rem4.port = rem->port;
rem4.rem4_id = rem->rem4_id;
/* If a route is not yet available then retry once */
if (mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i],
&rem4) == -ENETUNREACH)
retry = rem->retry_bitfield |= (1 << i);
else
mptcp_v4_subflows(meta_sk,
&mptcp_local->locaddr4[i],
&rem4);
goto next_subflow;
}
}
#if IS_ENABLED(CONFIG_IPV6)
if (fmp->first_pair == 0 && mpcb->master_sk) {
struct mptcp_loc6 loc;
struct mptcp_rem6 rem;
loc.addr = inet6_sk(meta_sk)->saddr;
loc.loc6_id = 0;
loc.low_prio = 0;
loc.if_idx = mpcb->master_sk->sk_bound_dev_if;
rem.addr = meta_sk->sk_v6_daddr;
rem.port = inet_sk(meta_sk)->inet_dport;
rem.rem6_id = 0; /* Default 0 */
mptcp_v6_subflows(meta_sk, &loc, &rem);
fmp->first_pair = 1;
}
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
struct fullmesh_rem6 *rem;
u8 remaining_bits;
rem = &fmp->remaddr6[i];
remaining_bits = ~(rem->bitfield) & mptcp_local->loc6_bits;
/* Are there still combinations to handle? */
if (remaining_bits) {
int i = mptcp_find_free_index(~remaining_bits);
struct mptcp_rem6 rem6;
rem->bitfield |= (1 << i);
rem6.addr = rem->addr;
rem6.port = rem->port;
rem6.rem6_id = rem->rem6_id;
/* If a route is not yet available then retry once */
if (mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i],
&rem6) == -ENETUNREACH)
retry = rem->retry_bitfield |= (1 << i);
else
mptcp_v6_subflows(meta_sk,
&mptcp_local->locaddr6[i],
&rem6);
goto next_subflow;
}
}
#endif
if (retry && !delayed_work_pending(&fmp->subflow_retry_work)) {
sock_hold(meta_sk);
queue_delayed_work(mptcp_wq, &fmp->subflow_retry_work,
msecs_to_jiffies(MPTCP_SUBFLOW_RETRY_DELAY));
}
exit:
kfree(mptcp_local);
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
sock_put(meta_sk);
}
static void announce_remove_addr(u8 addr_id, struct sock *meta_sk)
{
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
struct sock *sk = mptcp_select_ack_sock(meta_sk);
fmp->remove_addrs |= (1 << addr_id);
mpcb->addr_signal = 1;
if (sk)
tcp_send_ack(sk);
}
static void update_addr_bitfields(struct sock *meta_sk,
const struct mptcp_loc_addr *mptcp_local)
{
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
int i;
/* The bits in announced_addrs_* always match with loc*_bits. So, a
* simple & operation unsets the correct bits, because these go from
* announced to non-announced
*/
fmp->announced_addrs_v4 &= mptcp_local->loc4_bits;
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
fmp->remaddr4[i].bitfield &= mptcp_local->loc4_bits;
fmp->remaddr4[i].retry_bitfield &= mptcp_local->loc4_bits;
}
fmp->announced_addrs_v6 &= mptcp_local->loc6_bits;
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
fmp->remaddr6[i].bitfield &= mptcp_local->loc6_bits;
fmp->remaddr6[i].retry_bitfield &= mptcp_local->loc6_bits;
}
}
static int mptcp_find_address(const struct mptcp_loc_addr *mptcp_local,
sa_family_t family, const union inet_addr *addr,
int if_idx)
{
int i;
u8 loc_bits;
bool found = false;
if (family == AF_INET)
loc_bits = mptcp_local->loc4_bits;
else
loc_bits = mptcp_local->loc6_bits;
mptcp_for_each_bit_set(loc_bits, i) {
if (family == AF_INET &&
(!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) &&
mptcp_local->locaddr4[i].addr.s_addr == addr->in.s_addr) {
found = true;
break;
}
if (family == AF_INET6 &&
(!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) &&
ipv6_addr_equal(&mptcp_local->locaddr6[i].addr,
&addr->in6)) {
found = true;
break;
}
}
if (!found)
return -1;
return i;
}
static int mptcp_find_address_transp(const struct mptcp_loc_addr *mptcp_local,
sa_family_t family, int if_idx)
{
bool found = false;
u8 loc_bits;
int i;
if (family == AF_INET)
loc_bits = mptcp_local->loc4_bits;
else
loc_bits = mptcp_local->loc6_bits;
mptcp_for_each_bit_set(loc_bits, i) {
if (family == AF_INET &&
(!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx)) {
found = true;
break;
}
if (family == AF_INET6 &&
(!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx)) {
found = true;
break;
}
}
if (!found)
return -1;
return i;
}
static void mptcp_address_worker(struct work_struct *work)
{
const struct delayed_work *delayed_work = container_of(work,
struct delayed_work,
work);
struct mptcp_fm_ns *fm_ns = container_of(delayed_work,
struct mptcp_fm_ns,
address_worker);
struct net *net = fm_ns->net;
struct mptcp_addr_event *event = NULL;
struct mptcp_loc_addr *mptcp_local, *old;
int i, id = -1; /* id is used in the socket-code on a delete-event */
bool success; /* Used to indicate if we succeeded handling the event */
next_event:
success = false;
kfree(event);
/* First, let's dequeue an event from our event-list */
rcu_read_lock_bh();
spin_lock(&fm_ns->local_lock);
event = list_first_entry_or_null(&fm_ns->events,
struct mptcp_addr_event, list);
if (!event) {
spin_unlock(&fm_ns->local_lock);
rcu_read_unlock_bh();
return;
}
list_del(&event->list);
mptcp_local = rcu_dereference_bh(fm_ns->local);
if (event->code == MPTCP_EVENT_DEL) {
id = mptcp_find_address(mptcp_local, event->family,
&event->addr, event->if_idx);
/* Not in the list - so we don't care */
if (id < 0) {
mptcp_debug("%s could not find id\n", __func__);
goto duno;
}
old = mptcp_local;
mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
GFP_ATOMIC);
if (!mptcp_local)
goto duno;
if (event->family == AF_INET)
mptcp_local->loc4_bits &= ~(1 << id);
else
mptcp_local->loc6_bits &= ~(1 << id);
rcu_assign_pointer(fm_ns->local, mptcp_local);
kfree_rcu(old, rcu);
} else {
int i = mptcp_find_address(mptcp_local, event->family,
&event->addr, event->if_idx);
int j = i;
if (j < 0) {
/* Not in the list, so we have to find an empty slot */
if (event->family == AF_INET)
i = __mptcp_find_free_index(mptcp_local->loc4_bits,
mptcp_local->next_v4_index);
if (event->family == AF_INET6)
i = __mptcp_find_free_index(mptcp_local->loc6_bits,
mptcp_local->next_v6_index);
if (i < 0) {
mptcp_debug("%s no more space\n", __func__);
goto duno;
}
/* It might have been a MOD-event. */
event->code = MPTCP_EVENT_ADD;
} else {
/* Let's check if anything changes */
if (event->family == AF_INET &&
event->low_prio == mptcp_local->locaddr4[i].low_prio)
goto duno;
if (event->family == AF_INET6 &&
event->low_prio == mptcp_local->locaddr6[i].low_prio)
goto duno;
}
old = mptcp_local;
mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local),
GFP_ATOMIC);
if (!mptcp_local)
goto duno;
if (event->family == AF_INET) {
mptcp_local->locaddr4[i].addr.s_addr = event->addr.in.s_addr;
mptcp_local->locaddr4[i].loc4_id = i + 1;
mptcp_local->locaddr4[i].low_prio = event->low_prio;
mptcp_local->locaddr4[i].if_idx = event->if_idx;
mptcp_debug("%s updated IP %pI4 on ifidx %u prio %u id %u\n",
__func__, &event->addr.in.s_addr,
event->if_idx, event->low_prio, i + 1);
} else {
mptcp_local->locaddr6[i].addr = event->addr.in6;
mptcp_local->locaddr6[i].loc6_id = i + MPTCP_MAX_ADDR;
mptcp_local->locaddr6[i].low_prio = event->low_prio;
mptcp_local->locaddr6[i].if_idx = event->if_idx;
mptcp_debug("%s updated IP %pI6 on ifidx %u prio %u id %u\n",
__func__, &event->addr.in6,
event->if_idx, event->low_prio, i + MPTCP_MAX_ADDR);
}
if (j < 0) {
if (event->family == AF_INET) {
mptcp_local->loc4_bits |= (1 << i);
mptcp_local->next_v4_index = i + 1;
} else {
mptcp_local->loc6_bits |= (1 << i);
mptcp_local->next_v6_index = i + 1;
}
}
rcu_assign_pointer(fm_ns->local, mptcp_local);
kfree_rcu(old, rcu);
}
success = true;
duno:
spin_unlock(&fm_ns->local_lock);
rcu_read_unlock_bh();
if (!success)
goto next_event;
/* Now we iterate over the MPTCP-sockets and apply the event. */
for (i = 0; i < MPTCP_HASH_SIZE; i++) {
const struct hlist_nulls_node *node;
struct tcp_sock *meta_tp;
rcu_read_lock_bh();
hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[i],
tk_table) {
struct sock *meta_sk = (struct sock *)meta_tp, *sk;
bool meta_v4 = meta_sk->sk_family == AF_INET;
struct mptcp_cb *mpcb;
if (sock_net(meta_sk) != net)
continue;
if (meta_v4) {
/* skip IPv6 events if meta is IPv4 */
if (event->family == AF_INET6)
continue;
} else if (event->family == AF_INET && meta_sk->sk_ipv6only) {
/* skip IPv4 events if IPV6_V6ONLY is set */
continue;
}
if (unlikely(!refcount_inc_not_zero(&meta_sk->sk_refcnt)))
continue;
bh_lock_sock(meta_sk);
mpcb = meta_tp->mpcb;
if (!mpcb)
goto next;
if (!mptcp(meta_tp) || !is_meta_sk(meta_sk) ||
mpcb->infinite_mapping_snd ||
mpcb->infinite_mapping_rcv ||
mpcb->send_infinite_mapping)
goto next;
/* May be that the pm has changed in-between */
if (mpcb->pm_ops != &full_mesh)
goto next;
if (sock_owned_by_user(meta_sk)) {
if (!test_and_set_bit(MPTCP_PATH_MANAGER_DEFERRED,
&meta_sk->sk_tsq_flags))
sock_hold(meta_sk);
goto next;
}
if (event->code == MPTCP_EVENT_ADD) {
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
fmp->add_addr++;
mpcb->addr_signal = 1;
sk = mptcp_select_ack_sock(meta_sk);
if (sk)
tcp_send_ack(sk);
full_mesh_create_subflows(meta_sk);
}
if (event->code == MPTCP_EVENT_DEL) {
struct sock *sk, *tmpsk;
struct mptcp_loc_addr *mptcp_local;
bool found = false;
mptcp_local = rcu_dereference_bh(fm_ns->local);
/* In any case, we need to update our bitfields */
if (id >= 0)
update_addr_bitfields(meta_sk, mptcp_local);
/* Look for the socket and remove him */
mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
if ((event->family == AF_INET6 &&
(sk->sk_family == AF_INET ||
mptcp_v6_is_v4_mapped(sk))) ||
(event->family == AF_INET &&
(sk->sk_family == AF_INET6 &&
!mptcp_v6_is_v4_mapped(sk))))
continue;
if (event->family == AF_INET &&
(sk->sk_family == AF_INET ||
mptcp_v6_is_v4_mapped(sk)) &&
inet_sk(sk)->inet_saddr != event->addr.in.s_addr)
continue;
if (event->family == AF_INET6 &&
sk->sk_family == AF_INET6 &&
!ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6))
continue;
/* Reinject, so that pf = 1 and so we
* won't select this one as the
* ack-sock.
*/
mptcp_reinject_data(sk, 0);
/* We announce the removal of this id */
announce_remove_addr(tcp_sk(sk)->mptcp->loc_id, meta_sk);
mptcp_sub_force_close(sk);
found = true;
}
if (found)
goto next;
/* The id may have been given by the event,
* matching on a local address. And it may not
* have matched on one of the above sockets,
* because the client never created a subflow.
* So, we have to finally remove it here.
*/
if (id >= 0) {
u8 loc_id = id
+ (event->family == AF_INET ? 1 : MPTCP_MAX_ADDR);
announce_remove_addr(loc_id, meta_sk);
}
}
if (event->code == MPTCP_EVENT_MOD) {
struct sock *sk;
mptcp_for_each_sk(mpcb, sk) {
struct tcp_sock *tp = tcp_sk(sk);
if (event->family == AF_INET &&
(sk->sk_family == AF_INET ||
mptcp_v6_is_v4_mapped(sk)) &&
inet_sk(sk)->inet_saddr == event->addr.in.s_addr) {
if (event->low_prio != tp->mptcp->low_prio) {
tp->mptcp->send_mp_prio = 1;
tp->mptcp->low_prio = event->low_prio;
tcp_send_ack(sk);
}
}
if (event->family == AF_INET6 &&
sk->sk_family == AF_INET6 &&
!ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) {
if (event->low_prio != tp->mptcp->low_prio) {
tp->mptcp->send_mp_prio = 1;
tp->mptcp->low_prio = event->low_prio;
tcp_send_ack(sk);
}
}
}
}
next:
bh_unlock_sock(meta_sk);
sock_put(meta_sk);
}
rcu_read_unlock_bh();
}
goto next_event;
}
static struct mptcp_addr_event *lookup_similar_event(const struct net *net,
const struct mptcp_addr_event *event)
{
struct mptcp_addr_event *eventq;
struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
list_for_each_entry(eventq, &fm_ns->events, list) {
if (eventq->family != event->family)
continue;
if (eventq->if_idx != event->if_idx)
continue;
if (event->family == AF_INET) {
if (eventq->addr.in.s_addr == event->addr.in.s_addr)
return eventq;
} else {
if (ipv6_addr_equal(&eventq->addr.in6, &event->addr.in6))
return eventq;
}
}
return NULL;
}
/* We already hold the net-namespace MPTCP-lock */
static void add_pm_event(struct net *net, const struct mptcp_addr_event *event)
{
struct mptcp_addr_event *eventq = lookup_similar_event(net, event);
struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
if (eventq) {
switch (event->code) {
case MPTCP_EVENT_DEL:
mptcp_debug("%s del old_code %u\n", __func__, eventq->code);
list_del(&eventq->list);
kfree(eventq);
break;
case MPTCP_EVENT_ADD:
mptcp_debug("%s add old_code %u\n", __func__, eventq->code);
eventq->low_prio = event->low_prio;
eventq->code = MPTCP_EVENT_ADD;
return;
case MPTCP_EVENT_MOD:
mptcp_debug("%s mod old_code %u\n", __func__, eventq->code);
eventq->low_prio = event->low_prio;
eventq->code = MPTCP_EVENT_MOD;
return;
}
}
/* OK, we have to add the new address to the wait queue */
eventq = kmemdup(event, sizeof(struct mptcp_addr_event), GFP_ATOMIC);
if (!eventq)
return;
list_add_tail(&eventq->list, &fm_ns->events);
/* Create work-queue */
if (!delayed_work_pending(&fm_ns->address_worker))
queue_delayed_work(mptcp_wq, &fm_ns->address_worker,
msecs_to_jiffies(500));
}
static void addr4_event_handler(const struct in_ifaddr *ifa, unsigned long event,
struct net *net)
{
const struct net_device *netdev = ifa->ifa_dev->dev;
struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
struct mptcp_addr_event mpevent;
if (ifa->ifa_scope > RT_SCOPE_LINK ||
ipv4_is_loopback(ifa->ifa_local))
return;
spin_lock_bh(&fm_ns->local_lock);
mpevent.family = AF_INET;
mpevent.addr.in.s_addr = ifa->ifa_local;
mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
mpevent.if_idx = netdev->ifindex;
if (event == NETDEV_DOWN || !netif_running(netdev) ||
(netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP))
mpevent.code = MPTCP_EVENT_DEL;
else if (event == NETDEV_UP)
mpevent.code = MPTCP_EVENT_ADD;
else if (event == NETDEV_CHANGE)
mpevent.code = MPTCP_EVENT_MOD;
mptcp_debug("%s created event for %pI4, code %u prio %u idx %u\n", __func__,
&ifa->ifa_local, mpevent.code, mpevent.low_prio, mpevent.if_idx);
add_pm_event(net, &mpevent);
spin_unlock_bh(&fm_ns->local_lock);
return;
}
/* React on IPv4-addr add/rem-events */
static int mptcp_pm_inetaddr_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
const struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net *net = dev_net(ifa->ifa_dev->dev);
if (!(event == NETDEV_UP || event == NETDEV_DOWN ||
event == NETDEV_CHANGE))
return NOTIFY_DONE;
addr4_event_handler(ifa, event, net);
return NOTIFY_DONE;
}
static struct notifier_block mptcp_pm_inetaddr_notifier = {
.notifier_call = mptcp_pm_inetaddr_event,
};
#if IS_ENABLED(CONFIG_IPV6)
static int inet6_addr_event(struct notifier_block *this,
unsigned long event, void *ptr);
static void addr6_event_handler(const struct inet6_ifaddr *ifa, unsigned long event,
struct net *net)
{
const struct net_device *netdev = ifa->idev->dev;
int addr_type = ipv6_addr_type(&ifa->addr);
struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
struct mptcp_addr_event mpevent;
if (ifa->scope > RT_SCOPE_LINK ||
addr_type == IPV6_ADDR_ANY ||
(addr_type & IPV6_ADDR_LOOPBACK) ||
(addr_type & IPV6_ADDR_LINKLOCAL))
return;
spin_lock_bh(&fm_ns->local_lock);
mpevent.family = AF_INET6;
mpevent.addr.in6 = ifa->addr;
mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0;
mpevent.if_idx = netdev->ifindex;
if (event == NETDEV_DOWN || !netif_running(netdev) ||
(netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP))
mpevent.code = MPTCP_EVENT_DEL;
else if (event == NETDEV_UP)
mpevent.code = MPTCP_EVENT_ADD;
else if (event == NETDEV_CHANGE)
mpevent.code = MPTCP_EVENT_MOD;
mptcp_debug("%s created event for %pI6, code %u prio %u idx %u\n", __func__,
&ifa->addr, mpevent.code, mpevent.low_prio, mpevent.if_idx);
add_pm_event(net, &mpevent);
spin_unlock_bh(&fm_ns->local_lock);
return;
}
/* React on IPv6-addr add/rem-events */
static int inet6_addr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct inet6_ifaddr *ifa6 = (struct inet6_ifaddr *)ptr;
struct net *net = dev_net(ifa6->idev->dev);
if (!(event == NETDEV_UP || event == NETDEV_DOWN ||
event == NETDEV_CHANGE))
return NOTIFY_DONE;
addr6_event_handler(ifa6, event, net);
return NOTIFY_DONE;
}
static struct notifier_block inet6_addr_notifier = {
.notifier_call = inet6_addr_event,
};
#endif
/* React on ifup/down-events */
static int netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct in_device *in_dev;
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_dev *in6_dev;
#endif
if (!(event == NETDEV_UP || event == NETDEV_DOWN ||
event == NETDEV_CHANGE))
return NOTIFY_DONE;
rcu_read_lock();
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
for_ifa(in_dev) {
mptcp_pm_inetaddr_event(NULL, event, ifa);
} endfor_ifa(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
struct inet6_ifaddr *ifa6;
list_for_each_entry(ifa6, &in6_dev->addr_list, if_list)
inet6_addr_event(NULL, event, ifa6);
}
#endif
rcu_read_unlock();
return NOTIFY_DONE;
}
static struct notifier_block mptcp_pm_netdev_notifier = {
.notifier_call = netdev_event,
};
static void full_mesh_add_raddr(struct mptcp_cb *mpcb,
const union inet_addr *addr,
sa_family_t family, __be16 port, u8 id)
{
if (family == AF_INET)
mptcp_addv4_raddr(mpcb, &addr->in, port, id);
else
mptcp_addv6_raddr(mpcb, &addr->in6, port, id);
}
static void full_mesh_new_session(const struct sock *meta_sk)
{
struct mptcp_loc_addr *mptcp_local;
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
struct tcp_sock *master_tp = tcp_sk(mpcb->master_sk);
int i, index, if_idx = 0;
union inet_addr saddr, daddr;
sa_family_t family = AF_INET;
bool meta_v4 = meta_sk->sk_family == AF_INET;
/* Init local variables necessary for the rest */
if (meta_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(meta_sk)) {
saddr.ip = inet_sk(meta_sk)->inet_saddr;
daddr.ip = inet_sk(meta_sk)->inet_daddr;
if_idx = mpcb->master_sk->sk_bound_dev_if;
family = AF_INET;
#if IS_ENABLED(CONFIG_IPV6)
} else {
saddr.in6 = inet6_sk(meta_sk)->saddr;
daddr.in6 = meta_sk->sk_v6_daddr;
if_idx = mpcb->master_sk->sk_bound_dev_if;
family = AF_INET6;
#endif
}
if (inet_sk(meta_sk)->transparent)
if_idx = inet_sk(meta_sk)->rx_dst_ifindex;
rcu_read_lock_bh();
mptcp_local = rcu_dereference(fm_ns->local);
if (inet_sk(meta_sk)->transparent)
index = mptcp_find_address_transp(mptcp_local, family, if_idx);
else
index = mptcp_find_address(mptcp_local, family, &saddr, if_idx);
if (index < 0)
goto fallback;
if (family == AF_INET)
master_tp->mptcp->low_prio = mptcp_local->locaddr4[index].low_prio;
else
master_tp->mptcp->low_prio = mptcp_local->locaddr6[index].low_prio;
master_tp->mptcp->send_mp_prio = master_tp->mptcp->low_prio;
full_mesh_add_raddr(mpcb, &daddr, family, 0, 0);
mptcp_set_init_addr_bit(mpcb, &daddr, family, index);
/* Initialize workqueue-struct */
INIT_WORK(&fmp->subflow_work, create_subflow_worker);
INIT_DELAYED_WORK(&fmp->subflow_retry_work, retry_subflow_worker);
fmp->mpcb = mpcb;
if (!meta_v4 && meta_sk->sk_ipv6only)
goto skip_ipv4;
/* Look for the address among the local addresses */
mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
__be32 ifa_address = mptcp_local->locaddr4[i].addr.s_addr;
/* We do not need to announce the initial subflow's address again */
if (family == AF_INET &&
(!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) &&
saddr.ip == ifa_address)
continue;
fmp->add_addr++;
mpcb->addr_signal = 1;
}
skip_ipv4:
#if IS_ENABLED(CONFIG_IPV6)
/* skip IPv6 addresses if meta-socket is IPv4 */
if (meta_v4)
goto skip_ipv6;
mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
const struct in6_addr *ifa6 = &mptcp_local->locaddr6[i].addr;
/* We do not need to announce the initial subflow's address again */
if (family == AF_INET6 &&
(!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) &&
ipv6_addr_equal(&saddr.in6, ifa6))
continue;
fmp->add_addr++;
mpcb->addr_signal = 1;
}
skip_ipv6:
#endif
rcu_read_unlock_bh();
if (family == AF_INET)
fmp->announced_addrs_v4 |= (1 << index);
else
fmp->announced_addrs_v6 |= (1 << index);
for (i = fmp->add_addr; i && fmp->add_addr; i--)
tcp_send_ack(mpcb->master_sk);
if (master_tp->mptcp->send_mp_prio)
tcp_send_ack(mpcb->master_sk);
return;
fallback:
rcu_read_unlock_bh();
mptcp_fallback_default(mpcb);
return;
}
static void full_mesh_create_subflows(struct sock *meta_sk)
{
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
mpcb->send_infinite_mapping ||
mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
return;
if (mpcb->master_sk &&
!tcp_sk(mpcb->master_sk)->mptcp->fully_established)
return;
if (!work_pending(&fmp->subflow_work)) {
sock_hold(meta_sk);
queue_work(mptcp_wq, &fmp->subflow_work);
}
}
static void full_mesh_subflow_error(struct sock *meta_sk, struct sock *sk)
{
const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
if (!create_on_err)
return;
if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
mpcb->send_infinite_mapping ||
mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
return;
if (sk->sk_err != ETIMEDOUT)
return;
full_mesh_create_subflows(meta_sk);
}
/* Called upon release_sock, if the socket was owned by the user during
* a path-management event.
*/
static void full_mesh_release_sock(struct sock *meta_sk)
{
struct mptcp_loc_addr *mptcp_local;
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk));
struct sock *sk, *tmpsk;
bool meta_v4 = meta_sk->sk_family == AF_INET;
int i;
rcu_read_lock_bh();
mptcp_local = rcu_dereference(fm_ns->local);
if (!meta_v4 && meta_sk->sk_ipv6only)
goto skip_ipv4;
/* First, detect modifications or additions */
mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
struct in_addr ifa = mptcp_local->locaddr4[i].addr;
bool found = false;
mptcp_for_each_sk(mpcb, sk) {
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_family == AF_INET6 &&
!mptcp_v6_is_v4_mapped(sk))
continue;
if (inet_sk(sk)->inet_saddr != ifa.s_addr)
continue;
found = true;
if (mptcp_local->locaddr4[i].low_prio != tp->mptcp->low_prio) {
tp->mptcp->send_mp_prio = 1;
tp->mptcp->low_prio = mptcp_local->locaddr4[i].low_prio;
tcp_send_ack(sk);
}
}
if (!found) {
fmp->add_addr++;
mpcb->addr_signal = 1;
sk = mptcp_select_ack_sock(meta_sk);
if (sk)
tcp_send_ack(sk);
full_mesh_create_subflows(meta_sk);
}
}
skip_ipv4:
#if IS_ENABLED(CONFIG_IPV6)
/* skip IPv6 addresses if meta-socket is IPv4 */
if (meta_v4)
goto removal;
mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
struct in6_addr ifa = mptcp_local->locaddr6[i].addr;
bool found = false;
mptcp_for_each_sk(mpcb, sk) {
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_family == AF_INET ||
mptcp_v6_is_v4_mapped(sk))
continue;
if (!ipv6_addr_equal(&inet6_sk(sk)->saddr, &ifa))
continue;
found = true;
if (mptcp_local->locaddr6[i].low_prio != tp->mptcp->low_prio) {
tp->mptcp->send_mp_prio = 1;
tp->mptcp->low_prio = mptcp_local->locaddr6[i].low_prio;
tcp_send_ack(sk);
}
}
if (!found) {
fmp->add_addr++;
mpcb->addr_signal = 1;
sk = mptcp_select_ack_sock(meta_sk);
if (sk)
tcp_send_ack(sk);
full_mesh_create_subflows(meta_sk);
}
}
removal:
#endif
/* Now, detect address-removals */
mptcp_for_each_sk_safe(mpcb, sk, tmpsk) {
bool shall_remove = true;
if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
if (inet_sk(sk)->inet_saddr == mptcp_local->locaddr4[i].addr.s_addr) {
shall_remove = false;
break;
}
}
} else {
mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
if (ipv6_addr_equal(&inet6_sk(sk)->saddr, &mptcp_local->locaddr6[i].addr)) {
shall_remove = false;
break;
}
}
}
if (shall_remove) {
/* Reinject, so that pf = 1 and so we
* won't select this one as the
* ack-sock.
*/
mptcp_reinject_data(sk, 0);
announce_remove_addr(tcp_sk(sk)->mptcp->loc_id,
meta_sk);
mptcp_sub_force_close(sk);
}
}
/* Just call it optimistically. It actually cannot do any harm */
update_addr_bitfields(meta_sk, mptcp_local);
rcu_read_unlock_bh();
}
static int full_mesh_get_local_id(sa_family_t family, union inet_addr *addr,
struct net *net, bool *low_prio)
{
struct mptcp_loc_addr *mptcp_local;
const struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
int index, id = -1;
/* Handle the backup-flows */
rcu_read_lock_bh();
mptcp_local = rcu_dereference(fm_ns->local);
index = mptcp_find_address(mptcp_local, family, addr, 0);
if (index != -1) {
if (family == AF_INET) {
id = mptcp_local->locaddr4[index].loc4_id;
*low_prio = mptcp_local->locaddr4[index].low_prio;
} else {
id = mptcp_local->locaddr6[index].loc6_id;
*low_prio = mptcp_local->locaddr6[index].low_prio;
}
}
rcu_read_unlock_bh();
return id;
}
static void full_mesh_addr_signal(struct sock *sk, unsigned *size,
struct tcp_out_options *opts,
struct sk_buff *skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct mptcp_cb *mpcb = tp->mpcb;
struct sock *meta_sk = mpcb->meta_sk;
struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb);
struct mptcp_loc_addr *mptcp_local;
struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk));
int remove_addr_len;
u8 unannouncedv4 = 0, unannouncedv6 = 0;
bool meta_v4 = meta_sk->sk_family == AF_INET;
mpcb->addr_signal = 0;
if (likely(!fmp->add_addr))
goto remove_addr;
rcu_read_lock_bh();
mptcp_local = rcu_dereference(fm_ns->local);
if (!meta_v4 && meta_sk->sk_ipv6only)
goto skip_ipv4;
/* IPv4 */
unannouncedv4 = (~fmp->announced_addrs_v4) & mptcp_local->loc4_bits;
if (unannouncedv4 &&
((mpcb->mptcp_ver == MPTCP_VERSION_0 &&
MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) ||
(mpcb->mptcp_ver >= MPTCP_VERSION_1 &&
MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1))) {
int ind = mptcp_find_free_index(~unannouncedv4);
opts->options |= OPTION_MPTCP;
opts->mptcp_options |= OPTION_ADD_ADDR;
opts->add_addr4.addr_id = mptcp_local->locaddr4[ind].loc4_id;
opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr;
opts->add_addr_v4 = 1;
if (mpcb->mptcp_ver >= MPTCP_VERSION_1) {
u8 mptcp_hash_mac[20];
u8 no_key[8];
*(u64 *)no_key = 0;
mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key,
(u8 *)no_key,
(u32 *)mptcp_hash_mac, 2,
1, (u8 *)&mptcp_local->locaddr4[ind].loc4_id,
4, (u8 *)&opts->add_addr4.addr.s_addr);
opts->add_addr4.trunc_mac = *(u64 *)mptcp_hash_mac;
}
if (skb) {
fmp->announced_addrs_v4 |= (1 << ind);
fmp->add_addr--;
}
if (mpcb->mptcp_ver < MPTCP_VERSION_1)
*size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN;
if (mpcb->mptcp_ver >= MPTCP_VERSION_1)
*size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1;
goto skip_ipv6;
}
if (meta_v4)
goto skip_ipv6;
skip_ipv4:
/* IPv6 */
unannouncedv6 = (~fmp->announced_addrs_v6) & mptcp_local->loc6_bits;
if (unannouncedv6 &&
((mpcb->mptcp_ver == MPTCP_VERSION_0 &&
MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) ||
(mpcb->mptcp_ver >= MPTCP_VERSION_1 &&
MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1))) {
int ind = mptcp_find_free_index(~unannouncedv6);
opts->options |= OPTION_MPTCP;
opts->mptcp_options |= OPTION_ADD_ADDR;
opts->add_addr6.addr_id = mptcp_local->locaddr6[ind].loc6_id;
opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr;
opts->add_addr_v6 = 1;
if (mpcb->mptcp_ver >= MPTCP_VERSION_1) {
u8 mptcp_hash_mac[20];
u8 no_key[8];
*(u64 *)no_key = 0;
mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key,
(u8 *)no_key,
(u32 *)mptcp_hash_mac, 2,
1, (u8 *)&mptcp_local->locaddr6[ind].loc6_id,
16, (u8 *)&opts->add_addr6.addr.s6_addr);
opts->add_addr6.trunc_mac = *(u64 *)mptcp_hash_mac;
}
if (skb) {
fmp->announced_addrs_v6 |= (1 << ind);
fmp->add_addr--;
}
if (mpcb->mptcp_ver < MPTCP_VERSION_1)
*size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN;
if (mpcb->mptcp_ver >= MPTCP_VERSION_1)
*size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1;
}
skip_ipv6:
rcu_read_unlock_bh();
if (!unannouncedv4 && !unannouncedv6 && skb)
fmp->add_addr--;
remove_addr:
if (likely(!fmp->remove_addrs))
goto exit;
remove_addr_len = mptcp_sub_len_remove_addr_align(fmp->remove_addrs);
if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len)
goto exit;
opts->options |= OPTION_MPTCP;
opts->mptcp_options |= OPTION_REMOVE_ADDR;
opts->remove_addrs = fmp->remove_addrs;
*size += remove_addr_len;
if (skb)
fmp->remove_addrs = 0;
exit:
mpcb->addr_signal = !!(fmp->add_addr || fmp->remove_addrs);
}
static void full_mesh_rem_raddr(struct mptcp_cb *mpcb, u8 rem_id)
{
mptcp_v4_rem_raddress(mpcb, rem_id);
mptcp_v6_rem_raddress(mpcb, rem_id);
}
static void full_mesh_delete_subflow(struct sock *sk)
{
struct fullmesh_priv *fmp = fullmesh_get_priv(tcp_sk(sk)->mpcb);
struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk));
struct mptcp_loc_addr *mptcp_local;
int index, i;
if (!create_on_err)
return;
rcu_read_lock_bh();
mptcp_local = rcu_dereference_bh(fm_ns->local);
if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) {
union inet_addr saddr;
saddr.ip = inet_sk(sk)->inet_saddr;
index = mptcp_find_address(mptcp_local, AF_INET, &saddr,
sk->sk_bound_dev_if);
if (index < 0)
goto out;
mptcp_for_each_bit_set(fmp->rem4_bits, i) {
struct fullmesh_rem4 *rem4 = &fmp->remaddr4[i];
if (rem4->addr.s_addr != sk->sk_daddr)
continue;
if (rem4->port && rem4->port != inet_sk(sk)->inet_dport)
continue;
rem4->bitfield &= ~(1 << index);
}
#if IS_ENABLED(CONFIG_IPV6)
} else {
union inet_addr saddr;
saddr.in6 = inet6_sk(sk)->saddr;
index = mptcp_find_address(mptcp_local, AF_INET6, &saddr,
sk->sk_bound_dev_if);
if (index < 0)
goto out;
mptcp_for_each_bit_set(fmp->rem6_bits, i) {
struct fullmesh_rem6 *rem6 = &fmp->remaddr6[i];
if (!ipv6_addr_equal(&rem6->addr, &sk->sk_v6_daddr))
continue;
if (rem6->port && rem6->port != inet_sk(sk)->inet_dport)
continue;
rem6->bitfield &= ~(1 << index);
}
#endif
}
out:
rcu_read_unlock_bh();
}
/* Output /proc/net/mptcp_fullmesh */
static int mptcp_fm_seq_show(struct seq_file *seq, void *v)
{
const struct net *net = seq->private;
struct mptcp_loc_addr *mptcp_local;
const struct mptcp_fm_ns *fm_ns = fm_get_ns(net);
int i;
seq_printf(seq, "Index, Address-ID, Backup, IP-address, if-idx\n");
rcu_read_lock_bh();
mptcp_local = rcu_dereference(fm_ns->local);
seq_printf(seq, "IPv4, next v4-index: %u\n", mptcp_local->next_v4_index);
mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) {
struct mptcp_loc4 *loc4 = &mptcp_local->locaddr4[i];
seq_printf(seq, "%u, %u, %u, %pI4 %u\n", i, loc4->loc4_id,
loc4->low_prio, &loc4->addr, loc4->if_idx);
}
seq_printf(seq, "IPv6, next v6-index: %u\n", mptcp_local->next_v6_index);
mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) {
struct mptcp_loc6 *loc6 = &mptcp_local->locaddr6[i];
seq_printf(seq, "%u, %u, %u, %pI6 %u\n", i, loc6->loc6_id,
loc6->low_prio, &loc6->addr, loc6->if_idx);
}
rcu_read_unlock_bh();
return 0;
}
static int mptcp_fm_seq_open(struct inode *inode, struct file *file)
{
return single_open_net(inode, file, mptcp_fm_seq_show);
}
static const struct file_operations mptcp_fm_seq_fops = {
.owner = THIS_MODULE,
.open = mptcp_fm_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release_net,
};
static int mptcp_fm_init_net(struct net *net)
{
struct mptcp_loc_addr *mptcp_local;
struct mptcp_fm_ns *fm_ns;
int err = 0;
fm_ns = kzalloc(sizeof(*fm_ns), GFP_KERNEL);
if (!fm_ns)
return -ENOBUFS;
mptcp_local = kzalloc(sizeof(*mptcp_local), GFP_KERNEL);
if (!mptcp_local) {
err = -ENOBUFS;
goto err_mptcp_local;
}
if (!proc_create("mptcp_fullmesh", S_IRUGO, net->proc_net,
&mptcp_fm_seq_fops)) {
err = -ENOMEM;
goto err_seq_fops;
}
mptcp_local->next_v4_index = 1;
rcu_assign_pointer(fm_ns->local, mptcp_local);
INIT_DELAYED_WORK(&fm_ns->address_worker, mptcp_address_worker);
INIT_LIST_HEAD(&fm_ns->events);
spin_lock_init(&fm_ns->local_lock);
fm_ns->net = net;
net->mptcp.path_managers[MPTCP_PM_FULLMESH] = fm_ns;
return 0;
err_seq_fops:
kfree(mptcp_local);
err_mptcp_local:
kfree(fm_ns);
return err;
}
static void mptcp_fm_exit_net(struct net *net)
{
struct mptcp_addr_event *eventq, *tmp;
struct mptcp_fm_ns *fm_ns;
struct mptcp_loc_addr *mptcp_local;
fm_ns = fm_get_ns(net);
cancel_delayed_work_sync(&fm_ns->address_worker);
rcu_read_lock_bh();
mptcp_local = rcu_dereference_bh(fm_ns->local);
kfree_rcu(mptcp_local, rcu);
spin_lock(&fm_ns->local_lock);
list_for_each_entry_safe(eventq, tmp, &fm_ns->events, list) {
list_del(&eventq->list);
kfree(eventq);
}
spin_unlock(&fm_ns->local_lock);
rcu_read_unlock_bh();
remove_proc_entry("mptcp_fullmesh", net->proc_net);
kfree(fm_ns);
}
static struct pernet_operations full_mesh_net_ops = {
.init = mptcp_fm_init_net,
.exit = mptcp_fm_exit_net,
};
static struct mptcp_pm_ops full_mesh __read_mostly = {
.new_session = full_mesh_new_session,
.release_sock = full_mesh_release_sock,
.fully_established = full_mesh_create_subflows,
.new_remote_address = full_mesh_create_subflows,
.subflow_error = full_mesh_subflow_error,
.get_local_id = full_mesh_get_local_id,
.addr_signal = full_mesh_addr_signal,
.add_raddr = full_mesh_add_raddr,
.rem_raddr = full_mesh_rem_raddr,
.delete_subflow = full_mesh_delete_subflow,
.name = "fullmesh",
.owner = THIS_MODULE,
};
/* General initialization of MPTCP_PM */
static int __init full_mesh_register(void)
{
int ret;
BUILD_BUG_ON(sizeof(struct fullmesh_priv) > MPTCP_PM_SIZE);
ret = register_pernet_subsys(&full_mesh_net_ops);
if (ret)
goto out;
ret = register_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
if (ret)
goto err_reg_inetaddr;
ret = register_netdevice_notifier(&mptcp_pm_netdev_notifier);
if (ret)
goto err_reg_netdev;
#if IS_ENABLED(CONFIG_IPV6)
ret = register_inet6addr_notifier(&inet6_addr_notifier);
if (ret)
goto err_reg_inet6addr;
#endif
ret = mptcp_register_path_manager(&full_mesh);
if (ret)
goto err_reg_pm;
out:
return ret;
err_reg_pm:
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&inet6_addr_notifier);
err_reg_inet6addr:
#endif
unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
err_reg_netdev:
unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
err_reg_inetaddr:
unregister_pernet_subsys(&full_mesh_net_ops);
goto out;
}
static void full_mesh_unregister(void)
{
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&inet6_addr_notifier);
#endif
unregister_netdevice_notifier(&mptcp_pm_netdev_notifier);
unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier);
unregister_pernet_subsys(&full_mesh_net_ops);
mptcp_unregister_path_manager(&full_mesh);
}
module_init(full_mesh_register);
module_exit(full_mesh_unregister);
MODULE_AUTHOR("Christoph Paasch");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Full-Mesh MPTCP");
MODULE_VERSION("0.88");