blob: 93d3f86ae657f43379874837e9c39a67c16af3d9 [file] [log] [blame]
#include <linux/module.h>
#include <net/mptcp.h>
#include <net/mptcp_v4.h>
#include <linux/route.h>
#include <linux/inet.h>
#include <linux/mroute.h>
#include <linux/spinlock_types.h>
#include <net/inet_ecn.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/compat.h>
#include <linux/slab.h>
#define MPTCP_GW_MAX_LISTS 10
#define MPTCP_GW_LIST_MAX_LEN 6
#define MPTCP_GW_SYSCTL_MAX_LEN (15 * MPTCP_GW_LIST_MAX_LEN * \
MPTCP_GW_MAX_LISTS)
struct mptcp_gw_list {
struct in_addr list[MPTCP_GW_MAX_LISTS][MPTCP_GW_LIST_MAX_LEN];
u8 len[MPTCP_GW_MAX_LISTS];
};
struct binder_priv {
/* Worker struct for subflow establishment */
struct work_struct subflow_work;
struct mptcp_cb *mpcb;
/* Prevent multiple sub-sockets concurrently iterating over sockets */
spinlock_t *flow_lock;
};
static struct mptcp_gw_list *mptcp_gws;
static rwlock_t mptcp_gws_lock;
static int mptcp_binder_ndiffports __read_mostly = 1;
static char sysctl_mptcp_binder_gateways[MPTCP_GW_SYSCTL_MAX_LEN] __read_mostly;
static int mptcp_get_avail_list_ipv4(struct sock *sk)
{
int i, j, list_taken, opt_ret, opt_len;
unsigned char *opt_ptr, *opt_end_ptr, opt[MAX_IPOPTLEN];
for (i = 0; i < MPTCP_GW_MAX_LISTS; ++i) {
if (mptcp_gws->len[i] == 0)
goto error;
mptcp_debug("mptcp_get_avail_list_ipv4: List %i\n", i);
list_taken = 0;
/* Loop through all sub-sockets in this connection */
mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk) {
mptcp_debug("mptcp_get_avail_list_ipv4: Next sock\n");
/* Reset length and options buffer, then retrieve
* from socket
*/
opt_len = MAX_IPOPTLEN;
memset(opt, 0, MAX_IPOPTLEN);
opt_ret = ip_getsockopt(sk, IPPROTO_IP,
IP_OPTIONS, opt, &opt_len);
if (opt_ret < 0) {
mptcp_debug(KERN_ERR "%s: MPTCP subsocket getsockopt() IP_OPTIONS failed, error %d\n",
__func__, opt_ret);
goto error;
}
/* If socket has no options, it has no stake in this list */
if (opt_len <= 0)
continue;
/* Iterate options buffer */
for (opt_ptr = &opt[0]; opt_ptr < &opt[opt_len]; opt_ptr++) {
if (*opt_ptr == IPOPT_LSRR) {
mptcp_debug("mptcp_get_avail_list_ipv4: LSRR options found\n");
goto sock_lsrr;
}
}
continue;
sock_lsrr:
/* Pointer to the 2nd to last address */
opt_end_ptr = opt_ptr+(*(opt_ptr+1))-4;
/* Addresses start 3 bytes after type offset */
opt_ptr += 3;
j = 0;
/* Different length lists cannot be the same */
if ((opt_end_ptr-opt_ptr)/4 != mptcp_gws->len[i])
continue;
/* Iterate if we are still inside options list
* and sysctl list
*/
while (opt_ptr < opt_end_ptr && j < mptcp_gws->len[i]) {
/* If there is a different address, this list must
* not be set on this socket
*/
if (memcmp(&mptcp_gws->list[i][j], opt_ptr, 4))
break;
/* Jump 4 bytes to next address */
opt_ptr += 4;
j++;
}
/* Reached the end without a differing address, lists
* are therefore identical.
*/
if (j == mptcp_gws->len[i]) {
mptcp_debug("mptcp_get_avail_list_ipv4: List already used\n");
list_taken = 1;
break;
}
}
/* Free list found if not taken by a socket */
if (!list_taken) {
mptcp_debug("mptcp_get_avail_list_ipv4: List free\n");
break;
}
}
if (i >= MPTCP_GW_MAX_LISTS)
goto error;
return i;
error:
return -1;
}
/* The list of addresses is parsed each time a new connection is opened,
* to make sure it's up to date. In case of error, all the lists are
* marked as unavailable and the subflow's fingerprint is set to 0.
*/
static void mptcp_v4_add_lsrr(struct sock *sk, struct in_addr addr)
{
int i, j, ret;
unsigned char opt[MAX_IPOPTLEN] = {0};
struct tcp_sock *tp = tcp_sk(sk);
struct binder_priv *fmp = (struct binder_priv *)&tp->mpcb->mptcp_pm[0];
/* Read lock: multiple sockets can read LSRR addresses at the same
* time, but writes are done in mutual exclusion.
* Spin lock: must search for free list for one socket at a time, or
* multiple sockets could take the same list.
*/
read_lock(&mptcp_gws_lock);
spin_lock(fmp->flow_lock);
i = mptcp_get_avail_list_ipv4(sk);
/* Execution enters here only if a free path is found.
*/
if (i >= 0) {
opt[0] = IPOPT_NOP;
opt[1] = IPOPT_LSRR;
opt[2] = sizeof(mptcp_gws->list[i][0].s_addr) *
(mptcp_gws->len[i] + 1) + 3;
opt[3] = IPOPT_MINOFF;
for (j = 0; j < mptcp_gws->len[i]; ++j)
memcpy(opt + 4 +
(j * sizeof(mptcp_gws->list[i][0].s_addr)),
&mptcp_gws->list[i][j].s_addr,
sizeof(mptcp_gws->list[i][0].s_addr));
/* Final destination must be part of IP_OPTIONS parameter. */
memcpy(opt + 4 + (j * sizeof(addr.s_addr)), &addr.s_addr,
sizeof(addr.s_addr));
/* setsockopt must be inside the lock, otherwise another
* subflow could fail to see that we have taken a list.
*/
ret = ip_setsockopt(sk, IPPROTO_IP, IP_OPTIONS, opt,
4 + sizeof(mptcp_gws->list[i][0].s_addr)
* (mptcp_gws->len[i] + 1));
if (ret < 0) {
mptcp_debug(KERN_ERR "%s: MPTCP subsock setsockopt() IP_OPTIONS failed, error %d\n",
__func__, ret);
}
}
spin_unlock(fmp->flow_lock);
read_unlock(&mptcp_gws_lock);
return;
}
/* Parses gateways string for a list of paths to different
* gateways, and stores them for use with the Loose Source Routing (LSRR)
* socket option. Each list must have "," separated addresses, and the lists
* themselves must be separated by "-". Returns -1 in case one or more of the
* addresses is not a valid ipv4/6 address.
*/
static int mptcp_parse_gateway_ipv4(char *gateways)
{
int i, j, k, ret;
char *tmp_string = NULL;
struct in_addr tmp_addr;
tmp_string = kzalloc(16, GFP_KERNEL);
if (tmp_string == NULL)
return -ENOMEM;
write_lock(&mptcp_gws_lock);
memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
/* A TMP string is used since inet_pton needs a null terminated string
* but we do not want to modify the sysctl for obvious reasons.
* i will iterate over the SYSCTL string, j will iterate over the
* temporary string where each IP is copied into, k will iterate over
* the IPs in each list.
*/
for (i = j = k = 0;
i < MPTCP_GW_SYSCTL_MAX_LEN && k < MPTCP_GW_MAX_LISTS;
++i) {
if (gateways[i] == '-' || gateways[i] == ',' || gateways[i] == '\0') {
/* If the temp IP is empty and the current list is
* empty, we are done.
*/
if (j == 0 && mptcp_gws->len[k] == 0)
break;
/* Terminate the temp IP string, then if it is
* non-empty parse the IP and copy it.
*/
tmp_string[j] = '\0';
if (j > 0) {
mptcp_debug("mptcp_parse_gateway_list tmp: %s i: %d\n", tmp_string, i);
ret = in4_pton(tmp_string, strlen(tmp_string),
(u8 *)&tmp_addr.s_addr, '\0',
NULL);
if (ret) {
mptcp_debug("mptcp_parse_gateway_list ret: %d s_addr: %pI4\n",
ret,
&tmp_addr.s_addr);
memcpy(&mptcp_gws->list[k][mptcp_gws->len[k]].s_addr,
&tmp_addr.s_addr,
sizeof(tmp_addr.s_addr));
mptcp_gws->len[k]++;
j = 0;
tmp_string[j] = '\0';
/* Since we can't impose a limit to
* what the user can input, make sure
* there are not too many IPs in the
* SYSCTL string.
*/
if (mptcp_gws->len[k] > MPTCP_GW_LIST_MAX_LEN) {
mptcp_debug("mptcp_parse_gateway_list too many members in list %i: max %i\n",
k,
MPTCP_GW_LIST_MAX_LEN);
goto error;
}
} else {
goto error;
}
}
if (gateways[i] == '-' || gateways[i] == '\0')
++k;
} else {
tmp_string[j] = gateways[i];
++j;
}
}
/* Number of flows is number of gateway lists plus master flow */
mptcp_binder_ndiffports = k+1;
write_unlock(&mptcp_gws_lock);
kfree(tmp_string);
return 0;
error:
memset(mptcp_gws, 0, sizeof(struct mptcp_gw_list));
memset(gateways, 0, sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN);
write_unlock(&mptcp_gws_lock);
kfree(tmp_string);
return -1;
}
/**
* Create all new subflows, by doing calls to mptcp_initX_subsockets
*
* This function uses a goto next_subflow, to allow releasing the lock between
* new subflows and giving other processes a chance to do some work on the
* socket and potentially finishing the communication.
**/
static void create_subflow_worker(struct work_struct *work)
{
const struct binder_priv *pm_priv = container_of(work,
struct binder_priv,
subflow_work);
struct mptcp_cb *mpcb = pm_priv->mpcb;
struct sock *meta_sk = mpcb->meta_sk;
int iter = 0;
next_subflow:
if (iter) {
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
cond_resched();
}
mutex_lock(&mpcb->mpcb_mutex);
lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING);
iter++;
if (sock_flag(meta_sk, SOCK_DEAD))
goto exit;
if (mpcb->master_sk &&
!tcp_sk(mpcb->master_sk)->mptcp->fully_established)
goto exit;
if (mptcp_binder_ndiffports > iter &&
mptcp_binder_ndiffports > mpcb->cnt_subflows) {
struct mptcp_loc4 loc;
struct mptcp_rem4 rem;
loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr;
loc.loc4_id = 0;
loc.low_prio = 0;
rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr;
rem.port = inet_sk(meta_sk)->inet_dport;
rem.rem4_id = 0; /* Default 0 */
mptcp_init4_subsockets(meta_sk, &loc, &rem);
goto next_subflow;
}
exit:
release_sock(meta_sk);
mutex_unlock(&mpcb->mpcb_mutex);
sock_put(meta_sk);
}
static void binder_new_session(const struct sock *meta_sk)
{
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct binder_priv *fmp = (struct binder_priv *)&mpcb->mptcp_pm[0];
static DEFINE_SPINLOCK(flow_lock);
#if IS_ENABLED(CONFIG_IPV6)
if (meta_sk->sk_family == AF_INET6 &&
!mptcp_v6_is_v4_mapped(meta_sk)) {
mptcp_fallback_default(mpcb);
return;
}
#endif
/* Initialize workqueue-struct */
INIT_WORK(&fmp->subflow_work, create_subflow_worker);
fmp->mpcb = mpcb;
fmp->flow_lock = &flow_lock;
}
static void binder_create_subflows(struct sock *meta_sk)
{
struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb;
struct binder_priv *pm_priv = (struct binder_priv *)&mpcb->mptcp_pm[0];
if (mpcb->infinite_mapping_snd || mpcb->infinite_mapping_rcv ||
mpcb->send_infinite_mapping ||
mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD))
return;
if (!work_pending(&pm_priv->subflow_work)) {
sock_hold(meta_sk);
queue_work(mptcp_wq, &pm_priv->subflow_work);
}
}
static int binder_get_local_id(sa_family_t family, union inet_addr *addr,
struct net *net, bool *low_prio)
{
return 0;
}
/* Callback functions, executed when syctl mptcp.mptcp_gateways is updated.
* Inspired from proc_tcp_congestion_control().
*/
static int proc_mptcp_gateways(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
struct ctl_table tbl = {
.maxlen = MPTCP_GW_SYSCTL_MAX_LEN,
};
if (write) {
tbl.data = kzalloc(MPTCP_GW_SYSCTL_MAX_LEN, GFP_KERNEL);
if (tbl.data == NULL)
return -1;
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (ret == 0) {
ret = mptcp_parse_gateway_ipv4(tbl.data);
memcpy(ctl->data, tbl.data, MPTCP_GW_SYSCTL_MAX_LEN);
}
kfree(tbl.data);
} else {
ret = proc_dostring(ctl, write, buffer, lenp, ppos);
}
return ret;
}
static struct mptcp_pm_ops binder __read_mostly = {
.new_session = binder_new_session,
.fully_established = binder_create_subflows,
.get_local_id = binder_get_local_id,
.init_subsocket_v4 = mptcp_v4_add_lsrr,
.name = "binder",
.owner = THIS_MODULE,
};
static struct ctl_table binder_table[] = {
{
.procname = "mptcp_binder_gateways",
.data = &sysctl_mptcp_binder_gateways,
.maxlen = sizeof(char) * MPTCP_GW_SYSCTL_MAX_LEN,
.mode = 0644,
.proc_handler = &proc_mptcp_gateways
},
{ }
};
struct ctl_table_header *mptcp_sysctl_binder;
/* General initialization of MPTCP_PM */
static int __init binder_register(void)
{
mptcp_gws = kzalloc(sizeof(*mptcp_gws), GFP_KERNEL);
if (!mptcp_gws)
return -ENOMEM;
rwlock_init(&mptcp_gws_lock);
BUILD_BUG_ON(sizeof(struct binder_priv) > MPTCP_PM_SIZE);
mptcp_sysctl_binder = register_net_sysctl(&init_net, "net/mptcp",
binder_table);
if (!mptcp_sysctl_binder)
goto sysctl_fail;
if (mptcp_register_path_manager(&binder))
goto pm_failed;
return 0;
pm_failed:
unregister_net_sysctl_table(mptcp_sysctl_binder);
sysctl_fail:
kfree(mptcp_gws);
return -1;
}
static void binder_unregister(void)
{
mptcp_unregister_path_manager(&binder);
unregister_net_sysctl_table(mptcp_sysctl_binder);
kfree(mptcp_gws);
}
module_init(binder_register);
module_exit(binder_unregister);
MODULE_AUTHOR("Luca Boccassi, Duncan Eastoe, Christoph Paasch (ndiffports)");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("BINDER MPTCP");
MODULE_VERSION("0.1");