Merge branch 'lvs-next-2.6' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index ec6eb49..0f434a2 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -242,4 +242,164 @@
int syncid;
};
+/*
+ *
+ * IPVS Generic Netlink interface definitions
+ *
+ */
+
+/* Generic Netlink family info */
+
+#define IPVS_GENL_NAME "IPVS"
+#define IPVS_GENL_VERSION 0x1
+
+struct ip_vs_flags {
+ __be32 flags;
+ __be32 mask;
+};
+
+/* Generic Netlink command attributes */
+enum {
+ IPVS_CMD_UNSPEC = 0,
+
+ IPVS_CMD_NEW_SERVICE, /* add service */
+ IPVS_CMD_SET_SERVICE, /* modify service */
+ IPVS_CMD_DEL_SERVICE, /* delete service */
+ IPVS_CMD_GET_SERVICE, /* get service info */
+
+ IPVS_CMD_NEW_DEST, /* add destination */
+ IPVS_CMD_SET_DEST, /* modify destination */
+ IPVS_CMD_DEL_DEST, /* delete destination */
+ IPVS_CMD_GET_DEST, /* get destination info */
+
+ IPVS_CMD_NEW_DAEMON, /* start sync daemon */
+ IPVS_CMD_DEL_DAEMON, /* stop sync daemon */
+ IPVS_CMD_GET_DAEMON, /* get sync daemon status */
+
+ IPVS_CMD_SET_CONFIG, /* set config settings */
+ IPVS_CMD_GET_CONFIG, /* get config settings */
+
+ IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */
+ IPVS_CMD_GET_INFO, /* get general IPVS info */
+
+ IPVS_CMD_ZERO, /* zero all counters and stats */
+ IPVS_CMD_FLUSH, /* flush services and dests */
+
+ __IPVS_CMD_MAX,
+};
+
+#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1)
+
+/* Attributes used in the first level of commands */
+enum {
+ IPVS_CMD_ATTR_UNSPEC = 0,
+ IPVS_CMD_ATTR_SERVICE, /* nested service attribute */
+ IPVS_CMD_ATTR_DEST, /* nested destination attribute */
+ IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */
+ IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */
+ IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */
+ IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */
+ __IPVS_CMD_ATTR_MAX,
+};
+
+#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe a service
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_SERVICE
+ */
+enum {
+ IPVS_SVC_ATTR_UNSPEC = 0,
+ IPVS_SVC_ATTR_AF, /* address family */
+ IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */
+ IPVS_SVC_ATTR_ADDR, /* virtual service address */
+ IPVS_SVC_ATTR_PORT, /* virtual service port */
+ IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */
+
+ IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */
+ IPVS_SVC_ATTR_FLAGS, /* virtual service flags */
+ IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */
+ IPVS_SVC_ATTR_NETMASK, /* persistent netmask */
+
+ IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */
+ __IPVS_SVC_ATTR_MAX,
+};
+
+#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe a destination (real server)
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_DEST
+ */
+enum {
+ IPVS_DEST_ATTR_UNSPEC = 0,
+ IPVS_DEST_ATTR_ADDR, /* real server address */
+ IPVS_DEST_ATTR_PORT, /* real server port */
+
+ IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */
+ IPVS_DEST_ATTR_WEIGHT, /* destination weight */
+
+ IPVS_DEST_ATTR_U_THRESH, /* upper threshold */
+ IPVS_DEST_ATTR_L_THRESH, /* lower threshold */
+
+ IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */
+ IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */
+ IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */
+
+ IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */
+ __IPVS_DEST_ATTR_MAX,
+};
+
+#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1)
+
+/*
+ * Attributes describing a sync daemon
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_DAEMON
+ */
+enum {
+ IPVS_DAEMON_ATTR_UNSPEC = 0,
+ IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */
+ IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */
+ IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */
+ __IPVS_DAEMON_ATTR_MAX,
+};
+
+#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe service or destination entry statistics
+ *
+ * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
+ */
+enum {
+ IPVS_STATS_ATTR_UNSPEC = 0,
+ IPVS_STATS_ATTR_CONNS, /* connections scheduled */
+ IPVS_STATS_ATTR_INPKTS, /* incoming packets */
+ IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */
+ IPVS_STATS_ATTR_INBYTES, /* incoming bytes */
+ IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */
+
+ IPVS_STATS_ATTR_CPS, /* current connection rate */
+ IPVS_STATS_ATTR_INPPS, /* current in packet rate */
+ IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */
+ IPVS_STATS_ATTR_INBPS, /* current in byte rate */
+ IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */
+ __IPVS_STATS_ATTR_MAX,
+};
+
+#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1)
+
+/* Attributes used in response to IPVS_CMD_GET_INFO command */
+enum {
+ IPVS_INFO_ATTR_UNSPEC = 0,
+ IPVS_INFO_ATTR_VERSION, /* IPVS version number */
+ IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */
+ __IPVS_INFO_ATTR_MAX,
+};
+
+#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1)
+
#endif /* _IP_VS_H */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7312c3d..a25ad24 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -683,6 +683,8 @@
/*
* IPVS rate estimator prototypes (from ip_vs_est.c)
*/
+extern int ip_vs_estimator_init(void);
+extern void ip_vs_estimator_cleanup(void);
extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 09d0c3f..2e48a7e 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -71,14 +71,20 @@
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
+config IP_VS_PROTO_AH_ESP
+ bool
+ depends on UNDEFINED
+
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
+ select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
+ select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile
index 30e85de..73a46fe 100644
--- a/net/ipv4/ipvs/Makefile
+++ b/net/ipv4/ipvs/Makefile
@@ -6,8 +6,7 @@
ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
-ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index a7879eafc..9fbf0a6 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1070,10 +1070,12 @@
{
int ret;
+ ip_vs_estimator_init();
+
ret = ip_vs_control_init();
if (ret < 0) {
IP_VS_ERR("can't setup control.\n");
- goto cleanup_nothing;
+ goto cleanup_estimator;
}
ip_vs_protocol_init();
@@ -1106,7 +1108,8 @@
cleanup_protocol:
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
- cleanup_nothing:
+ cleanup_estimator:
+ ip_vs_estimator_cleanup();
return ret;
}
@@ -1117,6 +1120,7 @@
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
+ ip_vs_estimator_cleanup();
IP_VS_INFO("ipvs unloaded.\n");
}
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 6379705..ede101e 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
#include <net/ip.h>
#include <net/route.h>
#include <net/sock.h>
+#include <net/genetlink.h>
#include <asm/uaccess.h>
@@ -868,7 +869,8 @@
svc->num_dests++;
/* call the update_service function of its scheduler */
- svc->scheduler->update_service(svc);
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
return 0;
@@ -898,7 +900,8 @@
svc->num_dests++;
/* call the update_service function of its scheduler */
- svc->scheduler->update_service(svc);
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
@@ -948,7 +951,8 @@
IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
/* call the update_service, because server weight may be changed */
- svc->scheduler->update_service(svc);
+ if (svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
write_unlock_bh(&__ip_vs_svc_lock);
@@ -1011,12 +1015,12 @@
*/
list_del(&dest->n_list);
svc->num_dests--;
- if (svcupd) {
- /*
- * Call the update_service function of its scheduler
- */
- svc->scheduler->update_service(svc);
- }
+
+ /*
+ * Call the update_service function of its scheduler
+ */
+ if (svcupd && svc->scheduler->update_service)
+ svc->scheduler->update_service(svc);
}
@@ -2320,6 +2324,872 @@
.owner = THIS_MODULE,
};
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = IPVS_GENL_NAME,
+ .version = IPVS_GENL_VERSION,
+ .maxattr = IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+ [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
+ [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
+ [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
+ [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+ [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
+ [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
+ .len = IP_VS_IFNAME_MAXLEN },
+ [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+ [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
+ .len = sizeof(union nf_inet_addr) },
+ [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
+ [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
+ .len = IP_VS_SCHEDNAME_MAXLEN },
+ [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
+ .len = sizeof(struct ip_vs_flags) },
+ [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
+ [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+ [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
+ .len = sizeof(union nf_inet_addr) },
+ [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
+ [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+ struct ip_vs_stats *stats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ spin_lock_bh(&stats->lock);
+
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+ NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+ NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+ spin_unlock_bh(&stats->lock);
+
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ spin_unlock_bh(&stats->lock);
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+ struct ip_vs_service *svc)
+{
+ struct nlattr *nl_service;
+ struct ip_vs_flags flags = { .flags = svc->flags,
+ .mask = ~0 };
+
+ nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+ if (!nl_service)
+ return -EMSGSIZE;
+
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+ if (svc->fwmark) {
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+ } else {
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+ NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+ NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+ }
+
+ NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+ NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+ NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nl_service);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_service);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+ struct ip_vs_service *svc,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_SERVICE);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_service(skb, svc) < 0)
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ int idx = 0, i;
+ int start = cb->args[0];
+ struct ip_vs_service *svc;
+
+ mutex_lock(&__ip_vs_mutex);
+ for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+ list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+ }
+
+ for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+ list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+ }
+
+nla_put_failure:
+ mutex_unlock(&__ip_vs_mutex);
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+ struct nlattr *nla, int full_entry)
+{
+ struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+ struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+ /* Parse mandatory identifying service fields first */
+ if (nla == NULL ||
+ nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+ return -EINVAL;
+
+ nla_af = attrs[IPVS_SVC_ATTR_AF];
+ nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
+ nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
+ nla_port = attrs[IPVS_SVC_ATTR_PORT];
+ nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
+
+ if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+ return -EINVAL;
+
+ /* For now, only support IPv4 */
+ if (nla_get_u16(nla_af) != AF_INET)
+ return -EAFNOSUPPORT;
+
+ if (nla_fwmark) {
+ usvc->protocol = IPPROTO_TCP;
+ usvc->fwmark = nla_get_u32(nla_fwmark);
+ } else {
+ usvc->protocol = nla_get_u16(nla_protocol);
+ nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+ usvc->port = nla_get_u16(nla_port);
+ usvc->fwmark = 0;
+ }
+
+ /* If a full entry was requested, check for the additional fields */
+ if (full_entry) {
+ struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+ *nla_netmask;
+ struct ip_vs_flags flags;
+ struct ip_vs_service *svc;
+
+ nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+ nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+ nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+ nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+ if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+ return -EINVAL;
+
+ nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+ /* prefill flags from service if it already exists */
+ if (usvc->fwmark)
+ svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+ else
+ svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+ usvc->port);
+ if (svc) {
+ usvc->flags = svc->flags;
+ ip_vs_service_put(svc);
+ } else
+ usvc->flags = 0;
+
+ /* set new flags from userland */
+ usvc->flags = (usvc->flags & ~flags.mask) |
+ (flags.flags & flags.mask);
+
+ strlcpy(usvc->sched_name, nla_data(nla_sched),
+ sizeof(usvc->sched_name));
+ usvc->timeout = nla_get_u32(nla_timeout);
+ usvc->netmask = nla_get_u32(nla_netmask);
+ }
+
+ return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+ struct ip_vs_service_user usvc;
+ int ret;
+
+ ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (usvc.fwmark)
+ return __ip_vs_svc_fwm_get(usvc.fwmark);
+ else
+ return __ip_vs_service_get(usvc.protocol, usvc.addr,
+ usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+ struct nlattr *nl_dest;
+
+ nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+ if (!nl_dest)
+ return -EMSGSIZE;
+
+ NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+ NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+ atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+ atomic_read(&dest->activeconns));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+ atomic_read(&dest->inactconns));
+ NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+ atomic_read(&dest->persistconns));
+
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nl_dest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_dest);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_DEST);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_dest(skb, dest) < 0)
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ int idx = 0;
+ int start = cb->args[0];
+ struct ip_vs_service *svc;
+ struct ip_vs_dest *dest;
+ struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+ mutex_lock(&__ip_vs_mutex);
+
+ /* Try to find the service for which to dump destinations */
+ if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+ IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+ goto out_err;
+
+ svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+ if (IS_ERR(svc) || svc == NULL)
+ goto out_err;
+
+ /* Dump the destinations */
+ list_for_each_entry(dest, &svc->destinations, n_list) {
+ if (++idx <= start)
+ continue;
+ if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+ idx--;
+ goto nla_put_failure;
+ }
+ }
+
+nla_put_failure:
+ cb->args[0] = idx;
+ ip_vs_service_put(svc);
+
+out_err:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+ struct nlattr *nla, int full_entry)
+{
+ struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+ struct nlattr *nla_addr, *nla_port;
+
+ /* Parse mandatory identifying destination fields first */
+ if (nla == NULL ||
+ nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+ return -EINVAL;
+
+ nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
+ nla_port = attrs[IPVS_DEST_ATTR_PORT];
+
+ if (!(nla_addr && nla_port))
+ return -EINVAL;
+
+ nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+ udest->port = nla_get_u16(nla_port);
+
+ /* If a full entry was requested, check for the additional fields */
+ if (full_entry) {
+ struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+ *nla_l_thresh;
+
+ nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
+ nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
+ nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
+ nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
+
+ if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+ return -EINVAL;
+
+ udest->conn_flags = nla_get_u32(nla_fwd)
+ & IP_VS_CONN_F_FWD_MASK;
+ udest->weight = nla_get_u32(nla_weight);
+ udest->u_threshold = nla_get_u32(nla_u_thresh);
+ udest->l_threshold = nla_get_u32(nla_l_thresh);
+ }
+
+ return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+ const char *mcast_ifn, __be32 syncid)
+{
+ struct nlattr *nl_daemon;
+
+ nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+ if (!nl_daemon)
+ return -EMSGSIZE;
+
+ NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+ NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+ NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+ nla_nest_end(skb, nl_daemon);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_daemon);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+ const char *mcast_ifn, __be32 syncid,
+ struct netlink_callback *cb)
+{
+ void *hdr;
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ &ip_vs_genl_family, NLM_F_MULTI,
+ IPVS_CMD_NEW_DAEMON);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ mutex_lock(&__ip_vs_mutex);
+ if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+ ip_vs_master_mcast_ifn,
+ ip_vs_master_syncid, cb) < 0)
+ goto nla_put_failure;
+
+ cb->args[0] = 1;
+ }
+
+ if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+ ip_vs_backup_mcast_ifn,
+ ip_vs_backup_syncid, cb) < 0)
+ goto nla_put_failure;
+
+ cb->args[1] = 1;
+ }
+
+nla_put_failure:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+ if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+ attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+ attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+ return -EINVAL;
+
+ return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+ if (!attrs[IPVS_DAEMON_ATTR_STATE])
+ return -EINVAL;
+
+ return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+ struct ip_vs_timeout_user t;
+
+ __ip_vs_get_timeouts(&t);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+ t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+ t.tcp_fin_timeout =
+ nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+ if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+ t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+ return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ip_vs_service *svc = NULL;
+ struct ip_vs_service_user usvc;
+ struct ip_vs_dest_user udest;
+ int ret = 0, cmd;
+ int need_full_svc = 0, need_full_dest = 0;
+
+ cmd = info->genlhdr->cmd;
+
+ mutex_lock(&__ip_vs_mutex);
+
+ if (cmd == IPVS_CMD_FLUSH) {
+ ret = ip_vs_flush();
+ goto out;
+ } else if (cmd == IPVS_CMD_SET_CONFIG) {
+ ret = ip_vs_genl_set_config(info->attrs);
+ goto out;
+ } else if (cmd == IPVS_CMD_NEW_DAEMON ||
+ cmd == IPVS_CMD_DEL_DAEMON) {
+
+ struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+ if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+ nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+ info->attrs[IPVS_CMD_ATTR_DAEMON],
+ ip_vs_daemon_policy)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (cmd == IPVS_CMD_NEW_DAEMON)
+ ret = ip_vs_genl_new_daemon(daemon_attrs);
+ else
+ ret = ip_vs_genl_del_daemon(daemon_attrs);
+ goto out;
+ } else if (cmd == IPVS_CMD_ZERO &&
+ !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+ ret = ip_vs_zero_all();
+ goto out;
+ }
+
+ /* All following commands require a service argument, so check if we
+ * received a valid one. We need a full service specification when
+ * adding / editing a service. Only identifying members otherwise. */
+ if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+ need_full_svc = 1;
+
+ ret = ip_vs_genl_parse_service(&usvc,
+ info->attrs[IPVS_CMD_ATTR_SERVICE],
+ need_full_svc);
+ if (ret)
+ goto out;
+
+ /* Lookup the exact service by <protocol, addr, port> or fwmark */
+ if (usvc.fwmark == 0)
+ svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+ else
+ svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+ /* Unless we're adding a new service, the service must already exist */
+ if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+ ret = -ESRCH;
+ goto out;
+ }
+
+ /* Destination commands require a valid destination argument. For
+ * adding / editing a destination, we need a full destination
+ * specification. */
+ if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+ cmd == IPVS_CMD_DEL_DEST) {
+ if (cmd != IPVS_CMD_DEL_DEST)
+ need_full_dest = 1;
+
+ ret = ip_vs_genl_parse_dest(&udest,
+ info->attrs[IPVS_CMD_ATTR_DEST],
+ need_full_dest);
+ if (ret)
+ goto out;
+ }
+
+ switch (cmd) {
+ case IPVS_CMD_NEW_SERVICE:
+ if (svc == NULL)
+ ret = ip_vs_add_service(&usvc, &svc);
+ else
+ ret = -EEXIST;
+ break;
+ case IPVS_CMD_SET_SERVICE:
+ ret = ip_vs_edit_service(svc, &usvc);
+ break;
+ case IPVS_CMD_DEL_SERVICE:
+ ret = ip_vs_del_service(svc);
+ break;
+ case IPVS_CMD_NEW_DEST:
+ ret = ip_vs_add_dest(svc, &udest);
+ break;
+ case IPVS_CMD_SET_DEST:
+ ret = ip_vs_edit_dest(svc, &udest);
+ break;
+ case IPVS_CMD_DEL_DEST:
+ ret = ip_vs_del_dest(svc, &udest);
+ break;
+ case IPVS_CMD_ZERO:
+ ret = ip_vs_zero_service(svc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+out:
+ if (svc)
+ ip_vs_service_put(svc);
+ mutex_unlock(&__ip_vs_mutex);
+
+ return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *reply;
+ int ret, cmd, reply_cmd;
+
+ cmd = info->genlhdr->cmd;
+
+ if (cmd == IPVS_CMD_GET_SERVICE)
+ reply_cmd = IPVS_CMD_NEW_SERVICE;
+ else if (cmd == IPVS_CMD_GET_INFO)
+ reply_cmd = IPVS_CMD_SET_INFO;
+ else if (cmd == IPVS_CMD_GET_CONFIG)
+ reply_cmd = IPVS_CMD_SET_CONFIG;
+ else {
+ IP_VS_ERR("unknown Generic Netlink command\n");
+ return -EINVAL;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&__ip_vs_mutex);
+
+ reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+ if (reply == NULL)
+ goto nla_put_failure;
+
+ switch (cmd) {
+ case IPVS_CMD_GET_SERVICE:
+ {
+ struct ip_vs_service *svc;
+
+ svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+ if (IS_ERR(svc)) {
+ ret = PTR_ERR(svc);
+ goto out_err;
+ } else if (svc) {
+ ret = ip_vs_genl_fill_service(msg, svc);
+ ip_vs_service_put(svc);
+ if (ret)
+ goto nla_put_failure;
+ } else {
+ ret = -ESRCH;
+ goto out_err;
+ }
+
+ break;
+ }
+
+ case IPVS_CMD_GET_CONFIG:
+ {
+ struct ip_vs_timeout_user t;
+
+ __ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+ t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+ break;
+ }
+
+ case IPVS_CMD_GET_INFO:
+ NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+ NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+ IP_VS_CONN_TAB_SIZE);
+ break;
+ }
+
+ genlmsg_end(msg, reply);
+ ret = genlmsg_unicast(msg, info->snd_pid);
+ goto out;
+
+nla_put_failure:
+ IP_VS_ERR("not enough space in Netlink message\n");
+ ret = -EMSGSIZE;
+
+out_err:
+ nlmsg_free(msg);
+out:
+ mutex_unlock(&__ip_vs_mutex);
+
+ return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+ {
+ .cmd = IPVS_CMD_NEW_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_SET_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_SERVICE,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ .dumpit = ip_vs_genl_dump_services,
+ .policy = ip_vs_cmd_policy,
+ },
+ {
+ .cmd = IPVS_CMD_NEW_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_SET_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_DEST,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .dumpit = ip_vs_genl_dump_dests,
+ },
+ {
+ .cmd = IPVS_CMD_NEW_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_DEL_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_DAEMON,
+ .flags = GENL_ADMIN_PERM,
+ .dumpit = ip_vs_genl_dump_daemons,
+ },
+ {
+ .cmd = IPVS_CMD_SET_CONFIG,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_CONFIG,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_GET_INFO,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_get_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_ZERO,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ip_vs_cmd_policy,
+ .doit = ip_vs_genl_set_cmd,
+ },
+ {
+ .cmd = IPVS_CMD_FLUSH,
+ .flags = GENL_ADMIN_PERM,
+ .doit = ip_vs_genl_set_cmd,
+ },
+};
+
+static int __init ip_vs_genl_register(void)
+{
+ int ret, i;
+
+ ret = genl_register_family(&ip_vs_genl_family);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+ ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+ if (ret)
+ goto err_out;
+ }
+ return 0;
+
+err_out:
+ genl_unregister_family(&ip_vs_genl_family);
+ return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+ genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
int __init ip_vs_control_init(void)
{
@@ -2334,6 +3204,13 @@
return ret;
}
+ ret = ip_vs_genl_register();
+ if (ret) {
+ IP_VS_ERR("cannot register Generic Netlink interface.\n");
+ nf_unregister_sockopt(&ip_vs_sockopts);
+ return ret;
+ }
+
proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
@@ -2368,6 +3245,7 @@
unregister_sysctl_table(sysctl_header);
proc_net_remove(&init_net, "ip_vs_stats");
proc_net_remove(&init_net, "ip_vs");
+ ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
LeaveFunction(2);
}
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 5a20f93..4fb620e 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -124,8 +124,6 @@
est->outbps = stats->outbps<<5;
spin_lock_bh(&est_lock);
- if (list_empty(&est_list))
- mod_timer(&est_timer, jiffies + 2 * HZ);
list_add(&est->list, &est_list);
spin_unlock_bh(&est_lock);
}
@@ -136,11 +134,6 @@
spin_lock_bh(&est_lock);
list_del(&est->list);
- while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
- spin_unlock_bh(&est_lock);
- cpu_relax();
- spin_lock_bh(&est_lock);
- }
spin_unlock_bh(&est_lock);
}
@@ -160,3 +153,14 @@
est->inbps = 0;
est->outbps = 0;
}
+
+int __init ip_vs_estimator_init(void)
+{
+ mod_timer(&est_timer, jiffies + 2 * HZ);
+ return 0;
+}
+
+void ip_vs_estimator_cleanup(void)
+{
+ del_timer_sync(&est_timer);
+}
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 7a6a319..d2a43aa 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -96,7 +96,6 @@
* IPVS lblc hash table
*/
struct ip_vs_lblc_table {
- rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
@@ -123,31 +122,6 @@
static struct ctl_table_header * sysctl_header;
-/*
- * new/free a ip_vs_lblc_entry, which is a mapping of a destionation
- * IP address to a server.
- */
-static inline struct ip_vs_lblc_entry *
-ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
-{
- struct ip_vs_lblc_entry *en;
-
- en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
- if (en == NULL) {
- IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
- return NULL;
- }
-
- INIT_LIST_HEAD(&en->list);
- en->addr = daddr;
-
- atomic_inc(&dest->refcnt);
- en->dest = dest;
-
- return en;
-}
-
-
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@@ -173,87 +147,97 @@
* Hash an entry in the ip_vs_lblc_table.
* returns bool success.
*/
-static int
+static void
ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
- unsigned hash;
+ unsigned hash = ip_vs_lblc_hashkey(en->addr);
- if (!list_empty(&en->list)) {
- IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
- "called from %p\n", __builtin_return_address(0));
- return 0;
- }
-
- /*
- * Hash by destination IP address
- */
- hash = ip_vs_lblc_hashkey(en->addr);
-
- write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
- write_unlock(&tbl->lock);
-
- return 1;
}
/*
- * Get ip_vs_lblc_entry associated with supplied parameters.
+ * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
+ * lock
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
{
- unsigned hash;
+ unsigned hash = ip_vs_lblc_hashkey(addr);
struct ip_vs_lblc_entry *en;
- hash = ip_vs_lblc_hashkey(addr);
-
- read_lock(&tbl->lock);
-
- list_for_each_entry(en, &tbl->bucket[hash], list) {
- if (en->addr == addr) {
- /* HIT */
- read_unlock(&tbl->lock);
+ list_for_each_entry(en, &tbl->bucket[hash], list)
+ if (en->addr == addr)
return en;
- }
- }
-
- read_unlock(&tbl->lock);
return NULL;
}
/*
+ * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
+ * address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblc_entry *
+ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblc_entry *en;
+
+ en = ip_vs_lblc_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+ return NULL;
+ }
+
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+
+ ip_vs_lblc_hash(tbl, en);
+ } else if (en->dest != dest) {
+ atomic_dec(&en->dest->refcnt);
+ atomic_inc(&dest->refcnt);
+ en->dest = dest;
+ }
+
+ return en;
+}
+
+
+/*
* Flush all the entries of the specified table.
*/
static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
{
- int i;
struct ip_vs_lblc_entry *en, *nxt;
+ int i;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
- write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
}
}
-static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
+static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
- struct ip_vs_lblc_entry *en, *nxt;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse + sysctl_ip_vs_lblc_expiration))
@@ -262,7 +246,7 @@
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -281,17 +265,16 @@
*/
static void ip_vs_lblc_check_expire(unsigned long data)
{
- struct ip_vs_lblc_table *tbl;
+ struct ip_vs_service *svc = (struct ip_vs_service *) data;
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblc_entry *en, *nxt;
- tbl = (struct ip_vs_lblc_table *)data;
-
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
- ip_vs_lblc_full_check(tbl);
+ ip_vs_lblc_full_check(svc);
tbl->counter = 1;
goto out;
}
@@ -308,7 +291,7 @@
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
@@ -317,7 +300,7 @@
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -336,15 +319,14 @@
/*
* Allocate the ip_vs_lblc_table for this service
*/
- tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
+ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
- "current service\n",
- sizeof(struct ip_vs_lblc_table));
+ "current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
@@ -352,7 +334,6 @@
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
- rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
@@ -361,9 +342,8 @@
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
- (unsigned long)tbl);
- tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
- add_timer(&tbl->periodic_timer);
+ (unsigned long)svc);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
@@ -380,22 +360,16 @@
ip_vs_lblc_flush(tbl);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree(tbl);
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
- sizeof(struct ip_vs_lblc_table));
+ sizeof(*tbl));
return 0;
}
-static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
struct ip_vs_dest *dest, *least;
int loh, doh;
@@ -484,46 +458,54 @@
static struct ip_vs_dest *
ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
- struct ip_vs_lblc_table *tbl;
- struct ip_vs_lblc_entry *en;
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_dest *dest = NULL;
+ struct ip_vs_lblc_entry *en;
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
- tbl = (struct ip_vs_lblc_table *)svc->sched_data;
+ /* First look in our cache */
+ read_lock(&svc->sched_lock);
en = ip_vs_lblc_get(tbl, iph->daddr);
- if (en == NULL) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- en = ip_vs_lblc_new(iph->daddr, dest);
- if (en == NULL) {
- return NULL;
- }
- ip_vs_lblc_hash(tbl, en);
- } else {
- dest = en->dest;
- if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
- || atomic_read(&dest->weight) <= 0
- || is_overloaded(dest, svc)) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- atomic_dec(&en->dest->refcnt);
- atomic_inc(&dest->refcnt);
- en->dest = dest;
- }
- }
- en->lastuse = jiffies;
+ if (en) {
+ /* We only hold a read lock, but this is atomic */
+ en->lastuse = jiffies;
+ /*
+ * If the destination is not available, i.e. it's in the trash,
+ * we must ignore it, as it may be removed from under our feet,
+ * if someone drops our reference count. Our caller only makes
+ * sure that destinations, that are not in the trash, are not
+ * moved to the trash, while we are scheduling. But anyone can
+ * free up entries from the trash at any time.
+ */
+
+ if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
+ dest = en->dest;
+ }
+ read_unlock(&svc->sched_lock);
+
+ /* If the destination has a weight and is not overloaded, use it */
+ if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+ goto out;
+
+ /* No cache entry or it is invalid, time to schedule */
+ dest = __ip_vs_lblc_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
+ }
+
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&svc->sched_lock);
+ ip_vs_lblc_new(tbl, iph->daddr, dest);
+ write_unlock(&svc->sched_lock);
+
+out:
IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
- NIPQUAD(en->addr),
+ NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));
@@ -542,7 +524,6 @@
.n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
.init_service = ip_vs_lblc_init_svc,
.done_service = ip_vs_lblc_done_svc,
- .update_service = ip_vs_lblc_update_svc,
.schedule = ip_vs_lblc_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index c234e73..375a1ff 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -106,7 +106,7 @@
return NULL;
}
- e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC);
+ e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL) {
IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
return NULL;
@@ -116,11 +116,9 @@
e->dest = dest;
/* link it to the list */
- write_lock(&set->lock);
e->next = set->list;
set->list = e;
atomic_inc(&set->size);
- write_unlock(&set->lock);
set->lastmod = jiffies;
return e;
@@ -131,7 +129,6 @@
{
struct ip_vs_dest_list *e, **ep;
- write_lock(&set->lock);
for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
if (e->dest == dest) {
/* HIT */
@@ -144,7 +141,6 @@
}
ep = &e->next;
}
- write_unlock(&set->lock);
}
static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
@@ -174,7 +170,6 @@
if (set == NULL)
return NULL;
- read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
least = e->dest;
@@ -188,7 +183,6 @@
goto nextstage;
}
}
- read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted least load */
@@ -207,7 +201,6 @@
loh = doh;
}
}
- read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
@@ -229,7 +222,6 @@
if (set == NULL)
return NULL;
- read_lock(&set->lock);
/* select the first destination server, whose weight > 0 */
for (e=set->list; e!=NULL; e=e->next) {
most = e->dest;
@@ -239,7 +231,6 @@
goto nextstage;
}
}
- read_unlock(&set->lock);
return NULL;
/* find the destination with the weighted most load */
@@ -256,7 +247,6 @@
moh = doh;
}
}
- read_unlock(&set->lock);
IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d "
"activeconns %d refcnt %d weight %d overhead %d\n",
@@ -284,7 +274,6 @@
* IPVS lblcr hash table
*/
struct ip_vs_lblcr_table {
- rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
@@ -311,32 +300,6 @@
static struct ctl_table_header * sysctl_header;
-/*
- * new/free a ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server.
- */
-static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr)
-{
- struct ip_vs_lblcr_entry *en;
-
- en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
- if (en == NULL) {
- IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
- return NULL;
- }
-
- INIT_LIST_HEAD(&en->list);
- en->addr = daddr;
-
- /* initilize its dest set */
- atomic_set(&(en->set.size), 0);
- en->set.list = NULL;
- rwlock_init(&en->set.lock);
-
- return en;
-}
-
-
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
@@ -358,59 +321,72 @@
* Hash an entry in the ip_vs_lblcr_table.
* returns bool success.
*/
-static int
+static void
ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
- unsigned hash;
+ unsigned hash = ip_vs_lblcr_hashkey(en->addr);
- if (!list_empty(&en->list)) {
- IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, "
- "called from %p\n", __builtin_return_address(0));
- return 0;
- }
-
- /*
- * Hash by destination IP address
- */
- hash = ip_vs_lblcr_hashkey(en->addr);
-
- write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
- write_unlock(&tbl->lock);
-
- return 1;
}
/*
- * Get ip_vs_lblcr_entry associated with supplied parameters.
+ * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
+ * read lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr)
{
- unsigned hash;
+ unsigned hash = ip_vs_lblcr_hashkey(addr);
struct ip_vs_lblcr_entry *en;
- hash = ip_vs_lblcr_hashkey(addr);
-
- read_lock(&tbl->lock);
-
- list_for_each_entry(en, &tbl->bucket[hash], list) {
- if (en->addr == addr) {
- /* HIT */
- read_unlock(&tbl->lock);
+ list_for_each_entry(en, &tbl->bucket[hash], list)
+ if (en->addr == addr)
return en;
- }
- }
-
- read_unlock(&tbl->lock);
return NULL;
}
/*
+ * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
+ * IP address to a server. Called under write lock.
+ */
+static inline struct ip_vs_lblcr_entry *
+ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr,
+ struct ip_vs_dest *dest)
+{
+ struct ip_vs_lblcr_entry *en;
+
+ en = ip_vs_lblcr_get(tbl, daddr);
+ if (!en) {
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en) {
+ IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+ return NULL;
+ }
+
+ en->addr = daddr;
+ en->lastuse = jiffies;
+
+ /* initilize its dest set */
+ atomic_set(&(en->set.size), 0);
+ en->set.list = NULL;
+ rwlock_init(&en->set.lock);
+
+ ip_vs_lblcr_hash(tbl, en);
+ }
+
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
+
+ return en;
+}
+
+
+/*
* Flush all the entries of the specified table.
*/
static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
@@ -418,19 +394,18 @@
int i;
struct ip_vs_lblcr_entry *en, *nxt;
+ /* No locking required, only called during cleanup. */
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
- atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
}
}
-static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl)
+static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
@@ -438,7 +413,7 @@
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
now))
@@ -447,7 +422,7 @@
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -466,17 +441,16 @@
*/
static void ip_vs_lblcr_check_expire(unsigned long data)
{
- struct ip_vs_lblcr_table *tbl;
+ struct ip_vs_service *svc = (struct ip_vs_service *) data;
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
- tbl = (struct ip_vs_lblcr_table *)data;
-
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
- ip_vs_lblcr_full_check(tbl);
+ ip_vs_lblcr_full_check(svc);
tbl->counter = 1;
goto out;
}
@@ -493,7 +467,7 @@
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&tbl->lock);
+ write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
continue;
@@ -502,7 +476,7 @@
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&tbl->lock);
+ write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -520,15 +494,14 @@
/*
* Allocate the ip_vs_lblcr_table for this service
*/
- tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
+ tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
- "current service\n",
- sizeof(struct ip_vs_lblcr_table));
+ "current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
@@ -536,7 +509,6 @@
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
- rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
@@ -545,9 +517,8 @@
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
- (unsigned long)tbl);
- tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
- add_timer(&tbl->periodic_timer);
+ (unsigned long)svc);
+ mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
@@ -564,22 +535,16 @@
ip_vs_lblcr_flush(tbl);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree(tbl);
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
- sizeof(struct ip_vs_lblcr_table));
+ sizeof(*tbl));
return 0;
}
-static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline struct ip_vs_dest *
-__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
+__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
struct ip_vs_dest *dest, *least;
int loh, doh;
@@ -669,50 +634,78 @@
static struct ip_vs_dest *
ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
- struct ip_vs_lblcr_table *tbl;
- struct ip_vs_lblcr_entry *en;
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
+ struct ip_vs_dest *dest = NULL;
+ struct ip_vs_lblcr_entry *en;
IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
- tbl = (struct ip_vs_lblcr_table *)svc->sched_data;
+ /* First look in our cache */
+ read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(tbl, iph->daddr);
- if (en == NULL) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- en = ip_vs_lblcr_new(iph->daddr);
- if (en == NULL) {
- return NULL;
- }
- ip_vs_dest_set_insert(&en->set, dest);
- ip_vs_lblcr_hash(tbl, en);
- } else {
+ if (en) {
+ /* We only hold a read lock, but this is atomic */
+ en->lastuse = jiffies;
+
+ /* Get the least loaded destination */
+ read_lock(&en->set.lock);
dest = ip_vs_dest_set_min(&en->set);
- if (!dest || is_overloaded(dest, svc)) {
- dest = __ip_vs_wlc_schedule(svc, iph);
- if (dest == NULL) {
- IP_VS_DBG(1, "no destination available\n");
- return NULL;
- }
- ip_vs_dest_set_insert(&en->set, dest);
- }
+ read_unlock(&en->set.lock);
+
+ /* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
- jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) {
+ time_after(jiffies, en->set.lastmod +
+ sysctl_ip_vs_lblcr_expiration)) {
struct ip_vs_dest *m;
+
+ write_lock(&en->set.lock);
m = ip_vs_dest_set_max(&en->set);
if (m)
ip_vs_dest_set_erase(&en->set, m);
+ write_unlock(&en->set.lock);
}
- }
- en->lastuse = jiffies;
+ /* If the destination is not overloaded, use it */
+ if (dest && !is_overloaded(dest, svc)) {
+ read_unlock(&svc->sched_lock);
+ goto out;
+ }
+
+ /* The cache entry is invalid, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ read_unlock(&svc->sched_lock);
+ return NULL;
+ }
+
+ /* Update our cache entry */
+ write_lock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest);
+ write_unlock(&en->set.lock);
+ }
+ read_unlock(&svc->sched_lock);
+
+ if (dest)
+ goto out;
+
+ /* No cache entry, time to schedule */
+ dest = __ip_vs_lblcr_schedule(svc, iph);
+ if (!dest) {
+ IP_VS_DBG(1, "no destination available\n");
+ return NULL;
+ }
+
+ /* If we fail to create a cache entry, we'll just use the valid dest */
+ write_lock(&svc->sched_lock);
+ ip_vs_lblcr_new(tbl, iph->daddr, dest);
+ write_unlock(&svc->sched_lock);
+
+out:
IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u "
"--> server %u.%u.%u.%u:%d\n",
- NIPQUAD(en->addr),
+ NIPQUAD(iph->daddr),
NIPQUAD(dest->addr),
ntohs(dest->port));
@@ -731,7 +724,6 @@
.n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
.init_service = ip_vs_lblcr_init_svc,
.done_service = ip_vs_lblcr_done_svc,
- .update_service = ip_vs_lblcr_update_svc,
.schedule = ip_vs_lblcr_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index ebcdbf7..2c3de1b 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -20,24 +20,6 @@
#include <net/ip_vs.h>
-static int ip_vs_lc_init_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int ip_vs_lc_done_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int ip_vs_lc_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline unsigned int
ip_vs_lc_dest_overhead(struct ip_vs_dest *dest)
{
@@ -99,9 +81,6 @@
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
- .init_service = ip_vs_lc_init_svc,
- .done_service = ip_vs_lc_done_svc,
- .update_service = ip_vs_lc_update_svc,
.schedule = ip_vs_lc_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index 92f3a67..5330d5a 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -37,27 +37,6 @@
#include <net/ip_vs.h>
-static int
-ip_vs_nq_init_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_nq_done_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_nq_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline unsigned int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
@@ -137,9 +116,6 @@
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
- .init_service = ip_vs_nq_init_svc,
- .done_service = ip_vs_nq_done_svc,
- .update_service = ip_vs_nq_update_svc,
.schedule = ip_vs_nq_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
similarity index 63%
rename from net/ipv4/ipvs/ip_vs_proto_ah.c
rename to net/ipv4/ipvs/ip_vs_proto_ah_esp.c
index 73e0ea8..3f9ebd7 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c
@@ -1,5 +1,5 @@
/*
- * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
+ * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS
*
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
@@ -39,11 +39,11 @@
static struct ip_vs_conn *
-ah_conn_in_get(const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
- const struct iphdr *iph,
- unsigned int proto_off,
- int inverse)
+ah_esp_conn_in_get(const struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ const struct iphdr *iph,
+ unsigned int proto_off,
+ int inverse)
{
struct ip_vs_conn *cp;
@@ -79,8 +79,8 @@
static struct ip_vs_conn *
-ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
+ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ const struct iphdr *iph, unsigned int proto_off, int inverse)
{
struct ip_vs_conn *cp;
@@ -112,12 +112,12 @@
static int
-ah_conn_schedule(struct sk_buff *skb,
- struct ip_vs_protocol *pp,
- int *verdict, struct ip_vs_conn **cpp)
+ah_esp_conn_schedule(struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ int *verdict, struct ip_vs_conn **cpp)
{
/*
- * AH is only related traffic. Pass the packet to IP stack.
+ * AH/ESP is only related traffic. Pass the packet to IP stack.
*/
*verdict = NF_ACCEPT;
return 0;
@@ -125,8 +125,8 @@
static void
-ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
- int offset, const char *msg)
+ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
+ int offset, const char *msg)
{
char buf[256];
struct iphdr _iph, *ih;
@@ -143,28 +143,29 @@
}
-static void ah_init(struct ip_vs_protocol *pp)
+static void ah_esp_init(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
-static void ah_exit(struct ip_vs_protocol *pp)
+static void ah_esp_exit(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
+#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_init,
- .exit = ah_exit,
- .conn_schedule = ah_conn_schedule,
- .conn_in_get = ah_conn_in_get,
- .conn_out_get = ah_conn_out_get,
+ .init = ah_esp_init,
+ .exit = ah_esp_exit,
+ .conn_schedule = ah_esp_conn_schedule,
+ .conn_in_get = ah_esp_conn_in_get,
+ .conn_out_get = ah_esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
.csum_check = NULL,
@@ -172,7 +173,31 @@
.register_app = NULL,
.unregister_app = NULL,
.app_conn_bind = NULL,
- .debug_packet = ah_debug_packet,
+ .debug_packet = ah_esp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
.set_state_timeout = NULL,
};
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_ESP
+struct ip_vs_protocol ip_vs_protocol_esp = {
+ .name = "ESP",
+ .protocol = IPPROTO_ESP,
+ .num_states = 1,
+ .dont_defrag = 1,
+ .init = ah_esp_init,
+ .exit = ah_esp_exit,
+ .conn_schedule = ah_esp_conn_schedule,
+ .conn_in_get = ah_esp_conn_in_get,
+ .conn_out_get = ah_esp_conn_out_get,
+ .snat_handler = NULL,
+ .dnat_handler = NULL,
+ .csum_check = NULL,
+ .state_transition = NULL,
+ .register_app = NULL,
+ .unregister_app = NULL,
+ .app_conn_bind = NULL,
+ .debug_packet = ah_esp_debug_packet,
+ .timeout_change = NULL, /* ISAKMP */
+};
+#endif
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
deleted file mode 100644
index 21d70c8..0000000
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
- *
- * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
- * Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation;
- *
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-
-#include <net/ip_vs.h>
-
-
-/* TODO:
-
-struct isakmp_hdr {
- __u8 icookie[8];
- __u8 rcookie[8];
- __u8 np;
- __u8 version;
- __u8 xchgtype;
- __u8 flags;
- __u32 msgid;
- __u32 length;
-};
-
-*/
-
-#define PORT_ISAKMP 500
-
-
-static struct ip_vs_conn *
-esp_conn_in_get(const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
- const struct iphdr *iph,
- unsigned int proto_off,
- int inverse)
-{
- struct ip_vs_conn *cp;
-
- if (likely(!inverse)) {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->saddr,
- htons(PORT_ISAKMP),
- iph->daddr,
- htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->daddr,
- htons(PORT_ISAKMP),
- iph->saddr,
- htons(PORT_ISAKMP));
- }
-
- if (!cp) {
- /*
- * We are not sure if the packet is from our
- * service, so our conn_schedule hook should return NF_ACCEPT
- */
- IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet "
- "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
- inverse ? "ICMP+" : "",
- pp->name,
- NIPQUAD(iph->saddr),
- NIPQUAD(iph->daddr));
- }
-
- return cp;
-}
-
-
-static struct ip_vs_conn *
-esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- const struct iphdr *iph, unsigned int proto_off, int inverse)
-{
- struct ip_vs_conn *cp;
-
- if (likely(!inverse)) {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->saddr,
- htons(PORT_ISAKMP),
- iph->daddr,
- htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->daddr,
- htons(PORT_ISAKMP),
- iph->saddr,
- htons(PORT_ISAKMP));
- }
-
- if (!cp) {
- IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet "
- "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n",
- inverse ? "ICMP+" : "",
- pp->name,
- NIPQUAD(iph->saddr),
- NIPQUAD(iph->daddr));
- }
-
- return cp;
-}
-
-
-static int
-esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
- int *verdict, struct ip_vs_conn **cpp)
-{
- /*
- * ESP is only related traffic. Pass the packet to IP stack.
- */
- *verdict = NF_ACCEPT;
- return 0;
-}
-
-
-static void
-esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
- int offset, const char *msg)
-{
- char buf[256];
- struct iphdr _iph, *ih;
-
- ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
- if (ih == NULL)
- sprintf(buf, "%s TRUNCATED", pp->name);
- else
- sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u",
- pp->name, NIPQUAD(ih->saddr),
- NIPQUAD(ih->daddr));
-
- printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
-}
-
-
-static void esp_init(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
-static void esp_exit(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
-struct ip_vs_protocol ip_vs_protocol_esp = {
- .name = "ESP",
- .protocol = IPPROTO_ESP,
- .num_states = 1,
- .dont_defrag = 1,
- .init = esp_init,
- .exit = esp_exit,
- .conn_schedule = esp_conn_schedule,
- .conn_in_get = esp_conn_in_get,
- .conn_out_get = esp_conn_out_get,
- .snat_handler = NULL,
- .dnat_handler = NULL,
- .csum_check = NULL,
- .state_transition = NULL,
- .register_app = NULL,
- .unregister_app = NULL,
- .app_conn_bind = NULL,
- .debug_packet = esp_debug_packet,
- .timeout_change = NULL, /* ISAKMP */
-};
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 358110d..f749291 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -32,12 +32,6 @@
}
-static int ip_vs_rr_done_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
{
svc->sched_data = &svc->destinations;
@@ -96,7 +90,6 @@
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc,
- .done_service = ip_vs_rr_done_svc,
.update_service = ip_vs_rr_update_svc,
.schedule = ip_vs_rr_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index 77663d8..53f73be 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -41,27 +41,6 @@
#include <net/ip_vs.h>
-static int
-ip_vs_sed_init_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_sed_done_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_sed_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline unsigned int
ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
{
@@ -139,9 +118,6 @@
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
- .init_service = ip_vs_sed_init_svc,
- .done_service = ip_vs_sed_done_svc,
- .update_service = ip_vs_sed_update_svc,
.schedule = ip_vs_sed_schedule,
};
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 9b0ef86..df7ad8d 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -25,27 +25,6 @@
#include <net/ip_vs.h>
-static int
-ip_vs_wlc_init_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_wlc_done_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
-static int
-ip_vs_wlc_update_svc(struct ip_vs_service *svc)
-{
- return 0;
-}
-
-
static inline unsigned int
ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest)
{
@@ -127,9 +106,6 @@
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
- .init_service = ip_vs_wlc_init_svc,
- .done_service = ip_vs_wlc_done_svc,
- .update_service = ip_vs_wlc_update_svc,
.schedule = ip_vs_wlc_schedule,
};