[NET] Generalise TCP's struct open_request minisock infrastructure
Kept this first changeset minimal, without changing existing names to
ease peer review.
Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:
->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
a specific protocol
The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.
I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.
Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)
Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 8438c68..d5b7c90 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -81,6 +81,7 @@
#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/types.h>
+#include <net/request_sock.h>
#include <net/sock.h>
#include <linux/igmp.h>
#include <net/flow.h>
@@ -107,6 +108,26 @@
#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
+struct inet_request_sock {
+ struct open_request req;
+ u32 loc_addr;
+ u32 rmt_addr;
+ u16 rmt_port;
+ u16 snd_wscale : 4,
+ rcv_wscale : 4,
+ tstamp_ok : 1,
+ sack_ok : 1,
+ wscale_ok : 1,
+ ecn_ok : 1,
+ acked : 1;
+ struct ip_options *opt;
+};
+
+static inline struct inet_request_sock *inet_rsk(const struct open_request *sk)
+{
+ return (struct inet_request_sock *)sk;
+}
+
struct ipv6_pinfo;
struct inet_sock {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ab0d0ef..98acdbf 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -193,6 +193,19 @@
#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
+struct tcp6_request_sock {
+ struct tcp_request_sock req;
+ struct in6_addr loc_addr;
+ struct in6_addr rmt_addr;
+ struct sk_buff *pktopts;
+ int iif;
+};
+
+static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk)
+{
+ return (struct tcp6_request_sock *)sk;
+}
+
/**
* struct ipv6_pinfo - ipv6 private area
*
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 14a55e3..86771b3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,17 @@
__u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
};
+struct tcp_request_sock {
+ struct inet_request_sock req;
+ __u32 rcv_isn;
+ __u32 snt_isn;
+};
+
+static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req)
+{
+ return (struct tcp_request_sock *)req;
+}
+
struct tcp_sock {
/* inet_sock has to be the first member of tcp_sock */
struct inet_sock inet;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
new file mode 100644
index 0000000..9502f55
--- /dev/null
+++ b/include/net/request_sock.h
@@ -0,0 +1,77 @@
+/*
+ * NET Generic infrastructure for Network protocols.
+ *
+ * Definitions for request_sock
+ *
+ * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * From code originally in include/net/tcp.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _REQUEST_SOCK_H
+#define _REQUEST_SOCK_H
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <net/sock.h>
+
+struct open_request;
+struct sk_buff;
+struct dst_entry;
+struct proto;
+
+struct or_calltable {
+ int family;
+ kmem_cache_t *slab;
+ int obj_size;
+ int (*rtx_syn_ack)(struct sock *sk,
+ struct open_request *req,
+ struct dst_entry *dst);
+ void (*send_ack)(struct sk_buff *skb,
+ struct open_request *req);
+ void (*send_reset)(struct sk_buff *skb);
+ void (*destructor)(struct open_request *req);
+};
+
+/* struct open_request - mini sock to represent a connection request
+ */
+struct open_request {
+ struct open_request *dl_next; /* Must be first member! */
+ u16 mss;
+ u8 retrans;
+ u8 __pad;
+ /* The following two fields can be easily recomputed I think -AK */
+ u32 window_clamp; /* window clamp at creation time */
+ u32 rcv_wnd; /* rcv_wnd offered first time */
+ u32 ts_recent;
+ unsigned long expires;
+ struct or_calltable *class;
+ struct sock *sk;
+};
+
+static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class)
+{
+ struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC);
+
+ if (req != NULL)
+ req->class = class;
+
+ return req;
+}
+
+static inline void tcp_openreq_fastfree(struct open_request *req)
+{
+ kmem_cache_free(req->class->slab, req);
+}
+
+static inline void tcp_openreq_free(struct open_request *req)
+{
+ req->class->destructor(req);
+ tcp_openreq_fastfree(req);
+}
+
+#endif /* _REQUEST_SOCK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index a9ef3a6..6919276 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -484,6 +484,8 @@
extern int sk_wait_data(struct sock *sk, long *timeo);
+struct or_calltable;
+
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
* transport -> network interface is defined by struct inet_proto
@@ -547,6 +549,8 @@
kmem_cache_t *slab;
unsigned int obj_size;
+ struct or_calltable *rsk_prot;
+
struct module *owner;
char name[32];
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e71f8ba..d438ba5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -31,6 +31,7 @@
#include <linux/cache.h>
#include <linux/percpu.h>
#include <net/checksum.h>
+#include <net/request_sock.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
@@ -613,74 +614,6 @@
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
-struct open_request;
-
-struct or_calltable {
- int family;
- int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*);
- void (*send_ack) (struct sk_buff *skb, struct open_request *req);
- void (*destructor) (struct open_request *req);
- void (*send_reset) (struct sk_buff *skb);
-};
-
-struct tcp_v4_open_req {
- __u32 loc_addr;
- __u32 rmt_addr;
- struct ip_options *opt;
-};
-
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-struct tcp_v6_open_req {
- struct in6_addr loc_addr;
- struct in6_addr rmt_addr;
- struct sk_buff *pktopts;
- int iif;
-};
-#endif
-
-/* this structure is too big */
-struct open_request {
- struct open_request *dl_next; /* Must be first member! */
- __u32 rcv_isn;
- __u32 snt_isn;
- __u16 rmt_port;
- __u16 mss;
- __u8 retrans;
- __u8 __pad;
- __u16 snd_wscale : 4,
- rcv_wscale : 4,
- tstamp_ok : 1,
- sack_ok : 1,
- wscale_ok : 1,
- ecn_ok : 1,
- acked : 1;
- /* The following two fields can be easily recomputed I think -AK */
- __u32 window_clamp; /* window clamp at creation time */
- __u32 rcv_wnd; /* rcv_wnd offered first time */
- __u32 ts_recent;
- unsigned long expires;
- struct or_calltable *class;
- struct sock *sk;
- union {
- struct tcp_v4_open_req v4_req;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- struct tcp_v6_open_req v6_req;
-#endif
- } af;
-};
-
-/* SLAB cache for open requests. */
-extern kmem_cache_t *tcp_openreq_cachep;
-
-#define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC)
-#define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req)
-
-static inline void tcp_openreq_free(struct open_request *req)
-{
- req->class->destructor(req);
- tcp_openreq_fastfree(req);
-}
-
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
#else
@@ -1832,17 +1765,19 @@
struct tcp_options_received *rx_opt,
struct sk_buff *skb)
{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
- req->rcv_isn = TCP_SKB_CB(skb)->seq;
+ tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
- req->tstamp_ok = rx_opt->tstamp_ok;
- req->sack_ok = rx_opt->sack_ok;
- req->snd_wscale = rx_opt->snd_wscale;
- req->wscale_ok = rx_opt->wscale_ok;
- req->acked = 0;
- req->ecn_ok = 0;
- req->rmt_port = skb->h.th->source;
+ ireq->tstamp_ok = rx_opt->tstamp_ok;
+ ireq->sack_ok = rx_opt->sack_ok;
+ ireq->snd_wscale = rx_opt->snd_wscale;
+ ireq->wscale_ok = rx_opt->wscale_ok;
+ ireq->acked = 0;
+ ireq->ecn_ok = 0;
+ ireq->rmt_port = skb->h.th->source;
}
extern void tcp_enter_memory_pressure(void);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index dc14563..94ad970 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -2,6 +2,7 @@
#define _NET_TCP_ECN_H_ 1
#include <net/inet_ecn.h>
+#include <net/request_sock.h>
#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -40,7 +41,7 @@
static __inline__ void
TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th)
{
- if (req->ecn_ok)
+ if (inet_rsk(req)->ecn_ok)
th->ece = 1;
}
@@ -113,14 +114,14 @@
static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
struct open_request *req)
{
- tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0;
+ tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
}
static __inline__ void
TCP_ECN_create_request(struct open_request *req, struct tcphdr *th)
{
if (sysctl_tcp_ecn && th->ece && th->cwr)
- req->ecn_ok = 1;
+ inet_rsk(req)->ecn_ok = 1;
}
#endif