net: reorder struct sock fields Right now, fields in struct sock are not optimally ordered, because each path (RX softirq, TX completion, RX user, TX user) has to touch fields that are contained in many different cache lines. The really critical thing is to shrink number of cache lines that are used at RX softirq time : CPU handling softirqs for a device can receive many frames per second for many sockets. If load is too big, we can drop frames at NIC level. RPS or multiqueue cards can help, but better reduce latency if possible. This patch starts with UDP protocol, then additional patches will try to reduce latencies of other ones as well. At RX softirq time, fields of interest for UDP protocol are : (not counting ones in inet struct for the lookup) Read/Written: sk_refcnt (atomic increment/decrement) sk_rmem_alloc & sk_backlog.len (to check if there is room in queues) sk_receive_queue sk_backlog (if socket locked by user program) sk_rxhash sk_forward_alloc sk_drops Read only: sk_rcvbuf (sk_rcvqueues_full()) sk_filter sk_wq sk_policy[0] sk_flags Additional notes : - sk_backlog has one hole on 64bit arches. We can fill it to save 8 bytes. - sk_backlog is used only if RX sofirq handler finds the socket while locked by user. - sk_rxhash is written only once per flow. - sk_drops is written only if queues are full Final layout : [1] One section grouping all read/write fields, but placing rxhash and sk_backlog at the end of this section. [2] One section grouping all read fields in RX handler (sk_filter, sk_rcv_buf, sk_wq) [3] Section used by other paths I'll post a patch on its own to put sk_refcnt at the end of struct sock_common so that it shares same cache line than section [1] New offsets on 64bit arch : sizeof(struct sock)=0x268 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x48 offsetof(struct sock, sk_receive_queue)=0x68 offsetof(struct sock, sk_backlog)=0x80 offsetof(struct sock, sk_rmem_alloc)=0x80 offsetof(struct sock, sk_forward_alloc)=0x98 offsetof(struct sock, sk_rxhash)=0x9c offsetof(struct sock, sk_rcvbuf)=0xa4 offsetof(struct sock, sk_drops) =0xa0 offsetof(struct sock, sk_filter)=0xa8 offsetof(struct sock, sk_wq)=0xb0 offsetof(struct sock, sk_policy)=0xd0 offsetof(struct sock, sk_flags) =0xe0 Instead of : sizeof(struct sock)=0x270 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x50 offsetof(struct sock, sk_receive_queue)=0xc0 offsetof(struct sock, sk_backlog)=0x70 offsetof(struct sock, sk_rmem_alloc)=0xac offsetof(struct sock, sk_forward_alloc)=0x10c offsetof(struct sock, sk_rxhash)=0x128 offsetof(struct sock, sk_rcvbuf)=0x4c offsetof(struct sock, sk_drops) =0x16c offsetof(struct sock, sk_filter)=0x198 offsetof(struct sock, sk_wq)=0x88 offsetof(struct sock, sk_policy)=0x98 offsetof(struct sock, sk_flags) =0x130 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>

commit: b178bb3dfc30d9555bdd2401e95af98e23e83e10 [log] [tgz]
author: Eric Dumazet <eric.dumazet@gmail.com> Tue Nov 16 05:56:04 2010 +0000
committer: David S. Miller <davem@davemloft.net> Tue Nov 16 11:17:43 2010 -0800
tree: ec52d739015de589ceca1d31c5802305d94c9f6d
parent: c31504dc0d1dc853dcee509d9999169a9097a717 [diff] [blame]
diff --git a/include/net/sock.h b/include/net/sock.h
index eb0c1f5..5557dfb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h

@@ -241,6 +241,47 @@
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
 #define sk_net			__sk_common.skc_net
+	socket_lock_t		sk_lock;
+	struct sk_buff_head	sk_receive_queue;
+	/*
+	 * The backlog queue is special, it is always used with
+	 * the per-socket spinlock held and requires low latency
+	 * access. Therefore we special case it's implementation.
+	 * Note : rmem_alloc is in this structure to fill a hole
+	 * on 64bit arches, not because its logically part of
+	 * backlog.
+	 */
+	struct {
+		atomic_t	rmem_alloc;
+		int		len;
+		struct sk_buff	*head;
+		struct sk_buff	*tail;
+	} sk_backlog;
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+	int			sk_forward_alloc;
+#ifdef CONFIG_RPS
+	__u32			sk_rxhash;
+#endif
+	atomic_t		sk_drops;
+	int			sk_rcvbuf;
+
+	struct sk_filter __rcu	*sk_filter;
+	struct socket_wq	*sk_wq;
+
+#ifdef CONFIG_NET_DMA
+	struct sk_buff_head	sk_async_wait_queue;
+#endif
+
+#ifdef CONFIG_XFRM
+	struct xfrm_policy	*sk_policy[2];
+#endif
+	unsigned long 		sk_flags;
+	struct dst_entry	*sk_dst_cache;
+	spinlock_t		sk_dst_lock;
+	atomic_t		sk_wmem_alloc;
+	atomic_t		sk_omem_alloc;
+	int			sk_sndbuf;
+	struct sk_buff_head	sk_write_queue;
 	kmemcheck_bitfield_begin(flags);
 	unsigned int		sk_shutdown  : 2,
 				sk_no_check  : 2,
@@ -248,52 +289,19 @@
 				sk_protocol  : 8,
 				sk_type      : 16;
 	kmemcheck_bitfield_end(flags);
-	int			sk_rcvbuf;
-	socket_lock_t		sk_lock;
-	/*
-	 * The backlog queue is special, it is always used with
-	 * the per-socket spinlock held and requires low latency
-	 * access. Therefore we special case it's implementation.
-	 */
-	struct {
-		struct sk_buff *head;
-		struct sk_buff *tail;
-		int len;
-	} sk_backlog;
-	struct socket_wq	*sk_wq;
-	struct dst_entry	*sk_dst_cache;
-#ifdef CONFIG_XFRM
-	struct xfrm_policy	*sk_policy[2];
-#endif
-	spinlock_t		sk_dst_lock;
-	atomic_t		sk_rmem_alloc;
-	atomic_t		sk_wmem_alloc;
-	atomic_t		sk_omem_alloc;
-	int			sk_sndbuf;
-	struct sk_buff_head	sk_receive_queue;
-	struct sk_buff_head	sk_write_queue;
-#ifdef CONFIG_NET_DMA
-	struct sk_buff_head	sk_async_wait_queue;
-#endif
 	int			sk_wmem_queued;
-	int			sk_forward_alloc;
 	gfp_t			sk_allocation;
 	int			sk_route_caps;
 	int			sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
-#ifdef CONFIG_RPS
-	__u32			sk_rxhash;
-#endif
-	unsigned long 		sk_flags;
 	unsigned long	        sk_lingertime;
 	struct sk_buff_head	sk_error_queue;
 	struct proto		*sk_prot_creator;
 	rwlock_t		sk_callback_lock;
 	int			sk_err,
 				sk_err_soft;
-	atomic_t		sk_drops;
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
@@ -301,7 +309,6 @@
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
 	long			sk_sndtimeo;
-	struct sk_filter __rcu	*sk_filter;
 	void			*sk_protinfo;
 	struct timer_list	sk_timer;
 	ktime_t			sk_stamp;
commit	b178bb3dfc30d9555bdd2401e95af98e23e83e10	[log] [tgz]
author	Eric Dumazet <eric.dumazet@gmail.com>	Tue Nov 16 05:56:04 2010 +0000
committer	David S. Miller <davem@davemloft.net>	Tue Nov 16 11:17:43 2010 -0800
tree	ec52d739015de589ceca1d31c5802305d94c9f6d
parent	c31504dc0d1dc853dcee509d9999169a9097a717 [diff] [blame]