[SK_BUFF]: Convert skb->tail to sk_buff_data_t

So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes
on 64bit architectures, allowing us to combine the 4 bytes hole left by the
layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4
64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN...
:-)

Many calculations that previously required that skb->{transport,network,
mac}_header be first converted to a pointer now can be done directly, being
meaningful as offsets or pointers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 9a5850f..e46f4cb 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -829,7 +829,9 @@
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
 					skb_reserve(skb, 2); /* 16byte align */
-					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
+					memcpy(skb_put(skb, rxlen),
+					       skb_tail_pointer(rxptr->rx_skb_ptr),
+					       rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
 				} else
 					skb_put(skb, rxlen);
@@ -1175,7 +1177,10 @@
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1339,7 +1344,10 @@
 		if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL )
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;