[PATCH] bonding: suppress duplicate packets
Originally submitted by Kenzo Iwami; his original description is:
The current bonding driver receives duplicate packets when broadcast/
multicast packets are sent by other devices or packets are flooded by the
switch. In this patch, new flags are added in priv_flags of net_device
structure to let the bonding driver discard duplicate packets in
dev.c:skb_bond().
Modified by Jay Vosburgh to change a define name, update some
comments, rearrange the new skb_bond() for clarity, clear all bonding
priv_flags on slave release, and update the driver version.
Signed-off-by: Kenzo Iwami <k-iwami@cj.jp.nec.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index bcf9f17..623c87a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1040,6 +1040,10 @@
if ((bond->params.mode == BOND_MODE_TLB) ||
(bond->params.mode == BOND_MODE_ALB)) {
bond_alb_handle_active_change(bond, new_active);
+ if (old_active)
+ bond_set_slave_inactive_flags(old_active);
+ if (new_active)
+ bond_set_slave_active_flags(new_active);
} else {
bond->curr_active_slave = new_active;
}
@@ -1443,15 +1447,16 @@
switch (bond->params.mode) {
case BOND_MODE_ACTIVEBACKUP:
- /* if we're in active-backup mode, we need one and only one active
- * interface. The backup interfaces will have their NOARP flag set
- * because we need them to be completely deaf and not to respond to
- * any ARP request on the network to avoid fooling a switch. Thus,
- * since we guarantee that curr_active_slave always point to the last
- * usable interface, we just have to verify this interface's flag.
+ /* if we're in active-backup mode, we need one and
+ * only one active interface. The backup interfaces
+ * will have their SLAVE_INACTIVE flag set because we
+ * need them to be drop all packets. Thus, since we
+ * guarantee that curr_active_slave always point to
+ * the last usable interface, we just have to verify
+ * this interface's flag.
*/
if (((!bond->curr_active_slave) ||
- (bond->curr_active_slave->dev->flags & IFF_NOARP)) &&
+ (bond->curr_active_slave->dev->priv_flags & IFF_SLAVE_INACTIVE)) &&
(new_slave->link != BOND_LINK_DOWN)) {
dprintk("This is the first active slave\n");
/* first slave or no active slave yet, and this link
@@ -1492,6 +1497,8 @@
* is OK, so make this interface the active one
*/
bond_change_active_slave(bond, new_slave);
+ } else {
+ bond_set_slave_inactive_flags(new_slave);
}
break;
default:
@@ -1724,13 +1731,8 @@
addr.sa_family = slave_dev->type;
dev_set_mac_address(slave_dev, &addr);
- /* restore the original state of the
- * IFF_NOARP flag that might have been
- * set by bond_set_slave_inactive_flags()
- */
- if ((slave->original_flags & IFF_NOARP) == 0) {
- slave_dev->flags &= ~IFF_NOARP;
- }
+ slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
+ IFF_SLAVE_INACTIVE);
kfree(slave);
@@ -1816,12 +1818,8 @@
addr.sa_family = slave_dev->type;
dev_set_mac_address(slave_dev, &addr);
- /* restore the original state of the IFF_NOARP flag that might have
- * been set by bond_set_slave_inactive_flags()
- */
- if ((slave->original_flags & IFF_NOARP) == 0) {
- slave_dev->flags &= ~IFF_NOARP;
- }
+ slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB |
+ IFF_SLAVE_INACTIVE);
kfree(slave);
@@ -4061,14 +4059,17 @@
bond_dev->hard_start_xmit = bond_xmit_broadcast;
break;
case BOND_MODE_8023AD:
+ bond_set_master_3ad_flags(bond);
bond_dev->hard_start_xmit = bond_3ad_xmit_xor;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
else
bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
break;
- case BOND_MODE_TLB:
case BOND_MODE_ALB:
+ bond_set_master_alb_flags(bond);
+ /* FALLTHRU */
+ case BOND_MODE_TLB:
bond_dev->hard_start_xmit = bond_alb_xmit;
bond_dev->set_mac_address = bond_alb_set_mac_address;
break;
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 041bcc5..5a9bd95 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -424,6 +424,12 @@
ret = -EINVAL;
goto out;
} else {
+ if (bond->params.mode == BOND_MODE_8023AD)
+ bond_unset_master_3ad_flags(bond);
+
+ if (bond->params.mode == BOND_MODE_ALB)
+ bond_unset_master_alb_flags(bond);
+
bond->params.mode = new_value;
bond_set_mode_ops(bond, bond->params.mode);
printk(KERN_INFO DRV_NAME ": %s: setting mode to %s (%d).\n",
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 3dd78d0..ce9dc9b 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -22,8 +22,8 @@
#include "bond_3ad.h"
#include "bond_alb.h"
-#define DRV_VERSION "3.0.1"
-#define DRV_RELDATE "January 9, 2006"
+#define DRV_VERSION "3.0.2"
+#define DRV_RELDATE "February 21, 2006"
#define DRV_NAME "bonding"
#define DRV_DESCRIPTION "Ethernet Channel Bonding Driver"
@@ -230,14 +230,37 @@
static inline void bond_set_slave_inactive_flags(struct slave *slave)
{
- slave->state = BOND_STATE_BACKUP;
- slave->dev->flags |= IFF_NOARP;
+ struct bonding *bond = slave->dev->master->priv;
+ if (bond->params.mode != BOND_MODE_TLB &&
+ bond->params.mode != BOND_MODE_ALB)
+ slave->state = BOND_STATE_BACKUP;
+ slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
}
static inline void bond_set_slave_active_flags(struct slave *slave)
{
slave->state = BOND_STATE_ACTIVE;
- slave->dev->flags &= ~IFF_NOARP;
+ slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE;
+}
+
+static inline void bond_set_master_3ad_flags(struct bonding *bond)
+{
+ bond->dev->priv_flags |= IFF_MASTER_8023AD;
+}
+
+static inline void bond_unset_master_3ad_flags(struct bonding *bond)
+{
+ bond->dev->priv_flags &= ~IFF_MASTER_8023AD;
+}
+
+static inline void bond_set_master_alb_flags(struct bonding *bond)
+{
+ bond->dev->priv_flags |= IFF_MASTER_ALB;
+}
+
+static inline void bond_unset_master_alb_flags(struct bonding *bond)
+{
+ bond->dev->priv_flags &= ~IFF_MASTER_ALB;
}
struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
diff --git a/include/linux/if.h b/include/linux/if.h
index ce627d9..12c6f6d 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -52,6 +52,9 @@
/* Private (from user) interface flags (netdevice->priv_flags). */
#define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */
#define IFF_EBRIDGE 0x2 /* Ethernet bridging device. */
+#define IFF_SLAVE_INACTIVE 0x4 /* bonding slave not the curr. active */
+#define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */
+#define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */
#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 7a92c1c..ab08f35 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -61,6 +61,7 @@
#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
#define ETH_P_IPX 0x8137 /* IPX over DIX */
#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */
+#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */
#define ETH_P_WCCP 0x883E /* Web-cache coordination protocol
* defined in draft-wilson-wrec-wccp-v2-00.txt */
#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */
diff --git a/net/core/dev.c b/net/core/dev.c
index 225e38f..ef56c035 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1446,8 +1446,29 @@
{
struct net_device *dev = skb->dev;
- if (dev->master)
+ if (dev->master) {
+ /*
+ * On bonding slaves other than the currently active
+ * slave, suppress duplicates except for 802.3ad
+ * ETH_P_SLOW and alb non-mcast/bcast.
+ */
+ if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+ if (dev->master->priv_flags & IFF_MASTER_ALB) {
+ if (skb->pkt_type != PACKET_BROADCAST &&
+ skb->pkt_type != PACKET_MULTICAST)
+ goto keep;
+ }
+
+ if (dev->master->priv_flags & IFF_MASTER_8023AD &&
+ skb->protocol == __constant_htons(ETH_P_SLOW))
+ goto keep;
+
+ kfree_skb(skb);
+ return NULL;
+ }
+keep:
skb->dev = dev->master;
+ }
return dev;
}
@@ -1591,6 +1612,9 @@
orig_dev = skb_bond(skb);
+ if (!orig_dev)
+ return NET_RX_DROP;
+
__get_cpu_var(netdev_rx_stat).total++;
skb->h.raw = skb->nh.raw = skb->data;