|
@@ -1120,10 +1120,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
|
|
|
write_unlock_bh(&bond->curr_slave_lock);
|
|
|
read_unlock(&bond->lock);
|
|
|
|
|
|
- netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
|
|
|
+ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
|
|
|
if (should_notify_peers)
|
|
|
- netdev_bonding_change(bond->dev,
|
|
|
- NETDEV_NOTIFY_PEERS);
|
|
|
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
|
|
|
+ bond->dev);
|
|
|
|
|
|
read_lock(&bond->lock);
|
|
|
write_lock_bh(&bond->curr_slave_lock);
|
|
@@ -1558,8 +1558,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
|
|
|
bond_dev->name,
|
|
|
bond_dev->type, slave_dev->type);
|
|
|
|
|
|
- res = netdev_bonding_change(bond_dev,
|
|
|
- NETDEV_PRE_TYPE_CHANGE);
|
|
|
+ res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
|
|
|
+ bond_dev);
|
|
|
res = notifier_to_errno(res);
|
|
|
if (res) {
|
|
|
pr_err("%s: refused to change device type\n",
|
|
@@ -1579,8 +1579,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
|
|
|
bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
|
|
|
}
|
|
|
|
|
|
- netdev_bonding_change(bond_dev,
|
|
|
- NETDEV_POST_TYPE_CHANGE);
|
|
|
+ call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
|
|
|
+ bond_dev);
|
|
|
}
|
|
|
} else if (bond_dev->type != slave_dev->type) {
|
|
|
pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
|
|
@@ -1941,7 +1941,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
|
|
|
}
|
|
|
|
|
|
block_netpoll_tx();
|
|
|
- netdev_bonding_change(bond_dev, NETDEV_RELEASE);
|
|
|
+ call_netdevice_notifiers(NETDEV_RELEASE, bond_dev);
|
|
|
write_lock_bh(&bond->lock);
|
|
|
|
|
|
slave = bond_get_slave_by_dev(bond, slave_dev);
|
|
@@ -2584,7 +2584,7 @@ re_arm:
|
|
|
read_unlock(&bond->lock);
|
|
|
return;
|
|
|
}
|
|
|
- netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
|
|
|
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
|
|
|
rtnl_unlock();
|
|
|
}
|
|
|
}
|
|
@@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|
|
arp_work.work);
|
|
|
struct slave *slave, *oldcurrent;
|
|
|
int do_failover = 0;
|
|
|
- int delta_in_ticks;
|
|
|
+ int delta_in_ticks, extra_ticks;
|
|
|
int i;
|
|
|
|
|
|
read_lock(&bond->lock);
|
|
|
|
|
|
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
|
|
|
+ extra_ticks = delta_in_ticks / 2;
|
|
|
|
|
|
if (bond->slave_cnt == 0)
|
|
|
goto re_arm;
|
|
@@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|
|
if (slave->link != BOND_LINK_UP) {
|
|
|
if (time_in_range(jiffies,
|
|
|
trans_start - delta_in_ticks,
|
|
|
- trans_start + delta_in_ticks) &&
|
|
|
+ trans_start + delta_in_ticks + extra_ticks) &&
|
|
|
time_in_range(jiffies,
|
|
|
slave->dev->last_rx - delta_in_ticks,
|
|
|
- slave->dev->last_rx + delta_in_ticks)) {
|
|
|
+ slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
|
|
|
|
|
|
slave->link = BOND_LINK_UP;
|
|
|
bond_set_active_slave(slave);
|
|
@@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|
|
*/
|
|
|
if (!time_in_range(jiffies,
|
|
|
trans_start - delta_in_ticks,
|
|
|
- trans_start + 2 * delta_in_ticks) ||
|
|
|
+ trans_start + 2 * delta_in_ticks + extra_ticks) ||
|
|
|
!time_in_range(jiffies,
|
|
|
slave->dev->last_rx - delta_in_ticks,
|
|
|
- slave->dev->last_rx + 2 * delta_in_ticks)) {
|
|
|
+ slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
|
|
|
|
|
|
slave->link = BOND_LINK_DOWN;
|
|
|
bond_set_backup_slave(slave);
|
|
@@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|
|
struct slave *slave;
|
|
|
int i, commit = 0;
|
|
|
unsigned long trans_start;
|
|
|
+ int extra_ticks;
|
|
|
+
|
|
|
+ /* All the time comparisons below need some extra time. Otherwise, on
|
|
|
+ * fast networks the ARP probe/reply may arrive within the same jiffy
|
|
|
+ * as it was sent. Then, the next time the ARP monitor is run, one
|
|
|
+ * arp_interval will already have passed in the comparisons.
|
|
|
+ */
|
|
|
+ extra_ticks = delta_in_ticks / 2;
|
|
|
|
|
|
bond_for_each_slave(bond, slave, i) {
|
|
|
slave->new_link = BOND_LINK_NOCHANGE;
|
|
@@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|
|
if (slave->link != BOND_LINK_UP) {
|
|
|
if (time_in_range(jiffies,
|
|
|
slave_last_rx(bond, slave) - delta_in_ticks,
|
|
|
- slave_last_rx(bond, slave) + delta_in_ticks)) {
|
|
|
+ slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
|
|
|
|
|
|
slave->new_link = BOND_LINK_UP;
|
|
|
commit++;
|
|
@@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|
|
*/
|
|
|
if (time_in_range(jiffies,
|
|
|
slave->jiffies - delta_in_ticks,
|
|
|
- slave->jiffies + 2 * delta_in_ticks))
|
|
|
+ slave->jiffies + 2 * delta_in_ticks + extra_ticks))
|
|
|
continue;
|
|
|
|
|
|
/*
|
|
@@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|
|
!bond->current_arp_slave &&
|
|
|
!time_in_range(jiffies,
|
|
|
slave_last_rx(bond, slave) - delta_in_ticks,
|
|
|
- slave_last_rx(bond, slave) + 3 * delta_in_ticks)) {
|
|
|
+ slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
|
|
|
|
|
|
slave->new_link = BOND_LINK_DOWN;
|
|
|
commit++;
|
|
@@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|
|
if (bond_is_active_slave(slave) &&
|
|
|
(!time_in_range(jiffies,
|
|
|
trans_start - delta_in_ticks,
|
|
|
- trans_start + 2 * delta_in_ticks) ||
|
|
|
+ trans_start + 2 * delta_in_ticks + extra_ticks) ||
|
|
|
!time_in_range(jiffies,
|
|
|
slave_last_rx(bond, slave) - delta_in_ticks,
|
|
|
- slave_last_rx(bond, slave) + 2 * delta_in_ticks))) {
|
|
|
+ slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
|
|
|
|
|
|
slave->new_link = BOND_LINK_DOWN;
|
|
|
commit++;
|
|
@@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
|
|
|
if ((!bond->curr_active_slave &&
|
|
|
time_in_range(jiffies,
|
|
|
trans_start - delta_in_ticks,
|
|
|
- trans_start + delta_in_ticks)) ||
|
|
|
+ trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
|
|
|
bond->curr_active_slave != slave) {
|
|
|
slave->link = BOND_LINK_UP;
|
|
|
if (bond->current_arp_slave) {
|
|
@@ -3203,7 +3212,7 @@ re_arm:
|
|
|
read_unlock(&bond->lock);
|
|
|
return;
|
|
|
}
|
|
|
- netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
|
|
|
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
|
|
|
rtnl_unlock();
|
|
|
}
|
|
|
}
|
|
@@ -3351,57 +3360,94 @@ static struct notifier_block bond_netdev_notifier = {
|
|
|
|
|
|
/*---------------------------- Hashing Policies -----------------------------*/
|
|
|
|
|
|
+/*
|
|
|
+ * Hash for the output device based upon layer 2 data
|
|
|
+ */
|
|
|
+static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
|
|
|
+{
|
|
|
+ struct ethhdr *data = (struct ethhdr *)skb->data;
|
|
|
+
|
|
|
+ if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
|
|
|
+ return (data->h_dest[5] ^ data->h_source[5]) % count;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Hash for the output device based upon layer 2 and layer 3 data. If
|
|
|
- * the packet is not IP mimic bond_xmit_hash_policy_l2()
|
|
|
+ * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
|
|
|
*/
|
|
|
static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
|
|
|
{
|
|
|
struct ethhdr *data = (struct ethhdr *)skb->data;
|
|
|
- struct iphdr *iph = ip_hdr(skb);
|
|
|
-
|
|
|
- if (skb->protocol == htons(ETH_P_IP)) {
|
|
|
+ struct iphdr *iph;
|
|
|
+ struct ipv6hdr *ipv6h;
|
|
|
+ u32 v6hash;
|
|
|
+ __be32 *s, *d;
|
|
|
+
|
|
|
+ if (skb->protocol == htons(ETH_P_IP) &&
|
|
|
+ skb_network_header_len(skb) >= sizeof(*iph)) {
|
|
|
+ iph = ip_hdr(skb);
|
|
|
return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
|
|
|
(data->h_dest[5] ^ data->h_source[5])) % count;
|
|
|
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
|
|
|
+ skb_network_header_len(skb) >= sizeof(*ipv6h)) {
|
|
|
+ ipv6h = ipv6_hdr(skb);
|
|
|
+ s = &ipv6h->saddr.s6_addr32[0];
|
|
|
+ d = &ipv6h->daddr.s6_addr32[0];
|
|
|
+ v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
|
|
|
+ v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
|
|
|
+ return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
|
|
|
}
|
|
|
|
|
|
- return (data->h_dest[5] ^ data->h_source[5]) % count;
|
|
|
+ return bond_xmit_hash_policy_l2(skb, count);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
* Hash for the output device based upon layer 3 and layer 4 data. If
|
|
|
* the packet is a frag or not TCP or UDP, just use layer 3 data. If it is
|
|
|
- * altogether not IP, mimic bond_xmit_hash_policy_l2()
|
|
|
+ * altogether not IP, fall back on bond_xmit_hash_policy_l2()
|
|
|
*/
|
|
|
static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
|
|
|
{
|
|
|
- struct ethhdr *data = (struct ethhdr *)skb->data;
|
|
|
- struct iphdr *iph = ip_hdr(skb);
|
|
|
- __be16 *layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
|
|
|
- int layer4_xor = 0;
|
|
|
-
|
|
|
- if (skb->protocol == htons(ETH_P_IP)) {
|
|
|
+ u32 layer4_xor = 0;
|
|
|
+ struct iphdr *iph;
|
|
|
+ struct ipv6hdr *ipv6h;
|
|
|
+ __be32 *s, *d;
|
|
|
+ __be16 *layer4hdr;
|
|
|
+
|
|
|
+ if (skb->protocol == htons(ETH_P_IP) &&
|
|
|
+ skb_network_header_len(skb) >= sizeof(*iph)) {
|
|
|
+ iph = ip_hdr(skb);
|
|
|
if (!ip_is_fragment(iph) &&
|
|
|
(iph->protocol == IPPROTO_TCP ||
|
|
|
- iph->protocol == IPPROTO_UDP)) {
|
|
|
- layer4_xor = ntohs((*layer4hdr ^ *(layer4hdr + 1)));
|
|
|
+ iph->protocol == IPPROTO_UDP) &&
|
|
|
+ (skb_headlen(skb) - skb_network_offset(skb) >=
|
|
|
+ iph->ihl * sizeof(u32) + sizeof(*layer4hdr) * 2)) {
|
|
|
+ layer4hdr = (__be16 *)((u32 *)iph + iph->ihl);
|
|
|
+ layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
|
|
|
}
|
|
|
return (layer4_xor ^
|
|
|
((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
|
|
|
-
|
|
|
+ } else if (skb->protocol == htons(ETH_P_IPV6) &&
|
|
|
+ skb_network_header_len(skb) >= sizeof(*ipv6h)) {
|
|
|
+ ipv6h = ipv6_hdr(skb);
|
|
|
+ if ((ipv6h->nexthdr == IPPROTO_TCP ||
|
|
|
+ ipv6h->nexthdr == IPPROTO_UDP) &&
|
|
|
+ (skb_headlen(skb) - skb_network_offset(skb) >=
|
|
|
+ sizeof(*ipv6h) + sizeof(*layer4hdr) * 2)) {
|
|
|
+ layer4hdr = (__be16 *)(ipv6h + 1);
|
|
|
+ layer4_xor = ntohs(*layer4hdr ^ *(layer4hdr + 1));
|
|
|
+ }
|
|
|
+ s = &ipv6h->saddr.s6_addr32[0];
|
|
|
+ d = &ipv6h->daddr.s6_addr32[0];
|
|
|
+ layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
|
|
|
+ layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
|
|
|
+ (layer4_xor >> 8);
|
|
|
+ return layer4_xor % count;
|
|
|
}
|
|
|
|
|
|
- return (data->h_dest[5] ^ data->h_source[5]) % count;
|
|
|
-}
|
|
|
-
|
|
|
-/*
|
|
|
- * Hash for the output device based upon layer 2 data
|
|
|
- */
|
|
|
-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
|
|
|
-{
|
|
|
- struct ethhdr *data = (struct ethhdr *)skb->data;
|
|
|
-
|
|
|
- return (data->h_dest[5] ^ data->h_source[5]) % count;
|
|
|
+ return bond_xmit_hash_policy_l2(skb, count);
|
|
|
}
|
|
|
|
|
|
/*-------------------------- Device entry points ----------------------------*/
|