|
@@ -46,6 +46,7 @@
|
|
|
#include <linux/if_bridge.h>
|
|
|
#include <linux/socket.h>
|
|
|
#include <linux/route.h>
|
|
|
+#include <linux/gcd.h>
|
|
|
#include <net/netevent.h>
|
|
|
#include <net/neighbour.h>
|
|
|
#include <net/arp.h>
|
|
@@ -2203,6 +2204,9 @@ struct mlxsw_sp_nexthop {
|
|
|
struct mlxsw_sp_nexthop_key key;
|
|
|
unsigned char gw_addr[sizeof(struct in6_addr)];
|
|
|
int ifindex;
|
|
|
+ int nh_weight;
|
|
|
+ int norm_nh_weight;
|
|
|
+ int num_adj_entries;
|
|
|
struct mlxsw_sp_rif *rif;
|
|
|
u8 should_offload:1, /* set indicates this neigh is connected and
|
|
|
* should be put to KVD linear area of this group.
|
|
@@ -2232,6 +2236,7 @@ struct mlxsw_sp_nexthop_group {
|
|
|
u32 adj_index;
|
|
|
u16 ecmp_size;
|
|
|
u16 count;
|
|
|
+ int sum_norm_weight;
|
|
|
struct mlxsw_sp_nexthop nexthops[0];
|
|
|
#define nh_rif nexthops[0].rif
|
|
|
};
|
|
@@ -2299,7 +2304,7 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
|
|
|
}
|
|
|
|
|
|
int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
|
|
|
- u32 *p_adj_hash_index)
|
|
|
+ u32 *p_adj_size, u32 *p_adj_hash_index)
|
|
|
{
|
|
|
struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
|
|
|
u32 adj_hash_index = 0;
|
|
@@ -2309,6 +2314,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
|
|
|
return -EINVAL;
|
|
|
|
|
|
*p_adj_index = nh_grp->adj_index;
|
|
|
+ *p_adj_size = nh_grp->ecmp_size;
|
|
|
|
|
|
for (i = 0; i < nh_grp->count; i++) {
|
|
|
struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
|
|
@@ -2316,7 +2322,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
|
|
|
if (nh_iter == nh)
|
|
|
break;
|
|
|
if (nh_iter->offloaded)
|
|
|
- adj_hash_index++;
|
|
|
+ adj_hash_index += nh_iter->num_adj_entries;
|
|
|
}
|
|
|
|
|
|
*p_adj_hash_index = adj_hash_index;
|
|
@@ -2599,8 +2605,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
|
|
|
- struct mlxsw_sp_nexthop *nh)
|
|
|
+static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
|
|
|
+ struct mlxsw_sp_nexthop *nh)
|
|
|
{
|
|
|
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
|
|
|
char ratr_pl[MLXSW_REG_RATR_LEN];
|
|
@@ -2617,9 +2623,25 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
|
|
|
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
|
|
|
}
|
|
|
|
|
|
-static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
- u32 adj_index,
|
|
|
- struct mlxsw_sp_nexthop *nh)
|
|
|
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
|
|
|
+ struct mlxsw_sp_nexthop *nh)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < nh->num_adj_entries; i++) {
|
|
|
+ int err;
|
|
|
+
|
|
|
+ err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
+ u32 adj_index,
|
|
|
+ struct mlxsw_sp_nexthop *nh)
|
|
|
{
|
|
|
const struct mlxsw_sp_ipip_ops *ipip_ops;
|
|
|
|
|
@@ -2627,6 +2649,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
|
|
|
}
|
|
|
|
|
|
+static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
+ u32 adj_index,
|
|
|
+ struct mlxsw_sp_nexthop *nh)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < nh->num_adj_entries; i++) {
|
|
|
+ int err;
|
|
|
+
|
|
|
+ err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
|
|
|
+ nh);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
struct mlxsw_sp_nexthop_group *nh_grp,
|
|
@@ -2661,7 +2701,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
|
|
|
nh->update = 0;
|
|
|
nh->offloaded = 1;
|
|
|
}
|
|
|
- adj_index++;
|
|
|
+ adj_index += nh->num_adj_entries;
|
|
|
}
|
|
|
return 0;
|
|
|
}
|
|
@@ -2706,17 +2746,118 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
|
|
|
+{
|
|
|
+ /* Valid sizes for an adjacency group are:
|
|
|
+ * 1-64, 512, 1024, 2048 and 4096.
|
|
|
+ */
|
|
|
+ if (*p_adj_grp_size <= 64)
|
|
|
+ return;
|
|
|
+ else if (*p_adj_grp_size <= 512)
|
|
|
+ *p_adj_grp_size = 512;
|
|
|
+ else if (*p_adj_grp_size <= 1024)
|
|
|
+ *p_adj_grp_size = 1024;
|
|
|
+ else if (*p_adj_grp_size <= 2048)
|
|
|
+ *p_adj_grp_size = 2048;
|
|
|
+ else
|
|
|
+ *p_adj_grp_size = 4096;
|
|
|
+}
|
|
|
+
|
|
|
+static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
|
|
|
+ unsigned int alloc_size)
|
|
|
+{
|
|
|
+ if (alloc_size >= 4096)
|
|
|
+ *p_adj_grp_size = 4096;
|
|
|
+ else if (alloc_size >= 2048)
|
|
|
+ *p_adj_grp_size = 2048;
|
|
|
+ else if (alloc_size >= 1024)
|
|
|
+ *p_adj_grp_size = 1024;
|
|
|
+ else if (alloc_size >= 512)
|
|
|
+ *p_adj_grp_size = 512;
|
|
|
+}
|
|
|
+
|
|
|
+static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
|
|
|
+ u16 *p_adj_grp_size)
|
|
|
+{
|
|
|
+ unsigned int alloc_size;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ /* Round up the requested group size to the next size supported
|
|
|
+ * by the device and make sure the request can be satisfied.
|
|
|
+ */
|
|
|
+ mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
|
|
|
+ err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
|
|
|
+ &alloc_size);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+ /* It is possible the allocation results in more allocated
|
|
|
+ * entries than requested. Try to use as much of them as
|
|
|
+ * possible.
|
|
|
+ */
|
|
|
+ mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
|
|
|
+{
|
|
|
+ int i, g = 0, sum_norm_weight = 0;
|
|
|
+ struct mlxsw_sp_nexthop *nh;
|
|
|
+
|
|
|
+ for (i = 0; i < nh_grp->count; i++) {
|
|
|
+ nh = &nh_grp->nexthops[i];
|
|
|
+
|
|
|
+ if (!nh->should_offload)
|
|
|
+ continue;
|
|
|
+ if (g > 0)
|
|
|
+ g = gcd(nh->nh_weight, g);
|
|
|
+ else
|
|
|
+ g = nh->nh_weight;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0; i < nh_grp->count; i++) {
|
|
|
+ nh = &nh_grp->nexthops[i];
|
|
|
+
|
|
|
+ if (!nh->should_offload)
|
|
|
+ continue;
|
|
|
+ nh->norm_nh_weight = nh->nh_weight / g;
|
|
|
+ sum_norm_weight += nh->norm_nh_weight;
|
|
|
+ }
|
|
|
+
|
|
|
+ nh_grp->sum_norm_weight = sum_norm_weight;
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
|
|
|
+{
|
|
|
+ int total = nh_grp->sum_norm_weight;
|
|
|
+ u16 ecmp_size = nh_grp->ecmp_size;
|
|
|
+ int i, weight = 0, lower_bound = 0;
|
|
|
+
|
|
|
+ for (i = 0; i < nh_grp->count; i++) {
|
|
|
+ struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
|
|
|
+ int upper_bound;
|
|
|
+
|
|
|
+ if (!nh->should_offload)
|
|
|
+ continue;
|
|
|
+ weight += nh->norm_nh_weight;
|
|
|
+ upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
|
|
|
+ nh->num_adj_entries = upper_bound - lower_bound;
|
|
|
+ lower_bound = upper_bound;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
|
|
|
struct mlxsw_sp_nexthop_group *nh_grp)
|
|
|
{
|
|
|
+ u16 ecmp_size, old_ecmp_size;
|
|
|
struct mlxsw_sp_nexthop *nh;
|
|
|
bool offload_change = false;
|
|
|
u32 adj_index;
|
|
|
- u16 ecmp_size = 0;
|
|
|
bool old_adj_index_valid;
|
|
|
u32 old_adj_index;
|
|
|
- u16 old_ecmp_size;
|
|
|
int i;
|
|
|
int err;
|
|
|
|
|
@@ -2733,8 +2874,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
|
|
|
if (nh->should_offload)
|
|
|
nh->update = 1;
|
|
|
}
|
|
|
- if (nh->should_offload)
|
|
|
- ecmp_size++;
|
|
|
}
|
|
|
if (!offload_change) {
|
|
|
/* Nothing was added or removed, so no need to reallocate. Just
|
|
@@ -2747,12 +2886,19 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
|
|
|
}
|
|
|
return;
|
|
|
}
|
|
|
- if (!ecmp_size)
|
|
|
+ mlxsw_sp_nexthop_group_normalize(nh_grp);
|
|
|
+ if (!nh_grp->sum_norm_weight)
|
|
|
/* No neigh of this group is connected so we just set
|
|
|
* the trap and let everthing flow through kernel.
|
|
|
*/
|
|
|
goto set_trap;
|
|
|
|
|
|
+ ecmp_size = nh_grp->sum_norm_weight;
|
|
|
+ err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
|
|
|
+ if (err)
|
|
|
+ /* No valid allocation size available. */
|
|
|
+ goto set_trap;
|
|
|
+
|
|
|
err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
|
|
|
if (err) {
|
|
|
/* We ran out of KVD linear space, just set the
|
|
@@ -2767,6 +2913,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
|
|
|
nh_grp->adj_index_valid = 1;
|
|
|
nh_grp->adj_index = adj_index;
|
|
|
nh_grp->ecmp_size = ecmp_size;
|
|
|
+ mlxsw_sp_nexthop_group_rebalance(nh_grp);
|
|
|
err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
|
|
|
if (err) {
|
|
|
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
|
|
@@ -3044,6 +3191,11 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
|
|
|
|
|
|
nh->nh_grp = nh_grp;
|
|
|
nh->key.fib_nh = fib_nh;
|
|
|
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
|
|
+ nh->nh_weight = fib_nh->nh_weight;
|
|
|
+#else
|
|
|
+ nh->nh_weight = 1;
|
|
|
+#endif
|
|
|
memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
|
|
|
err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
|
|
|
if (err)
|
|
@@ -4303,6 +4455,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
|
|
|
struct net_device *dev = rt->dst.dev;
|
|
|
|
|
|
nh->nh_grp = nh_grp;
|
|
|
+ nh->nh_weight = 1;
|
|
|
memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
|
|
|
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
|
|
|
|