1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/events.h"
11
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16
17 #define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)18 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
19 {
20 int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
21 int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2);
22
23 if (idx0 < 0 || idx1 < 0 || !mlx5_lag_is_ready(ldev))
24 return false;
25
26 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
27 return false;
28
29 if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS)
30 return false;
31
32 return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev,
33 ldev->pf[idx1].dev);
34 }
35
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)36 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
37 {
38 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
39
40 return ldev && __mlx5_lag_is_multipath(ldev);
41 }
42
43 /**
44 * mlx5_lag_set_port_affinity
45 *
46 * @ldev: lag device
47 * @port:
48 * 0 - set normal affinity.
49 * 1 - set affinity to port 1.
50 * 2 - set affinity to port 2.
51 *
52 **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)53 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
54 enum mlx5_lag_port_affinity port)
55 {
56 int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
57 int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2);
58 struct lag_tracker tracker = {};
59
60 if (idx0 < 0 || idx1 < 0 || !__mlx5_lag_is_multipath(ldev))
61 return;
62
63 switch (port) {
64 case MLX5_LAG_NORMAL_AFFINITY:
65 tracker.netdev_state[idx0].tx_enabled = true;
66 tracker.netdev_state[idx1].tx_enabled = true;
67 tracker.netdev_state[idx0].link_up = true;
68 tracker.netdev_state[idx1].link_up = true;
69 break;
70 case MLX5_LAG_P1_AFFINITY:
71 tracker.netdev_state[idx0].tx_enabled = true;
72 tracker.netdev_state[idx0].link_up = true;
73 tracker.netdev_state[idx1].tx_enabled = false;
74 tracker.netdev_state[idx1].link_up = false;
75 break;
76 case MLX5_LAG_P2_AFFINITY:
77 tracker.netdev_state[idx0].tx_enabled = false;
78 tracker.netdev_state[idx0].link_up = false;
79 tracker.netdev_state[idx1].tx_enabled = true;
80 tracker.netdev_state[idx1].link_up = true;
81 break;
82 default:
83 mlx5_core_warn(ldev->pf[idx0].dev,
84 "Invalid affinity port %d", port);
85 return;
86 }
87
88 if (tracker.netdev_state[idx0].tx_enabled)
89 mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events,
90 MLX5_DEV_EVENT_PORT_AFFINITY,
91 (void *)0);
92
93 if (tracker.netdev_state[idx1].tx_enabled)
94 mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events,
95 MLX5_DEV_EVENT_PORT_AFFINITY,
96 (void *)0);
97
98 mlx5_modify_lag(ldev, &tracker);
99 }
100
mlx5_lag_fib_event_flush(struct notifier_block * nb)101 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
102 {
103 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
104
105 flush_workqueue(mp->wq);
106 }
107
mlx5_lag_fib_set(struct lag_mp * mp,struct fib_info * fi,u32 dst,int dst_len)108 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
109 {
110 mp->fib.mfi = fi;
111 mp->fib.priority = fi->fib_priority;
112 mp->fib.dst = dst;
113 mp->fib.dst_len = dst_len;
114 }
115
116 struct mlx5_fib_event_work {
117 struct work_struct work;
118 struct mlx5_lag *ldev;
119 unsigned long event;
120 union {
121 struct fib_entry_notifier_info fen_info;
122 struct fib_nh_notifier_info fnh_info;
123 };
124 };
125
126 static struct net_device*
mlx5_lag_get_next_fib_dev(struct mlx5_lag * ldev,struct fib_info * fi,struct net_device * current_dev)127 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
128 struct fib_info *fi,
129 struct net_device *current_dev)
130 {
131 struct net_device *fib_dev;
132 int i, ldev_idx, nhs;
133
134 nhs = fib_info_num_path(fi);
135 i = 0;
136 if (current_dev) {
137 for (; i < nhs; i++) {
138 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
139 if (fib_dev == current_dev) {
140 i++;
141 break;
142 }
143 }
144 }
145 for (; i < nhs; i++) {
146 fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
147 ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
148 if (ldev_idx >= 0)
149 return ldev->pf[ldev_idx].netdev;
150 }
151
152 return NULL;
153 }
154
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_entry_notifier_info * fen_info)155 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
156 struct fib_entry_notifier_info *fen_info)
157 {
158 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
159 struct net_device *nh_dev0, *nh_dev1;
160 struct fib_info *fi = fen_info->fi;
161 struct lag_mp *mp = &ldev->lag_mp;
162 int i, dev_idx = 0;
163
164 if (idx < 0)
165 return;
166
167 /* Handle delete event */
168 if (event == FIB_EVENT_ENTRY_DEL) {
169 /* stop track */
170 if (mp->fib.mfi == fi)
171 mp->fib.mfi = NULL;
172 return;
173 }
174
175 /* Handle multipath entry with lower priority value */
176 if (mp->fib.mfi && mp->fib.mfi != fi &&
177 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
178 fi->fib_priority >= mp->fib.priority)
179 return;
180
181 nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
182 nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
183
184 /* Handle add/replace event */
185 if (!nh_dev0) {
186 if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
187 mp->fib.mfi = NULL;
188 return;
189 }
190
191 if (nh_dev0 == nh_dev1) {
192 mlx5_core_warn(ldev->pf[idx].dev,
193 "Multipath offload doesn't support routes with multiple nexthops of the same device");
194 return;
195 }
196
197 if (!nh_dev1) {
198 if (__mlx5_lag_is_active(ldev)) {
199 mlx5_ldev_for_each(i, 0, ldev) {
200 dev_idx++;
201 if (ldev->pf[i].netdev == nh_dev0)
202 break;
203 }
204 mlx5_lag_set_port_affinity(ldev, dev_idx);
205 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
206 }
207
208 return;
209 }
210
211 /* First time we see multipath route */
212 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
213 struct lag_tracker tracker;
214
215 tracker = ldev->tracker;
216 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
217 }
218
219 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
220 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
221 }
222
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)223 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
224 unsigned long event,
225 struct fib_nh *fib_nh,
226 struct fib_info *fi)
227 {
228 struct lag_mp *mp = &ldev->lag_mp;
229 int i, dev_idx = 0;
230
231 /* Check the nh event is related to the route */
232 if (!mp->fib.mfi || mp->fib.mfi != fi)
233 return;
234
235 /* nh added/removed */
236 if (event == FIB_EVENT_NH_DEL) {
237 mlx5_ldev_for_each(i, 0, ldev) {
238 if (ldev->pf[i].netdev == fib_nh->fib_nh_dev)
239 break;
240 dev_idx++;
241 }
242
243 if (dev_idx >= 0) {
244 dev_idx = (dev_idx + 1) % 2 + 1; /* peer port */
245 mlx5_lag_set_port_affinity(ldev, dev_idx);
246 }
247 } else if (event == FIB_EVENT_NH_ADD &&
248 fib_info_num_path(fi) == 2) {
249 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
250 }
251 }
252
mlx5_lag_fib_update(struct work_struct * work)253 static void mlx5_lag_fib_update(struct work_struct *work)
254 {
255 struct mlx5_fib_event_work *fib_work =
256 container_of(work, struct mlx5_fib_event_work, work);
257 struct mlx5_lag *ldev = fib_work->ldev;
258 struct fib_nh *fib_nh;
259
260 /* Protect internal structures from changes */
261 rtnl_lock();
262 switch (fib_work->event) {
263 case FIB_EVENT_ENTRY_REPLACE:
264 case FIB_EVENT_ENTRY_DEL:
265 mlx5_lag_fib_route_event(ldev, fib_work->event,
266 &fib_work->fen_info);
267 fib_info_put(fib_work->fen_info.fi);
268 break;
269 case FIB_EVENT_NH_ADD:
270 case FIB_EVENT_NH_DEL:
271 fib_nh = fib_work->fnh_info.fib_nh;
272 mlx5_lag_fib_nexthop_event(ldev,
273 fib_work->event,
274 fib_work->fnh_info.fib_nh,
275 fib_nh->nh_parent);
276 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
277 break;
278 }
279
280 rtnl_unlock();
281 kfree(fib_work);
282 }
283
284 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)285 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
286 {
287 struct mlx5_fib_event_work *fib_work;
288
289 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
290 if (WARN_ON(!fib_work))
291 return NULL;
292
293 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
294 fib_work->ldev = ldev;
295 fib_work->event = event;
296
297 return fib_work;
298 }
299
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)300 static int mlx5_lag_fib_event(struct notifier_block *nb,
301 unsigned long event,
302 void *ptr)
303 {
304 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
305 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
306 struct fib_notifier_info *info = ptr;
307 struct mlx5_fib_event_work *fib_work;
308 struct fib_entry_notifier_info *fen_info;
309 struct fib_nh_notifier_info *fnh_info;
310 struct fib_info *fi;
311
312 if (info->family != AF_INET)
313 return NOTIFY_DONE;
314
315 if (!mlx5_lag_multipath_check_prereq(ldev))
316 return NOTIFY_DONE;
317
318 switch (event) {
319 case FIB_EVENT_ENTRY_REPLACE:
320 case FIB_EVENT_ENTRY_DEL:
321 fen_info = container_of(info, struct fib_entry_notifier_info,
322 info);
323 fi = fen_info->fi;
324 if (fi->nh)
325 return NOTIFY_DONE;
326
327 fib_work = mlx5_lag_init_fib_work(ldev, event);
328 if (!fib_work)
329 return NOTIFY_DONE;
330 fib_work->fen_info = *fen_info;
331 /* Take reference on fib_info to prevent it from being
332 * freed while work is queued. Release it afterwards.
333 */
334 fib_info_hold(fib_work->fen_info.fi);
335 break;
336 case FIB_EVENT_NH_ADD:
337 case FIB_EVENT_NH_DEL:
338 fnh_info = container_of(info, struct fib_nh_notifier_info,
339 info);
340 fib_work = mlx5_lag_init_fib_work(ldev, event);
341 if (!fib_work)
342 return NOTIFY_DONE;
343 fib_work->fnh_info = *fnh_info;
344 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
345 break;
346 default:
347 return NOTIFY_DONE;
348 }
349
350 queue_work(mp->wq, &fib_work->work);
351
352 return NOTIFY_DONE;
353 }
354
mlx5_lag_mp_reset(struct mlx5_lag * ldev)355 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
356 {
357 /* Clear mfi, as it might become stale when a route delete event
358 * has been missed, see mlx5_lag_fib_route_event().
359 */
360 ldev->lag_mp.fib.mfi = NULL;
361 }
362
mlx5_lag_mp_init(struct mlx5_lag * ldev)363 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
364 {
365 struct lag_mp *mp = &ldev->lag_mp;
366 int err;
367
368 /* always clear mfi, as it might become stale when a route delete event
369 * has been missed
370 */
371 mp->fib.mfi = NULL;
372
373 if (mp->fib_nb.notifier_call)
374 return 0;
375
376 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
377 if (!mp->wq)
378 return -ENOMEM;
379
380 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
381 err = register_fib_notifier(&init_net, &mp->fib_nb,
382 mlx5_lag_fib_event_flush, NULL);
383 if (err) {
384 destroy_workqueue(mp->wq);
385 mp->fib_nb.notifier_call = NULL;
386 }
387
388 return err;
389 }
390
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)391 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
392 {
393 struct lag_mp *mp = &ldev->lag_mp;
394
395 if (!mp->fib_nb.notifier_call)
396 return;
397
398 unregister_fib_notifier(&init_net, &mp->fib_nb);
399 destroy_workqueue(mp->wq);
400 mp->fib_nb.notifier_call = NULL;
401 mp->fib.mfi = NULL;
402 }
403