1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag/lag.h"
7 #include "lag/mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/events.h"
11 
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)12 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13 {
14 	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15 }
16 
17 #define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)18 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
19 {
20 	int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
21 	int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2);
22 
23 	if (idx0 < 0 || idx1 < 0 || !mlx5_lag_is_ready(ldev))
24 		return false;
25 
26 	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
27 		return false;
28 
29 	if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS)
30 		return false;
31 
32 	return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev,
33 					 ldev->pf[idx1].dev);
34 }
35 
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)36 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
37 {
38 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
39 
40 	return ldev && __mlx5_lag_is_multipath(ldev);
41 }
42 
43 /**
44  * mlx5_lag_set_port_affinity
45  *
46  * @ldev: lag device
47  * @port:
48  *     0 - set normal affinity.
49  *     1 - set affinity to port 1.
50  *     2 - set affinity to port 2.
51  *
52  **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)53 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
54 				       enum mlx5_lag_port_affinity port)
55 {
56 	int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
57 	int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2);
58 	struct lag_tracker tracker = {};
59 
60 	if (idx0 < 0 || idx1 < 0 || !__mlx5_lag_is_multipath(ldev))
61 		return;
62 
63 	switch (port) {
64 	case MLX5_LAG_NORMAL_AFFINITY:
65 		tracker.netdev_state[idx0].tx_enabled = true;
66 		tracker.netdev_state[idx1].tx_enabled = true;
67 		tracker.netdev_state[idx0].link_up = true;
68 		tracker.netdev_state[idx1].link_up = true;
69 		break;
70 	case MLX5_LAG_P1_AFFINITY:
71 		tracker.netdev_state[idx0].tx_enabled = true;
72 		tracker.netdev_state[idx0].link_up = true;
73 		tracker.netdev_state[idx1].tx_enabled = false;
74 		tracker.netdev_state[idx1].link_up = false;
75 		break;
76 	case MLX5_LAG_P2_AFFINITY:
77 		tracker.netdev_state[idx0].tx_enabled = false;
78 		tracker.netdev_state[idx0].link_up = false;
79 		tracker.netdev_state[idx1].tx_enabled = true;
80 		tracker.netdev_state[idx1].link_up = true;
81 		break;
82 	default:
83 		mlx5_core_warn(ldev->pf[idx0].dev,
84 			       "Invalid affinity port %d", port);
85 		return;
86 	}
87 
88 	if (tracker.netdev_state[idx0].tx_enabled)
89 		mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events,
90 					 MLX5_DEV_EVENT_PORT_AFFINITY,
91 					 (void *)0);
92 
93 	if (tracker.netdev_state[idx1].tx_enabled)
94 		mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events,
95 					 MLX5_DEV_EVENT_PORT_AFFINITY,
96 					 (void *)0);
97 
98 	mlx5_modify_lag(ldev, &tracker);
99 }
100 
mlx5_lag_fib_event_flush(struct notifier_block * nb)101 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
102 {
103 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
104 
105 	flush_workqueue(mp->wq);
106 }
107 
mlx5_lag_fib_set(struct lag_mp * mp,struct fib_info * fi,u32 dst,int dst_len)108 static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
109 {
110 	mp->fib.mfi = fi;
111 	mp->fib.priority = fi->fib_priority;
112 	mp->fib.dst = dst;
113 	mp->fib.dst_len = dst_len;
114 }
115 
116 struct mlx5_fib_event_work {
117 	struct work_struct work;
118 	struct mlx5_lag *ldev;
119 	unsigned long event;
120 	union {
121 		struct fib_entry_notifier_info fen_info;
122 		struct fib_nh_notifier_info fnh_info;
123 	};
124 };
125 
126 static struct net_device*
mlx5_lag_get_next_fib_dev(struct mlx5_lag * ldev,struct fib_info * fi,struct net_device * current_dev)127 mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
128 			  struct fib_info *fi,
129 			  struct net_device *current_dev)
130 {
131 	struct net_device *fib_dev;
132 	int i, ldev_idx, nhs;
133 
134 	nhs = fib_info_num_path(fi);
135 	i = 0;
136 	if (current_dev) {
137 		for (; i < nhs; i++) {
138 			fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
139 			if (fib_dev == current_dev) {
140 				i++;
141 				break;
142 			}
143 		}
144 	}
145 	for (; i < nhs; i++) {
146 		fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
147 		ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
148 		if (ldev_idx >= 0)
149 			return ldev->pf[ldev_idx].netdev;
150 	}
151 
152 	return NULL;
153 }
154 
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_entry_notifier_info * fen_info)155 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
156 				     struct fib_entry_notifier_info *fen_info)
157 {
158 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
159 	struct net_device *nh_dev0, *nh_dev1;
160 	struct fib_info *fi = fen_info->fi;
161 	struct lag_mp *mp = &ldev->lag_mp;
162 	int i, dev_idx = 0;
163 
164 	if (idx < 0)
165 		return;
166 
167 	/* Handle delete event */
168 	if (event == FIB_EVENT_ENTRY_DEL) {
169 		/* stop track */
170 		if (mp->fib.mfi == fi)
171 			mp->fib.mfi = NULL;
172 		return;
173 	}
174 
175 	/* Handle multipath entry with lower priority value */
176 	if (mp->fib.mfi && mp->fib.mfi != fi &&
177 	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
178 	    fi->fib_priority >= mp->fib.priority)
179 		return;
180 
181 	nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
182 	nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
183 
184 	/* Handle add/replace event */
185 	if (!nh_dev0) {
186 		if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
187 			mp->fib.mfi = NULL;
188 		return;
189 	}
190 
191 	if (nh_dev0 == nh_dev1) {
192 		mlx5_core_warn(ldev->pf[idx].dev,
193 			       "Multipath offload doesn't support routes with multiple nexthops of the same device");
194 		return;
195 	}
196 
197 	if (!nh_dev1) {
198 		if (__mlx5_lag_is_active(ldev)) {
199 			mlx5_ldev_for_each(i, 0, ldev) {
200 				dev_idx++;
201 				if (ldev->pf[i].netdev == nh_dev0)
202 					break;
203 			}
204 			mlx5_lag_set_port_affinity(ldev, dev_idx);
205 			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
206 		}
207 
208 		return;
209 	}
210 
211 	/* First time we see multipath route */
212 	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
213 		struct lag_tracker tracker;
214 
215 		tracker = ldev->tracker;
216 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
217 	}
218 
219 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
220 	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
221 }
222 
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)223 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
224 				       unsigned long event,
225 				       struct fib_nh *fib_nh,
226 				       struct fib_info *fi)
227 {
228 	struct lag_mp *mp = &ldev->lag_mp;
229 	int i, dev_idx = 0;
230 
231 	/* Check the nh event is related to the route */
232 	if (!mp->fib.mfi || mp->fib.mfi != fi)
233 		return;
234 
235 	/* nh added/removed */
236 	if (event == FIB_EVENT_NH_DEL) {
237 		mlx5_ldev_for_each(i, 0, ldev) {
238 			if (ldev->pf[i].netdev == fib_nh->fib_nh_dev)
239 				break;
240 			dev_idx++;
241 		}
242 
243 		if (dev_idx >= 0) {
244 			dev_idx = (dev_idx + 1) % 2 + 1; /* peer port */
245 			mlx5_lag_set_port_affinity(ldev, dev_idx);
246 		}
247 	} else if (event == FIB_EVENT_NH_ADD &&
248 		   fib_info_num_path(fi) == 2) {
249 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
250 	}
251 }
252 
mlx5_lag_fib_update(struct work_struct * work)253 static void mlx5_lag_fib_update(struct work_struct *work)
254 {
255 	struct mlx5_fib_event_work *fib_work =
256 		container_of(work, struct mlx5_fib_event_work, work);
257 	struct mlx5_lag *ldev = fib_work->ldev;
258 	struct fib_nh *fib_nh;
259 
260 	/* Protect internal structures from changes */
261 	rtnl_lock();
262 	switch (fib_work->event) {
263 	case FIB_EVENT_ENTRY_REPLACE:
264 	case FIB_EVENT_ENTRY_DEL:
265 		mlx5_lag_fib_route_event(ldev, fib_work->event,
266 					 &fib_work->fen_info);
267 		fib_info_put(fib_work->fen_info.fi);
268 		break;
269 	case FIB_EVENT_NH_ADD:
270 	case FIB_EVENT_NH_DEL:
271 		fib_nh = fib_work->fnh_info.fib_nh;
272 		mlx5_lag_fib_nexthop_event(ldev,
273 					   fib_work->event,
274 					   fib_work->fnh_info.fib_nh,
275 					   fib_nh->nh_parent);
276 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
277 		break;
278 	}
279 
280 	rtnl_unlock();
281 	kfree(fib_work);
282 }
283 
284 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)285 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
286 {
287 	struct mlx5_fib_event_work *fib_work;
288 
289 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
290 	if (WARN_ON(!fib_work))
291 		return NULL;
292 
293 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
294 	fib_work->ldev = ldev;
295 	fib_work->event = event;
296 
297 	return fib_work;
298 }
299 
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)300 static int mlx5_lag_fib_event(struct notifier_block *nb,
301 			      unsigned long event,
302 			      void *ptr)
303 {
304 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
305 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
306 	struct fib_notifier_info *info = ptr;
307 	struct mlx5_fib_event_work *fib_work;
308 	struct fib_entry_notifier_info *fen_info;
309 	struct fib_nh_notifier_info *fnh_info;
310 	struct fib_info *fi;
311 
312 	if (info->family != AF_INET)
313 		return NOTIFY_DONE;
314 
315 	if (!mlx5_lag_multipath_check_prereq(ldev))
316 		return NOTIFY_DONE;
317 
318 	switch (event) {
319 	case FIB_EVENT_ENTRY_REPLACE:
320 	case FIB_EVENT_ENTRY_DEL:
321 		fen_info = container_of(info, struct fib_entry_notifier_info,
322 					info);
323 		fi = fen_info->fi;
324 		if (fi->nh)
325 			return NOTIFY_DONE;
326 
327 		fib_work = mlx5_lag_init_fib_work(ldev, event);
328 		if (!fib_work)
329 			return NOTIFY_DONE;
330 		fib_work->fen_info = *fen_info;
331 		/* Take reference on fib_info to prevent it from being
332 		 * freed while work is queued. Release it afterwards.
333 		 */
334 		fib_info_hold(fib_work->fen_info.fi);
335 		break;
336 	case FIB_EVENT_NH_ADD:
337 	case FIB_EVENT_NH_DEL:
338 		fnh_info = container_of(info, struct fib_nh_notifier_info,
339 					info);
340 		fib_work = mlx5_lag_init_fib_work(ldev, event);
341 		if (!fib_work)
342 			return NOTIFY_DONE;
343 		fib_work->fnh_info = *fnh_info;
344 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
345 		break;
346 	default:
347 		return NOTIFY_DONE;
348 	}
349 
350 	queue_work(mp->wq, &fib_work->work);
351 
352 	return NOTIFY_DONE;
353 }
354 
mlx5_lag_mp_reset(struct mlx5_lag * ldev)355 void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
356 {
357 	/* Clear mfi, as it might become stale when a route delete event
358 	 * has been missed, see mlx5_lag_fib_route_event().
359 	 */
360 	ldev->lag_mp.fib.mfi = NULL;
361 }
362 
mlx5_lag_mp_init(struct mlx5_lag * ldev)363 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
364 {
365 	struct lag_mp *mp = &ldev->lag_mp;
366 	int err;
367 
368 	/* always clear mfi, as it might become stale when a route delete event
369 	 * has been missed
370 	 */
371 	mp->fib.mfi = NULL;
372 
373 	if (mp->fib_nb.notifier_call)
374 		return 0;
375 
376 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
377 	if (!mp->wq)
378 		return -ENOMEM;
379 
380 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
381 	err = register_fib_notifier(&init_net, &mp->fib_nb,
382 				    mlx5_lag_fib_event_flush, NULL);
383 	if (err) {
384 		destroy_workqueue(mp->wq);
385 		mp->fib_nb.notifier_call = NULL;
386 	}
387 
388 	return err;
389 }
390 
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)391 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
392 {
393 	struct lag_mp *mp = &ldev->lag_mp;
394 
395 	if (!mp->fib_nb.notifier_call)
396 		return;
397 
398 	unregister_fib_notifier(&init_net, &mp->fib_nb);
399 	destroy_workqueue(mp->wq);
400 	mp->fib_nb.notifier_call = NULL;
401 	mp->fib.mfi = NULL;
402 }
403