1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45 
46 
47 /* General purpose, use for short periods of time.
48  * Beware of lock dependencies (preferably, no locks should be acquired
49  * under it).
50  */
51 static DEFINE_SPINLOCK(lag_lock);
52 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)53 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
54 {
55 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
56 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
57 
58 	if (mode == MLX5_LAG_MODE_MPESW)
59 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
60 
61 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
62 }
63 
lag_active_port_bits(struct mlx5_lag * ldev)64 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
65 {
66 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
67 	u8 active_port = 0;
68 	int num_enabled;
69 	int idx;
70 
71 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
72 			      &num_enabled);
73 	for (idx = 0; idx < num_enabled; idx++)
74 		active_port |= BIT_MASK(enabled_ports[idx]);
75 
76 	return active_port;
77 }
78 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)79 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
80 			       int mode, unsigned long flags)
81 {
82 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
83 				     &flags);
84 	int port_sel_mode = get_port_sel_mode(mode, flags);
85 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
86 	u8 *ports = ldev->v2p_map;
87 	int idx0, idx1;
88 	void *lag_ctx;
89 
90 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
91 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
92 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
93 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
94 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
95 
96 	if (idx0 < 0 || idx1 < 0)
97 		return -EINVAL;
98 
99 	switch (port_sel_mode) {
100 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
101 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
102 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
103 		break;
104 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
105 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
106 			break;
107 
108 		MLX5_SET(lagc, lag_ctx, active_port,
109 			 lag_active_port_bits(mlx5_lag_dev(dev)));
110 		break;
111 	default:
112 		break;
113 	}
114 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
115 
116 	return mlx5_cmd_exec_in(dev, create_lag, in);
117 }
118 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)119 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
120 			       u8 *ports)
121 {
122 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
123 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
124 	int idx0, idx1;
125 
126 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
127 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
128 	if (idx0 < 0 || idx1 < 0)
129 		return -EINVAL;
130 
131 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
132 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
133 
134 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
135 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
136 
137 	return mlx5_cmd_exec_in(dev, modify_lag, in);
138 }
139 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)140 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
141 {
142 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
143 
144 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
145 
146 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
147 }
148 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
149 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)150 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
151 {
152 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
153 
154 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
155 
156 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
157 }
158 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
159 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)160 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
161 				   u8 *ports, int *num_disabled)
162 {
163 	int i;
164 
165 	*num_disabled = 0;
166 	mlx5_ldev_for_each(i, 0, ldev)
167 		if (!tracker->netdev_state[i].tx_enabled ||
168 		    !tracker->netdev_state[i].link_up)
169 			ports[(*num_disabled)++] = i;
170 }
171 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)172 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
173 			   u8 *ports, int *num_enabled)
174 {
175 	int i;
176 
177 	*num_enabled = 0;
178 	mlx5_ldev_for_each(i, 0, ldev)
179 		if (tracker->netdev_state[i].tx_enabled &&
180 		    tracker->netdev_state[i].link_up)
181 			ports[(*num_enabled)++] = i;
182 
183 	if (*num_enabled == 0)
184 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
185 }
186 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)187 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
188 				   struct mlx5_lag *ldev,
189 				   struct lag_tracker *tracker,
190 				   unsigned long flags)
191 {
192 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
193 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
194 	int written = 0;
195 	int num_enabled;
196 	int idx;
197 	int err;
198 	int i;
199 	int j;
200 
201 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
202 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
203 				      &num_enabled);
204 		for (i = 0; i < num_enabled; i++) {
205 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
206 			if (err != 3)
207 				return;
208 			written += err;
209 		}
210 		buf[written - 2] = 0;
211 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
212 	} else {
213 		mlx5_ldev_for_each(i, 0, ldev) {
214 			for (j  = 0; j < ldev->buckets; j++) {
215 				idx = i * ldev->buckets + j;
216 				err = scnprintf(buf + written, 10,
217 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
218 				if (err != 9)
219 					return;
220 				written += err;
221 			}
222 		}
223 		mlx5_core_info(dev, "lag map:%s\n", buf);
224 	}
225 }
226 
227 static int mlx5_lag_netdev_event(struct notifier_block *this,
228 				 unsigned long event, void *ptr);
229 static void mlx5_do_bond_work(struct work_struct *work);
230 
mlx5_ldev_free(struct kref * ref)231 static void mlx5_ldev_free(struct kref *ref)
232 {
233 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
234 
235 	if (ldev->nb.notifier_call)
236 		unregister_netdevice_notifier_net(&init_net, &ldev->nb);
237 	mlx5_lag_mp_cleanup(ldev);
238 	cancel_delayed_work_sync(&ldev->bond_work);
239 	destroy_workqueue(ldev->wq);
240 	mutex_destroy(&ldev->lock);
241 	kfree(ldev);
242 }
243 
mlx5_ldev_put(struct mlx5_lag * ldev)244 static void mlx5_ldev_put(struct mlx5_lag *ldev)
245 {
246 	kref_put(&ldev->ref, mlx5_ldev_free);
247 }
248 
mlx5_ldev_get(struct mlx5_lag * ldev)249 static void mlx5_ldev_get(struct mlx5_lag *ldev)
250 {
251 	kref_get(&ldev->ref);
252 }
253 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)254 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
255 {
256 	struct mlx5_lag *ldev;
257 	int err;
258 
259 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
260 	if (!ldev)
261 		return NULL;
262 
263 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
264 	if (!ldev->wq) {
265 		kfree(ldev);
266 		return NULL;
267 	}
268 
269 	kref_init(&ldev->ref);
270 	mutex_init(&ldev->lock);
271 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
272 
273 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
274 	if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
275 		ldev->nb.notifier_call = NULL;
276 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
277 	}
278 	ldev->mode = MLX5_LAG_MODE_NONE;
279 
280 	err = mlx5_lag_mp_init(ldev);
281 	if (err)
282 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
283 			      err);
284 
285 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
286 	ldev->buckets = 1;
287 
288 	return ldev;
289 }
290 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)291 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
292 				struct net_device *ndev)
293 {
294 	int i;
295 
296 	mlx5_ldev_for_each(i, 0, ldev)
297 		if (ldev->pf[i].netdev == ndev)
298 			return i;
299 
300 	return -ENOENT;
301 }
302 
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)303 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
304 {
305 	int i, num = 0;
306 
307 	if (!ldev)
308 		return -ENOENT;
309 
310 	mlx5_ldev_for_each(i, 0, ldev) {
311 		if (num == seq)
312 			return i;
313 		num++;
314 	}
315 	return -ENOENT;
316 }
317 
mlx5_lag_num_devs(struct mlx5_lag * ldev)318 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
319 {
320 	int i, num = 0;
321 
322 	if (!ldev)
323 		return 0;
324 
325 	mlx5_ldev_for_each(i, 0, ldev) {
326 		(void)i;
327 		num++;
328 	}
329 	return num;
330 }
331 
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)332 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
333 {
334 	int i, num = 0;
335 
336 	if (!ldev)
337 		return 0;
338 
339 	mlx5_ldev_for_each(i, 0, ldev)
340 		if (ldev->pf[i].netdev)
341 			num++;
342 	return num;
343 }
344 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)345 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
346 {
347 	return ldev->mode == MLX5_LAG_MODE_ROCE;
348 }
349 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)350 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
351 {
352 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
353 }
354 
355 /* Create a mapping between steering slots and active ports.
356  * As we have ldev->buckets slots per port first assume the native
357  * mapping should be used.
358  * If there are ports that are disabled fill the relevant slots
359  * with mapping that points to active ports.
360  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)361 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
362 					   struct mlx5_lag *ldev,
363 					   u8 buckets,
364 					   u8 *ports)
365 {
366 	int disabled[MLX5_MAX_PORTS] = {};
367 	int enabled[MLX5_MAX_PORTS] = {};
368 	int disabled_ports_num = 0;
369 	int enabled_ports_num = 0;
370 	int idx;
371 	u32 rand;
372 	int i;
373 	int j;
374 
375 	mlx5_ldev_for_each(i, 0, ldev) {
376 		if (tracker->netdev_state[i].tx_enabled &&
377 		    tracker->netdev_state[i].link_up)
378 			enabled[enabled_ports_num++] = i;
379 		else
380 			disabled[disabled_ports_num++] = i;
381 	}
382 
383 	/* Use native mapping by default where each port's buckets
384 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
385 	 */
386 	mlx5_ldev_for_each(i, 0, ldev) {
387 		for (j = 0; j < buckets; j++) {
388 			idx = i * buckets + j;
389 			ports[idx] = i + 1;
390 		}
391 	}
392 
393 	/* If all ports are disabled/enabled keep native mapping */
394 	if (enabled_ports_num == ldev->ports ||
395 	    disabled_ports_num == ldev->ports)
396 		return;
397 
398 	/* Go over the disabled ports and for each assign a random active port */
399 	for (i = 0; i < disabled_ports_num; i++) {
400 		for (j = 0; j < buckets; j++) {
401 			get_random_bytes(&rand, 4);
402 			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
403 		}
404 	}
405 }
406 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)407 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
408 {
409 	int i;
410 
411 	mlx5_ldev_for_each(i, 0, ldev)
412 		if (ldev->pf[i].has_drop)
413 			return true;
414 	return false;
415 }
416 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)417 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
418 {
419 	int i;
420 
421 	mlx5_ldev_for_each(i, 0, ldev) {
422 		if (!ldev->pf[i].has_drop)
423 			continue;
424 
425 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
426 							     MLX5_VPORT_UPLINK);
427 		ldev->pf[i].has_drop = false;
428 	}
429 }
430 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)431 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
432 				     struct lag_tracker *tracker)
433 {
434 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
435 	struct mlx5_core_dev *dev;
436 	int disabled_index;
437 	int num_disabled;
438 	int err;
439 	int i;
440 
441 	/* First delete the current drop rule so there won't be any dropped
442 	 * packets
443 	 */
444 	mlx5_lag_drop_rule_cleanup(ldev);
445 
446 	if (!ldev->tracker.has_inactive)
447 		return;
448 
449 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
450 
451 	for (i = 0; i < num_disabled; i++) {
452 		disabled_index = disabled_ports[i];
453 		dev = ldev->pf[disabled_index].dev;
454 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
455 								  MLX5_VPORT_UPLINK);
456 		if (!err)
457 			ldev->pf[disabled_index].has_drop = true;
458 		else
459 			mlx5_core_err(dev,
460 				      "Failed to create lag drop rule, error: %d", err);
461 	}
462 }
463 
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)464 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
465 {
466 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
467 	void *lag_ctx;
468 
469 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
470 
471 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
472 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
473 
474 	MLX5_SET(lagc, lag_ctx, active_port, ports);
475 
476 	return mlx5_cmd_exec_in(dev, modify_lag, in);
477 }
478 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)479 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
480 {
481 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
482 	struct mlx5_core_dev *dev0;
483 	u8 active_ports;
484 	int ret;
485 
486 	if (idx < 0)
487 		return -EINVAL;
488 
489 	dev0 = ldev->pf[idx].dev;
490 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
491 		ret = mlx5_lag_port_sel_modify(ldev, ports);
492 		if (ret ||
493 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
494 			return ret;
495 
496 		active_ports = lag_active_port_bits(ldev);
497 
498 		return mlx5_cmd_modify_active_port(dev0, active_ports);
499 	}
500 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
501 }
502 
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)503 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
504 {
505 	struct net_device *ndev = NULL;
506 	struct mlx5_lag *ldev;
507 	unsigned long flags;
508 	int i, last_idx;
509 
510 	spin_lock_irqsave(&lag_lock, flags);
511 	ldev = mlx5_lag_dev(dev);
512 
513 	if (!ldev)
514 		goto unlock;
515 
516 	mlx5_ldev_for_each(i, 0, ldev)
517 		if (ldev->tracker.netdev_state[i].tx_enabled)
518 			ndev = ldev->pf[i].netdev;
519 	if (!ndev) {
520 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
521 		if (last_idx < 0)
522 			goto unlock;
523 		ndev = ldev->pf[last_idx].netdev;
524 	}
525 
526 	if (ndev)
527 		dev_hold(ndev);
528 
529 unlock:
530 	spin_unlock_irqrestore(&lag_lock, flags);
531 
532 	return ndev;
533 }
534 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)535 void mlx5_modify_lag(struct mlx5_lag *ldev,
536 		     struct lag_tracker *tracker)
537 {
538 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
539 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
540 	struct mlx5_core_dev *dev0;
541 	int idx;
542 	int err;
543 	int i;
544 	int j;
545 
546 	if (first_idx < 0)
547 		return;
548 
549 	dev0 = ldev->pf[first_idx].dev;
550 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
551 
552 	mlx5_ldev_for_each(i, 0, ldev) {
553 		for (j = 0; j < ldev->buckets; j++) {
554 			idx = i * ldev->buckets + j;
555 			if (ports[idx] == ldev->v2p_map[idx])
556 				continue;
557 			err = _mlx5_modify_lag(ldev, ports);
558 			if (err) {
559 				mlx5_core_err(dev0,
560 					      "Failed to modify LAG (%d)\n",
561 					      err);
562 				return;
563 			}
564 			memcpy(ldev->v2p_map, ports, sizeof(ports));
565 
566 			mlx5_lag_print_mapping(dev0, ldev, tracker,
567 					       ldev->mode_flags);
568 			break;
569 		}
570 	}
571 
572 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
573 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
574 
575 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
576 			mlx5_lag_drop_rule_setup(ldev, tracker);
577 		/** Only sriov and roce lag should have tracker->tx_type set so
578 		 *  no need to check the mode
579 		 */
580 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
581 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
582 					     ndev);
583 		dev_put(ndev);
584 	}
585 }
586 
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)587 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
588 					   unsigned long *flags)
589 {
590 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
591 	struct mlx5_core_dev *dev0;
592 
593 	if (first_idx < 0)
594 		return -EINVAL;
595 
596 	dev0 = ldev->pf[first_idx].dev;
597 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
598 		if (ldev->ports > 2)
599 			return -EINVAL;
600 		return 0;
601 	}
602 
603 	if (ldev->ports > 2)
604 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
605 
606 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
607 
608 	return 0;
609 }
610 
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)611 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
612 						struct lag_tracker *tracker,
613 						enum mlx5_lag_mode mode,
614 						unsigned long *flags)
615 {
616 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
617 	struct lag_func *dev0;
618 
619 	if (first_idx < 0 || mode == MLX5_LAG_MODE_MPESW)
620 		return;
621 
622 	dev0 = &ldev->pf[first_idx];
623 	if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
624 	    tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) {
625 		if (ldev->ports > 2)
626 			ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
627 		set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
628 	}
629 }
630 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)631 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
632 			      struct lag_tracker *tracker, bool shared_fdb,
633 			      unsigned long *flags)
634 {
635 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
636 
637 	*flags = 0;
638 	if (shared_fdb) {
639 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
640 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
641 	}
642 
643 	if (mode == MLX5_LAG_MODE_MPESW)
644 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
645 
646 	if (roce_lag)
647 		return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
648 
649 	mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
650 	return 0;
651 }
652 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)653 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
654 {
655 	int port_sel_mode = get_port_sel_mode(mode, flags);
656 
657 	switch (port_sel_mode) {
658 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
659 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
660 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
661 	default: return "invalid";
662 	}
663 }
664 
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)665 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
666 {
667 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
668 	struct mlx5_eswitch *master_esw;
669 	struct mlx5_core_dev *dev0;
670 	int i, j;
671 	int err;
672 
673 	if (first_idx < 0)
674 		return -EINVAL;
675 
676 	dev0 = ldev->pf[first_idx].dev;
677 	master_esw = dev0->priv.eswitch;
678 	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
679 		struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
680 
681 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
682 							       slave_esw, ldev->ports);
683 		if (err)
684 			goto err;
685 	}
686 	return 0;
687 err:
688 	mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
689 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
690 							 ldev->pf[j].dev->priv.eswitch);
691 	return err;
692 }
693 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)694 static int mlx5_create_lag(struct mlx5_lag *ldev,
695 			   struct lag_tracker *tracker,
696 			   enum mlx5_lag_mode mode,
697 			   unsigned long flags)
698 {
699 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
700 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
701 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
702 	struct mlx5_core_dev *dev0;
703 	int err;
704 
705 	if (first_idx < 0)
706 		return -EINVAL;
707 
708 	dev0 = ldev->pf[first_idx].dev;
709 	if (tracker)
710 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
711 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
712 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
713 
714 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
715 	if (err) {
716 		mlx5_core_err(dev0,
717 			      "Failed to create LAG (%d)\n",
718 			      err);
719 		return err;
720 	}
721 
722 	if (shared_fdb) {
723 		err = mlx5_lag_create_single_fdb(ldev);
724 		if (err)
725 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
726 		else
727 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
728 	}
729 
730 	if (err) {
731 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
732 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
733 			mlx5_core_err(dev0,
734 				      "Failed to deactivate RoCE LAG; driver restart required\n");
735 	}
736 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
737 
738 	return err;
739 }
740 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)741 int mlx5_activate_lag(struct mlx5_lag *ldev,
742 		      struct lag_tracker *tracker,
743 		      enum mlx5_lag_mode mode,
744 		      bool shared_fdb)
745 {
746 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
747 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
748 	struct mlx5_core_dev *dev0;
749 	unsigned long flags = 0;
750 	int err;
751 
752 	if (first_idx < 0)
753 		return -EINVAL;
754 
755 	dev0 = ldev->pf[first_idx].dev;
756 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
757 	if (err)
758 		return err;
759 
760 	if (mode != MLX5_LAG_MODE_MPESW) {
761 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
762 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
763 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
764 						       ldev->v2p_map);
765 			if (err) {
766 				mlx5_core_err(dev0,
767 					      "Failed to create LAG port selection(%d)\n",
768 					      err);
769 				return err;
770 			}
771 		}
772 	}
773 
774 	err = mlx5_create_lag(ldev, tracker, mode, flags);
775 	if (err) {
776 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
777 			mlx5_lag_port_sel_destroy(ldev);
778 		if (roce_lag)
779 			mlx5_core_err(dev0,
780 				      "Failed to activate RoCE LAG\n");
781 		else
782 			mlx5_core_err(dev0,
783 				      "Failed to activate VF LAG\n"
784 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
785 		return err;
786 	}
787 
788 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
789 	    !roce_lag)
790 		mlx5_lag_drop_rule_setup(ldev, tracker);
791 
792 	ldev->mode = mode;
793 	ldev->mode_flags = flags;
794 	return 0;
795 }
796 
mlx5_deactivate_lag(struct mlx5_lag * ldev)797 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
798 {
799 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
800 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
801 	bool roce_lag = __mlx5_lag_is_roce(ldev);
802 	unsigned long flags = ldev->mode_flags;
803 	struct mlx5_eswitch *master_esw;
804 	struct mlx5_core_dev *dev0;
805 	int err;
806 	int i;
807 
808 	if (first_idx < 0)
809 		return -EINVAL;
810 
811 	dev0 = ldev->pf[first_idx].dev;
812 	master_esw = dev0->priv.eswitch;
813 	ldev->mode = MLX5_LAG_MODE_NONE;
814 	ldev->mode_flags = 0;
815 	mlx5_lag_mp_reset(ldev);
816 
817 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
818 		mlx5_ldev_for_each(i, first_idx + 1, ldev)
819 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
820 								 ldev->pf[i].dev->priv.eswitch);
821 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
822 	}
823 
824 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
825 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
826 	if (err) {
827 		if (roce_lag) {
828 			mlx5_core_err(dev0,
829 				      "Failed to deactivate RoCE LAG; driver restart required\n");
830 		} else {
831 			mlx5_core_err(dev0,
832 				      "Failed to deactivate VF LAG; driver restart required\n"
833 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
834 		}
835 		return err;
836 	}
837 
838 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
839 		mlx5_lag_port_sel_destroy(ldev);
840 		ldev->buckets = 1;
841 	}
842 	if (mlx5_lag_has_drop_rule(ldev))
843 		mlx5_lag_drop_rule_cleanup(ldev);
844 
845 	return 0;
846 }
847 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)848 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
849 {
850 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
851 #ifdef CONFIG_MLX5_ESWITCH
852 	struct mlx5_core_dev *dev;
853 	u8 mode;
854 #endif
855 	bool roce_support;
856 	int i;
857 
858 	if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
859 		return false;
860 
861 #ifdef CONFIG_MLX5_ESWITCH
862 	mlx5_ldev_for_each(i, 0, ldev) {
863 		dev = ldev->pf[i].dev;
864 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
865 			return false;
866 	}
867 
868 	dev = ldev->pf[first_idx].dev;
869 	mode = mlx5_eswitch_mode(dev);
870 	mlx5_ldev_for_each(i, 0, ldev)
871 		if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
872 			return false;
873 
874 #else
875 	mlx5_ldev_for_each(i, 0, ldev)
876 		if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
877 			return false;
878 #endif
879 	roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
880 	mlx5_ldev_for_each(i, first_idx + 1, ldev)
881 		if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
882 			return false;
883 
884 	return true;
885 }
886 
mlx5_lag_add_devices(struct mlx5_lag * ldev)887 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
888 {
889 	int i;
890 
891 	mlx5_ldev_for_each(i, 0, ldev) {
892 		if (ldev->pf[i].dev->priv.flags &
893 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
894 			continue;
895 
896 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
897 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
898 	}
899 }
900 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)901 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
902 {
903 	int i;
904 
905 	mlx5_ldev_for_each(i, 0, ldev) {
906 		if (ldev->pf[i].dev->priv.flags &
907 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
908 			continue;
909 
910 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
911 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
912 	}
913 }
914 
mlx5_disable_lag(struct mlx5_lag * ldev)915 void mlx5_disable_lag(struct mlx5_lag *ldev)
916 {
917 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
918 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
919 	struct mlx5_core_dev *dev0;
920 	bool roce_lag;
921 	int err;
922 	int i;
923 
924 	if (idx < 0)
925 		return;
926 
927 	dev0 = ldev->pf[idx].dev;
928 	roce_lag = __mlx5_lag_is_roce(ldev);
929 
930 	if (shared_fdb) {
931 		mlx5_lag_remove_devices(ldev);
932 	} else if (roce_lag) {
933 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
934 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
935 			mlx5_rescan_drivers_locked(dev0);
936 		}
937 		mlx5_ldev_for_each(i, idx + 1, ldev)
938 			mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
939 	}
940 
941 	err = mlx5_deactivate_lag(ldev);
942 	if (err)
943 		return;
944 
945 	if (shared_fdb || roce_lag)
946 		mlx5_lag_add_devices(ldev);
947 
948 	if (shared_fdb)
949 		mlx5_ldev_for_each(i, 0, ldev)
950 			if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
951 				mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
952 }
953 
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)954 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
955 {
956 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
957 	struct mlx5_core_dev *dev;
958 	int i;
959 
960 	if (idx < 0)
961 		return false;
962 
963 	mlx5_ldev_for_each(i, idx + 1, ldev) {
964 		dev = ldev->pf[i].dev;
965 		if (is_mdev_switchdev_mode(dev) &&
966 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
967 		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
968 		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
969 		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
970 		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
971 			continue;
972 		return false;
973 	}
974 
975 	dev = ldev->pf[idx].dev;
976 	if (is_mdev_switchdev_mode(dev) &&
977 	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
978 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
979 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
980 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
981 		return true;
982 
983 	return false;
984 }
985 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)986 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
987 {
988 	bool roce_lag = true;
989 	int i;
990 
991 	mlx5_ldev_for_each(i, 0, ldev)
992 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
993 
994 #ifdef CONFIG_MLX5_ESWITCH
995 	mlx5_ldev_for_each(i, 0, ldev)
996 		roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
997 #endif
998 
999 	return roce_lag;
1000 }
1001 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1002 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1003 {
1004 	return do_bond && __mlx5_lag_is_active(ldev) &&
1005 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1006 }
1007 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1008 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1009 {
1010 	return !do_bond && __mlx5_lag_is_active(ldev) &&
1011 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1012 }
1013 
mlx5_do_bond(struct mlx5_lag * ldev)1014 static void mlx5_do_bond(struct mlx5_lag *ldev)
1015 {
1016 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1017 	struct lag_tracker tracker = { };
1018 	struct mlx5_core_dev *dev0;
1019 	struct net_device *ndev;
1020 	bool do_bond, roce_lag;
1021 	int err;
1022 	int i;
1023 
1024 	if (idx < 0)
1025 		return;
1026 
1027 	dev0 = ldev->pf[idx].dev;
1028 	if (!mlx5_lag_is_ready(ldev)) {
1029 		do_bond = false;
1030 	} else {
1031 		/* VF LAG is in multipath mode, ignore bond change requests */
1032 		if (mlx5_lag_is_multipath(dev0))
1033 			return;
1034 
1035 		tracker = ldev->tracker;
1036 
1037 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1038 	}
1039 
1040 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1041 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1042 
1043 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1044 
1045 		if (shared_fdb || roce_lag)
1046 			mlx5_lag_remove_devices(ldev);
1047 
1048 		err = mlx5_activate_lag(ldev, &tracker,
1049 					roce_lag ? MLX5_LAG_MODE_ROCE :
1050 						   MLX5_LAG_MODE_SRIOV,
1051 					shared_fdb);
1052 		if (err) {
1053 			if (shared_fdb || roce_lag)
1054 				mlx5_lag_add_devices(ldev);
1055 			if (shared_fdb) {
1056 				mlx5_ldev_for_each(i, 0, ldev)
1057 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1058 			}
1059 
1060 			return;
1061 		} else if (roce_lag) {
1062 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1063 			mlx5_rescan_drivers_locked(dev0);
1064 			mlx5_ldev_for_each(i, idx + 1, ldev) {
1065 				if (mlx5_get_roce_state(ldev->pf[i].dev))
1066 					mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1067 			}
1068 		} else if (shared_fdb) {
1069 			int i;
1070 
1071 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1072 			mlx5_rescan_drivers_locked(dev0);
1073 
1074 			mlx5_ldev_for_each(i, 0, ldev) {
1075 				err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1076 				if (err)
1077 					break;
1078 			}
1079 
1080 			if (err) {
1081 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1082 				mlx5_rescan_drivers_locked(dev0);
1083 				mlx5_deactivate_lag(ldev);
1084 				mlx5_lag_add_devices(ldev);
1085 				mlx5_ldev_for_each(i, 0, ldev)
1086 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1087 				mlx5_core_err(dev0, "Failed to enable lag\n");
1088 				return;
1089 			}
1090 		}
1091 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1092 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1093 			/** Only sriov and roce lag should have tracker->TX_type
1094 			 *  set so no need to check the mode
1095 			 */
1096 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1097 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1098 						     ndev);
1099 			dev_put(ndev);
1100 		}
1101 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1102 		mlx5_modify_lag(ldev, &tracker);
1103 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1104 		mlx5_disable_lag(ldev);
1105 	}
1106 }
1107 
1108 /* The last mdev to unregister will destroy the workqueue before removing the
1109  * devcom component, and as all the mdevs use the same devcom component we are
1110  * guaranteed that the devcom is valid while the calling work is running.
1111  */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1112 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1113 {
1114 	struct mlx5_devcom_comp_dev *devcom = NULL;
1115 	int i;
1116 
1117 	mutex_lock(&ldev->lock);
1118 	i = mlx5_get_next_ldev_func(ldev, 0);
1119 	if (i < MLX5_MAX_PORTS)
1120 		devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1121 	mutex_unlock(&ldev->lock);
1122 	return devcom;
1123 }
1124 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1125 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1126 {
1127 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1128 }
1129 
mlx5_do_bond_work(struct work_struct * work)1130 static void mlx5_do_bond_work(struct work_struct *work)
1131 {
1132 	struct delayed_work *delayed_work = to_delayed_work(work);
1133 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1134 					     bond_work);
1135 	struct mlx5_devcom_comp_dev *devcom;
1136 	int status;
1137 
1138 	devcom = mlx5_lag_get_devcom_comp(ldev);
1139 	if (!devcom)
1140 		return;
1141 
1142 	status = mlx5_devcom_comp_trylock(devcom);
1143 	if (!status) {
1144 		mlx5_queue_bond_work(ldev, HZ);
1145 		return;
1146 	}
1147 
1148 	mutex_lock(&ldev->lock);
1149 	if (ldev->mode_changes_in_progress) {
1150 		mutex_unlock(&ldev->lock);
1151 		mlx5_devcom_comp_unlock(devcom);
1152 		mlx5_queue_bond_work(ldev, HZ);
1153 		return;
1154 	}
1155 
1156 	mlx5_do_bond(ldev);
1157 	mutex_unlock(&ldev->lock);
1158 	mlx5_devcom_comp_unlock(devcom);
1159 }
1160 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1161 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1162 					 struct lag_tracker *tracker,
1163 					 struct netdev_notifier_changeupper_info *info)
1164 {
1165 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1166 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1167 	bool is_bonded, is_in_lag, mode_supported;
1168 	bool has_inactive = 0;
1169 	struct slave *slave;
1170 	u8 bond_status = 0;
1171 	int num_slaves = 0;
1172 	int changed = 0;
1173 	int i, idx = -1;
1174 
1175 	if (!netif_is_lag_master(upper))
1176 		return 0;
1177 
1178 	if (info->linking)
1179 		lag_upper_info = info->upper_info;
1180 
1181 	/* The event may still be of interest if the slave does not belong to
1182 	 * us, but is enslaved to a master which has one or more of our netdevs
1183 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1184 	 * of our netdevs, we should unbond).
1185 	 */
1186 	rcu_read_lock();
1187 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1188 		mlx5_ldev_for_each(i, 0, ldev) {
1189 			if (ldev->pf[i].netdev == ndev_tmp) {
1190 				idx++;
1191 				break;
1192 			}
1193 		}
1194 		if (i < MLX5_MAX_PORTS) {
1195 			slave = bond_slave_get_rcu(ndev_tmp);
1196 			if (slave)
1197 				has_inactive |= bond_is_slave_inactive(slave);
1198 			bond_status |= (1 << idx);
1199 		}
1200 
1201 		num_slaves++;
1202 	}
1203 	rcu_read_unlock();
1204 
1205 	/* None of this lagdev's netdevs are slaves of this master. */
1206 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1207 		return 0;
1208 
1209 	if (lag_upper_info) {
1210 		tracker->tx_type = lag_upper_info->tx_type;
1211 		tracker->hash_type = lag_upper_info->hash_type;
1212 	}
1213 
1214 	tracker->has_inactive = has_inactive;
1215 	/* Determine bonding status:
1216 	 * A device is considered bonded if both its physical ports are slaves
1217 	 * of the same lag master, and only them.
1218 	 */
1219 	is_in_lag = num_slaves == ldev->ports &&
1220 		bond_status == GENMASK(ldev->ports - 1, 0);
1221 
1222 	/* Lag mode must be activebackup or hash. */
1223 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1224 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1225 
1226 	is_bonded = is_in_lag && mode_supported;
1227 	if (tracker->is_bonded != is_bonded) {
1228 		tracker->is_bonded = is_bonded;
1229 		changed = 1;
1230 	}
1231 
1232 	if (!is_in_lag)
1233 		return changed;
1234 
1235 	if (!mlx5_lag_is_ready(ldev))
1236 		NL_SET_ERR_MSG_MOD(info->info.extack,
1237 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1238 	else if (!mode_supported)
1239 		NL_SET_ERR_MSG_MOD(info->info.extack,
1240 				   "Can't activate LAG offload, TX type isn't supported");
1241 
1242 	return changed;
1243 }
1244 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1245 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1246 					      struct lag_tracker *tracker,
1247 					      struct net_device *ndev,
1248 					      struct netdev_notifier_changelowerstate_info *info)
1249 {
1250 	struct netdev_lag_lower_state_info *lag_lower_info;
1251 	int idx;
1252 
1253 	if (!netif_is_lag_port(ndev))
1254 		return 0;
1255 
1256 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1257 	if (idx < 0)
1258 		return 0;
1259 
1260 	/* This information is used to determine virtual to physical
1261 	 * port mapping.
1262 	 */
1263 	lag_lower_info = info->lower_state_info;
1264 	if (!lag_lower_info)
1265 		return 0;
1266 
1267 	tracker->netdev_state[idx] = *lag_lower_info;
1268 
1269 	return 1;
1270 }
1271 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1272 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1273 					    struct lag_tracker *tracker,
1274 					    struct net_device *ndev)
1275 {
1276 	struct net_device *ndev_tmp;
1277 	struct slave *slave;
1278 	bool has_inactive = 0;
1279 	int idx;
1280 
1281 	if (!netif_is_lag_master(ndev))
1282 		return 0;
1283 
1284 	rcu_read_lock();
1285 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1286 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1287 		if (idx < 0)
1288 			continue;
1289 
1290 		slave = bond_slave_get_rcu(ndev_tmp);
1291 		if (slave)
1292 			has_inactive |= bond_is_slave_inactive(slave);
1293 	}
1294 	rcu_read_unlock();
1295 
1296 	if (tracker->has_inactive == has_inactive)
1297 		return 0;
1298 
1299 	tracker->has_inactive = has_inactive;
1300 
1301 	return 1;
1302 }
1303 
1304 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1305 static int mlx5_lag_netdev_event(struct notifier_block *this,
1306 				 unsigned long event, void *ptr)
1307 {
1308 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1309 	struct lag_tracker tracker;
1310 	struct mlx5_lag *ldev;
1311 	int changed = 0;
1312 
1313 	if (event != NETDEV_CHANGEUPPER &&
1314 	    event != NETDEV_CHANGELOWERSTATE &&
1315 	    event != NETDEV_CHANGEINFODATA)
1316 		return NOTIFY_DONE;
1317 
1318 	ldev    = container_of(this, struct mlx5_lag, nb);
1319 
1320 	tracker = ldev->tracker;
1321 
1322 	switch (event) {
1323 	case NETDEV_CHANGEUPPER:
1324 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1325 		break;
1326 	case NETDEV_CHANGELOWERSTATE:
1327 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1328 							     ndev, ptr);
1329 		break;
1330 	case NETDEV_CHANGEINFODATA:
1331 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1332 		break;
1333 	}
1334 
1335 	ldev->tracker = tracker;
1336 
1337 	if (changed)
1338 		mlx5_queue_bond_work(ldev, 0);
1339 
1340 	return NOTIFY_DONE;
1341 }
1342 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1343 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1344 				struct mlx5_core_dev *dev,
1345 				struct net_device *netdev)
1346 {
1347 	unsigned int fn = mlx5_get_dev_index(dev);
1348 	unsigned long flags;
1349 
1350 	spin_lock_irqsave(&lag_lock, flags);
1351 	ldev->pf[fn].netdev = netdev;
1352 	ldev->tracker.netdev_state[fn].link_up = 0;
1353 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
1354 	spin_unlock_irqrestore(&lag_lock, flags);
1355 }
1356 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1357 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1358 				    struct net_device *netdev)
1359 {
1360 	unsigned long flags;
1361 	int i;
1362 
1363 	spin_lock_irqsave(&lag_lock, flags);
1364 	mlx5_ldev_for_each(i, 0, ldev) {
1365 		if (ldev->pf[i].netdev == netdev) {
1366 			ldev->pf[i].netdev = NULL;
1367 			break;
1368 		}
1369 	}
1370 	spin_unlock_irqrestore(&lag_lock, flags);
1371 }
1372 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1373 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1374 			      struct mlx5_core_dev *dev)
1375 {
1376 	unsigned int fn = mlx5_get_dev_index(dev);
1377 
1378 	ldev->pf[fn].dev = dev;
1379 	dev->priv.lag = ldev;
1380 }
1381 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1382 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1383 				  struct mlx5_core_dev *dev)
1384 {
1385 	int fn;
1386 
1387 	fn = mlx5_get_dev_index(dev);
1388 	if (ldev->pf[fn].dev != dev)
1389 		return;
1390 
1391 	ldev->pf[fn].dev = NULL;
1392 	dev->priv.lag = NULL;
1393 }
1394 
1395 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1396 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1397 {
1398 	struct mlx5_devcom_comp_dev *pos = NULL;
1399 	struct mlx5_lag *ldev = NULL;
1400 	struct mlx5_core_dev *tmp_dev;
1401 
1402 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1403 	if (tmp_dev)
1404 		ldev = mlx5_lag_dev(tmp_dev);
1405 
1406 	if (!ldev) {
1407 		ldev = mlx5_lag_dev_alloc(dev);
1408 		if (!ldev) {
1409 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
1410 			return 0;
1411 		}
1412 		mlx5_ldev_add_mdev(ldev, dev);
1413 		return 0;
1414 	}
1415 
1416 	mutex_lock(&ldev->lock);
1417 	if (ldev->mode_changes_in_progress) {
1418 		mutex_unlock(&ldev->lock);
1419 		return -EAGAIN;
1420 	}
1421 	mlx5_ldev_get(ldev);
1422 	mlx5_ldev_add_mdev(ldev, dev);
1423 	mutex_unlock(&ldev->lock);
1424 
1425 	return 0;
1426 }
1427 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1428 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1429 {
1430 	struct mlx5_lag *ldev;
1431 
1432 	ldev = mlx5_lag_dev(dev);
1433 	if (!ldev)
1434 		return;
1435 
1436 	/* mdev is being removed, might as well remove debugfs
1437 	 * as early as possible.
1438 	 */
1439 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1440 recheck:
1441 	mutex_lock(&ldev->lock);
1442 	if (ldev->mode_changes_in_progress) {
1443 		mutex_unlock(&ldev->lock);
1444 		msleep(100);
1445 		goto recheck;
1446 	}
1447 	mlx5_ldev_remove_mdev(ldev, dev);
1448 	mutex_unlock(&ldev->lock);
1449 	mlx5_ldev_put(ldev);
1450 }
1451 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1452 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1453 {
1454 	int err;
1455 
1456 	if (!mlx5_lag_is_supported(dev))
1457 		return;
1458 
1459 	if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
1460 		return;
1461 
1462 recheck:
1463 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1464 	err = __mlx5_lag_dev_add_mdev(dev);
1465 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1466 
1467 	if (err) {
1468 		msleep(100);
1469 		goto recheck;
1470 	}
1471 	mlx5_ldev_add_debugfs(dev);
1472 }
1473 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1474 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1475 			    struct net_device *netdev)
1476 {
1477 	struct mlx5_lag *ldev;
1478 	bool lag_is_active;
1479 
1480 	ldev = mlx5_lag_dev(dev);
1481 	if (!ldev)
1482 		return;
1483 
1484 	mutex_lock(&ldev->lock);
1485 	mlx5_ldev_remove_netdev(ldev, netdev);
1486 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1487 
1488 	lag_is_active = __mlx5_lag_is_active(ldev);
1489 	mutex_unlock(&ldev->lock);
1490 
1491 	if (lag_is_active)
1492 		mlx5_queue_bond_work(ldev, 0);
1493 }
1494 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1495 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1496 			 struct net_device *netdev)
1497 {
1498 	struct mlx5_lag *ldev;
1499 	int num = 0;
1500 
1501 	ldev = mlx5_lag_dev(dev);
1502 	if (!ldev)
1503 		return;
1504 
1505 	mutex_lock(&ldev->lock);
1506 	mlx5_ldev_add_netdev(ldev, dev, netdev);
1507 	num = mlx5_lag_num_netdevs(ldev);
1508 	if (num >= ldev->ports)
1509 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1510 	mutex_unlock(&ldev->lock);
1511 	mlx5_queue_bond_work(ldev, 0);
1512 }
1513 
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1514 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1515 {
1516 	int i;
1517 
1518 	for (i = start_idx; i >= end_idx; i--)
1519 		if (ldev->pf[i].dev)
1520 			return i;
1521 	return -1;
1522 }
1523 
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1524 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1525 {
1526 	int i;
1527 
1528 	for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1529 		if (ldev->pf[i].dev)
1530 			return i;
1531 	return MLX5_MAX_PORTS;
1532 }
1533 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1534 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1535 {
1536 	struct mlx5_lag *ldev;
1537 	unsigned long flags;
1538 	bool res;
1539 
1540 	spin_lock_irqsave(&lag_lock, flags);
1541 	ldev = mlx5_lag_dev(dev);
1542 	res  = ldev && __mlx5_lag_is_roce(ldev);
1543 	spin_unlock_irqrestore(&lag_lock, flags);
1544 
1545 	return res;
1546 }
1547 EXPORT_SYMBOL(mlx5_lag_is_roce);
1548 
mlx5_lag_is_active(struct mlx5_core_dev * dev)1549 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1550 {
1551 	struct mlx5_lag *ldev;
1552 	unsigned long flags;
1553 	bool res;
1554 
1555 	spin_lock_irqsave(&lag_lock, flags);
1556 	ldev = mlx5_lag_dev(dev);
1557 	res  = ldev && __mlx5_lag_is_active(ldev);
1558 	spin_unlock_irqrestore(&lag_lock, flags);
1559 
1560 	return res;
1561 }
1562 EXPORT_SYMBOL(mlx5_lag_is_active);
1563 
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1564 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1565 {
1566 	struct mlx5_lag *ldev;
1567 	unsigned long flags;
1568 	bool res = 0;
1569 
1570 	spin_lock_irqsave(&lag_lock, flags);
1571 	ldev = mlx5_lag_dev(dev);
1572 	if (ldev)
1573 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1574 	spin_unlock_irqrestore(&lag_lock, flags);
1575 
1576 	return res;
1577 }
1578 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1579 
mlx5_lag_is_master(struct mlx5_core_dev * dev)1580 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1581 {
1582 	struct mlx5_lag *ldev;
1583 	unsigned long flags;
1584 	bool res = false;
1585 	int idx;
1586 
1587 	spin_lock_irqsave(&lag_lock, flags);
1588 	ldev = mlx5_lag_dev(dev);
1589 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1590 	res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1591 	spin_unlock_irqrestore(&lag_lock, flags);
1592 
1593 	return res;
1594 }
1595 EXPORT_SYMBOL(mlx5_lag_is_master);
1596 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1597 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1598 {
1599 	struct mlx5_lag *ldev;
1600 	unsigned long flags;
1601 	bool res;
1602 
1603 	spin_lock_irqsave(&lag_lock, flags);
1604 	ldev = mlx5_lag_dev(dev);
1605 	res  = ldev && __mlx5_lag_is_sriov(ldev);
1606 	spin_unlock_irqrestore(&lag_lock, flags);
1607 
1608 	return res;
1609 }
1610 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1611 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1612 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1613 {
1614 	struct mlx5_lag *ldev;
1615 	unsigned long flags;
1616 	bool res;
1617 
1618 	spin_lock_irqsave(&lag_lock, flags);
1619 	ldev = mlx5_lag_dev(dev);
1620 	res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1621 	spin_unlock_irqrestore(&lag_lock, flags);
1622 
1623 	return res;
1624 }
1625 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1626 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1627 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1628 {
1629 	struct mlx5_lag *ldev;
1630 
1631 	ldev = mlx5_lag_dev(dev);
1632 	if (!ldev)
1633 		return;
1634 
1635 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1636 	mutex_lock(&ldev->lock);
1637 
1638 	ldev->mode_changes_in_progress++;
1639 	if (__mlx5_lag_is_active(ldev))
1640 		mlx5_disable_lag(ldev);
1641 
1642 	mutex_unlock(&ldev->lock);
1643 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1644 }
1645 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1646 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1647 {
1648 	struct mlx5_lag *ldev;
1649 
1650 	ldev = mlx5_lag_dev(dev);
1651 	if (!ldev)
1652 		return;
1653 
1654 	mutex_lock(&ldev->lock);
1655 	ldev->mode_changes_in_progress--;
1656 	mutex_unlock(&ldev->lock);
1657 	mlx5_queue_bond_work(ldev, 0);
1658 }
1659 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1660 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1661 			   struct net_device *slave)
1662 {
1663 	struct mlx5_lag *ldev;
1664 	unsigned long flags;
1665 	u8 port = 0;
1666 	int i;
1667 
1668 	spin_lock_irqsave(&lag_lock, flags);
1669 	ldev = mlx5_lag_dev(dev);
1670 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1671 		goto unlock;
1672 
1673 	mlx5_ldev_for_each(i, 0, ldev) {
1674 		if (ldev->pf[i].netdev == slave) {
1675 			port = i;
1676 			break;
1677 		}
1678 	}
1679 
1680 	port = ldev->v2p_map[port * ldev->buckets];
1681 
1682 unlock:
1683 	spin_unlock_irqrestore(&lag_lock, flags);
1684 	return port;
1685 }
1686 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1687 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1688 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1689 {
1690 	struct mlx5_lag *ldev;
1691 
1692 	ldev = mlx5_lag_dev(dev);
1693 	if (!ldev)
1694 		return 0;
1695 
1696 	return ldev->ports;
1697 }
1698 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1699 
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1700 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1701 {
1702 	struct mlx5_core_dev *peer_dev = NULL;
1703 	struct mlx5_lag *ldev;
1704 	unsigned long flags;
1705 	int idx;
1706 
1707 	spin_lock_irqsave(&lag_lock, flags);
1708 	ldev = mlx5_lag_dev(dev);
1709 	if (!ldev)
1710 		goto unlock;
1711 
1712 	if (*i == MLX5_MAX_PORTS)
1713 		goto unlock;
1714 	mlx5_ldev_for_each(idx, *i, ldev)
1715 		if (ldev->pf[idx].dev != dev)
1716 			break;
1717 
1718 	if (idx == MLX5_MAX_PORTS) {
1719 		*i = idx;
1720 		goto unlock;
1721 	}
1722 	*i = idx + 1;
1723 
1724 	peer_dev = ldev->pf[idx].dev;
1725 
1726 unlock:
1727 	spin_unlock_irqrestore(&lag_lock, flags);
1728 	return peer_dev;
1729 }
1730 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1731 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1732 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1733 				 u64 *values,
1734 				 int num_counters,
1735 				 size_t *offsets)
1736 {
1737 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1738 	struct mlx5_core_dev **mdev;
1739 	int ret = 0, i, j, idx = 0;
1740 	struct mlx5_lag *ldev;
1741 	unsigned long flags;
1742 	int num_ports;
1743 	void *out;
1744 
1745 	out = kvzalloc(outlen, GFP_KERNEL);
1746 	if (!out)
1747 		return -ENOMEM;
1748 
1749 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1750 	if (!mdev) {
1751 		ret = -ENOMEM;
1752 		goto free_out;
1753 	}
1754 
1755 	memset(values, 0, sizeof(*values) * num_counters);
1756 
1757 	spin_lock_irqsave(&lag_lock, flags);
1758 	ldev = mlx5_lag_dev(dev);
1759 	if (ldev && __mlx5_lag_is_active(ldev)) {
1760 		num_ports = ldev->ports;
1761 		mlx5_ldev_for_each(i, 0, ldev)
1762 			mdev[idx++] = ldev->pf[i].dev;
1763 	} else {
1764 		num_ports = 1;
1765 		mdev[MLX5_LAG_P1] = dev;
1766 	}
1767 	spin_unlock_irqrestore(&lag_lock, flags);
1768 
1769 	for (i = 0; i < num_ports; ++i) {
1770 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1771 
1772 		MLX5_SET(query_cong_statistics_in, in, opcode,
1773 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1774 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1775 					  out);
1776 		if (ret)
1777 			goto free_mdev;
1778 
1779 		for (j = 0; j < num_counters; ++j)
1780 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1781 	}
1782 
1783 free_mdev:
1784 	kvfree(mdev);
1785 free_out:
1786 	kvfree(out);
1787 	return ret;
1788 }
1789 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1790