1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39 
40 #include "en.h"
41 #include "eswitch.h"
42 #include "ipsec.h"
43 #include "ipsec_rxtx.h"
44 #include "en_rep.h"
45 
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
48 
to_ipsec_sa_entry(struct xfrm_state * x)49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
50 {
51 	return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
52 }
53 
to_ipsec_pol_entry(struct xfrm_policy * x)54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
55 {
56 	return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
57 }
58 
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)59 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
60 {
61 	struct mlx5e_ipsec_dwork *dwork =
62 		container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 	struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 	struct xfrm_state *x = sa_entry->x;
65 
66 	if (sa_entry->attrs.drop)
67 		return;
68 
69 	spin_lock_bh(&x->lock);
70 	if (x->km.state == XFRM_STATE_EXPIRED) {
71 		sa_entry->attrs.drop = true;
72 		spin_unlock_bh(&x->lock);
73 
74 		mlx5e_accel_ipsec_fs_modify(sa_entry);
75 		return;
76 	}
77 
78 	if (x->km.state != XFRM_STATE_VALID) {
79 		spin_unlock_bh(&x->lock);
80 		return;
81 	}
82 
83 	xfrm_state_check_expire(x);
84 	spin_unlock_bh(&x->lock);
85 
86 	queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
87 			   MLX5_IPSEC_RESCHED);
88 }
89 
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)90 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
91 {
92 	struct xfrm_state *x = sa_entry->x;
93 	u32 seq_bottom = 0;
94 	u32 esn, esn_msb;
95 	u8 overlap;
96 
97 	switch (x->xso.dir) {
98 	case XFRM_DEV_OFFLOAD_IN:
99 		esn = x->replay_esn->seq;
100 		esn_msb = x->replay_esn->seq_hi;
101 		break;
102 	case XFRM_DEV_OFFLOAD_OUT:
103 		esn = x->replay_esn->oseq;
104 		esn_msb = x->replay_esn->oseq_hi;
105 		break;
106 	default:
107 		WARN_ON(true);
108 		return false;
109 	}
110 
111 	overlap = sa_entry->esn_state.overlap;
112 
113 	if (!x->replay_esn->replay_window) {
114 		seq_bottom = esn;
115 	} else {
116 		if (esn >= x->replay_esn->replay_window)
117 			seq_bottom = esn - x->replay_esn->replay_window + 1;
118 
119 		if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
120 			esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
121 	}
122 
123 	if (sa_entry->esn_state.esn_msb)
124 		sa_entry->esn_state.esn = esn;
125 	else
126 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
127 		 * the first packet sent using a given SA will contain a sequence
128 		 * number of 1.
129 		 */
130 		sa_entry->esn_state.esn = max_t(u32, esn, 1);
131 	sa_entry->esn_state.esn_msb = esn_msb;
132 
133 	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
134 		sa_entry->esn_state.overlap = 0;
135 		return true;
136 	} else if (unlikely(!overlap &&
137 			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
138 		sa_entry->esn_state.overlap = 1;
139 		return true;
140 	}
141 
142 	return false;
143 }
144 
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)145 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
146 				    struct mlx5_accel_esp_xfrm_attrs *attrs)
147 {
148 	struct xfrm_state *x = sa_entry->x;
149 	s64 start_value, n;
150 
151 	attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
152 	attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
153 	if (x->lft.soft_packet_limit == XFRM_INF)
154 		return;
155 
156 	/* Compute hard limit initial value and number of rounds.
157 	 *
158 	 * The counting pattern of hardware counter goes:
159 	 *                value  -> 2^31-1
160 	 *      2^31  | (2^31-1) -> 2^31-1
161 	 *      2^31  | (2^31-1) -> 2^31-1
162 	 *      [..]
163 	 *      2^31  | (2^31-1) -> 0
164 	 *
165 	 * The pattern is created by using an ASO operation to atomically set
166 	 * bit 31 after the down counter clears bit 31. This is effectively an
167 	 * atomic addition of 2**31 to the counter.
168 	 *
169 	 * We wish to configure the counter, within the above pattern, so that
170 	 * when it reaches 0, it has hit the hard limit. This is defined by this
171 	 * system of equations:
172 	 *
173 	 *      hard_limit == start_value + n * 2^31
174 	 *      n >= 0
175 	 *      start_value < 2^32, start_value >= 0
176 	 *
177 	 * These equations are not single-solution, there are often two choices:
178 	 *      hard_limit == start_value + n * 2^31
179 	 *      hard_limit == (start_value+2^31) + (n-1) * 2^31
180 	 *
181 	 * The algorithm selects the solution that keeps the counter value
182 	 * above 2^31 until the final iteration.
183 	 */
184 
185 	/* Start by estimating n and compute start_value */
186 	n = attrs->lft.hard_packet_limit / BIT_ULL(31);
187 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
188 
189 	/* Choose the best of the two solutions: */
190 	if (n >= 1)
191 		n -= 1;
192 
193 	/* Computed values solve the system of equations: */
194 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
195 
196 	/* The best solution means: when there are multiple iterations we must
197 	 * start above 2^31 and count down to 2**31 to get the interrupt.
198 	 */
199 	attrs->lft.hard_packet_limit = lower_32_bits(start_value);
200 	attrs->lft.numb_rounds_hard = (u64)n;
201 
202 	/* Compute soft limit initial value and number of rounds.
203 	 *
204 	 * The soft_limit is achieved by adjusting the counter's
205 	 * interrupt_value. This is embedded in the counting pattern created by
206 	 * hard packet calculations above.
207 	 *
208 	 * We wish to compute the interrupt_value for the soft_limit. This is
209 	 * defined by this system of equations:
210 	 *
211 	 *      soft_limit == start_value - soft_value + n * 2^31
212 	 *      n >= 0
213 	 *      soft_value < 2^32, soft_value >= 0
214 	 *      for n == 0 start_value > soft_value
215 	 *
216 	 * As with compute_hard_n_value() the equations are not single-solution.
217 	 * The algorithm selects the solution that has:
218 	 *      2^30 <= soft_limit < 2^31 + 2^30
219 	 * for the interior iterations, which guarantees a large guard band
220 	 * around the counter hard limit and next interrupt.
221 	 */
222 
223 	/* Start by estimating n and compute soft_value */
224 	n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
225 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
226 		      x->lft.soft_packet_limit;
227 
228 	/* Compare against constraints and adjust n */
229 	if (n < 0)
230 		n = 0;
231 	else if (start_value >= BIT_ULL(32))
232 		n -= 1;
233 	else if (start_value < 0)
234 		n += 1;
235 
236 	/* Choose the best of the two solutions: */
237 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
238 	if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
239 		n += 1;
240 
241 	/* Note that the upper limit of soft_value happens naturally because we
242 	 * always select the lowest soft_value.
243 	 */
244 
245 	/* Computed values solve the system of equations: */
246 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
247 
248 	/* The best solution means: when there are multiple iterations we must
249 	 * not fall below 2^30 as that would get too close to the false
250 	 * hard_limit and when we reach an interior iteration for soft_limit it
251 	 * has to be far away from 2**32-1 which is the counter reset point
252 	 * after the +2^31 to accommodate latency.
253 	 */
254 	attrs->lft.soft_packet_limit = lower_32_bits(start_value);
255 	attrs->lft.numb_rounds_soft = (u64)n;
256 }
257 
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)258 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
259 				  struct mlx5_accel_esp_xfrm_attrs *attrs)
260 {
261 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
262 	struct xfrm_state *x = sa_entry->x;
263 	struct net_device *netdev;
264 	struct neighbour *n;
265 	u8 addr[ETH_ALEN];
266 	const void *pkey;
267 	u8 *dst, *src;
268 
269 	if (attrs->mode != XFRM_MODE_TUNNEL ||
270 	    attrs->type != XFRM_DEV_OFFLOAD_PACKET)
271 		return;
272 
273 	netdev = x->xso.real_dev;
274 
275 	mlx5_query_mac_address(mdev, addr);
276 	switch (attrs->dir) {
277 	case XFRM_DEV_OFFLOAD_IN:
278 		src = attrs->dmac;
279 		dst = attrs->smac;
280 		pkey = &attrs->saddr.a4;
281 		break;
282 	case XFRM_DEV_OFFLOAD_OUT:
283 		src = attrs->smac;
284 		dst = attrs->dmac;
285 		pkey = &attrs->daddr.a4;
286 		break;
287 	default:
288 		return;
289 	}
290 
291 	ether_addr_copy(src, addr);
292 	n = neigh_lookup(&arp_tbl, pkey, netdev);
293 	if (!n) {
294 		n = neigh_create(&arp_tbl, pkey, netdev);
295 		if (IS_ERR(n))
296 			return;
297 		neigh_event_send(n, NULL);
298 		attrs->drop = true;
299 	} else {
300 		neigh_ha_snapshot(addr, n, netdev);
301 		ether_addr_copy(dst, addr);
302 	}
303 	neigh_release(n);
304 }
305 
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)306 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
307 					struct mlx5_accel_esp_xfrm_attrs *attrs)
308 {
309 	struct xfrm_state *x = sa_entry->x;
310 	struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
311 	struct aead_geniv_ctx *geniv_ctx;
312 	struct crypto_aead *aead;
313 	unsigned int crypto_data_len, key_len;
314 	int ivsize;
315 
316 	memset(attrs, 0, sizeof(*attrs));
317 
318 	/* key */
319 	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
320 	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
321 
322 	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
323 	aes_gcm->key_len = key_len * 8;
324 
325 	/* salt and seq_iv */
326 	aead = x->data;
327 	geniv_ctx = crypto_aead_ctx(aead);
328 	ivsize = crypto_aead_ivsize(aead);
329 	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
330 	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
331 	       sizeof(aes_gcm->salt));
332 
333 	attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
334 
335 	/* iv len */
336 	aes_gcm->icv_len = x->aead->alg_icv_len;
337 
338 	attrs->dir = x->xso.dir;
339 
340 	/* esn */
341 	if (x->props.flags & XFRM_STATE_ESN) {
342 		attrs->replay_esn.trigger = true;
343 		attrs->replay_esn.esn = sa_entry->esn_state.esn;
344 		attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
345 		attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
346 		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
347 			goto skip_replay_window;
348 
349 		switch (x->replay_esn->replay_window) {
350 		case 32:
351 			attrs->replay_esn.replay_window =
352 				MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
353 			break;
354 		case 64:
355 			attrs->replay_esn.replay_window =
356 				MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
357 			break;
358 		case 128:
359 			attrs->replay_esn.replay_window =
360 				MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
361 			break;
362 		case 256:
363 			attrs->replay_esn.replay_window =
364 				MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
365 			break;
366 		default:
367 			WARN_ON(true);
368 			return;
369 		}
370 	}
371 
372 skip_replay_window:
373 	/* spi */
374 	attrs->spi = be32_to_cpu(x->id.spi);
375 
376 	/* source , destination ips */
377 	memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
378 	memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
379 	attrs->family = x->props.family;
380 	attrs->type = x->xso.type;
381 	attrs->reqid = x->props.reqid;
382 	attrs->upspec.dport = ntohs(x->sel.dport);
383 	attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
384 	attrs->upspec.sport = ntohs(x->sel.sport);
385 	attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
386 	attrs->upspec.proto = x->sel.proto;
387 	attrs->mode = x->props.mode;
388 
389 	mlx5e_ipsec_init_limits(sa_entry, attrs);
390 	mlx5e_ipsec_init_macs(sa_entry, attrs);
391 
392 	if (x->encap) {
393 		attrs->encap = true;
394 		attrs->sport = x->encap->encap_sport;
395 		attrs->dport = x->encap->encap_dport;
396 	}
397 }
398 
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)399 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
400 				     struct xfrm_state *x,
401 				     struct netlink_ext_ack *extack)
402 {
403 	if (x->props.aalgo != SADB_AALG_NONE) {
404 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
405 		return -EINVAL;
406 	}
407 	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
408 		NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
409 		return -EINVAL;
410 	}
411 	if (x->props.calgo != SADB_X_CALG_NONE) {
412 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
413 		return -EINVAL;
414 	}
415 	if (x->props.flags & XFRM_STATE_ESN &&
416 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
417 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
418 		return -EINVAL;
419 	}
420 	if (x->props.family != AF_INET &&
421 	    x->props.family != AF_INET6) {
422 		NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
423 		return -EINVAL;
424 	}
425 	if (x->id.proto != IPPROTO_ESP) {
426 		NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
427 		return -EINVAL;
428 	}
429 	if (x->encap) {
430 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
431 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported");
432 			return -EINVAL;
433 		}
434 
435 		if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
436 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
437 			return -EINVAL;
438 		}
439 
440 		if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
441 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
442 			return -EINVAL;
443 		}
444 
445 		if (x->props.mode != XFRM_MODE_TRANSPORT) {
446 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
447 			return -EINVAL;
448 		}
449 	}
450 	if (!x->aead) {
451 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
452 		return -EINVAL;
453 	}
454 	if (x->aead->alg_icv_len != 128) {
455 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
456 		return -EINVAL;
457 	}
458 	if ((x->aead->alg_key_len != 128 + 32) &&
459 	    (x->aead->alg_key_len != 256 + 32)) {
460 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
461 		return -EINVAL;
462 	}
463 	if (x->tfcpad) {
464 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
465 		return -EINVAL;
466 	}
467 	if (!x->geniv) {
468 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
469 		return -EINVAL;
470 	}
471 	if (strcmp(x->geniv, "seqiv")) {
472 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
473 		return -EINVAL;
474 	}
475 
476 	if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
477 	    x->sel.proto != IPPROTO_TCP) {
478 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
479 		return -EINVAL;
480 	}
481 
482 	if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
483 		NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
484 		return -EINVAL;
485 	}
486 
487 	switch (x->xso.type) {
488 	case XFRM_DEV_OFFLOAD_CRYPTO:
489 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
490 			NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
491 			return -EINVAL;
492 		}
493 
494 		break;
495 	case XFRM_DEV_OFFLOAD_PACKET:
496 		if (!(mlx5_ipsec_device_caps(mdev) &
497 		      MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
498 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
499 			return -EINVAL;
500 		}
501 
502 		if (x->props.mode == XFRM_MODE_TUNNEL &&
503 		    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
504 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
505 			return -EINVAL;
506 		}
507 
508 		if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
509 		    x->replay_esn->replay_window != 32 &&
510 		    x->replay_esn->replay_window != 64 &&
511 		    x->replay_esn->replay_window != 128 &&
512 		    x->replay_esn->replay_window != 256) {
513 			NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
514 			return -EINVAL;
515 		}
516 
517 		if (!x->props.reqid) {
518 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
519 			return -EINVAL;
520 		}
521 
522 		if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
523 		    x->lft.hard_byte_limit != XFRM_INF) {
524 			/* XFRM stack doesn't prevent such configuration :(. */
525 			NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
526 			return -EINVAL;
527 		}
528 
529 		if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
530 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
531 			return -EINVAL;
532 		}
533 
534 		if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
535 		    x->lft.hard_packet_limit != XFRM_INF) {
536 			/* XFRM stack doesn't prevent such configuration :(. */
537 			NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
538 			return -EINVAL;
539 		}
540 
541 		if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
542 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
543 			return -EINVAL;
544 		}
545 		break;
546 	default:
547 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
548 		return -EINVAL;
549 	}
550 	return 0;
551 }
552 
mlx5e_ipsec_modify_state(struct work_struct * _work)553 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
554 {
555 	struct mlx5e_ipsec_work *work =
556 		container_of(_work, struct mlx5e_ipsec_work, work);
557 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
558 	struct mlx5_accel_esp_xfrm_attrs *attrs;
559 
560 	attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
561 
562 	mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
563 }
564 
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)565 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
566 {
567 	struct xfrm_state *x = sa_entry->x;
568 
569 	if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
570 	    x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
571 		return;
572 
573 	if (x->props.flags & XFRM_STATE_ESN) {
574 		sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
575 		return;
576 	}
577 
578 	sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
579 }
580 
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)581 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
582 {
583 	struct mlx5e_ipsec_work *work =
584 		container_of(_work, struct mlx5e_ipsec_work, work);
585 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
586 	struct mlx5e_ipsec_netevent_data *data = work->data;
587 	struct mlx5_accel_esp_xfrm_attrs *attrs;
588 
589 	attrs = &sa_entry->attrs;
590 
591 	switch (attrs->dir) {
592 	case XFRM_DEV_OFFLOAD_IN:
593 		ether_addr_copy(attrs->smac, data->addr);
594 		break;
595 	case XFRM_DEV_OFFLOAD_OUT:
596 		ether_addr_copy(attrs->dmac, data->addr);
597 		break;
598 	default:
599 		WARN_ON_ONCE(true);
600 	}
601 	attrs->drop = false;
602 	mlx5e_accel_ipsec_fs_modify(sa_entry);
603 }
604 
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)605 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
606 {
607 	struct xfrm_state *x = sa_entry->x;
608 	struct mlx5e_ipsec_work *work;
609 	void *data = NULL;
610 
611 	switch (x->xso.type) {
612 	case XFRM_DEV_OFFLOAD_CRYPTO:
613 		if (!(x->props.flags & XFRM_STATE_ESN))
614 			return 0;
615 		break;
616 	case XFRM_DEV_OFFLOAD_PACKET:
617 		if (x->props.mode != XFRM_MODE_TUNNEL)
618 			return 0;
619 		break;
620 	default:
621 		break;
622 	}
623 
624 	work = kzalloc(sizeof(*work), GFP_KERNEL);
625 	if (!work)
626 		return -ENOMEM;
627 
628 	switch (x->xso.type) {
629 	case XFRM_DEV_OFFLOAD_CRYPTO:
630 		data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
631 		if (!data)
632 			goto free_work;
633 
634 		INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
635 		break;
636 	case XFRM_DEV_OFFLOAD_PACKET:
637 		data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
638 			       GFP_KERNEL);
639 		if (!data)
640 			goto free_work;
641 
642 		INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
643 		break;
644 	default:
645 		break;
646 	}
647 
648 	work->data = data;
649 	work->sa_entry = sa_entry;
650 	sa_entry->work = work;
651 	return 0;
652 
653 free_work:
654 	kfree(work);
655 	return -ENOMEM;
656 }
657 
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)658 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
659 {
660 	struct xfrm_state *x = sa_entry->x;
661 	struct mlx5e_ipsec_dwork *dwork;
662 
663 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
664 		return 0;
665 
666 	if (x->lft.soft_packet_limit == XFRM_INF &&
667 	    x->lft.hard_packet_limit == XFRM_INF &&
668 	    x->lft.soft_byte_limit == XFRM_INF &&
669 	    x->lft.hard_byte_limit == XFRM_INF)
670 		return 0;
671 
672 	dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
673 	if (!dwork)
674 		return -ENOMEM;
675 
676 	dwork->sa_entry = sa_entry;
677 	INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
678 	sa_entry->dwork = dwork;
679 	return 0;
680 }
681 
mlx5e_xfrm_add_state(struct xfrm_state * x,struct netlink_ext_ack * extack)682 static int mlx5e_xfrm_add_state(struct xfrm_state *x,
683 				struct netlink_ext_ack *extack)
684 {
685 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
686 	struct net_device *netdev = x->xso.real_dev;
687 	struct mlx5e_ipsec *ipsec;
688 	struct mlx5e_priv *priv;
689 	gfp_t gfp;
690 	int err;
691 
692 	priv = netdev_priv(netdev);
693 	if (!priv->ipsec)
694 		return -EOPNOTSUPP;
695 
696 	ipsec = priv->ipsec;
697 	gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
698 	sa_entry = kzalloc(sizeof(*sa_entry), gfp);
699 	if (!sa_entry)
700 		return -ENOMEM;
701 
702 	sa_entry->x = x;
703 	sa_entry->ipsec = ipsec;
704 	/* Check if this SA is originated from acquire flow temporary SA */
705 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
706 		goto out;
707 
708 	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
709 	if (err)
710 		goto err_xfrm;
711 
712 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
713 		err = -EBUSY;
714 		goto err_xfrm;
715 	}
716 
717 	/* check esn */
718 	if (x->props.flags & XFRM_STATE_ESN)
719 		mlx5e_ipsec_update_esn_state(sa_entry);
720 	else
721 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
722 		 * the first packet sent using a given SA will contain a sequence
723 		 * number of 1.
724 		 */
725 		sa_entry->esn_state.esn = 1;
726 
727 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
728 
729 	err = mlx5_ipsec_create_work(sa_entry);
730 	if (err)
731 		goto unblock_ipsec;
732 
733 	err = mlx5e_ipsec_create_dwork(sa_entry);
734 	if (err)
735 		goto release_work;
736 
737 	/* create hw context */
738 	err = mlx5_ipsec_create_sa_ctx(sa_entry);
739 	if (err)
740 		goto release_dwork;
741 
742 	err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
743 	if (err)
744 		goto err_hw_ctx;
745 
746 	if (x->props.mode == XFRM_MODE_TUNNEL &&
747 	    x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
748 	    !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
749 		NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
750 		err = -EINVAL;
751 		goto err_add_rule;
752 	}
753 
754 	/* We use *_bh() variant because xfrm_timer_handler(), which runs
755 	 * in softirq context, can reach our state delete logic and we need
756 	 * xa_erase_bh() there.
757 	 */
758 	err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
759 			   GFP_KERNEL);
760 	if (err)
761 		goto err_add_rule;
762 
763 	mlx5e_ipsec_set_esn_ops(sa_entry);
764 
765 	if (sa_entry->dwork)
766 		queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
767 				   MLX5_IPSEC_RESCHED);
768 
769 	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
770 	    x->props.mode == XFRM_MODE_TUNNEL) {
771 		xa_lock_bh(&ipsec->sadb);
772 		__xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
773 			      MLX5E_IPSEC_TUNNEL_SA);
774 		xa_unlock_bh(&ipsec->sadb);
775 	}
776 
777 out:
778 	x->xso.offload_handle = (unsigned long)sa_entry;
779 	return 0;
780 
781 err_add_rule:
782 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
783 err_hw_ctx:
784 	mlx5_ipsec_free_sa_ctx(sa_entry);
785 release_dwork:
786 	kfree(sa_entry->dwork);
787 release_work:
788 	if (sa_entry->work)
789 		kfree(sa_entry->work->data);
790 	kfree(sa_entry->work);
791 unblock_ipsec:
792 	mlx5_eswitch_unblock_ipsec(priv->mdev);
793 err_xfrm:
794 	kfree(sa_entry);
795 	NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
796 	return err;
797 }
798 
mlx5e_xfrm_del_state(struct xfrm_state * x)799 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
800 {
801 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
802 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
803 	struct mlx5e_ipsec_sa_entry *old;
804 
805 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
806 		return;
807 
808 	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
809 	WARN_ON(old != sa_entry);
810 }
811 
mlx5e_xfrm_free_state(struct xfrm_state * x)812 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
813 {
814 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
815 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
816 
817 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
818 		goto sa_entry_free;
819 
820 	if (sa_entry->work)
821 		cancel_work_sync(&sa_entry->work->work);
822 
823 	if (sa_entry->dwork)
824 		cancel_delayed_work_sync(&sa_entry->dwork->dwork);
825 
826 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
827 	mlx5_ipsec_free_sa_ctx(sa_entry);
828 	kfree(sa_entry->dwork);
829 	if (sa_entry->work)
830 		kfree(sa_entry->work->data);
831 	kfree(sa_entry->work);
832 	mlx5_eswitch_unblock_ipsec(ipsec->mdev);
833 sa_entry_free:
834 	kfree(sa_entry);
835 }
836 
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)837 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
838 				      unsigned long event, void *ptr)
839 {
840 	struct mlx5_accel_esp_xfrm_attrs *attrs;
841 	struct mlx5e_ipsec_netevent_data *data;
842 	struct mlx5e_ipsec_sa_entry *sa_entry;
843 	struct mlx5e_ipsec *ipsec;
844 	struct neighbour *n = ptr;
845 	struct net_device *netdev;
846 	struct xfrm_state *x;
847 	unsigned long idx;
848 
849 	if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
850 		return NOTIFY_DONE;
851 
852 	ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
853 	xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
854 		attrs = &sa_entry->attrs;
855 
856 		if (attrs->family == AF_INET) {
857 			if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
858 			    !neigh_key_eq32(n, &attrs->daddr.a4))
859 				continue;
860 		} else {
861 			if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
862 			    !neigh_key_eq128(n, &attrs->daddr.a4))
863 				continue;
864 		}
865 
866 		x = sa_entry->x;
867 		netdev = x->xso.real_dev;
868 		data = sa_entry->work->data;
869 
870 		neigh_ha_snapshot(data->addr, n, netdev);
871 		queue_work(ipsec->wq, &sa_entry->work->work);
872 	}
873 
874 	return NOTIFY_DONE;
875 }
876 
mlx5e_ipsec_init(struct mlx5e_priv * priv)877 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
878 {
879 	struct mlx5e_ipsec *ipsec;
880 	int ret = -ENOMEM;
881 
882 	if (!mlx5_ipsec_device_caps(priv->mdev)) {
883 		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
884 		return;
885 	}
886 
887 	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
888 	if (!ipsec)
889 		return;
890 
891 	xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
892 	ipsec->mdev = priv->mdev;
893 	init_completion(&ipsec->comp);
894 	ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
895 				    priv->netdev->name);
896 	if (!ipsec->wq)
897 		goto err_wq;
898 
899 	if (mlx5_ipsec_device_caps(priv->mdev) &
900 	    MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
901 		ret = mlx5e_ipsec_aso_init(ipsec);
902 		if (ret)
903 			goto err_aso;
904 	}
905 
906 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
907 		ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
908 		ret = register_netevent_notifier(&ipsec->netevent_nb);
909 		if (ret)
910 			goto clear_aso;
911 	}
912 
913 	ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
914 	ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
915 	if (ret)
916 		goto err_fs_init;
917 
918 	ipsec->fs = priv->fs;
919 	priv->ipsec = ipsec;
920 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
921 	return;
922 
923 err_fs_init:
924 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
925 		unregister_netevent_notifier(&ipsec->netevent_nb);
926 clear_aso:
927 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
928 		mlx5e_ipsec_aso_cleanup(ipsec);
929 err_aso:
930 	destroy_workqueue(ipsec->wq);
931 err_wq:
932 	kfree(ipsec);
933 	mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
934 	return;
935 }
936 
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)937 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
938 {
939 	struct mlx5e_ipsec *ipsec = priv->ipsec;
940 
941 	if (!ipsec)
942 		return;
943 
944 	mlx5e_accel_ipsec_fs_cleanup(ipsec);
945 	if (ipsec->netevent_nb.notifier_call) {
946 		unregister_netevent_notifier(&ipsec->netevent_nb);
947 		ipsec->netevent_nb.notifier_call = NULL;
948 	}
949 	if (ipsec->aso)
950 		mlx5e_ipsec_aso_cleanup(ipsec);
951 	destroy_workqueue(ipsec->wq);
952 	kfree(ipsec);
953 	priv->ipsec = NULL;
954 }
955 
mlx5e_ipsec_offload_ok(struct sk_buff * skb,struct xfrm_state * x)956 static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
957 {
958 	if (x->props.family == AF_INET) {
959 		/* Offload with IPv4 options is not supported yet */
960 		if (ip_hdr(skb)->ihl > 5)
961 			return false;
962 	} else {
963 		/* Offload with IPv6 extension headers is not support yet */
964 		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
965 			return false;
966 	}
967 
968 	return true;
969 }
970 
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)971 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
972 {
973 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
974 	struct mlx5e_ipsec_work *work = sa_entry->work;
975 	struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
976 	bool need_update;
977 
978 	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
979 	if (!need_update)
980 		return;
981 
982 	sa_entry_shadow = work->data;
983 	memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
984 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
985 	queue_work(sa_entry->ipsec->wq, &work->work);
986 }
987 
mlx5e_xfrm_update_stats(struct xfrm_state * x)988 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
989 {
990 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
991 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
992 	struct net *net = dev_net(x->xso.dev);
993 	u64 trailer_packets = 0, trailer_bytes = 0;
994 	u64 replay_packets = 0, replay_bytes = 0;
995 	u64 auth_packets = 0, auth_bytes = 0;
996 	u64 success_packets, success_bytes;
997 	u64 packets, bytes, lastuse;
998 	size_t headers;
999 
1000 	lockdep_assert(lockdep_is_held(&x->lock) ||
1001 		       lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) ||
1002 		       lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock));
1003 
1004 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1005 		return;
1006 
1007 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1008 		mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1009 				     &auth_packets, &lastuse);
1010 		x->stats.integrity_failed += auth_packets;
1011 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1012 
1013 		mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1014 				     &trailer_packets, &lastuse);
1015 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1016 	}
1017 
1018 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1019 		return;
1020 
1021 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1022 		mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1023 				     &replay_packets, &lastuse);
1024 		x->stats.replay += replay_packets;
1025 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1026 	}
1027 
1028 	mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1029 	success_packets = packets - auth_packets - trailer_packets - replay_packets;
1030 	x->curlft.packets += success_packets;
1031 	/* NIC counts all bytes passed through flow steering and doesn't have
1032 	 * an ability to count payload data size which is needed for SA.
1033 	 *
1034 	 * To overcome HW limitestion, let's approximate the payload size
1035 	 * by removing always available headers.
1036 	 */
1037 	headers = sizeof(struct ethhdr);
1038 	if (sa_entry->attrs.family == AF_INET)
1039 		headers += sizeof(struct iphdr);
1040 	else
1041 		headers += sizeof(struct ipv6hdr);
1042 
1043 	success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1044 	x->curlft.bytes += success_bytes - headers * success_packets;
1045 }
1046 
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1047 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1048 				      struct xfrm_policy *x,
1049 				      struct netlink_ext_ack *extack)
1050 {
1051 	struct xfrm_selector *sel = &x->selector;
1052 
1053 	if (x->type != XFRM_POLICY_TYPE_MAIN) {
1054 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1055 		return -EINVAL;
1056 	}
1057 
1058 	/* Please pay attention that we support only one template */
1059 	if (x->xfrm_nr > 1) {
1060 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1061 		return -EINVAL;
1062 	}
1063 
1064 	if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1065 	    x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1066 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1067 		return -EINVAL;
1068 	}
1069 
1070 	if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1071 	    addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1072 		NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1073 		return -EINVAL;
1074 	}
1075 
1076 	if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1077 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1078 		return -EINVAL;
1079 	}
1080 
1081 	if (x->selector.proto != IPPROTO_IP &&
1082 	    x->selector.proto != IPPROTO_UDP &&
1083 	    x->selector.proto != IPPROTO_TCP) {
1084 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1085 		return -EINVAL;
1086 	}
1087 
1088 	if (x->priority) {
1089 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1090 			NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1091 			return -EINVAL;
1092 		}
1093 
1094 		if (x->priority == U32_MAX) {
1095 			NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1096 			return -EINVAL;
1097 		}
1098 	}
1099 
1100 	if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1101 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1102 		NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1103 		return -EINVAL;
1104 	}
1105 
1106 	return 0;
1107 }
1108 
1109 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1110 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1111 				  struct mlx5_accel_pol_xfrm_attrs *attrs)
1112 {
1113 	struct xfrm_policy *x = pol_entry->x;
1114 	struct xfrm_selector *sel;
1115 
1116 	sel = &x->selector;
1117 	memset(attrs, 0, sizeof(*attrs));
1118 
1119 	memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr));
1120 	memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr));
1121 	attrs->family = sel->family;
1122 	attrs->dir = x->xdo.dir;
1123 	attrs->action = x->action;
1124 	attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1125 	attrs->reqid = x->xfrm_vec[0].reqid;
1126 	attrs->upspec.dport = ntohs(sel->dport);
1127 	attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1128 	attrs->upspec.sport = ntohs(sel->sport);
1129 	attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1130 	attrs->upspec.proto = sel->proto;
1131 	attrs->prio = x->priority;
1132 }
1133 
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1134 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1135 				 struct netlink_ext_ack *extack)
1136 {
1137 	struct net_device *netdev = x->xdo.real_dev;
1138 	struct mlx5e_ipsec_pol_entry *pol_entry;
1139 	struct mlx5e_priv *priv;
1140 	int err;
1141 
1142 	priv = netdev_priv(netdev);
1143 	if (!priv->ipsec) {
1144 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1145 		return -EOPNOTSUPP;
1146 	}
1147 
1148 	err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1149 	if (err)
1150 		return err;
1151 
1152 	pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1153 	if (!pol_entry)
1154 		return -ENOMEM;
1155 
1156 	pol_entry->x = x;
1157 	pol_entry->ipsec = priv->ipsec;
1158 
1159 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1160 		err = -EBUSY;
1161 		goto ipsec_busy;
1162 	}
1163 
1164 	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1165 	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1166 	if (err)
1167 		goto err_fs;
1168 
1169 	x->xdo.offload_handle = (unsigned long)pol_entry;
1170 	return 0;
1171 
1172 err_fs:
1173 	mlx5_eswitch_unblock_ipsec(priv->mdev);
1174 ipsec_busy:
1175 	kfree(pol_entry);
1176 	NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1177 	return err;
1178 }
1179 
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1180 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1181 {
1182 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1183 
1184 	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1185 	mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1186 }
1187 
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1188 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1189 {
1190 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1191 
1192 	kfree(pol_entry);
1193 }
1194 
1195 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1196 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
1197 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
1198 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
1199 	.xdo_dev_offload_ok	= mlx5e_ipsec_offload_ok,
1200 	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1201 
1202 	.xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1203 	.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1204 	.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1205 	.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1206 };
1207 
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1208 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1209 {
1210 	struct mlx5_core_dev *mdev = priv->mdev;
1211 	struct net_device *netdev = priv->netdev;
1212 
1213 	if (!mlx5_ipsec_device_caps(mdev))
1214 		return;
1215 
1216 	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1217 
1218 	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1219 	netdev->features |= NETIF_F_HW_ESP;
1220 	netdev->hw_enc_features |= NETIF_F_HW_ESP;
1221 
1222 	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1223 		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1224 		return;
1225 	}
1226 
1227 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1228 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1229 
1230 	if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1231 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1232 		return;
1233 	}
1234 
1235 	netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1236 	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1237 	netdev->features |= NETIF_F_GSO_ESP;
1238 	netdev->hw_features |= NETIF_F_GSO_ESP;
1239 	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1240 }
1241