1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <[email protected]> */
4 /*          Kai Shen <[email protected]> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <[email protected]> */
8 /*          Fredy Neeser */
9 /*          Greg Joyce <[email protected]> */
10 /* Copyright (c) 2008-2019, IBM Corporation */
11 /* Copyright (c) 2017, Open Grid Computing, Inc. */
12 
13 #include <linux/workqueue.h>
14 #include <trace/events/sock.h>
15 
16 #include "erdma.h"
17 #include "erdma_cm.h"
18 #include "erdma_verbs.h"
19 
20 static struct workqueue_struct *erdma_cm_wq;
21 
22 static void erdma_cm_llp_state_change(struct sock *sk);
23 static void erdma_cm_llp_data_ready(struct sock *sk);
24 static void erdma_cm_llp_error_report(struct sock *sk);
25 
erdma_sk_assign_cm_upcalls(struct sock * sk)26 static void erdma_sk_assign_cm_upcalls(struct sock *sk)
27 {
28 	write_lock_bh(&sk->sk_callback_lock);
29 	sk->sk_state_change = erdma_cm_llp_state_change;
30 	sk->sk_data_ready = erdma_cm_llp_data_ready;
31 	sk->sk_error_report = erdma_cm_llp_error_report;
32 	write_unlock_bh(&sk->sk_callback_lock);
33 }
34 
erdma_sk_save_upcalls(struct sock * sk)35 static void erdma_sk_save_upcalls(struct sock *sk)
36 {
37 	struct erdma_cep *cep = sk_to_cep(sk);
38 
39 	write_lock_bh(&sk->sk_callback_lock);
40 	cep->sk_state_change = sk->sk_state_change;
41 	cep->sk_data_ready = sk->sk_data_ready;
42 	cep->sk_error_report = sk->sk_error_report;
43 	write_unlock_bh(&sk->sk_callback_lock);
44 }
45 
erdma_sk_restore_upcalls(struct sock * sk,struct erdma_cep * cep)46 static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
47 {
48 	sk->sk_state_change = cep->sk_state_change;
49 	sk->sk_data_ready = cep->sk_data_ready;
50 	sk->sk_error_report = cep->sk_error_report;
51 	sk->sk_user_data = NULL;
52 }
53 
erdma_socket_disassoc(struct socket * s)54 static void erdma_socket_disassoc(struct socket *s)
55 {
56 	struct sock *sk = s->sk;
57 	struct erdma_cep *cep;
58 
59 	if (sk) {
60 		write_lock_bh(&sk->sk_callback_lock);
61 		cep = sk_to_cep(sk);
62 		if (cep) {
63 			erdma_sk_restore_upcalls(sk, cep);
64 			erdma_cep_put(cep);
65 		} else {
66 			WARN_ON_ONCE(1);
67 		}
68 		write_unlock_bh(&sk->sk_callback_lock);
69 	} else {
70 		WARN_ON_ONCE(1);
71 	}
72 }
73 
erdma_cep_socket_assoc(struct erdma_cep * cep,struct socket * s)74 static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
75 {
76 	cep->sock = s;
77 	erdma_cep_get(cep);
78 	s->sk->sk_user_data = cep;
79 
80 	erdma_sk_save_upcalls(s->sk);
81 	erdma_sk_assign_cm_upcalls(s->sk);
82 }
83 
erdma_disassoc_listen_cep(struct erdma_cep * cep)84 static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
85 {
86 	if (cep->listen_cep) {
87 		erdma_cep_put(cep->listen_cep);
88 		cep->listen_cep = NULL;
89 	}
90 }
91 
erdma_cep_alloc(struct erdma_dev * dev)92 static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
93 {
94 	struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
95 	unsigned long flags;
96 
97 	if (!cep)
98 		return NULL;
99 
100 	INIT_LIST_HEAD(&cep->listenq);
101 	INIT_LIST_HEAD(&cep->devq);
102 	INIT_LIST_HEAD(&cep->work_freelist);
103 
104 	kref_init(&cep->ref);
105 	cep->state = ERDMA_EPSTATE_IDLE;
106 	init_waitqueue_head(&cep->waitq);
107 	spin_lock_init(&cep->lock);
108 	cep->dev = dev;
109 
110 	spin_lock_irqsave(&dev->lock, flags);
111 	list_add_tail(&cep->devq, &dev->cep_list);
112 	spin_unlock_irqrestore(&dev->lock, flags);
113 
114 	return cep;
115 }
116 
erdma_cm_free_work(struct erdma_cep * cep)117 static void erdma_cm_free_work(struct erdma_cep *cep)
118 {
119 	struct list_head *w, *tmp;
120 	struct erdma_cm_work *work;
121 
122 	list_for_each_safe(w, tmp, &cep->work_freelist) {
123 		work = list_entry(w, struct erdma_cm_work, list);
124 		list_del(&work->list);
125 		kfree(work);
126 	}
127 }
128 
erdma_cancel_mpatimer(struct erdma_cep * cep)129 static void erdma_cancel_mpatimer(struct erdma_cep *cep)
130 {
131 	spin_lock_bh(&cep->lock);
132 	if (cep->mpa_timer) {
133 		if (cancel_delayed_work(&cep->mpa_timer->work)) {
134 			erdma_cep_put(cep);
135 			kfree(cep->mpa_timer);
136 		}
137 		cep->mpa_timer = NULL;
138 	}
139 	spin_unlock_bh(&cep->lock);
140 }
141 
erdma_put_work(struct erdma_cm_work * work)142 static void erdma_put_work(struct erdma_cm_work *work)
143 {
144 	INIT_LIST_HEAD(&work->list);
145 	spin_lock_bh(&work->cep->lock);
146 	list_add(&work->list, &work->cep->work_freelist);
147 	spin_unlock_bh(&work->cep->lock);
148 }
149 
erdma_cep_set_inuse(struct erdma_cep * cep)150 static void erdma_cep_set_inuse(struct erdma_cep *cep)
151 {
152 	unsigned long flags;
153 
154 	spin_lock_irqsave(&cep->lock, flags);
155 	while (cep->in_use) {
156 		spin_unlock_irqrestore(&cep->lock, flags);
157 		wait_event_interruptible(cep->waitq, !cep->in_use);
158 		if (signal_pending(current))
159 			flush_signals(current);
160 
161 		spin_lock_irqsave(&cep->lock, flags);
162 	}
163 
164 	cep->in_use = 1;
165 	spin_unlock_irqrestore(&cep->lock, flags);
166 }
167 
erdma_cep_set_free(struct erdma_cep * cep)168 static void erdma_cep_set_free(struct erdma_cep *cep)
169 {
170 	unsigned long flags;
171 
172 	spin_lock_irqsave(&cep->lock, flags);
173 	cep->in_use = 0;
174 	spin_unlock_irqrestore(&cep->lock, flags);
175 
176 	wake_up(&cep->waitq);
177 }
178 
__erdma_cep_dealloc(struct kref * ref)179 static void __erdma_cep_dealloc(struct kref *ref)
180 {
181 	struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
182 	struct erdma_dev *dev = cep->dev;
183 	unsigned long flags;
184 
185 	WARN_ON(cep->listen_cep);
186 
187 	kfree(cep->private_data);
188 	kfree(cep->mpa.pdata);
189 	spin_lock_bh(&cep->lock);
190 	if (!list_empty(&cep->work_freelist))
191 		erdma_cm_free_work(cep);
192 	spin_unlock_bh(&cep->lock);
193 
194 	spin_lock_irqsave(&dev->lock, flags);
195 	list_del(&cep->devq);
196 	spin_unlock_irqrestore(&dev->lock, flags);
197 	kfree(cep);
198 }
199 
erdma_get_work(struct erdma_cep * cep)200 static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
201 {
202 	struct erdma_cm_work *work = NULL;
203 
204 	spin_lock_bh(&cep->lock);
205 	if (!list_empty(&cep->work_freelist)) {
206 		work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
207 				  list);
208 		list_del_init(&work->list);
209 	}
210 
211 	spin_unlock_bh(&cep->lock);
212 	return work;
213 }
214 
erdma_cm_alloc_work(struct erdma_cep * cep,int num)215 static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
216 {
217 	struct erdma_cm_work *work;
218 
219 	while (num--) {
220 		work = kmalloc(sizeof(*work), GFP_KERNEL);
221 		if (!work) {
222 			if (!(list_empty(&cep->work_freelist)))
223 				erdma_cm_free_work(cep);
224 			return -ENOMEM;
225 		}
226 		work->cep = cep;
227 		INIT_LIST_HEAD(&work->list);
228 		list_add(&work->list, &cep->work_freelist);
229 	}
230 
231 	return 0;
232 }
233 
erdma_cm_upcall(struct erdma_cep * cep,enum iw_cm_event_type reason,int status)234 static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
235 			   int status)
236 {
237 	struct iw_cm_event event;
238 	struct iw_cm_id *cm_id;
239 
240 	memset(&event, 0, sizeof(event));
241 	event.status = status;
242 	event.event = reason;
243 
244 	if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
245 		event.provider_data = cep;
246 		cm_id = cep->listen_cep->cm_id;
247 
248 		event.ird = cep->dev->attrs.max_ird;
249 		event.ord = cep->dev->attrs.max_ord;
250 	} else {
251 		cm_id = cep->cm_id;
252 	}
253 
254 	if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
255 	    reason == IW_CM_EVENT_CONNECT_REPLY) {
256 		u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
257 
258 		if (pd_len && cep->mpa.pdata) {
259 			event.private_data_len = pd_len;
260 			event.private_data = cep->mpa.pdata;
261 		}
262 
263 		getname_local(cep->sock, &event.local_addr);
264 		getname_peer(cep->sock, &event.remote_addr);
265 	}
266 
267 	return cm_id->event_handler(cm_id, &event);
268 }
269 
erdma_qp_cm_drop(struct erdma_qp * qp)270 void erdma_qp_cm_drop(struct erdma_qp *qp)
271 {
272 	struct erdma_cep *cep = qp->cep;
273 
274 	if (!qp->cep)
275 		return;
276 
277 	erdma_cep_set_inuse(cep);
278 
279 	/* already closed. */
280 	if (cep->state == ERDMA_EPSTATE_CLOSED)
281 		goto out;
282 
283 	if (cep->cm_id) {
284 		switch (cep->state) {
285 		case ERDMA_EPSTATE_AWAIT_MPAREP:
286 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
287 					-EINVAL);
288 			break;
289 		case ERDMA_EPSTATE_RDMA_MODE:
290 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
291 			break;
292 		case ERDMA_EPSTATE_IDLE:
293 		case ERDMA_EPSTATE_LISTENING:
294 		case ERDMA_EPSTATE_CONNECTING:
295 		case ERDMA_EPSTATE_AWAIT_MPAREQ:
296 		case ERDMA_EPSTATE_RECVD_MPAREQ:
297 		case ERDMA_EPSTATE_CLOSED:
298 		default:
299 			break;
300 		}
301 		cep->cm_id->rem_ref(cep->cm_id);
302 		cep->cm_id = NULL;
303 		erdma_cep_put(cep);
304 	}
305 	cep->state = ERDMA_EPSTATE_CLOSED;
306 
307 	if (cep->sock) {
308 		erdma_socket_disassoc(cep->sock);
309 		sock_release(cep->sock);
310 		cep->sock = NULL;
311 	}
312 
313 	if (cep->qp) {
314 		cep->qp = NULL;
315 		erdma_qp_put(qp);
316 	}
317 out:
318 	erdma_cep_set_free(cep);
319 }
320 
erdma_cep_put(struct erdma_cep * cep)321 void erdma_cep_put(struct erdma_cep *cep)
322 {
323 	WARN_ON(kref_read(&cep->ref) < 1);
324 	kref_put(&cep->ref, __erdma_cep_dealloc);
325 }
326 
erdma_cep_get(struct erdma_cep * cep)327 void erdma_cep_get(struct erdma_cep *cep)
328 {
329 	kref_get(&cep->ref);
330 }
331 
erdma_send_mpareqrep(struct erdma_cep * cep,const void * pdata,u8 pd_len)332 static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
333 				u8 pd_len)
334 {
335 	struct socket *s = cep->sock;
336 	struct mpa_rr *rr = &cep->mpa.hdr;
337 	struct kvec iov[3];
338 	struct msghdr msg;
339 	int iovec_num = 0;
340 	int ret;
341 	int mpa_len;
342 
343 	memset(&msg, 0, sizeof(msg));
344 
345 	rr->params.pd_len = cpu_to_be16(pd_len);
346 
347 	iov[iovec_num].iov_base = rr;
348 	iov[iovec_num].iov_len = sizeof(*rr);
349 	iovec_num++;
350 	mpa_len = sizeof(*rr);
351 
352 	iov[iovec_num].iov_base = &cep->mpa.ext_data;
353 	iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
354 	iovec_num++;
355 	mpa_len += sizeof(cep->mpa.ext_data);
356 
357 	if (pd_len) {
358 		iov[iovec_num].iov_base = (char *)pdata;
359 		iov[iovec_num].iov_len = pd_len;
360 		mpa_len += pd_len;
361 		iovec_num++;
362 	}
363 
364 	ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
365 
366 	return ret < 0 ? ret : 0;
367 }
368 
ksock_recv(struct socket * sock,char * buf,size_t size,int flags)369 static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
370 			     int flags)
371 {
372 	struct kvec iov = { buf, size };
373 	struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
374 
375 	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
376 }
377 
__recv_mpa_hdr(struct erdma_cep * cep,int hdr_rcvd,char * hdr,int hdr_size,int * rcvd_out)378 static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
379 			  int hdr_size, int *rcvd_out)
380 {
381 	struct socket *s = cep->sock;
382 	int rcvd;
383 
384 	*rcvd_out = 0;
385 	if (hdr_rcvd < hdr_size) {
386 		rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
387 				  MSG_DONTWAIT);
388 		if (rcvd == -EAGAIN)
389 			return -EAGAIN;
390 
391 		if (rcvd <= 0)
392 			return -ECONNABORTED;
393 
394 		hdr_rcvd += rcvd;
395 		*rcvd_out = rcvd;
396 
397 		if (hdr_rcvd < hdr_size)
398 			return -EAGAIN;
399 	}
400 
401 	return 0;
402 }
403 
__mpa_rr_set_revision(__be16 * bits,u8 rev)404 static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
405 {
406 	*bits = (*bits & ~MPA_RR_MASK_REVISION) |
407 		(cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
408 }
409 
__mpa_rr_revision(__be16 mpa_rr_bits)410 static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
411 {
412 	__be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
413 
414 	return (u8)be16_to_cpu(rev);
415 }
416 
__mpa_ext_set_cc(__be32 * bits,u32 cc)417 static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
418 {
419 	*bits = (*bits & ~MPA_EXT_FLAG_CC) |
420 		(cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
421 }
422 
__mpa_ext_cc(__be32 mpa_ext_bits)423 static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
424 {
425 	__be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
426 
427 	return (u8)be32_to_cpu(cc);
428 }
429 
430 /*
431  * Receive MPA Request/Reply header.
432  *
433  * Returns 0 if complete MPA Request/Reply haeder including
434  * eventual private data was received. Returns -EAGAIN if
435  * header was partially received or negative error code otherwise.
436  *
437  * Context: May be called in process context only
438  */
erdma_recv_mpa_rr(struct erdma_cep * cep)439 static int erdma_recv_mpa_rr(struct erdma_cep *cep)
440 {
441 	struct mpa_rr *hdr = &cep->mpa.hdr;
442 	struct socket *s = cep->sock;
443 	u16 pd_len;
444 	int rcvd, to_rcv, ret, pd_rcvd;
445 
446 	if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
447 		ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
448 				     (char *)&cep->mpa.hdr,
449 				     sizeof(struct mpa_rr), &rcvd);
450 		cep->mpa.bytes_rcvd += rcvd;
451 		if (ret)
452 			return ret;
453 	}
454 
455 	if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
456 	    __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
457 		return -EPROTO;
458 
459 	if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
460 	    sizeof(struct erdma_mpa_ext)) {
461 		ret = __recv_mpa_hdr(
462 			cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
463 			(char *)&cep->mpa.ext_data,
464 			sizeof(struct erdma_mpa_ext), &rcvd);
465 		cep->mpa.bytes_rcvd += rcvd;
466 		if (ret)
467 			return ret;
468 	}
469 
470 	pd_len = be16_to_cpu(hdr->params.pd_len);
471 	pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
472 		  sizeof(struct erdma_mpa_ext);
473 	to_rcv = pd_len - pd_rcvd;
474 
475 	if (!to_rcv) {
476 		/*
477 		 * We have received the whole MPA Request/Reply message.
478 		 * Check against peer protocol violation.
479 		 */
480 		u32 word;
481 
482 		ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
483 				     &rcvd);
484 		if (ret == -EAGAIN && rcvd == 0)
485 			return 0;
486 
487 		if (ret)
488 			return ret;
489 
490 		return -EPROTO;
491 	}
492 
493 	/*
494 	 * At this point, MPA header has been fully received, and pd_len != 0.
495 	 * So, begin to receive private data.
496 	 */
497 	if (!cep->mpa.pdata) {
498 		cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
499 		if (!cep->mpa.pdata)
500 			return -ENOMEM;
501 	}
502 
503 	rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
504 			  MSG_DONTWAIT);
505 	if (rcvd < 0)
506 		return rcvd;
507 
508 	if (rcvd > to_rcv)
509 		return -EPROTO;
510 
511 	cep->mpa.bytes_rcvd += rcvd;
512 
513 	if (to_rcv == rcvd)
514 		return 0;
515 
516 	return -EAGAIN;
517 }
518 
519 /*
520  * erdma_proc_mpareq()
521  *
522  * Read MPA Request from socket and signal new connection to IWCM
523  * if success. Caller must hold lock on corresponding listening CEP.
524  */
erdma_proc_mpareq(struct erdma_cep * cep)525 static int erdma_proc_mpareq(struct erdma_cep *cep)
526 {
527 	struct mpa_rr *req;
528 	int ret;
529 
530 	ret = erdma_recv_mpa_rr(cep);
531 	if (ret)
532 		return ret;
533 
534 	req = &cep->mpa.hdr;
535 
536 	if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
537 		return -EPROTO;
538 
539 	memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
540 
541 	/* Currently does not support marker and crc. */
542 	if (req->params.bits & MPA_RR_FLAG_MARKERS ||
543 	    req->params.bits & MPA_RR_FLAG_CRC)
544 		goto reject_conn;
545 
546 	cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
547 
548 	/* Keep reference until IWCM accepts/rejects */
549 	erdma_cep_get(cep);
550 	ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
551 	if (ret)
552 		erdma_cep_put(cep);
553 
554 	return ret;
555 
556 reject_conn:
557 	req->params.bits &= ~MPA_RR_FLAG_MARKERS;
558 	req->params.bits |= MPA_RR_FLAG_REJECT;
559 	req->params.bits &= ~MPA_RR_FLAG_CRC;
560 
561 	kfree(cep->mpa.pdata);
562 	cep->mpa.pdata = NULL;
563 	erdma_send_mpareqrep(cep, NULL, 0);
564 
565 	return -EOPNOTSUPP;
566 }
567 
erdma_proc_mpareply(struct erdma_cep * cep)568 static int erdma_proc_mpareply(struct erdma_cep *cep)
569 {
570 	enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
571 	struct erdma_mod_qp_params_iwarp params;
572 	struct erdma_qp *qp = cep->qp;
573 	struct mpa_rr *rep;
574 	int ret;
575 
576 	ret = erdma_recv_mpa_rr(cep);
577 	if (ret)
578 		goto out_err;
579 
580 	erdma_cancel_mpatimer(cep);
581 
582 	rep = &cep->mpa.hdr;
583 
584 	if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
585 		ret = -EPROTO;
586 		goto out_err;
587 	}
588 
589 	if (rep->params.bits & MPA_RR_FLAG_REJECT) {
590 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
591 		return -ECONNRESET;
592 	}
593 
594 	/* Currently does not support marker and crc. */
595 	if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
596 	    (rep->params.bits & MPA_RR_FLAG_CRC)) {
597 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
598 		return -EINVAL;
599 	}
600 
601 	memset(&params, 0, sizeof(params));
602 	params.state = ERDMA_QPS_IWARP_RTS;
603 	params.irq_size = cep->ird;
604 	params.orq_size = cep->ord;
605 
606 	down_write(&qp->state_lock);
607 	if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
608 		ret = -EINVAL;
609 		up_write(&qp->state_lock);
610 		goto out_err;
611 	}
612 
613 	to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
614 			  ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
615 			  ERDMA_QPA_IWARP_ORD;
616 
617 	params.qp_type = ERDMA_QP_ACTIVE;
618 	if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
619 		to_modify_attrs |= ERDMA_QPA_IWARP_CC;
620 		params.cc = COMPROMISE_CC;
621 	}
622 
623 	ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
624 
625 	up_write(&qp->state_lock);
626 
627 	if (!ret) {
628 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
629 		if (!ret)
630 			cep->state = ERDMA_EPSTATE_RDMA_MODE;
631 
632 		return 0;
633 	}
634 
635 out_err:
636 	if (ret != -EAGAIN)
637 		erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
638 
639 	return ret;
640 }
641 
erdma_accept_newconn(struct erdma_cep * cep)642 static void erdma_accept_newconn(struct erdma_cep *cep)
643 {
644 	struct socket *s = cep->sock;
645 	struct socket *new_s = NULL;
646 	struct erdma_cep *new_cep = NULL;
647 	int ret = 0;
648 
649 	if (cep->state != ERDMA_EPSTATE_LISTENING)
650 		goto error;
651 
652 	new_cep = erdma_cep_alloc(cep->dev);
653 	if (!new_cep)
654 		goto error;
655 
656 	/*
657 	 * 4: Allocate a sufficient number of work elements
658 	 * to allow concurrent handling of local + peer close
659 	 * events, MPA header processing + MPA timeout.
660 	 */
661 	if (erdma_cm_alloc_work(new_cep, 4) != 0)
662 		goto error;
663 
664 	/*
665 	 * Copy saved socket callbacks from listening CEP
666 	 * and assign new socket with new CEP
667 	 */
668 	new_cep->sk_state_change = cep->sk_state_change;
669 	new_cep->sk_data_ready = cep->sk_data_ready;
670 	new_cep->sk_error_report = cep->sk_error_report;
671 
672 	ret = kernel_accept(s, &new_s, O_NONBLOCK);
673 	if (ret != 0)
674 		goto error;
675 
676 	new_cep->sock = new_s;
677 	erdma_cep_get(new_cep);
678 	new_s->sk->sk_user_data = new_cep;
679 
680 	tcp_sock_set_nodelay(new_s->sk);
681 	new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
682 
683 	ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
684 	if (ret)
685 		goto error;
686 
687 	new_cep->listen_cep = cep;
688 	erdma_cep_get(cep);
689 
690 	if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
691 		/* MPA REQ already queued */
692 		erdma_cep_set_inuse(new_cep);
693 		ret = erdma_proc_mpareq(new_cep);
694 		if (ret != -EAGAIN) {
695 			erdma_cep_put(cep);
696 			new_cep->listen_cep = NULL;
697 			if (ret) {
698 				erdma_cep_set_free(new_cep);
699 				goto error;
700 			}
701 		}
702 		erdma_cep_set_free(new_cep);
703 	}
704 	return;
705 
706 error:
707 	if (new_cep) {
708 		new_cep->state = ERDMA_EPSTATE_CLOSED;
709 		erdma_cancel_mpatimer(new_cep);
710 
711 		erdma_cep_put(new_cep);
712 	}
713 
714 	if (new_s) {
715 		erdma_socket_disassoc(new_s);
716 		sock_release(new_s);
717 	}
718 }
719 
erdma_newconn_connected(struct erdma_cep * cep)720 static int erdma_newconn_connected(struct erdma_cep *cep)
721 {
722 	int ret = 0;
723 
724 	cep->mpa.hdr.params.bits = 0;
725 	__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
726 
727 	memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
728 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
729 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
730 
731 	ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
732 	cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
733 	cep->mpa.hdr.params.pd_len = 0;
734 
735 	if (ret >= 0)
736 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
737 
738 	return ret;
739 }
740 
erdma_cm_work_handler(struct work_struct * w)741 static void erdma_cm_work_handler(struct work_struct *w)
742 {
743 	struct erdma_cm_work *work;
744 	struct erdma_cep *cep;
745 	int release_cep = 0, ret = 0;
746 
747 	work = container_of(w, struct erdma_cm_work, work.work);
748 	cep = work->cep;
749 
750 	erdma_cep_set_inuse(cep);
751 
752 	switch (work->type) {
753 	case ERDMA_CM_WORK_CONNECTED:
754 		erdma_cancel_mpatimer(cep);
755 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
756 			ret = erdma_newconn_connected(cep);
757 			if (ret) {
758 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
759 						-EIO);
760 				release_cep = 1;
761 			}
762 		}
763 		break;
764 	case ERDMA_CM_WORK_CONNECTTIMEOUT:
765 		if (cep->state == ERDMA_EPSTATE_CONNECTING) {
766 			cep->mpa_timer = NULL;
767 			erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
768 					-ETIMEDOUT);
769 			release_cep = 1;
770 		}
771 		break;
772 	case ERDMA_CM_WORK_ACCEPT:
773 		erdma_accept_newconn(cep);
774 		break;
775 	case ERDMA_CM_WORK_READ_MPAHDR:
776 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
777 			if (cep->listen_cep) {
778 				erdma_cep_set_inuse(cep->listen_cep);
779 
780 				if (cep->listen_cep->state ==
781 				    ERDMA_EPSTATE_LISTENING)
782 					ret = erdma_proc_mpareq(cep);
783 				else
784 					ret = -EFAULT;
785 
786 				erdma_cep_set_free(cep->listen_cep);
787 
788 				if (ret != -EAGAIN) {
789 					erdma_cep_put(cep->listen_cep);
790 					cep->listen_cep = NULL;
791 					if (ret)
792 						erdma_cep_put(cep);
793 				}
794 			}
795 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
796 			ret = erdma_proc_mpareply(cep);
797 		}
798 
799 		if (ret && ret != -EAGAIN)
800 			release_cep = 1;
801 		break;
802 	case ERDMA_CM_WORK_CLOSE_LLP:
803 		if (cep->cm_id)
804 			erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
805 		release_cep = 1;
806 		break;
807 	case ERDMA_CM_WORK_PEER_CLOSE:
808 		if (cep->cm_id) {
809 			if (cep->state == ERDMA_EPSTATE_CONNECTING ||
810 			    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
811 				/*
812 				 * MPA reply not received, but connection drop
813 				 */
814 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
815 						-ECONNRESET);
816 			} else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
817 				/*
818 				 * NOTE: IW_CM_EVENT_DISCONNECT is given just
819 				 *       to transition IWCM into CLOSING.
820 				 */
821 				erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
822 				erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
823 			}
824 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
825 			/* Socket close before MPA request received. */
826 			erdma_disassoc_listen_cep(cep);
827 			erdma_cep_put(cep);
828 		}
829 		release_cep = 1;
830 		break;
831 	case ERDMA_CM_WORK_MPATIMEOUT:
832 		cep->mpa_timer = NULL;
833 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
834 			/*
835 			 * MPA request timed out:
836 			 * Hide any partially received private data and signal
837 			 * timeout
838 			 */
839 			cep->mpa.hdr.params.pd_len = 0;
840 
841 			if (cep->cm_id)
842 				erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
843 						-ETIMEDOUT);
844 			release_cep = 1;
845 		} else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
846 			/* No MPA req received after peer TCP stream setup. */
847 			erdma_disassoc_listen_cep(cep);
848 
849 			erdma_cep_put(cep);
850 			release_cep = 1;
851 		}
852 		break;
853 	default:
854 		WARN(1, "Undefined CM work type: %d\n", work->type);
855 	}
856 
857 	if (release_cep) {
858 		erdma_cancel_mpatimer(cep);
859 		cep->state = ERDMA_EPSTATE_CLOSED;
860 		if (cep->qp) {
861 			struct erdma_qp *qp = cep->qp;
862 			/*
863 			 * Serialize a potential race with application
864 			 * closing the QP and calling erdma_qp_cm_drop()
865 			 */
866 			erdma_qp_get(qp);
867 			erdma_cep_set_free(cep);
868 
869 			erdma_qp_llp_close(qp);
870 			erdma_qp_put(qp);
871 
872 			erdma_cep_set_inuse(cep);
873 			cep->qp = NULL;
874 			erdma_qp_put(qp);
875 		}
876 
877 		if (cep->sock) {
878 			erdma_socket_disassoc(cep->sock);
879 			sock_release(cep->sock);
880 			cep->sock = NULL;
881 		}
882 
883 		if (cep->cm_id) {
884 			cep->cm_id->rem_ref(cep->cm_id);
885 			cep->cm_id = NULL;
886 			if (cep->state != ERDMA_EPSTATE_LISTENING)
887 				erdma_cep_put(cep);
888 		}
889 	}
890 	erdma_cep_set_free(cep);
891 	erdma_put_work(work);
892 	erdma_cep_put(cep);
893 }
894 
erdma_cm_queue_work(struct erdma_cep * cep,enum erdma_work_type type)895 int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
896 {
897 	struct erdma_cm_work *work = erdma_get_work(cep);
898 	unsigned long delay = 0;
899 
900 	if (!work)
901 		return -ENOMEM;
902 
903 	work->type = type;
904 	work->cep = cep;
905 
906 	erdma_cep_get(cep);
907 
908 	INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
909 
910 	if (type == ERDMA_CM_WORK_MPATIMEOUT) {
911 		cep->mpa_timer = work;
912 
913 		if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
914 			delay = MPAREP_TIMEOUT;
915 		else
916 			delay = MPAREQ_TIMEOUT;
917 	} else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
918 		cep->mpa_timer = work;
919 
920 		delay = CONNECT_TIMEOUT;
921 	}
922 
923 	queue_delayed_work(erdma_cm_wq, &work->work, delay);
924 
925 	return 0;
926 }
927 
erdma_cm_llp_data_ready(struct sock * sk)928 static void erdma_cm_llp_data_ready(struct sock *sk)
929 {
930 	struct erdma_cep *cep;
931 
932 	trace_sk_data_ready(sk);
933 
934 	read_lock(&sk->sk_callback_lock);
935 
936 	cep = sk_to_cep(sk);
937 	if (!cep)
938 		goto out;
939 
940 	if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
941 	    cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
942 		erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
943 
944 out:
945 	read_unlock(&sk->sk_callback_lock);
946 }
947 
erdma_cm_llp_error_report(struct sock * sk)948 static void erdma_cm_llp_error_report(struct sock *sk)
949 {
950 	struct erdma_cep *cep = sk_to_cep(sk);
951 
952 	if (cep)
953 		cep->sk_error_report(sk);
954 }
955 
erdma_cm_llp_state_change(struct sock * sk)956 static void erdma_cm_llp_state_change(struct sock *sk)
957 {
958 	struct erdma_cep *cep;
959 	void (*orig_state_change)(struct sock *sk);
960 
961 	read_lock(&sk->sk_callback_lock);
962 
963 	cep = sk_to_cep(sk);
964 	if (!cep) {
965 		read_unlock(&sk->sk_callback_lock);
966 		return;
967 	}
968 	orig_state_change = cep->sk_state_change;
969 
970 	switch (sk->sk_state) {
971 	case TCP_ESTABLISHED:
972 		if (cep->state == ERDMA_EPSTATE_CONNECTING)
973 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
974 		else
975 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
976 		break;
977 	case TCP_CLOSE:
978 	case TCP_CLOSE_WAIT:
979 		if (cep->state != ERDMA_EPSTATE_LISTENING)
980 			erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
981 		break;
982 	default:
983 		break;
984 	}
985 	read_unlock(&sk->sk_callback_lock);
986 	orig_state_change(sk);
987 }
988 
kernel_bindconnect(struct socket * s,struct sockaddr * laddr,int laddrlen,struct sockaddr * raddr,int raddrlen,int flags)989 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
990 			      int laddrlen, struct sockaddr *raddr,
991 			      int raddrlen, int flags)
992 {
993 	int ret;
994 
995 	sock_set_reuseaddr(s->sk);
996 	ret = s->ops->bind(s, laddr, laddrlen);
997 	if (ret)
998 		return ret;
999 	ret = s->ops->connect(s, raddr, raddrlen, flags);
1000 	return ret < 0 ? ret : 0;
1001 }
1002 
erdma_connect(struct iw_cm_id * id,struct iw_cm_conn_param * params)1003 int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1004 {
1005 	struct erdma_dev *dev = to_edev(id->device);
1006 	struct erdma_qp *qp;
1007 	struct erdma_cep *cep = NULL;
1008 	struct socket *s = NULL;
1009 	struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
1010 	struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
1011 	u16 pd_len = params->private_data_len;
1012 	int ret;
1013 
1014 	if (pd_len > MPA_MAX_PRIVDATA)
1015 		return -EINVAL;
1016 
1017 	if (params->ird > dev->attrs.max_ird ||
1018 	    params->ord > dev->attrs.max_ord)
1019 		return -EINVAL;
1020 
1021 	if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
1022 		return -EAFNOSUPPORT;
1023 
1024 	qp = find_qp_by_qpn(dev, params->qpn);
1025 	if (!qp)
1026 		return -ENOENT;
1027 	erdma_qp_get(qp);
1028 
1029 	ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
1030 	if (ret < 0)
1031 		goto error_put_qp;
1032 
1033 	cep = erdma_cep_alloc(dev);
1034 	if (!cep) {
1035 		ret = -ENOMEM;
1036 		goto error_release_sock;
1037 	}
1038 
1039 	erdma_cep_set_inuse(cep);
1040 
1041 	/* Associate QP with CEP */
1042 	erdma_cep_get(cep);
1043 	qp->cep = cep;
1044 	cep->qp = qp;
1045 
1046 	/* Associate cm_id with CEP */
1047 	id->add_ref(id);
1048 	cep->cm_id = id;
1049 
1050 	/*
1051 	 * 6: Allocate a sufficient number of work elements
1052 	 * to allow concurrent handling of local + peer close
1053 	 * events, MPA header processing + MPA timeout, connected event
1054 	 * and connect timeout.
1055 	 */
1056 	ret = erdma_cm_alloc_work(cep, 6);
1057 	if (ret != 0) {
1058 		ret = -ENOMEM;
1059 		goto error_release_cep;
1060 	}
1061 
1062 	cep->ird = params->ird;
1063 	cep->ord = params->ord;
1064 	cep->state = ERDMA_EPSTATE_CONNECTING;
1065 
1066 	erdma_cep_socket_assoc(cep, s);
1067 
1068 	if (pd_len) {
1069 		cep->pd_len = pd_len;
1070 		cep->private_data = kmalloc(pd_len, GFP_KERNEL);
1071 		if (!cep->private_data) {
1072 			ret = -ENOMEM;
1073 			goto error_disassoc;
1074 		}
1075 
1076 		memcpy(cep->private_data, params->private_data,
1077 		       params->private_data_len);
1078 	}
1079 
1080 	ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
1081 				 sizeof(*raddr), O_NONBLOCK);
1082 	if (ret != -EINPROGRESS && ret != 0) {
1083 		goto error_disassoc;
1084 	} else if (ret == 0) {
1085 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
1086 		if (ret)
1087 			goto error_disassoc;
1088 	} else {
1089 		ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
1090 		if (ret)
1091 			goto error_disassoc;
1092 	}
1093 
1094 	erdma_cep_set_free(cep);
1095 	return 0;
1096 
1097 error_disassoc:
1098 	kfree(cep->private_data);
1099 	cep->private_data = NULL;
1100 	cep->pd_len = 0;
1101 
1102 	erdma_socket_disassoc(s);
1103 
1104 error_release_cep:
1105 	/* disassoc with cm_id */
1106 	cep->cm_id = NULL;
1107 	id->rem_ref(id);
1108 
1109 	/* disassoc with qp */
1110 	qp->cep = NULL;
1111 	erdma_cep_put(cep);
1112 	cep->qp = NULL;
1113 
1114 	cep->state = ERDMA_EPSTATE_CLOSED;
1115 
1116 	erdma_cep_set_free(cep);
1117 
1118 	/* release the cep. */
1119 	erdma_cep_put(cep);
1120 
1121 error_release_sock:
1122 	if (s)
1123 		sock_release(s);
1124 error_put_qp:
1125 	erdma_qp_put(qp);
1126 
1127 	return ret;
1128 }
1129 
erdma_accept(struct iw_cm_id * id,struct iw_cm_conn_param * params)1130 int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
1131 {
1132 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1133 	struct erdma_mod_qp_params_iwarp mod_qp_params;
1134 	enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
1135 	struct erdma_dev *dev = to_edev(id->device);
1136 	struct erdma_qp *qp;
1137 	int ret;
1138 
1139 	erdma_cep_set_inuse(cep);
1140 	erdma_cep_put(cep);
1141 
1142 	/* Free lingering inbound private data */
1143 	if (cep->mpa.hdr.params.pd_len) {
1144 		cep->mpa.hdr.params.pd_len = 0;
1145 		kfree(cep->mpa.pdata);
1146 		cep->mpa.pdata = NULL;
1147 	}
1148 	erdma_cancel_mpatimer(cep);
1149 
1150 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1151 		erdma_cep_set_free(cep);
1152 		erdma_cep_put(cep);
1153 
1154 		return -ECONNRESET;
1155 	}
1156 
1157 	qp = find_qp_by_qpn(dev, params->qpn);
1158 	if (!qp)
1159 		return -ENOENT;
1160 	erdma_qp_get(qp);
1161 
1162 	down_write(&qp->state_lock);
1163 	if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
1164 		ret = -EINVAL;
1165 		up_write(&qp->state_lock);
1166 		goto error;
1167 	}
1168 
1169 	if (params->ord > dev->attrs.max_ord ||
1170 	    params->ird > dev->attrs.max_ord) {
1171 		ret = -EINVAL;
1172 		up_write(&qp->state_lock);
1173 		goto error;
1174 	}
1175 
1176 	if (params->private_data_len > MPA_MAX_PRIVDATA) {
1177 		ret = -EINVAL;
1178 		up_write(&qp->state_lock);
1179 		goto error;
1180 	}
1181 
1182 	cep->ird = params->ird;
1183 	cep->ord = params->ord;
1184 
1185 	cep->cm_id = id;
1186 	id->add_ref(id);
1187 
1188 	memset(&mod_qp_params, 0, sizeof(mod_qp_params));
1189 
1190 	mod_qp_params.irq_size = params->ird;
1191 	mod_qp_params.orq_size = params->ord;
1192 	mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
1193 
1194 	/* Associate QP with CEP */
1195 	erdma_cep_get(cep);
1196 	qp->cep = cep;
1197 	cep->qp = qp;
1198 
1199 	cep->state = ERDMA_EPSTATE_RDMA_MODE;
1200 
1201 	mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
1202 	mod_qp_params.pd_len = params->private_data_len;
1203 
1204 	to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
1205 			  ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
1206 			  ERDMA_QPA_IWARP_MPA;
1207 
1208 	if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
1209 		to_modify_attrs |= ERDMA_QPA_IWARP_CC;
1210 		mod_qp_params.cc = COMPROMISE_CC;
1211 	}
1212 
1213 	/* move to rts */
1214 	ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
1215 
1216 	up_write(&qp->state_lock);
1217 
1218 	if (ret)
1219 		goto error;
1220 
1221 	cep->mpa.ext_data.bits = 0;
1222 	__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
1223 	cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
1224 
1225 	ret = erdma_send_mpareqrep(cep, params->private_data,
1226 				   params->private_data_len);
1227 	if (!ret) {
1228 		ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
1229 		if (ret)
1230 			goto error;
1231 
1232 		erdma_cep_set_free(cep);
1233 
1234 		return 0;
1235 	}
1236 
1237 error:
1238 	erdma_socket_disassoc(cep->sock);
1239 	sock_release(cep->sock);
1240 	cep->sock = NULL;
1241 
1242 	cep->state = ERDMA_EPSTATE_CLOSED;
1243 
1244 	if (cep->cm_id) {
1245 		cep->cm_id->rem_ref(id);
1246 		cep->cm_id = NULL;
1247 	}
1248 
1249 	if (qp->cep) {
1250 		erdma_cep_put(cep);
1251 		qp->cep = NULL;
1252 	}
1253 
1254 	cep->qp = NULL;
1255 	erdma_qp_put(qp);
1256 
1257 	erdma_cep_set_free(cep);
1258 	erdma_cep_put(cep);
1259 
1260 	return ret;
1261 }
1262 
erdma_reject(struct iw_cm_id * id,const void * pdata,u8 plen)1263 int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
1264 {
1265 	struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
1266 
1267 	erdma_cep_set_inuse(cep);
1268 	erdma_cep_put(cep);
1269 
1270 	erdma_cancel_mpatimer(cep);
1271 
1272 	if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
1273 		erdma_cep_set_free(cep);
1274 		erdma_cep_put(cep);
1275 
1276 		return -ECONNRESET;
1277 	}
1278 
1279 	if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
1280 		cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
1281 		erdma_send_mpareqrep(cep, pdata, plen);
1282 	}
1283 
1284 	erdma_socket_disassoc(cep->sock);
1285 	sock_release(cep->sock);
1286 	cep->sock = NULL;
1287 
1288 	cep->state = ERDMA_EPSTATE_CLOSED;
1289 
1290 	erdma_cep_set_free(cep);
1291 	erdma_cep_put(cep);
1292 
1293 	return 0;
1294 }
1295 
erdma_create_listen(struct iw_cm_id * id,int backlog)1296 int erdma_create_listen(struct iw_cm_id *id, int backlog)
1297 {
1298 	struct socket *s;
1299 	struct erdma_cep *cep = NULL;
1300 	int ret = 0;
1301 	struct erdma_dev *dev = to_edev(id->device);
1302 	int addr_family = id->local_addr.ss_family;
1303 	struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
1304 
1305 	if (addr_family != AF_INET)
1306 		return -EAFNOSUPPORT;
1307 
1308 	ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
1309 	if (ret < 0)
1310 		return ret;
1311 
1312 	sock_set_reuseaddr(s->sk);
1313 
1314 	/* For wildcard addr, limit binding to current device only */
1315 	if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
1316 		s->sk->sk_bound_dev_if = dev->netdev->ifindex;
1317 
1318 	ret = s->ops->bind(s, (struct sockaddr *)laddr,
1319 			   sizeof(struct sockaddr_in));
1320 	if (ret)
1321 		goto error;
1322 
1323 	cep = erdma_cep_alloc(dev);
1324 	if (!cep) {
1325 		ret = -ENOMEM;
1326 		goto error;
1327 	}
1328 	erdma_cep_socket_assoc(cep, s);
1329 
1330 	ret = erdma_cm_alloc_work(cep, backlog);
1331 	if (ret)
1332 		goto error;
1333 
1334 	ret = s->ops->listen(s, backlog);
1335 	if (ret)
1336 		goto error;
1337 
1338 	cep->cm_id = id;
1339 	id->add_ref(id);
1340 
1341 	if (!id->provider_data) {
1342 		id->provider_data =
1343 			kmalloc(sizeof(struct list_head), GFP_KERNEL);
1344 		if (!id->provider_data) {
1345 			ret = -ENOMEM;
1346 			goto error;
1347 		}
1348 		INIT_LIST_HEAD((struct list_head *)id->provider_data);
1349 	}
1350 
1351 	list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
1352 	cep->state = ERDMA_EPSTATE_LISTENING;
1353 
1354 	return 0;
1355 
1356 error:
1357 	if (cep) {
1358 		erdma_cep_set_inuse(cep);
1359 
1360 		if (cep->cm_id) {
1361 			cep->cm_id->rem_ref(cep->cm_id);
1362 			cep->cm_id = NULL;
1363 		}
1364 		cep->sock = NULL;
1365 		erdma_socket_disassoc(s);
1366 		cep->state = ERDMA_EPSTATE_CLOSED;
1367 
1368 		erdma_cep_set_free(cep);
1369 		erdma_cep_put(cep);
1370 	}
1371 	sock_release(s);
1372 
1373 	return ret;
1374 }
1375 
erdma_drop_listeners(struct iw_cm_id * id)1376 static void erdma_drop_listeners(struct iw_cm_id *id)
1377 {
1378 	struct list_head *p, *tmp;
1379 	/*
1380 	 * In case of a wildcard rdma_listen on a multi-homed device,
1381 	 * a listener's IWCM id is associated with more than one listening CEP.
1382 	 */
1383 	list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
1384 		struct erdma_cep *cep =
1385 			list_entry(p, struct erdma_cep, listenq);
1386 
1387 		list_del(p);
1388 
1389 		erdma_cep_set_inuse(cep);
1390 
1391 		if (cep->cm_id) {
1392 			cep->cm_id->rem_ref(cep->cm_id);
1393 			cep->cm_id = NULL;
1394 		}
1395 		if (cep->sock) {
1396 			erdma_socket_disassoc(cep->sock);
1397 			sock_release(cep->sock);
1398 			cep->sock = NULL;
1399 		}
1400 		cep->state = ERDMA_EPSTATE_CLOSED;
1401 		erdma_cep_set_free(cep);
1402 		erdma_cep_put(cep);
1403 	}
1404 }
1405 
erdma_destroy_listen(struct iw_cm_id * id)1406 int erdma_destroy_listen(struct iw_cm_id *id)
1407 {
1408 	if (!id->provider_data)
1409 		return 0;
1410 
1411 	erdma_drop_listeners(id);
1412 	kfree(id->provider_data);
1413 	id->provider_data = NULL;
1414 
1415 	return 0;
1416 }
1417 
erdma_cm_init(void)1418 int erdma_cm_init(void)
1419 {
1420 	erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
1421 	if (!erdma_cm_wq)
1422 		return -ENOMEM;
1423 
1424 	return 0;
1425 }
1426 
erdma_cm_exit(void)1427 void erdma_cm_exit(void)
1428 {
1429 	if (erdma_cm_wq)
1430 		destroy_workqueue(erdma_cm_wq);
1431 }
1432