1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <[email protected]> */
4 /*          Kai Shen <[email protected]> */
5 /* Copyright (c) 2020-2021, Alibaba Group */
6 /* Authors: Bernard Metzler <[email protected]> */
7 /* Copyright (c) 2008-2019, IBM Corporation */
8 
9 #include "erdma_cm.h"
10 #include "erdma_verbs.h"
11 
erdma_qp_llp_close(struct erdma_qp * qp)12 void erdma_qp_llp_close(struct erdma_qp *qp)
13 {
14 	struct erdma_mod_qp_params_iwarp params;
15 
16 	down_write(&qp->state_lock);
17 
18 	switch (qp->attrs.iwarp.state) {
19 	case ERDMA_QPS_IWARP_RTS:
20 	case ERDMA_QPS_IWARP_RTR:
21 	case ERDMA_QPS_IWARP_IDLE:
22 	case ERDMA_QPS_IWARP_TERMINATE:
23 		params.state = ERDMA_QPS_IWARP_CLOSING;
24 		erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
25 		break;
26 	case ERDMA_QPS_IWARP_CLOSING:
27 		qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
28 		break;
29 	default:
30 		break;
31 	}
32 
33 	if (qp->cep) {
34 		erdma_cep_put(qp->cep);
35 		qp->cep = NULL;
36 	}
37 
38 	up_write(&qp->state_lock);
39 }
40 
erdma_get_ibqp(struct ib_device * ibdev,int id)41 struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
42 {
43 	struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
44 
45 	if (qp)
46 		return &qp->ibqp;
47 
48 	return NULL;
49 }
50 
51 static int
erdma_modify_qp_state_to_rts(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,enum erdma_qpa_mask_iwarp mask)52 erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
53 			     struct erdma_mod_qp_params_iwarp *params,
54 			     enum erdma_qpa_mask_iwarp mask)
55 {
56 	int ret;
57 	struct erdma_dev *dev = qp->dev;
58 	struct erdma_cmdq_modify_qp_req req;
59 	struct tcp_sock *tp;
60 	struct erdma_cep *cep = qp->cep;
61 	struct sockaddr_storage local_addr, remote_addr;
62 
63 	if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
64 		return -EINVAL;
65 
66 	if (!(mask & ERDMA_QPA_IWARP_MPA))
67 		return -EINVAL;
68 
69 	if (!(mask & ERDMA_QPA_IWARP_CC))
70 		params->cc = qp->attrs.cc;
71 
72 	ret = getname_local(cep->sock, &local_addr);
73 	if (ret < 0)
74 		return ret;
75 
76 	ret = getname_peer(cep->sock, &remote_addr);
77 	if (ret < 0)
78 		return ret;
79 
80 	tp = tcp_sk(qp->cep->sock->sk);
81 
82 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
83 				CMDQ_OPCODE_MODIFY_QP);
84 
85 	req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
86 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
87 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
88 
89 	req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
90 	req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
91 	req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
92 	req.dport = to_sockaddr_in(remote_addr).sin_port;
93 	req.sport = to_sockaddr_in(local_addr).sin_port;
94 
95 	req.send_nxt = tp->snd_nxt;
96 	/* rsvd tcp seq for mpa-rsp in server. */
97 	if (params->qp_type == ERDMA_QP_PASSIVE)
98 		req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
99 	req.recv_nxt = tp->rcv_nxt;
100 
101 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
102 				  true);
103 	if (ret)
104 		return ret;
105 
106 	if (mask & ERDMA_QPA_IWARP_IRD)
107 		qp->attrs.irq_size = params->irq_size;
108 
109 	if (mask & ERDMA_QPA_IWARP_ORD)
110 		qp->attrs.orq_size = params->orq_size;
111 
112 	if (mask & ERDMA_QPA_IWARP_CC)
113 		qp->attrs.cc = params->cc;
114 
115 	qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
116 
117 	return 0;
118 }
119 
120 static int
erdma_modify_qp_state_to_stop(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,enum erdma_qpa_mask_iwarp mask)121 erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
122 			      struct erdma_mod_qp_params_iwarp *params,
123 			      enum erdma_qpa_mask_iwarp mask)
124 {
125 	struct erdma_dev *dev = qp->dev;
126 	struct erdma_cmdq_modify_qp_req req;
127 	int ret;
128 
129 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
130 				CMDQ_OPCODE_MODIFY_QP);
131 
132 	req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
133 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
134 
135 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
136 				  true);
137 	if (ret)
138 		return ret;
139 
140 	qp->attrs.iwarp.state = params->state;
141 
142 	return 0;
143 }
144 
erdma_modify_qp_state_iwarp(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,int mask)145 int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
146 				struct erdma_mod_qp_params_iwarp *params,
147 				int mask)
148 {
149 	bool need_reflush = false;
150 	int drop_conn, ret = 0;
151 
152 	if (!mask)
153 		return 0;
154 
155 	if (!(mask & ERDMA_QPA_IWARP_STATE))
156 		return 0;
157 
158 	switch (qp->attrs.iwarp.state) {
159 	case ERDMA_QPS_IWARP_IDLE:
160 	case ERDMA_QPS_IWARP_RTR:
161 		if (params->state == ERDMA_QPS_IWARP_RTS) {
162 			ret = erdma_modify_qp_state_to_rts(qp, params, mask);
163 		} else if (params->state == ERDMA_QPS_IWARP_ERROR) {
164 			qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
165 			need_reflush = true;
166 			if (qp->cep) {
167 				erdma_cep_put(qp->cep);
168 				qp->cep = NULL;
169 			}
170 			ret = erdma_modify_qp_state_to_stop(qp, params, mask);
171 		}
172 		break;
173 	case ERDMA_QPS_IWARP_RTS:
174 		drop_conn = 0;
175 
176 		if (params->state == ERDMA_QPS_IWARP_CLOSING ||
177 		    params->state == ERDMA_QPS_IWARP_TERMINATE ||
178 		    params->state == ERDMA_QPS_IWARP_ERROR) {
179 			ret = erdma_modify_qp_state_to_stop(qp, params, mask);
180 			drop_conn = 1;
181 			need_reflush = true;
182 		}
183 
184 		if (drop_conn)
185 			erdma_qp_cm_drop(qp);
186 
187 		break;
188 	case ERDMA_QPS_IWARP_TERMINATE:
189 		if (params->state == ERDMA_QPS_IWARP_ERROR)
190 			qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
191 		break;
192 	case ERDMA_QPS_IWARP_CLOSING:
193 		if (params->state == ERDMA_QPS_IWARP_IDLE) {
194 			qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
195 		} else if (params->state == ERDMA_QPS_IWARP_ERROR) {
196 			ret = erdma_modify_qp_state_to_stop(qp, params, mask);
197 			qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
198 		} else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
199 			return -ECONNABORTED;
200 		}
201 		break;
202 	default:
203 		break;
204 	}
205 
206 	if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
207 		qp->flags |= ERDMA_QP_IN_FLUSHING;
208 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
209 				 usecs_to_jiffies(100));
210 	}
211 
212 	return ret;
213 }
214 
modify_qp_cmd_rocev2(struct erdma_qp * qp,struct erdma_mod_qp_params_rocev2 * params,enum erdma_qpa_mask_rocev2 attr_mask)215 static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
216 				struct erdma_mod_qp_params_rocev2 *params,
217 				enum erdma_qpa_mask_rocev2 attr_mask)
218 {
219 	struct erdma_cmdq_mod_qp_req_rocev2 req;
220 
221 	memset(&req, 0, sizeof(req));
222 
223 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
224 				CMDQ_OPCODE_MODIFY_QP);
225 
226 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
227 
228 	if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
229 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
230 				       params->state);
231 
232 	if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
233 		req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
234 				      params->dst_qpn);
235 
236 	if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
237 		req.qkey = params->qkey;
238 
239 	if (attr_mask & ERDMA_QPA_ROCEV2_AV)
240 		erdma_set_av_cfg(&req.av_cfg, &params->av);
241 
242 	if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
243 		req.sq_psn = params->sq_psn;
244 
245 	if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
246 		req.rq_psn = params->rq_psn;
247 
248 	req.attr_mask = attr_mask;
249 
250 	return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
251 				   NULL, true);
252 }
253 
erdma_reset_qp(struct erdma_qp * qp)254 static void erdma_reset_qp(struct erdma_qp *qp)
255 {
256 	qp->kern_qp.sq_pi = 0;
257 	qp->kern_qp.sq_ci = 0;
258 	qp->kern_qp.rq_pi = 0;
259 	qp->kern_qp.rq_ci = 0;
260 	memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
261 	memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
262 	memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
263 	memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
264 	erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
265 	if (qp->rcq != qp->scq)
266 		erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
267 }
268 
erdma_modify_qp_state_rocev2(struct erdma_qp * qp,struct erdma_mod_qp_params_rocev2 * params,int attr_mask)269 int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
270 				 struct erdma_mod_qp_params_rocev2 *params,
271 				 int attr_mask)
272 {
273 	struct erdma_dev *dev = to_edev(qp->ibqp.device);
274 	int ret;
275 
276 	ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
277 	if (ret)
278 		return ret;
279 
280 	if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
281 		qp->attrs.rocev2.state = params->state;
282 
283 	if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
284 		qp->attrs.rocev2.qkey = params->qkey;
285 
286 	if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
287 		qp->attrs.rocev2.dst_qpn = params->dst_qpn;
288 
289 	if (attr_mask & ERDMA_QPA_ROCEV2_AV)
290 		memcpy(&qp->attrs.rocev2.av, &params->av,
291 		       sizeof(struct erdma_av));
292 
293 	if (rdma_is_kernel_res(&qp->ibqp.res) &&
294 	    params->state == ERDMA_QPS_ROCEV2_RESET)
295 		erdma_reset_qp(qp);
296 
297 	if (rdma_is_kernel_res(&qp->ibqp.res) &&
298 	    params->state == ERDMA_QPS_ROCEV2_ERROR) {
299 		qp->flags |= ERDMA_QP_IN_FLUSHING;
300 		mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
301 				 usecs_to_jiffies(100));
302 	}
303 
304 	return 0;
305 }
306 
erdma_qp_safe_free(struct kref * ref)307 static void erdma_qp_safe_free(struct kref *ref)
308 {
309 	struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
310 
311 	complete(&qp->safe_free);
312 }
313 
erdma_qp_put(struct erdma_qp * qp)314 void erdma_qp_put(struct erdma_qp *qp)
315 {
316 	WARN_ON(kref_read(&qp->ref) < 1);
317 	kref_put(&qp->ref, erdma_qp_safe_free);
318 }
319 
erdma_qp_get(struct erdma_qp * qp)320 void erdma_qp_get(struct erdma_qp *qp)
321 {
322 	kref_get(&qp->ref);
323 }
324 
fill_inline_data(struct erdma_qp * qp,const struct ib_send_wr * send_wr,u16 wqe_idx,u32 sgl_offset,__le32 * length_field)325 static int fill_inline_data(struct erdma_qp *qp,
326 			    const struct ib_send_wr *send_wr, u16 wqe_idx,
327 			    u32 sgl_offset, __le32 *length_field)
328 {
329 	u32 remain_size, copy_size, data_off, bytes = 0;
330 	char *data;
331 	int i = 0;
332 
333 	wqe_idx += (sgl_offset >> SQEBB_SHIFT);
334 	sgl_offset &= (SQEBB_SIZE - 1);
335 	data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
336 			       SQEBB_SHIFT);
337 
338 	while (i < send_wr->num_sge) {
339 		bytes += send_wr->sg_list[i].length;
340 		if (bytes > (int)ERDMA_MAX_INLINE)
341 			return -EINVAL;
342 
343 		remain_size = send_wr->sg_list[i].length;
344 		data_off = 0;
345 
346 		while (1) {
347 			copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
348 
349 			memcpy(data + sgl_offset,
350 			       (void *)(uintptr_t)send_wr->sg_list[i].addr +
351 				       data_off,
352 			       copy_size);
353 			remain_size -= copy_size;
354 			data_off += copy_size;
355 			sgl_offset += copy_size;
356 			wqe_idx += (sgl_offset >> SQEBB_SHIFT);
357 			sgl_offset &= (SQEBB_SIZE - 1);
358 
359 			data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
360 					       qp->attrs.sq_size, SQEBB_SHIFT);
361 			if (!remain_size)
362 				break;
363 		}
364 
365 		i++;
366 	}
367 	*length_field = cpu_to_le32(bytes);
368 
369 	return bytes;
370 }
371 
fill_sgl(struct erdma_qp * qp,const struct ib_send_wr * send_wr,u16 wqe_idx,u32 sgl_offset,__le32 * length_field)372 static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
373 		    u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
374 {
375 	int i = 0;
376 	u32 bytes = 0;
377 	char *sgl;
378 
379 	if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
380 		return -EINVAL;
381 
382 	if (sgl_offset & 0xF)
383 		return -EINVAL;
384 
385 	while (i < send_wr->num_sge) {
386 		wqe_idx += (sgl_offset >> SQEBB_SHIFT);
387 		sgl_offset &= (SQEBB_SIZE - 1);
388 		sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
389 				      qp->attrs.sq_size, SQEBB_SHIFT);
390 
391 		bytes += send_wr->sg_list[i].length;
392 		memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
393 		       sizeof(struct ib_sge));
394 
395 		sgl_offset += sizeof(struct ib_sge);
396 		i++;
397 	}
398 
399 	*length_field = cpu_to_le32(bytes);
400 	return 0;
401 }
402 
init_send_sqe_rc(struct erdma_qp * qp,struct erdma_send_sqe_rc * sqe,const struct ib_send_wr * wr,u32 * hw_op)403 static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
404 			     const struct ib_send_wr *wr, u32 *hw_op)
405 {
406 	u32 op = ERDMA_OP_SEND;
407 
408 	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
409 		op = ERDMA_OP_SEND_WITH_IMM;
410 		sqe->imm_data = wr->ex.imm_data;
411 	} else if (wr->opcode == IB_WR_SEND_WITH_INV) {
412 		op = ERDMA_OP_SEND_WITH_INV;
413 		sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
414 	}
415 
416 	*hw_op = op;
417 }
418 
init_send_sqe_ud(struct erdma_qp * qp,struct erdma_send_sqe_ud * sqe,const struct ib_send_wr * wr,u32 * hw_op)419 static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
420 			     const struct ib_send_wr *wr, u32 *hw_op)
421 {
422 	const struct ib_ud_wr *uwr = ud_wr(wr);
423 	struct erdma_ah *ah = to_eah(uwr->ah);
424 	u32 op = ERDMA_OP_SEND;
425 
426 	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
427 		op = ERDMA_OP_SEND_WITH_IMM;
428 		sqe->imm_data = wr->ex.imm_data;
429 	}
430 
431 	*hw_op = op;
432 
433 	sqe->ahn = cpu_to_le32(ah->ahn);
434 	sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
435 	/* Not allowed to send control qkey */
436 	if (uwr->remote_qkey & 0x80000000)
437 		sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
438 	else
439 		sqe->qkey = cpu_to_le32(uwr->remote_qkey);
440 }
441 
erdma_push_one_sqe(struct erdma_qp * qp,u16 * pi,const struct ib_send_wr * send_wr)442 static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
443 			      const struct ib_send_wr *send_wr)
444 {
445 	u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
446 	u32 idx = *pi & (qp->attrs.sq_size - 1);
447 	enum ib_wr_opcode op = send_wr->opcode;
448 	struct erdma_send_sqe_rc *rc_send_sqe;
449 	struct erdma_send_sqe_ud *ud_send_sqe;
450 	struct erdma_atomic_sqe *atomic_sqe;
451 	struct erdma_readreq_sqe *read_sqe;
452 	struct erdma_reg_mr_sqe *regmr_sge;
453 	struct erdma_write_sqe *write_sqe;
454 	struct ib_rdma_wr *rdma_wr;
455 	struct erdma_sge *sge;
456 	__le32 *length_field;
457 	struct erdma_mr *mr;
458 	u64 wqe_hdr, *entry;
459 	u32 attrs;
460 	int ret;
461 
462 	if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
463 	    send_wr->opcode != IB_WR_SEND_WITH_IMM)
464 		return -EINVAL;
465 
466 	entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
467 				SQEBB_SHIFT);
468 
469 	/* Clear the SQE header section. */
470 	*entry = 0;
471 
472 	qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
473 	flags = send_wr->send_flags;
474 	wqe_hdr = FIELD_PREP(
475 		ERDMA_SQE_HDR_CE_MASK,
476 		((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
477 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
478 			      flags & IB_SEND_SOLICITED ? 1 : 0);
479 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
480 			      flags & IB_SEND_FENCE ? 1 : 0);
481 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
482 			      flags & IB_SEND_INLINE ? 1 : 0);
483 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
484 
485 	switch (op) {
486 	case IB_WR_RDMA_WRITE:
487 	case IB_WR_RDMA_WRITE_WITH_IMM:
488 		hw_op = ERDMA_OP_WRITE;
489 		if (op == IB_WR_RDMA_WRITE_WITH_IMM)
490 			hw_op = ERDMA_OP_WRITE_WITH_IMM;
491 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
492 		rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
493 		write_sqe = (struct erdma_write_sqe *)entry;
494 
495 		write_sqe->imm_data = send_wr->ex.imm_data;
496 		write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
497 		write_sqe->sink_to_h =
498 			cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
499 		write_sqe->sink_to_l =
500 			cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
501 
502 		length_field = &write_sqe->length;
503 		wqe_size = sizeof(struct erdma_write_sqe);
504 		sgl_offset = wqe_size;
505 		break;
506 	case IB_WR_RDMA_READ:
507 	case IB_WR_RDMA_READ_WITH_INV:
508 		read_sqe = (struct erdma_readreq_sqe *)entry;
509 		if (unlikely(send_wr->num_sge != 1))
510 			return -EINVAL;
511 		hw_op = ERDMA_OP_READ;
512 		if (op == IB_WR_RDMA_READ_WITH_INV) {
513 			hw_op = ERDMA_OP_READ_WITH_INV;
514 			read_sqe->invalid_stag =
515 				cpu_to_le32(send_wr->ex.invalidate_rkey);
516 		}
517 
518 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
519 		rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
520 		read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
521 		read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
522 		read_sqe->sink_to_l =
523 			cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
524 		read_sqe->sink_to_h =
525 			cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
526 
527 		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
528 				      qp->attrs.sq_size, SQEBB_SHIFT);
529 		sge->addr = cpu_to_le64(rdma_wr->remote_addr);
530 		sge->key = cpu_to_le32(rdma_wr->rkey);
531 		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
532 		wqe_size = sizeof(struct erdma_readreq_sqe) +
533 			   send_wr->num_sge * sizeof(struct ib_sge);
534 
535 		goto out;
536 	case IB_WR_SEND:
537 	case IB_WR_SEND_WITH_IMM:
538 	case IB_WR_SEND_WITH_INV:
539 		if (qp->ibqp.qp_type == IB_QPT_RC) {
540 			rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
541 			init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
542 			length_field = &rc_send_sqe->length;
543 			wqe_size = sizeof(struct erdma_send_sqe_rc);
544 		} else {
545 			ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
546 			init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
547 			length_field = &ud_send_sqe->length;
548 			wqe_size = sizeof(struct erdma_send_sqe_ud);
549 		}
550 
551 		sgl_offset = wqe_size;
552 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
553 		break;
554 	case IB_WR_REG_MR:
555 		wqe_hdr |=
556 			FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
557 		regmr_sge = (struct erdma_reg_mr_sqe *)entry;
558 		mr = to_emr(reg_wr(send_wr)->mr);
559 
560 		mr->access = ERDMA_MR_ACC_LR |
561 			     to_erdma_access_flags(reg_wr(send_wr)->access);
562 		regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
563 		regmr_sge->length = cpu_to_le32(mr->ibmr.length);
564 		regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
565 		attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
566 			FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
567 				   mr->mem.mtt_nents);
568 
569 		if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
570 			attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
571 			/* Copy SGLs to SQE content to accelerate */
572 			memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
573 					       qp->attrs.sq_size, SQEBB_SHIFT),
574 			       mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
575 			wqe_size = sizeof(struct erdma_reg_mr_sqe) +
576 				   MTT_SIZE(mr->mem.mtt_nents);
577 		} else {
578 			attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
579 			wqe_size = sizeof(struct erdma_reg_mr_sqe);
580 		}
581 
582 		regmr_sge->attrs = cpu_to_le32(attrs);
583 		goto out;
584 	case IB_WR_LOCAL_INV:
585 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
586 				      ERDMA_OP_LOCAL_INV);
587 		regmr_sge = (struct erdma_reg_mr_sqe *)entry;
588 		regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
589 		wqe_size = sizeof(struct erdma_reg_mr_sqe);
590 		goto out;
591 	case IB_WR_ATOMIC_CMP_AND_SWP:
592 	case IB_WR_ATOMIC_FETCH_AND_ADD:
593 		atomic_sqe = (struct erdma_atomic_sqe *)entry;
594 		if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
595 			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
596 					      ERDMA_OP_ATOMIC_CAS);
597 			atomic_sqe->fetchadd_swap_data =
598 				cpu_to_le64(atomic_wr(send_wr)->swap);
599 			atomic_sqe->cmp_data =
600 				cpu_to_le64(atomic_wr(send_wr)->compare_add);
601 		} else {
602 			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
603 					      ERDMA_OP_ATOMIC_FAA);
604 			atomic_sqe->fetchadd_swap_data =
605 				cpu_to_le64(atomic_wr(send_wr)->compare_add);
606 		}
607 
608 		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
609 				      qp->attrs.sq_size, SQEBB_SHIFT);
610 		sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
611 		sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
612 		sge++;
613 
614 		sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
615 		sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
616 		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
617 
618 		wqe_size = sizeof(*atomic_sqe);
619 		goto out;
620 	default:
621 		return -EOPNOTSUPP;
622 	}
623 
624 	if (flags & IB_SEND_INLINE) {
625 		ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
626 				       length_field);
627 		if (ret < 0)
628 			return -EINVAL;
629 		wqe_size += ret;
630 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
631 	} else {
632 		ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
633 		if (ret)
634 			return -EINVAL;
635 		wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
636 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
637 				      send_wr->num_sge);
638 	}
639 
640 out:
641 	wqebb_cnt = SQEBB_COUNT(wqe_size);
642 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
643 	*pi += wqebb_cnt;
644 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
645 
646 	*entry = wqe_hdr;
647 
648 	return 0;
649 }
650 
kick_sq_db(struct erdma_qp * qp,u16 pi)651 static void kick_sq_db(struct erdma_qp *qp, u16 pi)
652 {
653 	u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
654 		      FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
655 
656 	*(u64 *)qp->kern_qp.sq_dbrec = db_data;
657 	writeq(db_data, qp->kern_qp.hw_sq_db);
658 }
659 
erdma_post_send(struct ib_qp * ibqp,const struct ib_send_wr * send_wr,const struct ib_send_wr ** bad_send_wr)660 int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
661 		    const struct ib_send_wr **bad_send_wr)
662 {
663 	struct erdma_qp *qp = to_eqp(ibqp);
664 	int ret = 0;
665 	const struct ib_send_wr *wr = send_wr;
666 	unsigned long flags;
667 	u16 sq_pi;
668 
669 	if (!send_wr)
670 		return -EINVAL;
671 
672 	spin_lock_irqsave(&qp->lock, flags);
673 	sq_pi = qp->kern_qp.sq_pi;
674 
675 	while (wr) {
676 		if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
677 			ret = -ENOMEM;
678 			*bad_send_wr = send_wr;
679 			break;
680 		}
681 
682 		ret = erdma_push_one_sqe(qp, &sq_pi, wr);
683 		if (ret) {
684 			*bad_send_wr = wr;
685 			break;
686 		}
687 		qp->kern_qp.sq_pi = sq_pi;
688 		kick_sq_db(qp, sq_pi);
689 
690 		wr = wr->next;
691 	}
692 	spin_unlock_irqrestore(&qp->lock, flags);
693 
694 	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
695 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
696 				 usecs_to_jiffies(100));
697 
698 	return ret;
699 }
700 
erdma_post_recv_one(struct erdma_qp * qp,const struct ib_recv_wr * recv_wr)701 static int erdma_post_recv_one(struct erdma_qp *qp,
702 			       const struct ib_recv_wr *recv_wr)
703 {
704 	struct erdma_rqe *rqe =
705 		get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
706 				qp->attrs.rq_size, RQE_SHIFT);
707 
708 	rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
709 	rqe->qpn = cpu_to_le32(QP_ID(qp));
710 
711 	if (recv_wr->num_sge == 0) {
712 		rqe->length = 0;
713 	} else if (recv_wr->num_sge == 1) {
714 		rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
715 		rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
716 		rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
717 	} else {
718 		return -EINVAL;
719 	}
720 
721 	*(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
722 	writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
723 
724 	qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
725 		recv_wr->wr_id;
726 	qp->kern_qp.rq_pi++;
727 
728 	return 0;
729 }
730 
erdma_post_recv(struct ib_qp * ibqp,const struct ib_recv_wr * recv_wr,const struct ib_recv_wr ** bad_recv_wr)731 int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
732 		    const struct ib_recv_wr **bad_recv_wr)
733 {
734 	const struct ib_recv_wr *wr = recv_wr;
735 	struct erdma_qp *qp = to_eqp(ibqp);
736 	unsigned long flags;
737 	int ret;
738 
739 	spin_lock_irqsave(&qp->lock, flags);
740 
741 	while (wr) {
742 		ret = erdma_post_recv_one(qp, wr);
743 		if (ret) {
744 			*bad_recv_wr = wr;
745 			break;
746 		}
747 		wr = wr->next;
748 	}
749 
750 	spin_unlock_irqrestore(&qp->lock, flags);
751 
752 	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
753 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
754 				 usecs_to_jiffies(100));
755 
756 	return ret;
757 }
758