1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /* Authors: Cheng Xu <[email protected]> */
4 /* Kai Shen <[email protected]> */
5 /* Copyright (c) 2020-2021, Alibaba Group */
6 /* Authors: Bernard Metzler <[email protected]> */
7 /* Copyright (c) 2008-2019, IBM Corporation */
8
9 #include "erdma_cm.h"
10 #include "erdma_verbs.h"
11
erdma_qp_llp_close(struct erdma_qp * qp)12 void erdma_qp_llp_close(struct erdma_qp *qp)
13 {
14 struct erdma_mod_qp_params_iwarp params;
15
16 down_write(&qp->state_lock);
17
18 switch (qp->attrs.iwarp.state) {
19 case ERDMA_QPS_IWARP_RTS:
20 case ERDMA_QPS_IWARP_RTR:
21 case ERDMA_QPS_IWARP_IDLE:
22 case ERDMA_QPS_IWARP_TERMINATE:
23 params.state = ERDMA_QPS_IWARP_CLOSING;
24 erdma_modify_qp_state_iwarp(qp, ¶ms, ERDMA_QPA_IWARP_STATE);
25 break;
26 case ERDMA_QPS_IWARP_CLOSING:
27 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
28 break;
29 default:
30 break;
31 }
32
33 if (qp->cep) {
34 erdma_cep_put(qp->cep);
35 qp->cep = NULL;
36 }
37
38 up_write(&qp->state_lock);
39 }
40
erdma_get_ibqp(struct ib_device * ibdev,int id)41 struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
42 {
43 struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
44
45 if (qp)
46 return &qp->ibqp;
47
48 return NULL;
49 }
50
51 static int
erdma_modify_qp_state_to_rts(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,enum erdma_qpa_mask_iwarp mask)52 erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
53 struct erdma_mod_qp_params_iwarp *params,
54 enum erdma_qpa_mask_iwarp mask)
55 {
56 int ret;
57 struct erdma_dev *dev = qp->dev;
58 struct erdma_cmdq_modify_qp_req req;
59 struct tcp_sock *tp;
60 struct erdma_cep *cep = qp->cep;
61 struct sockaddr_storage local_addr, remote_addr;
62
63 if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
64 return -EINVAL;
65
66 if (!(mask & ERDMA_QPA_IWARP_MPA))
67 return -EINVAL;
68
69 if (!(mask & ERDMA_QPA_IWARP_CC))
70 params->cc = qp->attrs.cc;
71
72 ret = getname_local(cep->sock, &local_addr);
73 if (ret < 0)
74 return ret;
75
76 ret = getname_peer(cep->sock, &remote_addr);
77 if (ret < 0)
78 return ret;
79
80 tp = tcp_sk(qp->cep->sock->sk);
81
82 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
83 CMDQ_OPCODE_MODIFY_QP);
84
85 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
86 FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
87 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
88
89 req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
90 req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
91 req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
92 req.dport = to_sockaddr_in(remote_addr).sin_port;
93 req.sport = to_sockaddr_in(local_addr).sin_port;
94
95 req.send_nxt = tp->snd_nxt;
96 /* rsvd tcp seq for mpa-rsp in server. */
97 if (params->qp_type == ERDMA_QP_PASSIVE)
98 req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
99 req.recv_nxt = tp->rcv_nxt;
100
101 ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
102 true);
103 if (ret)
104 return ret;
105
106 if (mask & ERDMA_QPA_IWARP_IRD)
107 qp->attrs.irq_size = params->irq_size;
108
109 if (mask & ERDMA_QPA_IWARP_ORD)
110 qp->attrs.orq_size = params->orq_size;
111
112 if (mask & ERDMA_QPA_IWARP_CC)
113 qp->attrs.cc = params->cc;
114
115 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
116
117 return 0;
118 }
119
120 static int
erdma_modify_qp_state_to_stop(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,enum erdma_qpa_mask_iwarp mask)121 erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
122 struct erdma_mod_qp_params_iwarp *params,
123 enum erdma_qpa_mask_iwarp mask)
124 {
125 struct erdma_dev *dev = qp->dev;
126 struct erdma_cmdq_modify_qp_req req;
127 int ret;
128
129 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
130 CMDQ_OPCODE_MODIFY_QP);
131
132 req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
133 FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
134
135 ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
136 true);
137 if (ret)
138 return ret;
139
140 qp->attrs.iwarp.state = params->state;
141
142 return 0;
143 }
144
erdma_modify_qp_state_iwarp(struct erdma_qp * qp,struct erdma_mod_qp_params_iwarp * params,int mask)145 int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
146 struct erdma_mod_qp_params_iwarp *params,
147 int mask)
148 {
149 bool need_reflush = false;
150 int drop_conn, ret = 0;
151
152 if (!mask)
153 return 0;
154
155 if (!(mask & ERDMA_QPA_IWARP_STATE))
156 return 0;
157
158 switch (qp->attrs.iwarp.state) {
159 case ERDMA_QPS_IWARP_IDLE:
160 case ERDMA_QPS_IWARP_RTR:
161 if (params->state == ERDMA_QPS_IWARP_RTS) {
162 ret = erdma_modify_qp_state_to_rts(qp, params, mask);
163 } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
164 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
165 need_reflush = true;
166 if (qp->cep) {
167 erdma_cep_put(qp->cep);
168 qp->cep = NULL;
169 }
170 ret = erdma_modify_qp_state_to_stop(qp, params, mask);
171 }
172 break;
173 case ERDMA_QPS_IWARP_RTS:
174 drop_conn = 0;
175
176 if (params->state == ERDMA_QPS_IWARP_CLOSING ||
177 params->state == ERDMA_QPS_IWARP_TERMINATE ||
178 params->state == ERDMA_QPS_IWARP_ERROR) {
179 ret = erdma_modify_qp_state_to_stop(qp, params, mask);
180 drop_conn = 1;
181 need_reflush = true;
182 }
183
184 if (drop_conn)
185 erdma_qp_cm_drop(qp);
186
187 break;
188 case ERDMA_QPS_IWARP_TERMINATE:
189 if (params->state == ERDMA_QPS_IWARP_ERROR)
190 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
191 break;
192 case ERDMA_QPS_IWARP_CLOSING:
193 if (params->state == ERDMA_QPS_IWARP_IDLE) {
194 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
195 } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
196 ret = erdma_modify_qp_state_to_stop(qp, params, mask);
197 qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
198 } else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
199 return -ECONNABORTED;
200 }
201 break;
202 default:
203 break;
204 }
205
206 if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
207 qp->flags |= ERDMA_QP_IN_FLUSHING;
208 mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
209 usecs_to_jiffies(100));
210 }
211
212 return ret;
213 }
214
modify_qp_cmd_rocev2(struct erdma_qp * qp,struct erdma_mod_qp_params_rocev2 * params,enum erdma_qpa_mask_rocev2 attr_mask)215 static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
216 struct erdma_mod_qp_params_rocev2 *params,
217 enum erdma_qpa_mask_rocev2 attr_mask)
218 {
219 struct erdma_cmdq_mod_qp_req_rocev2 req;
220
221 memset(&req, 0, sizeof(req));
222
223 erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
224 CMDQ_OPCODE_MODIFY_QP);
225
226 req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
227
228 if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
229 req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
230 params->state);
231
232 if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
233 req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
234 params->dst_qpn);
235
236 if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
237 req.qkey = params->qkey;
238
239 if (attr_mask & ERDMA_QPA_ROCEV2_AV)
240 erdma_set_av_cfg(&req.av_cfg, ¶ms->av);
241
242 if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
243 req.sq_psn = params->sq_psn;
244
245 if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
246 req.rq_psn = params->rq_psn;
247
248 req.attr_mask = attr_mask;
249
250 return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
251 NULL, true);
252 }
253
erdma_reset_qp(struct erdma_qp * qp)254 static void erdma_reset_qp(struct erdma_qp *qp)
255 {
256 qp->kern_qp.sq_pi = 0;
257 qp->kern_qp.sq_ci = 0;
258 qp->kern_qp.rq_pi = 0;
259 qp->kern_qp.rq_ci = 0;
260 memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
261 memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
262 memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
263 memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
264 erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
265 if (qp->rcq != qp->scq)
266 erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
267 }
268
erdma_modify_qp_state_rocev2(struct erdma_qp * qp,struct erdma_mod_qp_params_rocev2 * params,int attr_mask)269 int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
270 struct erdma_mod_qp_params_rocev2 *params,
271 int attr_mask)
272 {
273 struct erdma_dev *dev = to_edev(qp->ibqp.device);
274 int ret;
275
276 ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
277 if (ret)
278 return ret;
279
280 if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
281 qp->attrs.rocev2.state = params->state;
282
283 if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
284 qp->attrs.rocev2.qkey = params->qkey;
285
286 if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
287 qp->attrs.rocev2.dst_qpn = params->dst_qpn;
288
289 if (attr_mask & ERDMA_QPA_ROCEV2_AV)
290 memcpy(&qp->attrs.rocev2.av, ¶ms->av,
291 sizeof(struct erdma_av));
292
293 if (rdma_is_kernel_res(&qp->ibqp.res) &&
294 params->state == ERDMA_QPS_ROCEV2_RESET)
295 erdma_reset_qp(qp);
296
297 if (rdma_is_kernel_res(&qp->ibqp.res) &&
298 params->state == ERDMA_QPS_ROCEV2_ERROR) {
299 qp->flags |= ERDMA_QP_IN_FLUSHING;
300 mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
301 usecs_to_jiffies(100));
302 }
303
304 return 0;
305 }
306
erdma_qp_safe_free(struct kref * ref)307 static void erdma_qp_safe_free(struct kref *ref)
308 {
309 struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
310
311 complete(&qp->safe_free);
312 }
313
erdma_qp_put(struct erdma_qp * qp)314 void erdma_qp_put(struct erdma_qp *qp)
315 {
316 WARN_ON(kref_read(&qp->ref) < 1);
317 kref_put(&qp->ref, erdma_qp_safe_free);
318 }
319
erdma_qp_get(struct erdma_qp * qp)320 void erdma_qp_get(struct erdma_qp *qp)
321 {
322 kref_get(&qp->ref);
323 }
324
fill_inline_data(struct erdma_qp * qp,const struct ib_send_wr * send_wr,u16 wqe_idx,u32 sgl_offset,__le32 * length_field)325 static int fill_inline_data(struct erdma_qp *qp,
326 const struct ib_send_wr *send_wr, u16 wqe_idx,
327 u32 sgl_offset, __le32 *length_field)
328 {
329 u32 remain_size, copy_size, data_off, bytes = 0;
330 char *data;
331 int i = 0;
332
333 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
334 sgl_offset &= (SQEBB_SIZE - 1);
335 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
336 SQEBB_SHIFT);
337
338 while (i < send_wr->num_sge) {
339 bytes += send_wr->sg_list[i].length;
340 if (bytes > (int)ERDMA_MAX_INLINE)
341 return -EINVAL;
342
343 remain_size = send_wr->sg_list[i].length;
344 data_off = 0;
345
346 while (1) {
347 copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
348
349 memcpy(data + sgl_offset,
350 (void *)(uintptr_t)send_wr->sg_list[i].addr +
351 data_off,
352 copy_size);
353 remain_size -= copy_size;
354 data_off += copy_size;
355 sgl_offset += copy_size;
356 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
357 sgl_offset &= (SQEBB_SIZE - 1);
358
359 data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
360 qp->attrs.sq_size, SQEBB_SHIFT);
361 if (!remain_size)
362 break;
363 }
364
365 i++;
366 }
367 *length_field = cpu_to_le32(bytes);
368
369 return bytes;
370 }
371
fill_sgl(struct erdma_qp * qp,const struct ib_send_wr * send_wr,u16 wqe_idx,u32 sgl_offset,__le32 * length_field)372 static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
373 u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
374 {
375 int i = 0;
376 u32 bytes = 0;
377 char *sgl;
378
379 if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
380 return -EINVAL;
381
382 if (sgl_offset & 0xF)
383 return -EINVAL;
384
385 while (i < send_wr->num_sge) {
386 wqe_idx += (sgl_offset >> SQEBB_SHIFT);
387 sgl_offset &= (SQEBB_SIZE - 1);
388 sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
389 qp->attrs.sq_size, SQEBB_SHIFT);
390
391 bytes += send_wr->sg_list[i].length;
392 memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
393 sizeof(struct ib_sge));
394
395 sgl_offset += sizeof(struct ib_sge);
396 i++;
397 }
398
399 *length_field = cpu_to_le32(bytes);
400 return 0;
401 }
402
init_send_sqe_rc(struct erdma_qp * qp,struct erdma_send_sqe_rc * sqe,const struct ib_send_wr * wr,u32 * hw_op)403 static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
404 const struct ib_send_wr *wr, u32 *hw_op)
405 {
406 u32 op = ERDMA_OP_SEND;
407
408 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
409 op = ERDMA_OP_SEND_WITH_IMM;
410 sqe->imm_data = wr->ex.imm_data;
411 } else if (wr->opcode == IB_WR_SEND_WITH_INV) {
412 op = ERDMA_OP_SEND_WITH_INV;
413 sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
414 }
415
416 *hw_op = op;
417 }
418
init_send_sqe_ud(struct erdma_qp * qp,struct erdma_send_sqe_ud * sqe,const struct ib_send_wr * wr,u32 * hw_op)419 static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
420 const struct ib_send_wr *wr, u32 *hw_op)
421 {
422 const struct ib_ud_wr *uwr = ud_wr(wr);
423 struct erdma_ah *ah = to_eah(uwr->ah);
424 u32 op = ERDMA_OP_SEND;
425
426 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
427 op = ERDMA_OP_SEND_WITH_IMM;
428 sqe->imm_data = wr->ex.imm_data;
429 }
430
431 *hw_op = op;
432
433 sqe->ahn = cpu_to_le32(ah->ahn);
434 sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
435 /* Not allowed to send control qkey */
436 if (uwr->remote_qkey & 0x80000000)
437 sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
438 else
439 sqe->qkey = cpu_to_le32(uwr->remote_qkey);
440 }
441
erdma_push_one_sqe(struct erdma_qp * qp,u16 * pi,const struct ib_send_wr * send_wr)442 static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
443 const struct ib_send_wr *send_wr)
444 {
445 u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
446 u32 idx = *pi & (qp->attrs.sq_size - 1);
447 enum ib_wr_opcode op = send_wr->opcode;
448 struct erdma_send_sqe_rc *rc_send_sqe;
449 struct erdma_send_sqe_ud *ud_send_sqe;
450 struct erdma_atomic_sqe *atomic_sqe;
451 struct erdma_readreq_sqe *read_sqe;
452 struct erdma_reg_mr_sqe *regmr_sge;
453 struct erdma_write_sqe *write_sqe;
454 struct ib_rdma_wr *rdma_wr;
455 struct erdma_sge *sge;
456 __le32 *length_field;
457 struct erdma_mr *mr;
458 u64 wqe_hdr, *entry;
459 u32 attrs;
460 int ret;
461
462 if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
463 send_wr->opcode != IB_WR_SEND_WITH_IMM)
464 return -EINVAL;
465
466 entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
467 SQEBB_SHIFT);
468
469 /* Clear the SQE header section. */
470 *entry = 0;
471
472 qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
473 flags = send_wr->send_flags;
474 wqe_hdr = FIELD_PREP(
475 ERDMA_SQE_HDR_CE_MASK,
476 ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
477 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
478 flags & IB_SEND_SOLICITED ? 1 : 0);
479 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
480 flags & IB_SEND_FENCE ? 1 : 0);
481 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
482 flags & IB_SEND_INLINE ? 1 : 0);
483 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
484
485 switch (op) {
486 case IB_WR_RDMA_WRITE:
487 case IB_WR_RDMA_WRITE_WITH_IMM:
488 hw_op = ERDMA_OP_WRITE;
489 if (op == IB_WR_RDMA_WRITE_WITH_IMM)
490 hw_op = ERDMA_OP_WRITE_WITH_IMM;
491 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
492 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
493 write_sqe = (struct erdma_write_sqe *)entry;
494
495 write_sqe->imm_data = send_wr->ex.imm_data;
496 write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
497 write_sqe->sink_to_h =
498 cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
499 write_sqe->sink_to_l =
500 cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
501
502 length_field = &write_sqe->length;
503 wqe_size = sizeof(struct erdma_write_sqe);
504 sgl_offset = wqe_size;
505 break;
506 case IB_WR_RDMA_READ:
507 case IB_WR_RDMA_READ_WITH_INV:
508 read_sqe = (struct erdma_readreq_sqe *)entry;
509 if (unlikely(send_wr->num_sge != 1))
510 return -EINVAL;
511 hw_op = ERDMA_OP_READ;
512 if (op == IB_WR_RDMA_READ_WITH_INV) {
513 hw_op = ERDMA_OP_READ_WITH_INV;
514 read_sqe->invalid_stag =
515 cpu_to_le32(send_wr->ex.invalidate_rkey);
516 }
517
518 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
519 rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
520 read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
521 read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
522 read_sqe->sink_to_l =
523 cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
524 read_sqe->sink_to_h =
525 cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
526
527 sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
528 qp->attrs.sq_size, SQEBB_SHIFT);
529 sge->addr = cpu_to_le64(rdma_wr->remote_addr);
530 sge->key = cpu_to_le32(rdma_wr->rkey);
531 sge->length = cpu_to_le32(send_wr->sg_list[0].length);
532 wqe_size = sizeof(struct erdma_readreq_sqe) +
533 send_wr->num_sge * sizeof(struct ib_sge);
534
535 goto out;
536 case IB_WR_SEND:
537 case IB_WR_SEND_WITH_IMM:
538 case IB_WR_SEND_WITH_INV:
539 if (qp->ibqp.qp_type == IB_QPT_RC) {
540 rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
541 init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
542 length_field = &rc_send_sqe->length;
543 wqe_size = sizeof(struct erdma_send_sqe_rc);
544 } else {
545 ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
546 init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
547 length_field = &ud_send_sqe->length;
548 wqe_size = sizeof(struct erdma_send_sqe_ud);
549 }
550
551 sgl_offset = wqe_size;
552 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
553 break;
554 case IB_WR_REG_MR:
555 wqe_hdr |=
556 FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
557 regmr_sge = (struct erdma_reg_mr_sqe *)entry;
558 mr = to_emr(reg_wr(send_wr)->mr);
559
560 mr->access = ERDMA_MR_ACC_LR |
561 to_erdma_access_flags(reg_wr(send_wr)->access);
562 regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
563 regmr_sge->length = cpu_to_le32(mr->ibmr.length);
564 regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
565 attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
566 FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
567 mr->mem.mtt_nents);
568
569 if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
570 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
571 /* Copy SGLs to SQE content to accelerate */
572 memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
573 qp->attrs.sq_size, SQEBB_SHIFT),
574 mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
575 wqe_size = sizeof(struct erdma_reg_mr_sqe) +
576 MTT_SIZE(mr->mem.mtt_nents);
577 } else {
578 attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
579 wqe_size = sizeof(struct erdma_reg_mr_sqe);
580 }
581
582 regmr_sge->attrs = cpu_to_le32(attrs);
583 goto out;
584 case IB_WR_LOCAL_INV:
585 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
586 ERDMA_OP_LOCAL_INV);
587 regmr_sge = (struct erdma_reg_mr_sqe *)entry;
588 regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
589 wqe_size = sizeof(struct erdma_reg_mr_sqe);
590 goto out;
591 case IB_WR_ATOMIC_CMP_AND_SWP:
592 case IB_WR_ATOMIC_FETCH_AND_ADD:
593 atomic_sqe = (struct erdma_atomic_sqe *)entry;
594 if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
595 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
596 ERDMA_OP_ATOMIC_CAS);
597 atomic_sqe->fetchadd_swap_data =
598 cpu_to_le64(atomic_wr(send_wr)->swap);
599 atomic_sqe->cmp_data =
600 cpu_to_le64(atomic_wr(send_wr)->compare_add);
601 } else {
602 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
603 ERDMA_OP_ATOMIC_FAA);
604 atomic_sqe->fetchadd_swap_data =
605 cpu_to_le64(atomic_wr(send_wr)->compare_add);
606 }
607
608 sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
609 qp->attrs.sq_size, SQEBB_SHIFT);
610 sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
611 sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
612 sge++;
613
614 sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
615 sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
616 sge->length = cpu_to_le32(send_wr->sg_list[0].length);
617
618 wqe_size = sizeof(*atomic_sqe);
619 goto out;
620 default:
621 return -EOPNOTSUPP;
622 }
623
624 if (flags & IB_SEND_INLINE) {
625 ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
626 length_field);
627 if (ret < 0)
628 return -EINVAL;
629 wqe_size += ret;
630 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
631 } else {
632 ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
633 if (ret)
634 return -EINVAL;
635 wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
636 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
637 send_wr->num_sge);
638 }
639
640 out:
641 wqebb_cnt = SQEBB_COUNT(wqe_size);
642 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
643 *pi += wqebb_cnt;
644 wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
645
646 *entry = wqe_hdr;
647
648 return 0;
649 }
650
kick_sq_db(struct erdma_qp * qp,u16 pi)651 static void kick_sq_db(struct erdma_qp *qp, u16 pi)
652 {
653 u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
654 FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
655
656 *(u64 *)qp->kern_qp.sq_dbrec = db_data;
657 writeq(db_data, qp->kern_qp.hw_sq_db);
658 }
659
erdma_post_send(struct ib_qp * ibqp,const struct ib_send_wr * send_wr,const struct ib_send_wr ** bad_send_wr)660 int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
661 const struct ib_send_wr **bad_send_wr)
662 {
663 struct erdma_qp *qp = to_eqp(ibqp);
664 int ret = 0;
665 const struct ib_send_wr *wr = send_wr;
666 unsigned long flags;
667 u16 sq_pi;
668
669 if (!send_wr)
670 return -EINVAL;
671
672 spin_lock_irqsave(&qp->lock, flags);
673 sq_pi = qp->kern_qp.sq_pi;
674
675 while (wr) {
676 if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
677 ret = -ENOMEM;
678 *bad_send_wr = send_wr;
679 break;
680 }
681
682 ret = erdma_push_one_sqe(qp, &sq_pi, wr);
683 if (ret) {
684 *bad_send_wr = wr;
685 break;
686 }
687 qp->kern_qp.sq_pi = sq_pi;
688 kick_sq_db(qp, sq_pi);
689
690 wr = wr->next;
691 }
692 spin_unlock_irqrestore(&qp->lock, flags);
693
694 if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
695 mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
696 usecs_to_jiffies(100));
697
698 return ret;
699 }
700
erdma_post_recv_one(struct erdma_qp * qp,const struct ib_recv_wr * recv_wr)701 static int erdma_post_recv_one(struct erdma_qp *qp,
702 const struct ib_recv_wr *recv_wr)
703 {
704 struct erdma_rqe *rqe =
705 get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
706 qp->attrs.rq_size, RQE_SHIFT);
707
708 rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
709 rqe->qpn = cpu_to_le32(QP_ID(qp));
710
711 if (recv_wr->num_sge == 0) {
712 rqe->length = 0;
713 } else if (recv_wr->num_sge == 1) {
714 rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
715 rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
716 rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
717 } else {
718 return -EINVAL;
719 }
720
721 *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
722 writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
723
724 qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
725 recv_wr->wr_id;
726 qp->kern_qp.rq_pi++;
727
728 return 0;
729 }
730
erdma_post_recv(struct ib_qp * ibqp,const struct ib_recv_wr * recv_wr,const struct ib_recv_wr ** bad_recv_wr)731 int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
732 const struct ib_recv_wr **bad_recv_wr)
733 {
734 const struct ib_recv_wr *wr = recv_wr;
735 struct erdma_qp *qp = to_eqp(ibqp);
736 unsigned long flags;
737 int ret;
738
739 spin_lock_irqsave(&qp->lock, flags);
740
741 while (wr) {
742 ret = erdma_post_recv_one(qp, wr);
743 if (ret) {
744 *bad_recv_wr = wr;
745 break;
746 }
747 wr = wr->next;
748 }
749
750 spin_unlock_irqrestore(&qp->lock, flags);
751
752 if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
753 mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
754 usecs_to_jiffies(100));
755
756 return ret;
757 }
758