1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/vhost_types.h>
5 #include <linux/vdpa.h>
6 #include <linux/gcd.h>
7 #include <linux/string.h>
8 #include <linux/mlx5/qp.h>
9 #include "mlx5_vdpa.h"
10 
11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
13 ({ \
14 	u64 __s = _s; \
15 	u64 _res; \
16 	_res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
17 	_res; \
18 })
19 
get_octo_len(u64 len,int page_shift)20 static int get_octo_len(u64 len, int page_shift)
21 {
22 	u64 page_size = 1ULL << page_shift;
23 	int npages;
24 
25 	npages = ALIGN(len, page_size) >> page_shift;
26 	return (npages + 1) / 2;
27 }
28 
mlx5_set_access_mode(void * mkc,int mode)29 static void mlx5_set_access_mode(void *mkc, int mode)
30 {
31 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 	MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
33 }
34 
populate_mtts(struct mlx5_vdpa_direct_mr * mr,__be64 * mtt)35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
36 {
37 	struct scatterlist *sg;
38 	int nsg = mr->nsg;
39 	u64 dma_addr;
40 	u64 dma_len;
41 	int j = 0;
42 	int i;
43 
44 	for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 		for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
46 		     nsg && dma_len;
47 		     nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 			mtt[j++] = cpu_to_be64(dma_addr);
49 	}
50 }
51 
52 struct mlx5_create_mkey_mem {
53 	u8 out[MLX5_ST_SZ_BYTES(create_mkey_out)];
54 	u8 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
55 	__be64 mtt[];
56 };
57 
58 struct mlx5_destroy_mkey_mem {
59 	u8 out[MLX5_ST_SZ_BYTES(destroy_mkey_out)];
60 	u8 in[MLX5_ST_SZ_BYTES(destroy_mkey_in)];
61 };
62 
fill_create_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct mlx5_create_mkey_mem * mem)63 static void fill_create_direct_mr(struct mlx5_vdpa_dev *mvdev,
64 				  struct mlx5_vdpa_direct_mr *mr,
65 				  struct mlx5_create_mkey_mem *mem)
66 {
67 	void *in = &mem->in;
68 	void *mkc;
69 
70 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
71 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
72 	MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
73 	MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
74 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
75 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
76 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
77 	MLX5_SET64(mkc, mkc, start_addr, mr->offset);
78 	MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
79 	MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
80 	MLX5_SET(mkc, mkc, translations_octword_size,
81 		 get_octo_len(mr->end - mr->start, mr->log_size));
82 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
83 		 get_octo_len(mr->end - mr->start, mr->log_size));
84 	populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
85 
86 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
87 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
88 }
89 
create_direct_mr_end(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct mlx5_create_mkey_mem * mem)90 static void create_direct_mr_end(struct mlx5_vdpa_dev *mvdev,
91 				 struct mlx5_vdpa_direct_mr *mr,
92 				 struct mlx5_create_mkey_mem *mem)
93 {
94 	u32 mkey_index = MLX5_GET(create_mkey_out, mem->out, mkey_index);
95 
96 	mr->mr = mlx5_idx_to_mkey(mkey_index);
97 }
98 
fill_destroy_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct mlx5_destroy_mkey_mem * mem)99 static void fill_destroy_direct_mr(struct mlx5_vdpa_dev *mvdev,
100 				   struct mlx5_vdpa_direct_mr *mr,
101 				   struct mlx5_destroy_mkey_mem *mem)
102 {
103 	void *in = &mem->in;
104 
105 	MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
106 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
107 	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mr->mr));
108 }
109 
destroy_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)110 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
111 {
112 	if (!mr->mr)
113 		return;
114 
115 	mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
116 }
117 
map_start(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)118 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
119 {
120 	return max_t(u64, map->start, mr->start);
121 }
122 
map_end(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)123 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
124 {
125 	return min_t(u64, map->last + 1, mr->end);
126 }
127 
maplen(struct vhost_iotlb_map * map,struct mlx5_vdpa_direct_mr * mr)128 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
129 {
130 	return map_end(map, mr) - map_start(map, mr);
131 }
132 
133 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
134 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
135 
indir_start_addr(struct mlx5_vdpa_mr * mkey)136 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
137 {
138 	struct mlx5_vdpa_direct_mr *s;
139 
140 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
141 	if (!s)
142 		return MLX5_VDPA_INVALID_START_ADDR;
143 
144 	return s->start;
145 }
146 
indir_len(struct mlx5_vdpa_mr * mkey)147 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
148 {
149 	struct mlx5_vdpa_direct_mr *s;
150 	struct mlx5_vdpa_direct_mr *e;
151 
152 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
153 	if (!s)
154 		return MLX5_VDPA_INVALID_LEN;
155 
156 	e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
157 
158 	return e->end - s->start;
159 }
160 
161 #define LOG_MAX_KLM_SIZE 30
162 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
163 
klm_bcount(u64 size)164 static u32 klm_bcount(u64 size)
165 {
166 	return (u32)size;
167 }
168 
fill_indir(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey,void * in)169 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
170 {
171 	struct mlx5_vdpa_direct_mr *dmr;
172 	struct mlx5_klm *klmarr;
173 	struct mlx5_klm *klm;
174 	bool first = true;
175 	u64 preve;
176 	int i;
177 
178 	klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
179 	i = 0;
180 	list_for_each_entry(dmr, &mkey->head, list) {
181 again:
182 		klm = &klmarr[i++];
183 		if (first) {
184 			preve = dmr->start;
185 			first = false;
186 		}
187 
188 		if (preve == dmr->start) {
189 			klm->key = cpu_to_be32(dmr->mr);
190 			klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
191 			preve = dmr->end;
192 		} else {
193 			u64 bcount = min_t(u64, dmr->start - preve, MAX_KLM_SIZE);
194 
195 			klm->key = cpu_to_be32(mvdev->res.null_mkey);
196 			klm->bcount = cpu_to_be32(klm_bcount(bcount));
197 			preve += bcount;
198 
199 			goto again;
200 		}
201 	}
202 }
203 
klm_byte_size(int nklms)204 static int klm_byte_size(int nklms)
205 {
206 	return 16 * ALIGN(nklms, 4);
207 }
208 
209 #define MLX5_VDPA_MTT_ALIGN 16
210 
create_direct_keys(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)211 static int create_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
212 {
213 	struct mlx5_vdpa_async_cmd *cmds;
214 	struct mlx5_vdpa_direct_mr *dmr;
215 	int err = 0;
216 	int i = 0;
217 
218 	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
219 	if (!cmds)
220 		return -ENOMEM;
221 
222 	list_for_each_entry(dmr, &mr->head, list) {
223 		struct mlx5_create_mkey_mem *cmd_mem;
224 		int mttlen, mttcount;
225 
226 		mttlen = roundup(MLX5_ST_SZ_BYTES(mtt) * dmr->nsg, MLX5_VDPA_MTT_ALIGN);
227 		mttcount = mttlen / sizeof(cmd_mem->mtt[0]);
228 		cmd_mem = kvcalloc(1, struct_size(cmd_mem, mtt, mttcount), GFP_KERNEL);
229 		if (!cmd_mem) {
230 			err = -ENOMEM;
231 			goto done;
232 		}
233 
234 		cmds[i].out = cmd_mem->out;
235 		cmds[i].outlen = sizeof(cmd_mem->out);
236 		cmds[i].in = cmd_mem->in;
237 		cmds[i].inlen = struct_size(cmd_mem, mtt, mttcount);
238 
239 		fill_create_direct_mr(mvdev, dmr, cmd_mem);
240 
241 		i++;
242 	}
243 
244 	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
245 	if (err) {
246 
247 		mlx5_vdpa_err(mvdev, "error issuing MTT mkey creation for direct mrs: %d\n", err);
248 		goto done;
249 	}
250 
251 	i = 0;
252 	list_for_each_entry(dmr, &mr->head, list) {
253 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
254 		struct mlx5_create_mkey_mem *cmd_mem;
255 
256 		cmd_mem = container_of(cmd->out, struct mlx5_create_mkey_mem, out);
257 
258 		if (!cmd->err) {
259 			create_direct_mr_end(mvdev, dmr, cmd_mem);
260 		} else {
261 			err = err ? err : cmd->err;
262 			mlx5_vdpa_err(mvdev, "error creating MTT mkey [0x%llx, 0x%llx]: %d\n",
263 				dmr->start, dmr->end, cmd->err);
264 		}
265 	}
266 
267 done:
268 	for (i = i-1; i >= 0; i--) {
269 		struct mlx5_create_mkey_mem *cmd_mem;
270 
271 		cmd_mem = container_of(cmds[i].out, struct mlx5_create_mkey_mem, out);
272 		kvfree(cmd_mem);
273 	}
274 
275 	kvfree(cmds);
276 	return err;
277 }
278 
DEFINE_FREE(free_cmds,struct mlx5_vdpa_async_cmd *,kvfree (_T))279 DEFINE_FREE(free_cmds, struct mlx5_vdpa_async_cmd *, kvfree(_T))
280 DEFINE_FREE(free_cmd_mem, struct mlx5_destroy_mkey_mem *, kvfree(_T))
281 
282 static int destroy_direct_keys(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
283 {
284 	struct mlx5_destroy_mkey_mem *cmd_mem __free(free_cmd_mem) = NULL;
285 	struct mlx5_vdpa_async_cmd *cmds __free(free_cmds) = NULL;
286 	struct mlx5_vdpa_direct_mr *dmr;
287 	int err = 0;
288 	int i = 0;
289 
290 	cmds = kvcalloc(mr->num_directs, sizeof(*cmds), GFP_KERNEL);
291 	cmd_mem = kvcalloc(mr->num_directs, sizeof(*cmd_mem), GFP_KERNEL);
292 	if (!cmds || !cmd_mem)
293 		return -ENOMEM;
294 
295 	list_for_each_entry(dmr, &mr->head, list) {
296 		cmds[i].out = cmd_mem[i].out;
297 		cmds[i].outlen = sizeof(cmd_mem[i].out);
298 		cmds[i].in = cmd_mem[i].in;
299 		cmds[i].inlen = sizeof(cmd_mem[i].in);
300 		fill_destroy_direct_mr(mvdev, dmr, &cmd_mem[i]);
301 		i++;
302 	}
303 
304 	err = mlx5_vdpa_exec_async_cmds(mvdev, cmds, mr->num_directs);
305 	if (err) {
306 
307 		mlx5_vdpa_err(mvdev, "error issuing MTT mkey deletion for direct mrs: %d\n", err);
308 		return err;
309 	}
310 
311 	i = 0;
312 	list_for_each_entry(dmr, &mr->head, list) {
313 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i++];
314 
315 		dmr->mr = 0;
316 		if (cmd->err) {
317 			err = err ? err : cmd->err;
318 			mlx5_vdpa_err(mvdev, "error deleting MTT mkey [0x%llx, 0x%llx]: %d\n",
319 				dmr->start, dmr->end, cmd->err);
320 		}
321 	}
322 
323 	return err;
324 }
325 
create_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)326 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
327 {
328 	int inlen;
329 	void *mkc;
330 	void *in;
331 	int err;
332 	u64 start;
333 	u64 len;
334 
335 	start = indir_start_addr(mr);
336 	len = indir_len(mr);
337 	if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
338 		return -EINVAL;
339 
340 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
341 	in = kzalloc(inlen, GFP_KERNEL);
342 	if (!in)
343 		return -ENOMEM;
344 
345 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
346 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
347 	MLX5_SET(mkc, mkc, lw, 1);
348 	MLX5_SET(mkc, mkc, lr, 1);
349 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
350 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
351 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
352 	MLX5_SET64(mkc, mkc, start_addr, start);
353 	MLX5_SET64(mkc, mkc, len, len);
354 	MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
355 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
356 	fill_indir(mvdev, mr, in);
357 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
358 	kfree(in);
359 	return err;
360 }
361 
destroy_indirect_key(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mkey)362 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
363 {
364 	mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
365 }
366 
map_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr,struct vhost_iotlb * iotlb)367 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
368 			 struct vhost_iotlb *iotlb)
369 {
370 	struct vhost_iotlb_map *map;
371 	unsigned long lgcd = 0;
372 	int log_entity_size;
373 	unsigned long size;
374 	int err;
375 	struct page *pg;
376 	unsigned int nsg;
377 	int sglen;
378 	u64 pa, offset;
379 	u64 paend;
380 	struct scatterlist *sg;
381 	struct device *dma = mvdev->vdev.dma_dev;
382 
383 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
384 	     map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
385 		size = maplen(map, mr);
386 		lgcd = gcd(lgcd, size);
387 	}
388 	log_entity_size = ilog2(lgcd);
389 
390 	sglen = 1 << log_entity_size;
391 	nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
392 
393 	err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
394 	if (err)
395 		return err;
396 
397 	sg = mr->sg_head.sgl;
398 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
399 	     map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
400 		offset = mr->start > map->start ? mr->start - map->start : 0;
401 		pa = map->addr + offset;
402 		paend = map->addr + offset + maplen(map, mr);
403 		for (; pa < paend; pa += sglen) {
404 			pg = pfn_to_page(__phys_to_pfn(pa));
405 			if (!sg) {
406 				mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
407 					       map->start, map->last + 1);
408 				err = -ENOMEM;
409 				goto err_map;
410 			}
411 			sg_set_page(sg, pg, sglen, 0);
412 			sg = sg_next(sg);
413 			if (!sg)
414 				goto done;
415 		}
416 	}
417 done:
418 	mr->log_size = log_entity_size;
419 	mr->nsg = nsg;
420 	mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
421 	if (!mr->nent) {
422 		err = -ENOMEM;
423 		goto err_map;
424 	}
425 
426 	return 0;
427 
428 err_map:
429 	sg_free_table(&mr->sg_head);
430 	return err;
431 }
432 
unmap_direct_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_direct_mr * mr)433 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
434 {
435 	struct device *dma = mvdev->vdev.dma_dev;
436 
437 	destroy_direct_mr(mvdev, mr);
438 	dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
439 	sg_free_table(&mr->sg_head);
440 }
441 
add_direct_chain(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr,u64 start,u64 size,u8 perm,struct vhost_iotlb * iotlb)442 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
443 			    struct mlx5_vdpa_mr *mr,
444 			    u64 start,
445 			    u64 size,
446 			    u8 perm,
447 			    struct vhost_iotlb *iotlb)
448 {
449 	struct mlx5_vdpa_direct_mr *dmr;
450 	struct mlx5_vdpa_direct_mr *n;
451 	LIST_HEAD(tmp);
452 	u64 st;
453 	u64 sz;
454 	int err;
455 
456 	st = start;
457 	while (size) {
458 		sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
459 		dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
460 		if (!dmr) {
461 			err = -ENOMEM;
462 			goto err_alloc;
463 		}
464 
465 		dmr->start = st;
466 		dmr->end = st + sz;
467 		dmr->perm = perm;
468 		err = map_direct_mr(mvdev, dmr, iotlb);
469 		if (err) {
470 			kfree(dmr);
471 			goto err_alloc;
472 		}
473 
474 		list_add_tail(&dmr->list, &tmp);
475 		size -= sz;
476 		mr->num_directs++;
477 		mr->num_klms++;
478 		st += sz;
479 	}
480 	list_splice_tail(&tmp, &mr->head);
481 	return 0;
482 
483 err_alloc:
484 	list_for_each_entry_safe(dmr, n, &mr->head, list) {
485 		list_del_init(&dmr->list);
486 		unmap_direct_mr(mvdev, dmr);
487 		kfree(dmr);
488 	}
489 	return err;
490 }
491 
492 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
493  * merging mergeable maps, and create direct memory keys that provide the
494  * device access to memory. The direct mkeys are then referred to by the
495  * indirect memory key that provides access to the enitre address space given
496  * by iotlb.
497  */
create_user_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr,struct vhost_iotlb * iotlb)498 static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
499 			  struct mlx5_vdpa_mr *mr,
500 			  struct vhost_iotlb *iotlb)
501 {
502 	struct mlx5_vdpa_direct_mr *dmr;
503 	struct mlx5_vdpa_direct_mr *n;
504 	struct vhost_iotlb_map *map;
505 	u32 pperm = U16_MAX;
506 	u64 last = U64_MAX;
507 	u64 ps = U64_MAX;
508 	u64 pe = U64_MAX;
509 	u64 start = 0;
510 	int err = 0;
511 	int nnuls;
512 
513 	INIT_LIST_HEAD(&mr->head);
514 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
515 	     map = vhost_iotlb_itree_next(map, start, last)) {
516 		start = map->start;
517 		if (pe == map->start && pperm == map->perm) {
518 			pe = map->last + 1;
519 		} else {
520 			if (ps != U64_MAX) {
521 				if (pe < map->start) {
522 					/* We have a hole in the map. Check how
523 					 * many null keys are required to fill it.
524 					 */
525 					nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
526 								       LOG_MAX_KLM_SIZE);
527 					mr->num_klms += nnuls;
528 				}
529 				err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
530 				if (err)
531 					goto err_chain;
532 			}
533 			ps = map->start;
534 			pe = map->last + 1;
535 			pperm = map->perm;
536 		}
537 	}
538 	err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
539 	if (err)
540 		goto err_chain;
541 
542 	err = create_direct_keys(mvdev, mr);
543 	if (err)
544 		goto err_chain;
545 
546 	/* Create the memory key that defines the guests's address space. This
547 	 * memory key refers to the direct keys that contain the MTT
548 	 * translations
549 	 */
550 	err = create_indirect_key(mvdev, mr);
551 	if (err)
552 		goto err_chain;
553 
554 	mr->user_mr = true;
555 	return 0;
556 
557 err_chain:
558 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
559 		list_del_init(&dmr->list);
560 		unmap_direct_mr(mvdev, dmr);
561 		kfree(dmr);
562 	}
563 	return err;
564 }
565 
create_dma_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)566 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
567 {
568 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
569 	void *mkc;
570 	u32 *in;
571 	int err;
572 
573 	in = kzalloc(inlen, GFP_KERNEL);
574 	if (!in)
575 		return -ENOMEM;
576 
577 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
578 
579 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
580 	MLX5_SET(mkc, mkc, length64, 1);
581 	MLX5_SET(mkc, mkc, lw, 1);
582 	MLX5_SET(mkc, mkc, lr, 1);
583 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
584 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
585 
586 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
587 	if (!err)
588 		mr->user_mr = false;
589 
590 	kfree(in);
591 	return err;
592 }
593 
destroy_dma_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)594 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
595 {
596 	mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
597 }
598 
dup_iotlb(struct vhost_iotlb * dst,struct vhost_iotlb * src)599 static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
600 {
601 	struct vhost_iotlb_map *map;
602 	u64 start = 0, last = ULLONG_MAX;
603 	int err;
604 
605 	if (dst == src)
606 		return -EINVAL;
607 
608 	if (!src) {
609 		err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
610 		return err;
611 	}
612 
613 	for (map = vhost_iotlb_itree_first(src, start, last); map;
614 		map = vhost_iotlb_itree_next(map, start, last)) {
615 		err = vhost_iotlb_add_range(dst, map->start, map->last,
616 					    map->addr, map->perm);
617 		if (err)
618 			return err;
619 	}
620 	return 0;
621 }
622 
prune_iotlb(struct vhost_iotlb * iotlb)623 static void prune_iotlb(struct vhost_iotlb *iotlb)
624 {
625 	vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
626 }
627 
destroy_user_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)628 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
629 {
630 	struct mlx5_vdpa_direct_mr *dmr;
631 	struct mlx5_vdpa_direct_mr *n;
632 
633 	destroy_indirect_key(mvdev, mr);
634 	destroy_direct_keys(mvdev, mr);
635 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
636 		list_del_init(&dmr->list);
637 		unmap_direct_mr(mvdev, dmr);
638 		kfree(dmr);
639 	}
640 }
641 
_mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)642 static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
643 {
644 	if (WARN_ON(!mr))
645 		return;
646 
647 	if (mr->user_mr)
648 		destroy_user_mr(mvdev, mr);
649 	else
650 		destroy_dma_mr(mvdev, mr);
651 
652 	vhost_iotlb_free(mr->iotlb);
653 
654 	list_del(&mr->mr_list);
655 
656 	kfree(mr);
657 }
658 
659 /* There can be multiple .set_map() operations in quick succession.
660  * This large delay is a simple way to prevent the MR cleanup from blocking
661  * .set_map() MR creation in this scenario.
662  */
663 #define MLX5_VDPA_MR_GC_TRIGGER_MS 2000
664 
mlx5_vdpa_mr_gc_handler(struct work_struct * work)665 static void mlx5_vdpa_mr_gc_handler(struct work_struct *work)
666 {
667 	struct mlx5_vdpa_mr_resources *mres;
668 	struct mlx5_vdpa_mr *mr, *tmp;
669 	struct mlx5_vdpa_dev *mvdev;
670 
671 	mres = container_of(work, struct mlx5_vdpa_mr_resources, gc_dwork_ent.work);
672 
673 	if (atomic_read(&mres->shutdown)) {
674 		mutex_lock(&mres->lock);
675 	} else if (!mutex_trylock(&mres->lock)) {
676 		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
677 				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
678 		return;
679 	}
680 
681 	mvdev = container_of(mres, struct mlx5_vdpa_dev, mres);
682 
683 	list_for_each_entry_safe(mr, tmp, &mres->mr_gc_list_head, mr_list) {
684 		_mlx5_vdpa_destroy_mr(mvdev, mr);
685 	}
686 
687 	mutex_unlock(&mres->lock);
688 }
689 
_mlx5_vdpa_put_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)690 static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
691 			      struct mlx5_vdpa_mr *mr)
692 {
693 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
694 
695 	if (!mr)
696 		return;
697 
698 	if (refcount_dec_and_test(&mr->refcount)) {
699 		list_move_tail(&mr->mr_list, &mres->mr_gc_list_head);
700 		queue_delayed_work(mres->wq_gc, &mres->gc_dwork_ent,
701 				   msecs_to_jiffies(MLX5_VDPA_MR_GC_TRIGGER_MS));
702 	}
703 }
704 
mlx5_vdpa_put_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)705 void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
706 		      struct mlx5_vdpa_mr *mr)
707 {
708 	mutex_lock(&mvdev->mres.lock);
709 	_mlx5_vdpa_put_mr(mvdev, mr);
710 	mutex_unlock(&mvdev->mres.lock);
711 }
712 
_mlx5_vdpa_get_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)713 static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
714 			      struct mlx5_vdpa_mr *mr)
715 {
716 	if (!mr)
717 		return;
718 
719 	refcount_inc(&mr->refcount);
720 }
721 
mlx5_vdpa_get_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr)722 void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
723 		      struct mlx5_vdpa_mr *mr)
724 {
725 	mutex_lock(&mvdev->mres.lock);
726 	_mlx5_vdpa_get_mr(mvdev, mr);
727 	mutex_unlock(&mvdev->mres.lock);
728 }
729 
mlx5_vdpa_update_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * new_mr,unsigned int asid)730 void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
731 			 struct mlx5_vdpa_mr *new_mr,
732 			 unsigned int asid)
733 {
734 	struct mlx5_vdpa_mr *old_mr = mvdev->mres.mr[asid];
735 
736 	mutex_lock(&mvdev->mres.lock);
737 
738 	_mlx5_vdpa_put_mr(mvdev, old_mr);
739 	mvdev->mres.mr[asid] = new_mr;
740 
741 	mutex_unlock(&mvdev->mres.lock);
742 }
743 
mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev * mvdev)744 static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
745 {
746 	struct mlx5_vdpa_mr *mr;
747 
748 	mutex_lock(&mvdev->mres.lock);
749 
750 	list_for_each_entry(mr, &mvdev->mres.mr_list_head, mr_list) {
751 
752 		mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
753 				      "mr: %p, mkey: 0x%x, refcount: %u\n",
754 				       mr, mr->mkey, refcount_read(&mr->refcount));
755 	}
756 
757 	mutex_unlock(&mvdev->mres.lock);
758 
759 }
760 
mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev * mvdev)761 void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev)
762 {
763 	if (!mvdev->res.valid)
764 		return;
765 
766 	for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
767 		mlx5_vdpa_update_mr(mvdev, NULL, i);
768 
769 	prune_iotlb(mvdev->cvq.iotlb);
770 
771 	mlx5_vdpa_show_mr_leaks(mvdev);
772 }
773 
_mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * mr,struct vhost_iotlb * iotlb)774 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
775 				struct mlx5_vdpa_mr *mr,
776 				struct vhost_iotlb *iotlb)
777 {
778 	int err;
779 
780 	if (iotlb)
781 		err = create_user_mr(mvdev, mr, iotlb);
782 	else
783 		err = create_dma_mr(mvdev, mr);
784 
785 	if (err)
786 		return err;
787 
788 	mr->iotlb = vhost_iotlb_alloc(0, 0);
789 	if (!mr->iotlb) {
790 		err = -ENOMEM;
791 		goto err_mr;
792 	}
793 
794 	err = dup_iotlb(mr->iotlb, iotlb);
795 	if (err)
796 		goto err_iotlb;
797 
798 	list_add_tail(&mr->mr_list, &mvdev->mres.mr_list_head);
799 
800 	return 0;
801 
802 err_iotlb:
803 	vhost_iotlb_free(mr->iotlb);
804 
805 err_mr:
806 	if (iotlb)
807 		destroy_user_mr(mvdev, mr);
808 	else
809 		destroy_dma_mr(mvdev, mr);
810 
811 	return err;
812 }
813 
mlx5_vdpa_create_mr(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb)814 struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
815 					 struct vhost_iotlb *iotlb)
816 {
817 	struct mlx5_vdpa_mr *mr;
818 	int err;
819 
820 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
821 	if (!mr)
822 		return ERR_PTR(-ENOMEM);
823 
824 	mutex_lock(&mvdev->mres.lock);
825 	err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
826 	mutex_unlock(&mvdev->mres.lock);
827 
828 	if (err)
829 		goto out_err;
830 
831 	refcount_set(&mr->refcount, 1);
832 
833 	return mr;
834 
835 out_err:
836 	kfree(mr);
837 	return ERR_PTR(err);
838 }
839 
mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)840 int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
841 				struct vhost_iotlb *iotlb,
842 				unsigned int asid)
843 {
844 	int err;
845 
846 	if (mvdev->mres.group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
847 		return 0;
848 
849 	spin_lock(&mvdev->cvq.iommu_lock);
850 
851 	prune_iotlb(mvdev->cvq.iotlb);
852 	err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
853 
854 	spin_unlock(&mvdev->cvq.iommu_lock);
855 
856 	return err;
857 }
858 
mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev * mvdev)859 int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
860 {
861 	struct mlx5_vdpa_mr *mr;
862 
863 	mr = mlx5_vdpa_create_mr(mvdev, NULL);
864 	if (IS_ERR(mr))
865 		return PTR_ERR(mr);
866 
867 	mlx5_vdpa_update_mr(mvdev, mr, 0);
868 
869 	return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
870 }
871 
mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev * mvdev,unsigned int asid)872 int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
873 {
874 	if (asid >= MLX5_VDPA_NUM_AS)
875 		return -EINVAL;
876 
877 	mlx5_vdpa_update_mr(mvdev, NULL, asid);
878 
879 	if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
880 		if (mlx5_vdpa_create_dma_mr(mvdev))
881 			mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
882 	} else {
883 		mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);
884 	}
885 
886 	return 0;
887 }
888 
mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev * mvdev)889 int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev)
890 {
891 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
892 
893 	mres->wq_gc = create_singlethread_workqueue("mlx5_vdpa_mr_gc");
894 	if (!mres->wq_gc)
895 		return -ENOMEM;
896 
897 	INIT_DELAYED_WORK(&mres->gc_dwork_ent, mlx5_vdpa_mr_gc_handler);
898 
899 	mutex_init(&mres->lock);
900 
901 	INIT_LIST_HEAD(&mres->mr_list_head);
902 	INIT_LIST_HEAD(&mres->mr_gc_list_head);
903 
904 	return 0;
905 }
906 
mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev * mvdev)907 void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
908 {
909 	struct mlx5_vdpa_mr_resources *mres = &mvdev->mres;
910 
911 	atomic_set(&mres->shutdown, 1);
912 
913 	flush_delayed_work(&mres->gc_dwork_ent);
914 	destroy_workqueue(mres->wq_gc);
915 	mres->wq_gc = NULL;
916 	mutex_destroy(&mres->lock);
917 }
918