xref: /aosp_15_r20/external/ublksrv/include/ublksrv_priv.h (revision 94c4a1e103eb1715230460aab379dff275992c20)
1*94c4a1e1SFrank Piva // SPDX-License-Identifier: MIT or GPL-2.0-only
2*94c4a1e1SFrank Piva 
3*94c4a1e1SFrank Piva #if !defined(UBLKSRV_INTERNAL_H_)
4*94c4a1e1SFrank Piva #error "Never include <ublksrv_priv.h> directly; use <ublksrv.h> instead."
5*94c4a1e1SFrank Piva #endif
6*94c4a1e1SFrank Piva 
7*94c4a1e1SFrank Piva #ifndef UBLKSRV_PRIVATE_INC_H
8*94c4a1e1SFrank Piva #define UBLKSRV_PRIVATE_INC_H
9*94c4a1e1SFrank Piva 
10*94c4a1e1SFrank Piva #include <unistd.h>
11*94c4a1e1SFrank Piva #include <stdlib.h>
12*94c4a1e1SFrank Piva #include <stddef.h>
13*94c4a1e1SFrank Piva #include <signal.h>
14*94c4a1e1SFrank Piva #include <limits.h>
15*94c4a1e1SFrank Piva #include <pthread.h>
16*94c4a1e1SFrank Piva #include <string.h>
17*94c4a1e1SFrank Piva #include <sys/types.h>
18*94c4a1e1SFrank Piva #include <sys/eventfd.h>
19*94c4a1e1SFrank Piva #include <sys/epoll.h>
20*94c4a1e1SFrank Piva #include <sys/poll.h>
21*94c4a1e1SFrank Piva 
22*94c4a1e1SFrank Piva #include "ublk_cmd.h"
23*94c4a1e1SFrank Piva #include "ublksrv_utils.h"
24*94c4a1e1SFrank Piva #include "ublksrv.h"
25*94c4a1e1SFrank Piva #include "ublksrv_aio.h"
26*94c4a1e1SFrank Piva 
27*94c4a1e1SFrank Piva 
28*94c4a1e1SFrank Piva /* todo: relace the hardcode name with /dev/char/maj:min */
29*94c4a1e1SFrank Piva #define UBLKC_DEV	"/dev/ublkc"
30*94c4a1e1SFrank Piva #define UBLKC_PATH_MAX	32
31*94c4a1e1SFrank Piva 
32*94c4a1e1SFrank Piva #ifdef __cplusplus
33*94c4a1e1SFrank Piva extern "C" {
34*94c4a1e1SFrank Piva #endif
35*94c4a1e1SFrank Piva 
36*94c4a1e1SFrank Piva struct ublksrv_ctrl_dev {
37*94c4a1e1SFrank Piva 	struct io_uring ring;
38*94c4a1e1SFrank Piva 
39*94c4a1e1SFrank Piva 	int ctrl_fd;
40*94c4a1e1SFrank Piva 	unsigned bs_shift;
41*94c4a1e1SFrank Piva 	struct ublksrv_ctrl_dev_info  dev_info;
42*94c4a1e1SFrank Piva 
43*94c4a1e1SFrank Piva 	const char *tgt_type;
44*94c4a1e1SFrank Piva 	const struct ublksrv_tgt_type *tgt_ops;
45*94c4a1e1SFrank Piva 
46*94c4a1e1SFrank Piva 	/*
47*94c4a1e1SFrank Piva 	 * default is UBLKSRV_RUN_DIR but can be specified via command line,
48*94c4a1e1SFrank Piva 	 * pid file will be saved there
49*94c4a1e1SFrank Piva 	 */
50*94c4a1e1SFrank Piva 	const char *run_dir;
51*94c4a1e1SFrank Piva 
52*94c4a1e1SFrank Piva 	union {
53*94c4a1e1SFrank Piva 		/* used by ->init_tgt() */
54*94c4a1e1SFrank Piva 		struct {
55*94c4a1e1SFrank Piva 			int tgt_argc;
56*94c4a1e1SFrank Piva 			char **tgt_argv;
57*94c4a1e1SFrank Piva 		};
58*94c4a1e1SFrank Piva 		/* used by ->recovery_tgt(), tgt_argc == -1 */
59*94c4a1e1SFrank Piva 		struct {
60*94c4a1e1SFrank Piva 			int padding;
61*94c4a1e1SFrank Piva 			const char *recovery_jbuf;
62*94c4a1e1SFrank Piva 		};
63*94c4a1e1SFrank Piva 	};
64*94c4a1e1SFrank Piva 
65*94c4a1e1SFrank Piva 	cpu_set_t *queues_cpuset;
66*94c4a1e1SFrank Piva 
67*94c4a1e1SFrank Piva 	unsigned long reserved[4];
68*94c4a1e1SFrank Piva };
69*94c4a1e1SFrank Piva 
70*94c4a1e1SFrank Piva struct ublk_io {
71*94c4a1e1SFrank Piva 	char *buf_addr;
72*94c4a1e1SFrank Piva 
73*94c4a1e1SFrank Piva #define UBLKSRV_NEED_FETCH_RQ		(1UL << 0)
74*94c4a1e1SFrank Piva #define UBLKSRV_NEED_COMMIT_RQ_COMP	(1UL << 1)
75*94c4a1e1SFrank Piva #define UBLKSRV_IO_FREE			(1UL << 2)
76*94c4a1e1SFrank Piva #define UBLKSRV_NEED_GET_DATA		(1UL << 3)
77*94c4a1e1SFrank Piva 	unsigned int flags;
78*94c4a1e1SFrank Piva 
79*94c4a1e1SFrank Piva 	/* result is updated after all target ios are done */
80*94c4a1e1SFrank Piva 	unsigned int result;
81*94c4a1e1SFrank Piva 
82*94c4a1e1SFrank Piva 	struct ublk_io_data  data;
83*94c4a1e1SFrank Piva };
84*94c4a1e1SFrank Piva 
85*94c4a1e1SFrank Piva struct _ublksrv_queue {
86*94c4a1e1SFrank Piva 	/********** part of API, can't change ************/
87*94c4a1e1SFrank Piva 	int q_id;
88*94c4a1e1SFrank Piva 	int q_depth;
89*94c4a1e1SFrank Piva 
90*94c4a1e1SFrank Piva 	struct io_uring *ring_ptr;
91*94c4a1e1SFrank Piva 	struct _ublksrv_dev *dev;
92*94c4a1e1SFrank Piva 	void *private_data;
93*94c4a1e1SFrank Piva 	/*************************************************/
94*94c4a1e1SFrank Piva 
95*94c4a1e1SFrank Piva 	/*
96*94c4a1e1SFrank Piva 	 * Read only by ublksrv daemon, setup via mmap on /dev/ublkcN.
97*94c4a1e1SFrank Piva 	 *
98*94c4a1e1SFrank Piva 	 * ublksrv_io_desc(iod) is stored in this buffer, so iod
99*94c4a1e1SFrank Piva 	 * can be retrieved by request's tag directly.
100*94c4a1e1SFrank Piva 	 *
101*94c4a1e1SFrank Piva 	 * ublksrv writes the iod into this array, and notify ublksrv daemon
102*94c4a1e1SFrank Piva 	 * by issued io_uring command beforehand.
103*94c4a1e1SFrank Piva 	 * */
104*94c4a1e1SFrank Piva 	char *io_cmd_buf;
105*94c4a1e1SFrank Piva 	char *io_buf;
106*94c4a1e1SFrank Piva 
107*94c4a1e1SFrank Piva 	unsigned cmd_inflight, tgt_io_inflight;	//obsolete
108*94c4a1e1SFrank Piva 	unsigned state;
109*94c4a1e1SFrank Piva 
110*94c4a1e1SFrank Piva 	/* eventfd */
111*94c4a1e1SFrank Piva 	int efd;
112*94c4a1e1SFrank Piva 
113*94c4a1e1SFrank Piva 	/* cache tgt ops */
114*94c4a1e1SFrank Piva 	const struct ublksrv_tgt_type *tgt_ops;
115*94c4a1e1SFrank Piva 
116*94c4a1e1SFrank Piva 	/*
117*94c4a1e1SFrank Piva 	 * ring for submit io command to ublk driver, can only be issued
118*94c4a1e1SFrank Piva 	 * from ublksrv daemon.
119*94c4a1e1SFrank Piva 	 *
120*94c4a1e1SFrank Piva 	 * ring depth == dev_info->queue_depth.
121*94c4a1e1SFrank Piva 	 */
122*94c4a1e1SFrank Piva 	struct io_uring ring;
123*94c4a1e1SFrank Piva 
124*94c4a1e1SFrank Piva 	unsigned  tid;
125*94c4a1e1SFrank Piva 
126*94c4a1e1SFrank Piva #define UBLKSRV_NR_CTX_BATCH 4
127*94c4a1e1SFrank Piva 	int nr_ctxs;
128*94c4a1e1SFrank Piva 	struct ublksrv_aio_ctx *ctxs[UBLKSRV_NR_CTX_BATCH];
129*94c4a1e1SFrank Piva 
130*94c4a1e1SFrank Piva 	unsigned long reserved[8];
131*94c4a1e1SFrank Piva 
132*94c4a1e1SFrank Piva 	struct ublk_io ios[0];
133*94c4a1e1SFrank Piva };
134*94c4a1e1SFrank Piva 
135*94c4a1e1SFrank Piva struct _ublksrv_dev {
136*94c4a1e1SFrank Piva 	//keep same with ublksrv_dev
137*94c4a1e1SFrank Piva 	/********** part of API, can't change ************/
138*94c4a1e1SFrank Piva 	struct ublksrv_tgt_info tgt;
139*94c4a1e1SFrank Piva 	/************************************************/
140*94c4a1e1SFrank Piva 
141*94c4a1e1SFrank Piva 	struct _ublksrv_queue *__queues[MAX_NR_HW_QUEUES];
142*94c4a1e1SFrank Piva 	char	*io_buf_start;
143*94c4a1e1SFrank Piva 	pthread_t *thread;
144*94c4a1e1SFrank Piva 	int cdev_fd;
145*94c4a1e1SFrank Piva 	int pid_file_fd;
146*94c4a1e1SFrank Piva 
147*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev *ctrl_dev;
148*94c4a1e1SFrank Piva 	void	*target_data;
149*94c4a1e1SFrank Piva 	int	cq_depth;
150*94c4a1e1SFrank Piva 	int	pad;
151*94c4a1e1SFrank Piva 
152*94c4a1e1SFrank Piva 	/* reserved isn't necessary any more */
153*94c4a1e1SFrank Piva 	unsigned long reserved[3];
154*94c4a1e1SFrank Piva };
155*94c4a1e1SFrank Piva 
156*94c4a1e1SFrank Piva #define local_to_tq(q)	((struct ublksrv_queue *)(q))
157*94c4a1e1SFrank Piva #define tq_to_local(q)	((struct _ublksrv_queue *)(q))
158*94c4a1e1SFrank Piva 
159*94c4a1e1SFrank Piva #define local_to_tdev(d)	((struct ublksrv_dev *)(d))
160*94c4a1e1SFrank Piva #define tdev_to_local(d)	((struct _ublksrv_dev *)(d))
161*94c4a1e1SFrank Piva 
ublk_is_unprivileged(const struct ublksrv_ctrl_dev * ctrl_dev)162*94c4a1e1SFrank Piva static inline bool ublk_is_unprivileged(const struct ublksrv_ctrl_dev *ctrl_dev)
163*94c4a1e1SFrank Piva {
164*94c4a1e1SFrank Piva 	return !!(ctrl_dev->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV);
165*94c4a1e1SFrank Piva }
166*94c4a1e1SFrank Piva 
ublksrv_get_queue_affinity(const struct ublksrv_ctrl_dev * dev,int qid)167*94c4a1e1SFrank Piva static inline cpu_set_t *ublksrv_get_queue_affinity(
168*94c4a1e1SFrank Piva 		const struct ublksrv_ctrl_dev *dev, int qid)
169*94c4a1e1SFrank Piva {
170*94c4a1e1SFrank Piva 	unsigned char *buf = (unsigned char *)&dev->queues_cpuset[qid];
171*94c4a1e1SFrank Piva 
172*94c4a1e1SFrank Piva 	if (ublk_is_unprivileged(dev))
173*94c4a1e1SFrank Piva 		return (cpu_set_t *)&buf[UBLKC_PATH_MAX];
174*94c4a1e1SFrank Piva 
175*94c4a1e1SFrank Piva 	return &dev->queues_cpuset[qid];
176*94c4a1e1SFrank Piva }
177*94c4a1e1SFrank Piva 
ublksrv_mark_io_done(struct ublk_io * io,int res)178*94c4a1e1SFrank Piva static inline void ublksrv_mark_io_done(struct ublk_io *io, int res)
179*94c4a1e1SFrank Piva {
180*94c4a1e1SFrank Piva 	/*
181*94c4a1e1SFrank Piva 	 * mark io done by target, so that ->ubq_daemon can commit its
182*94c4a1e1SFrank Piva 	 * result and fetch new request via io_uring command.
183*94c4a1e1SFrank Piva 	 */
184*94c4a1e1SFrank Piva 	io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
185*94c4a1e1SFrank Piva 
186*94c4a1e1SFrank Piva 	io->result = res;
187*94c4a1e1SFrank Piva }
188*94c4a1e1SFrank Piva 
ublksrv_io_done(struct ublk_io * io)189*94c4a1e1SFrank Piva static inline bool ublksrv_io_done(struct ublk_io *io)
190*94c4a1e1SFrank Piva {
191*94c4a1e1SFrank Piva 	return io->flags & UBLKSRV_IO_FREE;
192*94c4a1e1SFrank Piva }
193*94c4a1e1SFrank Piva 
194*94c4a1e1SFrank Piva int create_pid_file(const char *pid_file, int *pid_fd);
195*94c4a1e1SFrank Piva 
196*94c4a1e1SFrank Piva extern void ublksrv_build_cpu_str(char *buf, int len, const cpu_set_t *cpuset);
197*94c4a1e1SFrank Piva 
198*94c4a1e1SFrank Piva /* bit63: target io, bit62: eventfd data */
build_eventfd_data()199*94c4a1e1SFrank Piva static inline __u64 build_eventfd_data()
200*94c4a1e1SFrank Piva {
201*94c4a1e1SFrank Piva 	return 0x3ULL << 62;
202*94c4a1e1SFrank Piva }
203*94c4a1e1SFrank Piva 
is_eventfd_io(__u64 user_data)204*94c4a1e1SFrank Piva static inline int is_eventfd_io(__u64 user_data)
205*94c4a1e1SFrank Piva {
206*94c4a1e1SFrank Piva 	return (user_data & (1ULL << 62)) != 0;
207*94c4a1e1SFrank Piva }
208*94c4a1e1SFrank Piva 
is_target_io(__u64 user_data)209*94c4a1e1SFrank Piva static inline int is_target_io(__u64 user_data)
210*94c4a1e1SFrank Piva {
211*94c4a1e1SFrank Piva 	return (user_data & (1ULL << 63)) != 0;
212*94c4a1e1SFrank Piva }
213*94c4a1e1SFrank Piva 
214*94c4a1e1SFrank Piva /* two helpers for setting up io_uring */
ublksrv_setup_ring(struct io_uring * r,int depth,int cq_depth,unsigned flags)215*94c4a1e1SFrank Piva static inline int ublksrv_setup_ring(struct io_uring *r, int depth,
216*94c4a1e1SFrank Piva 		int cq_depth, unsigned flags)
217*94c4a1e1SFrank Piva {
218*94c4a1e1SFrank Piva 	struct io_uring_params p;
219*94c4a1e1SFrank Piva 
220*94c4a1e1SFrank Piva 	memset(&p, 0, sizeof(p));
221*94c4a1e1SFrank Piva 	p.flags = flags | IORING_SETUP_CQSIZE;
222*94c4a1e1SFrank Piva 	p.cq_entries = cq_depth;
223*94c4a1e1SFrank Piva 
224*94c4a1e1SFrank Piva 	return io_uring_queue_init_params(depth, r, &p);
225*94c4a1e1SFrank Piva }
226*94c4a1e1SFrank Piva 
ublksrv_uring_get_sqe(struct io_uring * r,int idx,bool is_sqe128)227*94c4a1e1SFrank Piva static inline struct io_uring_sqe *ublksrv_uring_get_sqe(struct io_uring *r,
228*94c4a1e1SFrank Piva 		int idx, bool is_sqe128)
229*94c4a1e1SFrank Piva {
230*94c4a1e1SFrank Piva 	if (is_sqe128)
231*94c4a1e1SFrank Piva 		return  &r->sq.sqes[idx << 1];
232*94c4a1e1SFrank Piva 	return  &r->sq.sqes[idx];
233*94c4a1e1SFrank Piva }
234*94c4a1e1SFrank Piva 
ublksrv_get_sqe_cmd(struct io_uring_sqe * sqe)235*94c4a1e1SFrank Piva static inline void *ublksrv_get_sqe_cmd(struct io_uring_sqe *sqe)
236*94c4a1e1SFrank Piva {
237*94c4a1e1SFrank Piva 	return (void *)&sqe->addr3;
238*94c4a1e1SFrank Piva }
239*94c4a1e1SFrank Piva 
ublksrv_set_sqe_cmd_op(struct io_uring_sqe * sqe,__u32 cmd_op)240*94c4a1e1SFrank Piva static inline void ublksrv_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
241*94c4a1e1SFrank Piva {
242*94c4a1e1SFrank Piva 	__u32 *addr = (__u32 *)&sqe->off;
243*94c4a1e1SFrank Piva 
244*94c4a1e1SFrank Piva 	addr[0] = cmd_op;
245*94c4a1e1SFrank Piva 	addr[1] = 0;
246*94c4a1e1SFrank Piva }
247*94c4a1e1SFrank Piva 
248*94c4a1e1SFrank Piva /*
249*94c4a1e1SFrank Piva  * ublksrv_aio_ctx is used to offload IO handling from ublksrv io_uring
250*94c4a1e1SFrank Piva  * context.
251*94c4a1e1SFrank Piva  *
252*94c4a1e1SFrank Piva  * ublksrv_aio_ctx is bound with one single pthread which has to belong
253*94c4a1e1SFrank Piva  * to same process of the io_uring where IO is originated, so we can
254*94c4a1e1SFrank Piva  * support to handle IO from multiple queues of the same device. At
255*94c4a1e1SFrank Piva  * default, ublksrv_aio_ctx supports to handle device wide aio or io
256*94c4a1e1SFrank Piva  * offloading except for UBLKSRV_AIO_QUEUE_WIDE.
257*94c4a1e1SFrank Piva  *
258*94c4a1e1SFrank Piva  * Meantime ublksrv_aio_ctx can be created per each queue, and only handle
259*94c4a1e1SFrank Piva  * IOs from this queue.
260*94c4a1e1SFrank Piva  *
261*94c4a1e1SFrank Piva  * The final io handling in the aio context depends on user's implementation,
262*94c4a1e1SFrank Piva  * either sync or async IO submitting is supported.
263*94c4a1e1SFrank Piva  */
264*94c4a1e1SFrank Piva struct ublksrv_aio_ctx {
265*94c4a1e1SFrank Piva 	struct ublksrv_aio_list submit;
266*94c4a1e1SFrank Piva 
267*94c4a1e1SFrank Piva 	/* per-queue completion list */
268*94c4a1e1SFrank Piva 	struct ublksrv_aio_list *complete;
269*94c4a1e1SFrank Piva 
270*94c4a1e1SFrank Piva 	int efd;		//for wakeup us
271*94c4a1e1SFrank Piva 
272*94c4a1e1SFrank Piva #define UBLKSRV_AIO_QUEUE_WIDE	(1U << 0)
273*94c4a1e1SFrank Piva 	unsigned int		flags;
274*94c4a1e1SFrank Piva 	bool dead;
275*94c4a1e1SFrank Piva 
276*94c4a1e1SFrank Piva 	const struct ublksrv_dev *dev;
277*94c4a1e1SFrank Piva 
278*94c4a1e1SFrank Piva 	void *ctx_data;
279*94c4a1e1SFrank Piva 
280*94c4a1e1SFrank Piva 	unsigned long reserved[8];
281*94c4a1e1SFrank Piva };
282*94c4a1e1SFrank Piva 
283*94c4a1e1SFrank Piva #ifdef __cplusplus
284*94c4a1e1SFrank Piva }
285*94c4a1e1SFrank Piva #endif
286*94c4a1e1SFrank Piva 
287*94c4a1e1SFrank Piva #endif
288