xref: /aosp_15_r20/external/e2fsprogs/lib/ext2fs/unix_io.c (revision 6a54128f25917bfc36a8a6e9d722c04a0b4641b6)
1 /*
2  * unix_io.c --- This is the Unix (well, really POSIX) implementation
3  *	of the I/O manager.
4  *
5  * Implements a one-block write-through cache.
6  *
7  * Includes support for Windows NT support under Cygwin.
8  *
9  * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
10  *	2002 by Theodore Ts'o.
11  *
12  * %Begin-Header%
13  * This file may be redistributed under the terms of the GNU Library
14  * General Public License, version 2.
15  * %End-Header%
16  */
17 
18 #if !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
19 #define _XOPEN_SOURCE 600
20 #define _DARWIN_C_SOURCE
21 #define _FILE_OFFSET_BITS 64
22 #ifndef _LARGEFILE_SOURCE
23 #define _LARGEFILE_SOURCE
24 #endif
25 #ifndef _LARGEFILE64_SOURCE
26 #define _LARGEFILE64_SOURCE
27 #endif
28 #ifndef _GNU_SOURCE
29 #define _GNU_SOURCE
30 #endif
31 #endif
32 
33 #include "config.h"
34 #include <stdio.h>
35 #include <string.h>
36 #if HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 #if HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #include <fcntl.h>
43 #include <time.h>
44 #ifdef __linux__
45 #include <sys/utsname.h>
46 #endif
47 #if HAVE_SYS_TYPES_H
48 #include <sys/types.h>
49 #endif
50 #ifdef HAVE_SYS_IOCTL_H
51 #include <sys/ioctl.h>
52 #endif
53 #ifdef HAVE_SYS_MOUNT_H
54 #include <sys/mount.h>
55 #endif
56 #ifdef HAVE_SYS_PRCTL_H
57 #include <sys/prctl.h>
58 #else
59 #define PR_GET_DUMPABLE 3
60 #endif
61 #if HAVE_SYS_STAT_H
62 #include <sys/stat.h>
63 #endif
64 #if HAVE_SYS_RESOURCE_H
65 #include <sys/resource.h>
66 #endif
67 #if HAVE_LINUX_FALLOC_H
68 #include <linux/falloc.h>
69 #endif
70 #ifdef HAVE_PTHREAD
71 #include <pthread.h>
72 #endif
73 
74 #if defined(__linux__) && defined(_IO) && !defined(BLKROGET)
75 #define BLKROGET   _IO(0x12, 94) /* Get read-only status (0 = read_write).  */
76 #endif
77 
78 #undef ALIGN_DEBUG
79 
80 #include "ext2_fs.h"
81 #include "ext2fs.h"
82 #include "ext2fsP.h"
83 
84 /*
85  * For checking structure magic numbers...
86  */
87 
88 #define EXT2_CHECK_MAGIC(struct, code) \
89 	  if ((struct)->magic != (code)) return (code)
90 
91 struct unix_cache {
92 	char			*buf;
93 	unsigned long long	block;
94 	int			access_time;
95 	unsigned		dirty:1;
96 	unsigned		in_use:1;
97 	unsigned		write_err:1;
98 };
99 
100 #define CACHE_SIZE 8
101 #define WRITE_DIRECT_SIZE 4	/* Must be smaller than CACHE_SIZE */
102 #define READ_DIRECT_SIZE 4	/* Should be smaller than CACHE_SIZE */
103 
104 struct unix_private_data {
105 	int	magic;
106 	int	dev;
107 	int	flags;
108 	int	align;
109 	int	access_time;
110 	ext2_loff_t offset;
111 	struct unix_cache cache[CACHE_SIZE];
112 	void	*bounce;
113 	struct struct_io_stats io_stats;
114 #ifdef HAVE_PTHREAD
115 	pthread_mutex_t cache_mutex;
116 	pthread_mutex_t bounce_mutex;
117 	pthread_mutex_t stats_mutex;
118 #endif
119 };
120 
121 #define IS_ALIGNED(n, align) ((((uintptr_t) n) & \
122 			       ((uintptr_t) ((align)-1))) == 0)
123 
124 typedef enum lock_kind {
125 	CACHE_MTX, BOUNCE_MTX, STATS_MTX
126 } kind_t;
127 
128 #ifdef HAVE_PTHREAD
get_mutex(struct unix_private_data * data,kind_t kind)129 static inline pthread_mutex_t *get_mutex(struct unix_private_data *data,
130 					 kind_t kind)
131 {
132 	if (data->flags & IO_FLAG_THREADS) {
133 		switch (kind) {
134 		case CACHE_MTX:
135 			return &data->cache_mutex;
136 		case BOUNCE_MTX:
137 			return &data->bounce_mutex;
138 		case STATS_MTX:
139 			return &data->stats_mutex;
140 		}
141 	}
142 	return NULL;
143 }
144 #endif
145 
mutex_lock(struct unix_private_data * data,kind_t kind)146 static inline void mutex_lock(struct unix_private_data *data, kind_t kind)
147 {
148 #ifdef HAVE_PTHREAD
149 	pthread_mutex_t *mtx = get_mutex(data,kind);
150 
151 	if (mtx)
152 		pthread_mutex_lock(mtx);
153 #endif
154 }
155 
mutex_unlock(struct unix_private_data * data,kind_t kind)156 static inline void mutex_unlock(struct unix_private_data *data, kind_t kind)
157 {
158 #ifdef HAVE_PTHREAD
159 	pthread_mutex_t *mtx = get_mutex(data,kind);
160 
161 	if (mtx)
162 		pthread_mutex_unlock(mtx);
163 #endif
164 }
165 
unix_get_stats(io_channel channel,io_stats * stats)166 static errcode_t unix_get_stats(io_channel channel, io_stats *stats)
167 {
168 	errcode_t	retval = 0;
169 
170 	struct unix_private_data *data;
171 
172 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
173 	data = (struct unix_private_data *) channel->private_data;
174 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
175 
176 	if (stats) {
177 		mutex_lock(data, STATS_MTX);
178 		*stats = &data->io_stats;
179 		mutex_unlock(data, STATS_MTX);
180 	}
181 
182 	return retval;
183 }
184 
safe_getenv(const char * arg)185 static char *safe_getenv(const char *arg)
186 {
187 	if ((getuid() != geteuid()) || (getgid() != getegid()))
188 		return NULL;
189 #ifdef HAVE_PRCTL
190 	if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0) == 0)
191 		return NULL;
192 #else
193 #if (defined(linux) && defined(SYS_prctl))
194 	if (syscall(SYS_prctl, PR_GET_DUMPABLE, 0, 0, 0, 0) == 0)
195 		return NULL;
196 #endif
197 #endif
198 
199 #if defined(HAVE_SECURE_GETENV)
200 	return secure_getenv(arg);
201 #elif defined(HAVE___SECURE_GETENV)
202 	return __secure_getenv(arg);
203 #else
204 	return getenv(arg);
205 #endif
206 }
207 
208 /*
209  * Here are the raw I/O functions
210  */
raw_read_blk(io_channel channel,struct unix_private_data * data,unsigned long long block,int count,void * bufv)211 static errcode_t raw_read_blk(io_channel channel,
212 			      struct unix_private_data *data,
213 			      unsigned long long block,
214 			      int count, void *bufv)
215 {
216 	errcode_t	retval;
217 	ssize_t		size;
218 	ext2_loff_t	location;
219 	int		actual = 0;
220 	unsigned char	*buf = bufv;
221 	ssize_t		really_read = 0;
222 	unsigned long long aligned_blk;
223 	int		align_size, offset;
224 
225 	size = (count < 0) ? -count : (ext2_loff_t) count * channel->block_size;
226 	mutex_lock(data, STATS_MTX);
227 	data->io_stats.bytes_read += size;
228 	mutex_unlock(data, STATS_MTX);
229 	location = ((ext2_loff_t) block * channel->block_size) + data->offset;
230 
231 	if (data->flags & IO_FLAG_FORCE_BOUNCE)
232 		goto bounce_read;
233 
234 #ifdef HAVE_PREAD64
235 	/* Try an aligned pread */
236 	if ((channel->align == 0) ||
237 	    (IS_ALIGNED(buf, channel->align) &&
238 	     IS_ALIGNED(location, channel->align) &&
239 	     IS_ALIGNED(size, channel->align))) {
240 		actual = pread64(data->dev, buf, size, location);
241 		if (actual == size)
242 			return 0;
243 		actual = 0;
244 	}
245 #elif HAVE_PREAD
246 	/* Try an aligned pread */
247 	if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
248 	    ((channel->align == 0) ||
249 	     (IS_ALIGNED(buf, channel->align) &&
250 	      IS_ALIGNED(location, channel->align) &&
251 	      IS_ALIGNED(size, channel->align)))) {
252 		actual = pread(data->dev, buf, size, location);
253 		if (actual == size)
254 			return 0;
255 		actual = 0;
256 	}
257 #endif /* HAVE_PREAD */
258 
259 	if ((channel->align == 0) ||
260 	    (IS_ALIGNED(buf, channel->align) &&
261 	     IS_ALIGNED(location, channel->align) &&
262 	     IS_ALIGNED(size, channel->align))) {
263 		mutex_lock(data, BOUNCE_MTX);
264 		if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
265 			retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
266 			goto error_unlock;
267 		}
268 		actual = read(data->dev, buf, size);
269 		if (actual != size) {
270 		short_read:
271 			if (actual < 0) {
272 				retval = errno;
273 				actual = 0;
274 			} else
275 				retval = EXT2_ET_SHORT_READ;
276 			goto error_unlock;
277 		}
278 		goto success_unlock;
279 	}
280 
281 #ifdef ALIGN_DEBUG
282 	printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf,
283 	       (unsigned long) size);
284 #endif
285 
286 	/*
287 	 * The buffer or size which we're trying to read isn't aligned
288 	 * to the O_DIRECT rules, so we need to do this the hard way...
289 	 */
290 bounce_read:
291 	if (channel->align == 0)
292 		channel->align = 1;
293 	if ((channel->block_size > channel->align) &&
294 	    (channel->block_size % channel->align) == 0)
295 		align_size = channel->block_size;
296 	else
297 		align_size = channel->align;
298 	aligned_blk = location / align_size;
299 	offset = location % align_size;
300 
301 	mutex_lock(data, BOUNCE_MTX);
302 	if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
303 		retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
304 		goto error_unlock;
305 	}
306 	while (size > 0) {
307 		actual = read(data->dev, data->bounce, align_size);
308 		if (actual != align_size) {
309 			mutex_unlock(data, BOUNCE_MTX);
310 			actual = really_read;
311 			buf -= really_read;
312 			size += really_read;
313 			goto short_read;
314 		}
315 		if ((actual + offset) > align_size)
316 			actual = align_size - offset;
317 		if (actual > size)
318 			actual = size;
319 		memcpy(buf, (char *)data->bounce + offset, actual);
320 
321 		really_read += actual;
322 		size -= actual;
323 		buf += actual;
324 		offset = 0;
325 		aligned_blk++;
326 	}
327 success_unlock:
328 	mutex_unlock(data, BOUNCE_MTX);
329 	return 0;
330 
331 error_unlock:
332 	mutex_unlock(data, BOUNCE_MTX);
333 	if (actual >= 0 && actual < size)
334 		memset((char *) buf+actual, 0, size-actual);
335 	if (channel->read_error)
336 		retval = (channel->read_error)(channel, block, count, buf,
337 					       size, actual, retval);
338 	return retval;
339 }
340 
341 #define RAW_WRITE_NO_HANDLER	1
342 
raw_write_blk(io_channel channel,struct unix_private_data * data,unsigned long long block,int count,const void * bufv,int flags)343 static errcode_t raw_write_blk(io_channel channel,
344 			       struct unix_private_data *data,
345 			       unsigned long long block,
346 			       int count, const void *bufv,
347 			       int flags)
348 {
349 	ssize_t		size;
350 	ext2_loff_t	location;
351 	int		actual = 0;
352 	errcode_t	retval;
353 	const unsigned char *buf = bufv;
354 	unsigned long long aligned_blk;
355 	int		align_size, offset;
356 
357 	if (count == 1)
358 		size = channel->block_size;
359 	else {
360 		if (count < 0)
361 			size = -count;
362 		else
363 			size = (ext2_loff_t) count * channel->block_size;
364 	}
365 	mutex_lock(data, STATS_MTX);
366 	data->io_stats.bytes_written += size;
367 	mutex_unlock(data, STATS_MTX);
368 
369 	location = ((ext2_loff_t) block * channel->block_size) + data->offset;
370 
371 	if (data->flags & IO_FLAG_FORCE_BOUNCE)
372 		goto bounce_write;
373 
374 #ifdef HAVE_PWRITE64
375 	/* Try an aligned pwrite */
376 	if ((channel->align == 0) ||
377 	    (IS_ALIGNED(buf, channel->align) &&
378 	     IS_ALIGNED(location, channel->align) &&
379 	     IS_ALIGNED(size, channel->align))) {
380 		actual = pwrite64(data->dev, buf, size, location);
381 		if (actual == size)
382 			return 0;
383 	}
384 #elif HAVE_PWRITE
385 	/* Try an aligned pwrite */
386 	if ((sizeof(off_t) >= sizeof(ext2_loff_t)) &&
387 	    ((channel->align == 0) ||
388 	     (IS_ALIGNED(buf, channel->align) &&
389 	      IS_ALIGNED(location, channel->align) &&
390 	      IS_ALIGNED(size, channel->align)))) {
391 		actual = pwrite(data->dev, buf, size, location);
392 		if (actual == size)
393 			return 0;
394 	}
395 #endif /* HAVE_PWRITE */
396 
397 	if ((channel->align == 0) ||
398 	    (IS_ALIGNED(buf, channel->align) &&
399 	     IS_ALIGNED(location, channel->align) &&
400 	     IS_ALIGNED(size, channel->align))) {
401 		mutex_lock(data, BOUNCE_MTX);
402 		if (ext2fs_llseek(data->dev, location, SEEK_SET) < 0) {
403 			retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
404 			goto error_unlock;
405 		}
406 		actual = write(data->dev, buf, size);
407 		mutex_unlock(data, BOUNCE_MTX);
408 		if (actual < 0) {
409 			retval = errno;
410 			goto error_out;
411 		}
412 		if (actual != size) {
413 		short_write:
414 			retval = EXT2_ET_SHORT_WRITE;
415 			goto error_out;
416 		}
417 		return 0;
418 	}
419 
420 #ifdef ALIGN_DEBUG
421 	printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf,
422 	       (unsigned long) size);
423 #endif
424 	/*
425 	 * The buffer or size which we're trying to write isn't aligned
426 	 * to the O_DIRECT rules, so we need to do this the hard way...
427 	 */
428 bounce_write:
429 	if (channel->align == 0)
430 		channel->align = 1;
431 	if ((channel->block_size > channel->align) &&
432 	    (channel->block_size % channel->align) == 0)
433 		align_size = channel->block_size;
434 	else
435 		align_size = channel->align;
436 	aligned_blk = location / align_size;
437 	offset = location % align_size;
438 
439 	while (size > 0) {
440 		int actual_w;
441 
442 		mutex_lock(data, BOUNCE_MTX);
443 		if (size < align_size || offset) {
444 			if (ext2fs_llseek(data->dev, aligned_blk * align_size,
445 					  SEEK_SET) < 0) {
446 				retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
447 				goto error_unlock;
448 			}
449 			actual = read(data->dev, data->bounce,
450 				      align_size);
451 			if (actual != align_size) {
452 				if (actual < 0) {
453 					retval = errno;
454 					goto error_unlock;
455 				}
456 				memset((char *) data->bounce + actual, 0,
457 				       align_size - actual);
458 			}
459 		}
460 		actual = size;
461 		if ((actual + offset) > align_size)
462 			actual = align_size - offset;
463 		if (actual > size)
464 			actual = size;
465 		memcpy(((char *)data->bounce) + offset, buf, actual);
466 		if (ext2fs_llseek(data->dev, aligned_blk * align_size, SEEK_SET) < 0) {
467 			retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
468 			goto error_unlock;
469 		}
470 		actual_w = write(data->dev, data->bounce, align_size);
471 		mutex_unlock(data, BOUNCE_MTX);
472 		if (actual_w < 0) {
473 			retval = errno;
474 			goto error_out;
475 		}
476 		if (actual_w != align_size)
477 			goto short_write;
478 		size -= actual;
479 		buf += actual;
480 		location += actual;
481 		aligned_blk++;
482 		offset = 0;
483 	}
484 	return 0;
485 
486 error_unlock:
487 	mutex_unlock(data, BOUNCE_MTX);
488 error_out:
489 	if (((flags & RAW_WRITE_NO_HANDLER) == 0) && channel->write_error)
490 		retval = (channel->write_error)(channel, block, count, buf,
491 						size, actual, retval);
492 	return retval;
493 }
494 
495 
496 /*
497  * Here we implement the cache functions
498  */
499 
500 /* Allocate the cache buffers */
alloc_cache(io_channel channel,struct unix_private_data * data)501 static errcode_t alloc_cache(io_channel channel,
502 			     struct unix_private_data *data)
503 {
504 	errcode_t		retval;
505 	struct unix_cache	*cache;
506 	int			i;
507 
508 	data->access_time = 0;
509 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
510 		cache->block = 0;
511 		cache->access_time = 0;
512 		cache->dirty = 0;
513 		cache->in_use = 0;
514 		if (cache->buf)
515 			ext2fs_free_mem(&cache->buf);
516 		retval = io_channel_alloc_buf(channel, 0, &cache->buf);
517 		if (retval)
518 			return retval;
519 	}
520 	if (channel->align || data->flags & IO_FLAG_FORCE_BOUNCE) {
521 		if (data->bounce)
522 			ext2fs_free_mem(&data->bounce);
523 		retval = io_channel_alloc_buf(channel, 0, &data->bounce);
524 	}
525 	return retval;
526 }
527 
528 /* Free the cache buffers */
free_cache(struct unix_private_data * data)529 static void free_cache(struct unix_private_data *data)
530 {
531 	struct unix_cache	*cache;
532 	int			i;
533 
534 	data->access_time = 0;
535 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
536 		cache->block = 0;
537 		cache->access_time = 0;
538 		cache->dirty = 0;
539 		cache->in_use = 0;
540 		if (cache->buf)
541 			ext2fs_free_mem(&cache->buf);
542 	}
543 	if (data->bounce)
544 		ext2fs_free_mem(&data->bounce);
545 }
546 
547 #ifndef NO_IO_CACHE
548 /*
549  * Try to find a block in the cache.  If the block is not found, and
550  * eldest is a non-zero pointer, then fill in eldest with the cache
551  * entry to that should be reused.
552  */
find_cached_block(struct unix_private_data * data,unsigned long long block,struct unix_cache ** eldest)553 static struct unix_cache *find_cached_block(struct unix_private_data *data,
554 					    unsigned long long block,
555 					    struct unix_cache **eldest)
556 {
557 	struct unix_cache	*cache, *unused_cache, *oldest_cache;
558 	int			i;
559 
560 	unused_cache = oldest_cache = 0;
561 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
562 		if (!cache->in_use) {
563 			if (!unused_cache)
564 				unused_cache = cache;
565 			continue;
566 		}
567 		if (cache->block == block) {
568 			cache->access_time = ++data->access_time;
569 			return cache;
570 		}
571 		if (!oldest_cache ||
572 		    (cache->access_time < oldest_cache->access_time))
573 			oldest_cache = cache;
574 	}
575 	if (eldest)
576 		*eldest = (unused_cache) ? unused_cache : oldest_cache;
577 	return 0;
578 }
579 
580 /*
581  * Reuse a particular cache entry for another block.
582  */
reuse_cache(io_channel channel,struct unix_private_data * data,struct unix_cache * cache,unsigned long long block)583 static errcode_t reuse_cache(io_channel channel,
584 		struct unix_private_data *data, struct unix_cache *cache,
585 		unsigned long long block)
586 {
587 	if (cache->dirty && cache->in_use) {
588 		errcode_t retval;
589 
590 		retval = raw_write_blk(channel, data, cache->block, 1,
591 				       cache->buf, RAW_WRITE_NO_HANDLER);
592 		if (retval) {
593 			cache->write_err = 1;
594 			return retval;
595 		}
596 	}
597 
598 	cache->in_use = 1;
599 	cache->dirty = 0;
600 	cache->write_err = 0;
601 	cache->block = block;
602 	cache->access_time = ++data->access_time;
603 	return 0;
604 }
605 
606 #define FLUSH_INVALIDATE	0x01
607 #define FLUSH_NOLOCK		0x02
608 
609 /*
610  * Flush all of the blocks in the cache
611  */
flush_cached_blocks(io_channel channel,struct unix_private_data * data,int flags)612 static errcode_t flush_cached_blocks(io_channel channel,
613 				     struct unix_private_data *data,
614 				     int flags)
615 {
616 	struct unix_cache	*cache;
617 	errcode_t		retval, retval2 = 0;
618 	int			i;
619 	int			errors_found = 0;
620 
621 	if ((flags & FLUSH_NOLOCK) == 0)
622 		mutex_lock(data, CACHE_MTX);
623 	for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
624 		if (!cache->in_use || !cache->dirty)
625 			continue;
626 		retval = raw_write_blk(channel, data,
627 				       cache->block, 1, cache->buf,
628 				       RAW_WRITE_NO_HANDLER);
629 		if (retval) {
630 			cache->write_err = 1;
631 			errors_found = 1;
632 			retval2 = retval;
633 		} else {
634 			cache->dirty = 0;
635 			cache->write_err = 0;
636 			if (flags & FLUSH_INVALIDATE)
637 				cache->in_use = 0;
638 		}
639 	}
640 	if ((flags & FLUSH_NOLOCK) == 0)
641 		mutex_unlock(data, CACHE_MTX);
642 retry:
643 	while (errors_found) {
644 		if ((flags & FLUSH_NOLOCK) == 0)
645 			mutex_lock(data, CACHE_MTX);
646 		errors_found = 0;
647 		for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
648 			if (!cache->in_use || !cache->write_err)
649 				continue;
650 			errors_found = 1;
651 			if (cache->write_err && channel->write_error) {
652 				char *err_buf = NULL;
653 				unsigned long long err_block = cache->block;
654 
655 				cache->dirty = 0;
656 				cache->in_use = 0;
657 				cache->write_err = 0;
658 				if (io_channel_alloc_buf(channel, 0,
659 							 &err_buf))
660 					err_buf = NULL;
661 				else
662 					memcpy(err_buf, cache->buf,
663 					       channel->block_size);
664 				mutex_unlock(data, CACHE_MTX);
665 				(channel->write_error)(channel, err_block,
666 					1, err_buf, channel->block_size, -1,
667 					retval2);
668 				if (err_buf)
669 					ext2fs_free_mem(&err_buf);
670 				goto retry;
671 			} else
672 				cache->write_err = 0;
673 		}
674 		if ((flags & FLUSH_NOLOCK) == 0)
675 			mutex_unlock(data, CACHE_MTX);
676 	}
677 	return retval2;
678 }
679 #endif /* NO_IO_CACHE */
680 
681 #ifdef __linux__
682 #ifndef BLKDISCARDZEROES
683 #define BLKDISCARDZEROES _IO(0x12,124)
684 #endif
685 #endif
686 
ext2fs_open_file(const char * pathname,int flags,mode_t mode)687 int ext2fs_open_file(const char *pathname, int flags, mode_t mode)
688 {
689 	if (mode)
690 #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
691 		return open64(pathname, flags, mode);
692 	else
693 		return open64(pathname, flags);
694 #else
695 		return open(pathname, flags, mode);
696 	else
697 		return open(pathname, flags);
698 #endif
699 }
700 
ext2fs_stat(const char * path,ext2fs_struct_stat * buf)701 int ext2fs_stat(const char *path, ext2fs_struct_stat *buf)
702 {
703 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
704 	return stat64(path, buf);
705 #else
706 	return stat(path, buf);
707 #endif
708 }
709 
ext2fs_fstat(int fd,ext2fs_struct_stat * buf)710 int ext2fs_fstat(int fd, ext2fs_struct_stat *buf)
711 {
712 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
713 	return fstat64(fd, buf);
714 #else
715 	return fstat(fd, buf);
716 #endif
717 }
718 
719 
unix_open_channel(const char * name,int fd,int flags,io_channel * channel,io_manager io_mgr)720 static errcode_t unix_open_channel(const char *name, int fd,
721 				   int flags, io_channel *channel,
722 				   io_manager io_mgr)
723 {
724 	io_channel	io = NULL;
725 	struct unix_private_data *data = NULL;
726 	errcode_t	retval;
727 	ext2fs_struct_stat st;
728 #ifdef __linux__
729 	struct		utsname ut;
730 #endif
731 
732 	if (safe_getenv("UNIX_IO_FORCE_BOUNCE"))
733 		flags |= IO_FLAG_FORCE_BOUNCE;
734 
735 #ifdef __linux__
736 	/*
737 	 * We need to make sure any previous errors in the block
738 	 * device are thrown away, sigh.
739 	 */
740 	(void) fsync(fd);
741 #endif
742 
743 	retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
744 	if (retval)
745 		goto cleanup;
746 	memset(io, 0, sizeof(struct struct_io_channel));
747 	io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
748 	retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data);
749 	if (retval)
750 		goto cleanup;
751 
752 	io->manager = io_mgr;
753 	retval = ext2fs_get_mem(strlen(name)+1, &io->name);
754 	if (retval)
755 		goto cleanup;
756 
757 	strcpy(io->name, name);
758 	io->private_data = data;
759 	io->block_size = 1024;
760 	io->read_error = 0;
761 	io->write_error = 0;
762 	io->refcount = 1;
763 	io->flags = 0;
764 
765 	memset(data, 0, sizeof(struct unix_private_data));
766 	data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
767 	data->io_stats.num_fields = 2;
768 	data->flags = flags;
769 	data->dev = fd;
770 
771 #if defined(O_DIRECT)
772 	if (flags & IO_FLAG_DIRECT_IO)
773 		io->align = ext2fs_get_dio_alignment(data->dev);
774 #elif defined(F_NOCACHE)
775 	if (flags & IO_FLAG_DIRECT_IO)
776 		io->align = 4096;
777 #endif
778 
779 	/*
780 	 * If the device is really a block device, then set the
781 	 * appropriate flag, otherwise we can set DISCARD_ZEROES flag
782 	 * because we are going to use punch hole instead of discard
783 	 * and if it succeed, subsequent read from sparse area returns
784 	 * zero.
785 	 */
786 	if (ext2fs_fstat(data->dev, &st) == 0) {
787 		if (ext2fsP_is_disk_device(st.st_mode))
788 			io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE;
789 		else
790 			io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
791 	}
792 
793 #ifdef BLKDISCARDZEROES
794 	{
795 		int zeroes = 0;
796 		if (ioctl(data->dev, BLKDISCARDZEROES, &zeroes) == 0 &&
797 		    zeroes)
798 			io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES;
799 	}
800 #endif
801 
802 #if defined(__CYGWIN__)
803 	/*
804 	 * Some operating systems require that the buffers be aligned,
805 	 * regardless of O_DIRECT
806 	 */
807 	if (!io->align)
808 		io->align = 512;
809 #endif
810 
811 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
812 	if (io->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
813 		int dio_align = ext2fs_get_dio_alignment(fd);
814 
815 		if (io->align < dio_align)
816 			io->align = dio_align;
817 	}
818 #endif
819 
820 	if ((retval = alloc_cache(io, data)))
821 		goto cleanup;
822 
823 #ifdef BLKROGET
824 	if (flags & IO_FLAG_RW) {
825 		int error;
826 		int readonly = 0;
827 
828 		/* Is the block device actually writable? */
829 		error = ioctl(data->dev, BLKROGET, &readonly);
830 		if (!error && readonly) {
831 			retval = EPERM;
832 			goto cleanup;
833 		}
834 	}
835 #endif
836 
837 #ifdef __linux__
838 #undef RLIM_INFINITY
839 #if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
840 #define RLIM_INFINITY	((unsigned long)(~0UL>>1))
841 #else
842 #define RLIM_INFINITY  (~0UL)
843 #endif
844 	/*
845 	 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
846 	 * block devices are wrongly getting hit by the filesize
847 	 * limit.  This workaround isn't perfect, since it won't work
848 	 * if glibc wasn't built against 2.2 header files.  (Sigh.)
849 	 *
850 	 */
851 	if ((flags & IO_FLAG_RW) &&
852 	    (uname(&ut) == 0) &&
853 	    ((ut.release[0] == '2') && (ut.release[1] == '.') &&
854 	     (ut.release[2] == '4') && (ut.release[3] == '.') &&
855 	     (ut.release[4] == '1') && (ut.release[5] >= '0') &&
856 	     (ut.release[5] < '8')) &&
857 	    (ext2fs_fstat(data->dev, &st) == 0) &&
858 	    (ext2fsP_is_disk_device(st.st_mode))) {
859 		struct rlimit	rlim;
860 
861 		rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
862 		setrlimit(RLIMIT_FSIZE, &rlim);
863 		getrlimit(RLIMIT_FSIZE, &rlim);
864 		if (((unsigned long) rlim.rlim_cur) <
865 		    ((unsigned long) rlim.rlim_max)) {
866 			rlim.rlim_cur = rlim.rlim_max;
867 			setrlimit(RLIMIT_FSIZE, &rlim);
868 		}
869 	}
870 #endif
871 #ifdef HAVE_PTHREAD
872 	if (flags & IO_FLAG_THREADS) {
873 		io->flags |= CHANNEL_FLAGS_THREADS;
874 		retval = pthread_mutex_init(&data->cache_mutex, NULL);
875 		if (retval)
876 			goto cleanup;
877 		retval = pthread_mutex_init(&data->bounce_mutex, NULL);
878 		if (retval) {
879 			pthread_mutex_destroy(&data->cache_mutex);
880 			goto cleanup;
881 		}
882 		retval = pthread_mutex_init(&data->stats_mutex, NULL);
883 		if (retval) {
884 			pthread_mutex_destroy(&data->cache_mutex);
885 			pthread_mutex_destroy(&data->bounce_mutex);
886 			goto cleanup;
887 		}
888 	}
889 #endif
890 	*channel = io;
891 	return 0;
892 
893 cleanup:
894 	if (data) {
895 		if (data->dev >= 0)
896 			close(data->dev);
897 		free_cache(data);
898 		ext2fs_free_mem(&data);
899 	}
900 	if (io) {
901 		if (io->name) {
902 			ext2fs_free_mem(&io->name);
903 		}
904 		ext2fs_free_mem(&io);
905 	}
906 	return retval;
907 }
908 
unixfd_open(const char * str_fd,int flags,io_channel * channel)909 static errcode_t unixfd_open(const char *str_fd, int flags,
910 			     io_channel *channel)
911 {
912 	int fd;
913 	int fd_flags;
914 
915 	fd = atoi(str_fd);
916 #if defined(HAVE_FCNTL)
917 	fd_flags = fcntl(fd, F_GETFD);
918 	if (fd_flags == -1)
919 		return EBADF;
920 
921 	flags = 0;
922 	if (fd_flags & O_RDWR)
923 		flags |= IO_FLAG_RW;
924 	if (fd_flags & O_EXCL)
925 		flags |= IO_FLAG_EXCLUSIVE;
926 #if defined(O_DIRECT)
927 	if (fd_flags & O_DIRECT)
928 		flags |= IO_FLAG_DIRECT_IO;
929 #endif
930 #endif  /* HAVE_FCNTL */
931 
932 	return unix_open_channel(str_fd, fd, flags, channel, unixfd_io_manager);
933 }
934 
unix_open(const char * name,int flags,io_channel * channel)935 static errcode_t unix_open(const char *name, int flags,
936 			   io_channel *channel)
937 {
938 	int fd = -1;
939 	int open_flags;
940 
941 	if (name == 0)
942 		return EXT2_ET_BAD_DEVICE_NAME;
943 
944 	open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
945 	if (flags & IO_FLAG_EXCLUSIVE)
946 		open_flags |= O_EXCL;
947 #if defined(O_DIRECT)
948 	if (flags & IO_FLAG_DIRECT_IO)
949 		open_flags |= O_DIRECT;
950 #endif
951 	fd = ext2fs_open_file(name, open_flags, 0);
952 	if (fd < 0)
953 		return errno;
954 #if defined(F_NOCACHE) && !defined(IO_DIRECT)
955 	if (flags & IO_FLAG_DIRECT_IO) {
956 		if (fcntl(fd, F_NOCACHE, 1) < 0)
957 			return errno;
958 	}
959 #endif
960 	return unix_open_channel(name, fd, flags, channel, unix_io_manager);
961 }
962 
unix_close(io_channel channel)963 static errcode_t unix_close(io_channel channel)
964 {
965 	struct unix_private_data *data;
966 	errcode_t	retval = 0;
967 
968 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
969 	data = (struct unix_private_data *) channel->private_data;
970 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
971 
972 	if (--channel->refcount > 0)
973 		return 0;
974 
975 #ifndef NO_IO_CACHE
976 	retval = flush_cached_blocks(channel, data, 0);
977 #endif
978 
979 	if (close(data->dev) < 0)
980 		retval = errno;
981 	free_cache(data);
982 #ifdef HAVE_PTHREAD
983 	if (data->flags & IO_FLAG_THREADS) {
984 		pthread_mutex_destroy(&data->cache_mutex);
985 		pthread_mutex_destroy(&data->bounce_mutex);
986 		pthread_mutex_destroy(&data->stats_mutex);
987 	}
988 #endif
989 
990 	ext2fs_free_mem(&channel->private_data);
991 	if (channel->name)
992 		ext2fs_free_mem(&channel->name);
993 	ext2fs_free_mem(&channel);
994 	return retval;
995 }
996 
unix_set_blksize(io_channel channel,int blksize)997 static errcode_t unix_set_blksize(io_channel channel, int blksize)
998 {
999 	struct unix_private_data *data;
1000 	errcode_t		retval = 0;
1001 
1002 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1003 	data = (struct unix_private_data *) channel->private_data;
1004 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1005 
1006 	if (channel->block_size != blksize) {
1007 		mutex_lock(data, CACHE_MTX);
1008 		mutex_lock(data, BOUNCE_MTX);
1009 #ifndef NO_IO_CACHE
1010 		if ((retval = flush_cached_blocks(channel, data, FLUSH_NOLOCK))){
1011 			mutex_unlock(data, BOUNCE_MTX);
1012 			mutex_unlock(data, CACHE_MTX);
1013 			return retval;
1014 		}
1015 #endif
1016 
1017 		channel->block_size = blksize;
1018 		free_cache(data);
1019 		retval = alloc_cache(channel, data);
1020 		mutex_unlock(data, BOUNCE_MTX);
1021 		mutex_unlock(data, CACHE_MTX);
1022 	}
1023 	return retval;
1024 }
1025 
unix_read_blk64(io_channel channel,unsigned long long block,int count,void * buf)1026 static errcode_t unix_read_blk64(io_channel channel, unsigned long long block,
1027 			       int count, void *buf)
1028 {
1029 	struct unix_private_data *data;
1030 	struct unix_cache *cache;
1031 	errcode_t	retval;
1032 	char		*cp;
1033 	int		i, j;
1034 
1035 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1036 	data = (struct unix_private_data *) channel->private_data;
1037 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1038 
1039 #ifdef NO_IO_CACHE
1040 	return raw_read_blk(channel, data, block, count, buf);
1041 #else
1042 	if (data->flags & IO_FLAG_NOCACHE)
1043 		return raw_read_blk(channel, data, block, count, buf);
1044 	/*
1045 	 * If we're doing an odd-sized read or a very large read,
1046 	 * flush out the cache and then do a direct read.
1047 	 */
1048 	if (count < 0 || count > WRITE_DIRECT_SIZE) {
1049 		if ((retval = flush_cached_blocks(channel, data, 0)))
1050 			return retval;
1051 		return raw_read_blk(channel, data, block, count, buf);
1052 	}
1053 
1054 	cp = buf;
1055 	mutex_lock(data, CACHE_MTX);
1056 	while (count > 0) {
1057 		/* If it's in the cache, use it! */
1058 		if ((cache = find_cached_block(data, block, NULL))) {
1059 #ifdef DEBUG
1060 			printf("Using cached block %lu\n", block);
1061 #endif
1062 			memcpy(cp, cache->buf, channel->block_size);
1063 			count--;
1064 			block++;
1065 			cp += channel->block_size;
1066 			continue;
1067 		}
1068 
1069 		/*
1070 		 * Find the number of uncached blocks so we can do a
1071 		 * single read request
1072 		 */
1073 		for (i=1; i < count; i++)
1074 			if (find_cached_block(data, block+i, NULL))
1075 				break;
1076 #ifdef DEBUG
1077 		printf("Reading %d blocks starting at %lu\n", i, block);
1078 #endif
1079 		mutex_unlock(data, CACHE_MTX);
1080 		if ((retval = raw_read_blk(channel, data, block, i, cp)))
1081 			return retval;
1082 		mutex_lock(data, CACHE_MTX);
1083 
1084 		/* Save the results in the cache */
1085 		for (j=0; j < i; j++) {
1086 			if (!find_cached_block(data, block, &cache)) {
1087 				retval = reuse_cache(channel, data,
1088 						     cache, block);
1089 				if (retval)
1090 					goto call_write_handler;
1091 				memcpy(cache->buf, cp, channel->block_size);
1092 			}
1093 			count--;
1094 			block++;
1095 			cp += channel->block_size;
1096 		}
1097 	}
1098 	mutex_unlock(data, CACHE_MTX);
1099 	return 0;
1100 
1101 call_write_handler:
1102 	if (cache->write_err && channel->write_error) {
1103 		char *err_buf = NULL;
1104 		unsigned long long err_block = cache->block;
1105 
1106 		cache->dirty = 0;
1107 		cache->in_use = 0;
1108 		cache->write_err = 0;
1109 		if (io_channel_alloc_buf(channel, 0, &err_buf))
1110 			err_buf = NULL;
1111 		else
1112 			memcpy(err_buf, cache->buf, channel->block_size);
1113 		mutex_unlock(data, CACHE_MTX);
1114 		(channel->write_error)(channel, err_block, 1, err_buf,
1115 				       channel->block_size, -1,
1116 				       retval);
1117 		if (err_buf)
1118 			ext2fs_free_mem(&err_buf);
1119 	} else
1120 		mutex_unlock(data, CACHE_MTX);
1121 	return retval;
1122 #endif /* NO_IO_CACHE */
1123 }
1124 
unix_read_blk(io_channel channel,unsigned long block,int count,void * buf)1125 static errcode_t unix_read_blk(io_channel channel, unsigned long block,
1126 			       int count, void *buf)
1127 {
1128 	return unix_read_blk64(channel, block, count, buf);
1129 }
1130 
unix_write_blk64(io_channel channel,unsigned long long block,int count,const void * buf)1131 static errcode_t unix_write_blk64(io_channel channel, unsigned long long block,
1132 				int count, const void *buf)
1133 {
1134 	struct unix_private_data *data;
1135 	struct unix_cache *cache, *reuse;
1136 	errcode_t	retval = 0;
1137 	const char	*cp;
1138 	int		writethrough;
1139 
1140 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1141 	data = (struct unix_private_data *) channel->private_data;
1142 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1143 
1144 #ifdef NO_IO_CACHE
1145 	return raw_write_blk(channel, data, block, count, buf, 0);
1146 #else
1147 	if (data->flags & IO_FLAG_NOCACHE)
1148 		return raw_write_blk(channel, data, block, count, buf, 0);
1149 	/*
1150 	 * If we're doing an odd-sized write or a very large write,
1151 	 * flush out the cache completely and then do a direct write.
1152 	 */
1153 	if (count < 0 || count > WRITE_DIRECT_SIZE) {
1154 		if ((retval = flush_cached_blocks(channel, data,
1155 						  FLUSH_INVALIDATE)))
1156 			return retval;
1157 		return raw_write_blk(channel, data, block, count, buf, 0);
1158 	}
1159 
1160 	/*
1161 	 * For a moderate-sized multi-block write, first force a write
1162 	 * if we're in write-through cache mode, and then fill the
1163 	 * cache with the blocks.
1164 	 */
1165 	writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
1166 	if (writethrough)
1167 		retval = raw_write_blk(channel, data, block, count, buf, 0);
1168 
1169 	cp = buf;
1170 	mutex_lock(data, CACHE_MTX);
1171 	while (count > 0) {
1172 		cache = find_cached_block(data, block, &reuse);
1173 		if (!cache) {
1174 			errcode_t err;
1175 
1176 			cache = reuse;
1177 			err = reuse_cache(channel, data, cache, block);
1178 			if (err)
1179 				goto call_write_handler;
1180 		}
1181 		if (cache->buf != cp)
1182 			memcpy(cache->buf, cp, channel->block_size);
1183 		cache->dirty = !writethrough;
1184 		count--;
1185 		block++;
1186 		cp += channel->block_size;
1187 	}
1188 	mutex_unlock(data, CACHE_MTX);
1189 	return retval;
1190 
1191 call_write_handler:
1192 	if (cache->write_err && channel->write_error) {
1193 		char *err_buf = NULL;
1194 		unsigned long long err_block = cache->block;
1195 
1196 		cache->dirty = 0;
1197 		cache->in_use = 0;
1198 		cache->write_err = 0;
1199 		if (io_channel_alloc_buf(channel, 0, &err_buf))
1200 			err_buf = NULL;
1201 		else
1202 			memcpy(err_buf, cache->buf, channel->block_size);
1203 		mutex_unlock(data, CACHE_MTX);
1204 		(channel->write_error)(channel, err_block, 1, err_buf,
1205 				       channel->block_size, -1,
1206 				       retval);
1207 		if (err_buf)
1208 			ext2fs_free_mem(&err_buf);
1209 	} else
1210 		mutex_unlock(data, CACHE_MTX);
1211 	return retval;
1212 #endif /* NO_IO_CACHE */
1213 }
1214 
unix_cache_readahead(io_channel channel,unsigned long long block,unsigned long long count)1215 static errcode_t unix_cache_readahead(io_channel channel,
1216 				      unsigned long long block,
1217 				      unsigned long long count)
1218 {
1219 #ifdef POSIX_FADV_WILLNEED
1220 	struct unix_private_data *data;
1221 
1222 	data = (struct unix_private_data *)channel->private_data;
1223 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1224 	return posix_fadvise(data->dev,
1225 			     (ext2_loff_t)block * channel->block_size + data->offset,
1226 			     (ext2_loff_t)count * channel->block_size,
1227 			     POSIX_FADV_WILLNEED);
1228 #else
1229 	return EXT2_ET_OP_NOT_SUPPORTED;
1230 #endif
1231 }
1232 
unix_write_blk(io_channel channel,unsigned long block,int count,const void * buf)1233 static errcode_t unix_write_blk(io_channel channel, unsigned long block,
1234 				int count, const void *buf)
1235 {
1236 	return unix_write_blk64(channel, block, count, buf);
1237 }
1238 
unix_write_byte(io_channel channel,unsigned long offset,int size,const void * buf)1239 static errcode_t unix_write_byte(io_channel channel, unsigned long offset,
1240 				 int size, const void *buf)
1241 {
1242 	struct unix_private_data *data;
1243 	errcode_t	retval = 0;
1244 	ssize_t		actual;
1245 
1246 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1247 	data = (struct unix_private_data *) channel->private_data;
1248 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1249 
1250 	if (channel->align != 0) {
1251 #ifdef ALIGN_DEBUG
1252 		printf("unix_write_byte: O_DIRECT fallback\n");
1253 #endif
1254 		return EXT2_ET_UNIMPLEMENTED;
1255 	}
1256 
1257 #ifndef NO_IO_CACHE
1258 	/*
1259 	 * Flush out the cache completely
1260 	 */
1261 	if ((retval = flush_cached_blocks(channel, data, FLUSH_INVALIDATE)))
1262 		return retval;
1263 #endif
1264 
1265 	if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
1266 		return errno;
1267 
1268 	actual = write(data->dev, buf, size);
1269 	if (actual < 0)
1270 		return errno;
1271 	if (actual != size)
1272 		return EXT2_ET_SHORT_WRITE;
1273 
1274 	return 0;
1275 }
1276 
1277 /*
1278  * Flush data buffers to disk.
1279  */
unix_flush(io_channel channel)1280 static errcode_t unix_flush(io_channel channel)
1281 {
1282 	struct unix_private_data *data;
1283 	errcode_t retval = 0;
1284 
1285 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1286 	data = (struct unix_private_data *) channel->private_data;
1287 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1288 
1289 #ifndef NO_IO_CACHE
1290 	retval = flush_cached_blocks(channel, data, 0);
1291 #endif
1292 #ifdef HAVE_FSYNC
1293 	if (!retval && fsync(data->dev) != 0)
1294 		return errno;
1295 #endif
1296 	return retval;
1297 }
1298 
unix_set_option(io_channel channel,const char * option,const char * arg)1299 static errcode_t unix_set_option(io_channel channel, const char *option,
1300 				 const char *arg)
1301 {
1302 	struct unix_private_data *data;
1303 	unsigned long long tmp;
1304 	errcode_t retval;
1305 	char *end;
1306 
1307 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1308 	data = (struct unix_private_data *) channel->private_data;
1309 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1310 
1311 	if (!strcmp(option, "offset")) {
1312 		if (!arg)
1313 			return EXT2_ET_INVALID_ARGUMENT;
1314 
1315 		tmp = strtoull(arg, &end, 0);
1316 		if (*end)
1317 			return EXT2_ET_INVALID_ARGUMENT;
1318 		data->offset = tmp;
1319 		if (data->offset < 0)
1320 			return EXT2_ET_INVALID_ARGUMENT;
1321 		return 0;
1322 	}
1323 	if (!strcmp(option, "cache")) {
1324 		if (!arg)
1325 			return EXT2_ET_INVALID_ARGUMENT;
1326 		if (!strcmp(arg, "on")) {
1327 			data->flags &= ~IO_FLAG_NOCACHE;
1328 			return 0;
1329 		}
1330 		if (!strcmp(arg, "off")) {
1331 			retval = flush_cached_blocks(channel, data, 0);
1332 			data->flags |= IO_FLAG_NOCACHE;
1333 			return retval;
1334 		}
1335 		return EXT2_ET_INVALID_ARGUMENT;
1336 	}
1337 	return EXT2_ET_INVALID_ARGUMENT;
1338 }
1339 
1340 #if defined(__linux__) && !defined(BLKDISCARD)
1341 #define BLKDISCARD		_IO(0x12,119)
1342 #endif
1343 
unix_discard(io_channel channel,unsigned long long block,unsigned long long count)1344 static errcode_t unix_discard(io_channel channel, unsigned long long block,
1345 			      unsigned long long count)
1346 {
1347 	struct unix_private_data *data;
1348 	int		ret;
1349 
1350 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1351 	data = (struct unix_private_data *) channel->private_data;
1352 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1353 
1354 	if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) {
1355 #ifdef BLKDISCARD
1356 		__u64 range[2];
1357 
1358 		range[0] = (__u64)(block) * channel->block_size + data->offset;
1359 		range[1] = (__u64)(count) * channel->block_size;
1360 
1361 		ret = ioctl(data->dev, BLKDISCARD, &range);
1362 #else
1363 		goto unimplemented;
1364 #endif
1365 	} else {
1366 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
1367 		/*
1368 		 * If we are not on block device, try to use punch hole
1369 		 * to reclaim free space.
1370 		 */
1371 		ret = fallocate(data->dev,
1372 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1373 				(off_t)(block) * channel->block_size + data->offset,
1374 				(off_t)(count) * channel->block_size);
1375 #else
1376 		goto unimplemented;
1377 #endif
1378 	}
1379 	if (ret < 0) {
1380 		if (errno == EOPNOTSUPP)
1381 			goto unimplemented;
1382 		return errno;
1383 	}
1384 	return 0;
1385 unimplemented:
1386 	return EXT2_ET_UNIMPLEMENTED;
1387 }
1388 
1389 /*
1390  * If we know about ZERO_RANGE, try that before we try PUNCH_HOLE because
1391  * ZERO_RANGE doesn't unmap preallocated blocks.  We prefer fallocate because
1392  * it always invalidates page cache, and libext2fs requires that reads after
1393  * ZERO_RANGE return zeroes.
1394  */
__unix_zeroout(int fd,off_t offset,off_t len)1395 static int __unix_zeroout(int fd, off_t offset, off_t len)
1396 {
1397 	int ret = -1;
1398 
1399 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_ZERO_RANGE)
1400 	ret = fallocate(fd, FALLOC_FL_ZERO_RANGE, offset, len);
1401 	if (ret == 0)
1402 		return 0;
1403 #endif
1404 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
1405 	ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1406 			offset,  len);
1407 	if (ret == 0)
1408 		return 0;
1409 #endif
1410 	errno = EOPNOTSUPP;
1411 	return ret;
1412 }
1413 
1414 /* parameters might not be used if OS doesn't support zeroout */
1415 #if __GNUC_PREREQ (4, 6)
1416 #pragma GCC diagnostic push
1417 #pragma GCC diagnostic ignored "-Wunused-parameter"
1418 #endif
unix_zeroout(io_channel channel,unsigned long long block,unsigned long long count)1419 static errcode_t unix_zeroout(io_channel channel, unsigned long long block,
1420 			      unsigned long long count)
1421 {
1422 	struct unix_private_data *data;
1423 	int		ret;
1424 
1425 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
1426 	data = (struct unix_private_data *) channel->private_data;
1427 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
1428 
1429 	if (safe_getenv("UNIX_IO_NOZEROOUT"))
1430 		goto unimplemented;
1431 
1432 	if (!(channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE)) {
1433 		/* Regular file, try to use truncate/punch/zero. */
1434 		struct stat statbuf;
1435 
1436 		if (count == 0)
1437 			return 0;
1438 		/*
1439 		 * If we're trying to zero a range past the end of the file,
1440 		 * extend the file size, then truncate everything.
1441 		 */
1442 		ret = fstat(data->dev, &statbuf);
1443 		if (ret)
1444 			goto err;
1445 		if ((unsigned long long) statbuf.st_size <
1446 			(block + count) * channel->block_size + data->offset) {
1447 			ret = ftruncate(data->dev,
1448 					(block + count) * channel->block_size + data->offset);
1449 			if (ret)
1450 				goto err;
1451 		}
1452 	}
1453 
1454 	ret = __unix_zeroout(data->dev,
1455 			(off_t)(block) * channel->block_size + data->offset,
1456 			(off_t)(count) * channel->block_size);
1457 err:
1458 	if (ret < 0) {
1459 		if (errno == EOPNOTSUPP)
1460 			goto unimplemented;
1461 		return errno;
1462 	}
1463 	return 0;
1464 unimplemented:
1465 	return EXT2_ET_UNIMPLEMENTED;
1466 }
1467 #if __GNUC_PREREQ (4, 6)
1468 #pragma GCC diagnostic pop
1469 #endif
1470 
1471 static struct struct_io_manager struct_unix_manager = {
1472 	.magic		= EXT2_ET_MAGIC_IO_MANAGER,
1473 	.name		= "Unix I/O Manager",
1474 	.open		= unix_open,
1475 	.close		= unix_close,
1476 	.set_blksize	= unix_set_blksize,
1477 	.read_blk	= unix_read_blk,
1478 	.write_blk	= unix_write_blk,
1479 	.flush		= unix_flush,
1480 	.write_byte	= unix_write_byte,
1481 	.set_option	= unix_set_option,
1482 	.get_stats	= unix_get_stats,
1483 	.read_blk64	= unix_read_blk64,
1484 	.write_blk64	= unix_write_blk64,
1485 	.discard	= unix_discard,
1486 	.cache_readahead	= unix_cache_readahead,
1487 	.zeroout	= unix_zeroout,
1488 };
1489 
1490 io_manager unix_io_manager = &struct_unix_manager;
1491 
1492 static struct struct_io_manager struct_unixfd_manager = {
1493 	.magic		= EXT2_ET_MAGIC_IO_MANAGER,
1494 	.name		= "Unix fd I/O Manager",
1495 	.open		= unixfd_open,
1496 	.close		= unix_close,
1497 	.set_blksize	= unix_set_blksize,
1498 	.read_blk	= unix_read_blk,
1499 	.write_blk	= unix_write_blk,
1500 	.flush		= unix_flush,
1501 	.write_byte	= unix_write_byte,
1502 	.set_option	= unix_set_option,
1503 	.get_stats	= unix_get_stats,
1504 	.read_blk64	= unix_read_blk64,
1505 	.write_blk64	= unix_write_blk64,
1506 	.discard	= unix_discard,
1507 	.cache_readahead	= unix_cache_readahead,
1508 	.zeroout	= unix_zeroout,
1509 };
1510 
1511 io_manager unixfd_io_manager = &struct_unixfd_manager;
1512