xref: /aosp_15_r20/external/armnn/third-party/stb/stb_image.h (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright (c) 2017 Sean Barrett
3 // SPDX-License-Identifier: MIT
4 //
5 
6 /* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h
7                                      no warranty implied; use at your own risk
8 
9    Do this:
10       #define STB_IMAGE_IMPLEMENTATION
11    before you include this file in *one* C or C++ file to create the implementation.
12 
13    // i.e. it should look like this:
14    #include ...
15    #include ...
16    #include ...
17    #define STB_IMAGE_IMPLEMENTATION
18    #include "stb_image.h"
19 
20    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
21    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
22 
23 
24    QUICK NOTES:
25       Primarily of interest to game developers and other people who can
26           avoid problematic images and only need the trivial interface
27 
28       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
29       PNG 1/2/4/8/16-bit-per-channel
30 
31       TGA (not sure what subset, if a subset)
32       BMP non-1bpp, non-RLE
33       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
34 
35       GIF (*comp always reports as 4-channel)
36       HDR (radiance rgbE format)
37       PIC (Softimage PIC)
38       PNM (PPM and PGM binary only)
39 
40       Animated GIF still needs a proper API, but here's one way to do it:
41           http://gist.github.com/urraka/685d9a6340b26b830d49
42 
43       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
44       - decode from arbitrary I/O callbacks
45       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
46 
47    Full documentation under "DOCUMENTATION" below.
48 
49 
50 LICENSE
51 
52   See end of file for license information.
53 
54 RECENT REVISION HISTORY:
55 
56       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
57       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
58       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
59       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
60       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
61       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
62                          RGB-format JPEG; remove white matting in PSD;
63                          allocate large structures on the stack;
64                          correct channel count for PNG & BMP
65       2.10  (2016-01-22) avoid warning introduced in 2.09
66       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
67 
68    See end of file for full revision history.
69 
70 
71  ============================    Contributors    =========================
72 
73  Image formats                          Extensions, features
74     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
75     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
76     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
77     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
78     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
79     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
80     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
81     github:urraka (animated gif)           Junggon Kim (PNM comments)
82                                            Daniel Gibson (16-bit TGA)
83                                            socks-the-fox (16-bit PNG)
84                                            Jeremy Sawicki (handle all ImageNet JPGs)
85  Optimizations & bugfixes
86     Fabian "ryg" Giesen
87     Arseny Kapoulkine
88     John-Mark Allen
89 
90  Bug & warning fixes
91     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
92     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
93     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
94     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
95     the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
96     Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
97     Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
98     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
99     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
100     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
101     Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
102     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
103     Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
104     Christian Floisand      Kevin Schmidt                         github:poppolopoppo
105 */
106 
107 #ifndef STBI_INCLUDE_STB_IMAGE_H
108 #define STBI_INCLUDE_STB_IMAGE_H
109 
110 // DOCUMENTATION
111 //
112 // Limitations:
113 //    - no 16-bit-per-channel PNG
114 //    - no 12-bit-per-channel JPEG
115 //    - no JPEGs with arithmetic coding
116 //    - no 1-bit BMP
117 //    - GIF always returns *comp=4
118 //
119 // Basic usage (see HDR discussion below for HDR usage):
120 //    int x,y,n;
121 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
122 //    // ... process data if not NULL ...
123 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
124 //    // ... replace '0' with '1'..'4' to force that many components per pixel
125 //    // ... but 'n' will always be the number that it would have been if you said 0
126 //    stbi_image_free(data)
127 //
128 // Standard parameters:
129 //    int *x                 -- outputs image width in pixels
130 //    int *y                 -- outputs image height in pixels
131 //    int *channels_in_file  -- outputs # of image components in image file
132 //    int desired_channels   -- if non-zero, # of image components requested in result
133 //
134 // The return value from an image loader is an 'unsigned char *' which points
135 // to the pixel data, or NULL on an allocation failure or if the image is
136 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
137 // with each pixel consisting of N interleaved 8-bit components; the first
138 // pixel pointed to is top-left-most in the image. There is no padding between
139 // image scanlines or between pixels, regardless of format. The number of
140 // components N is 'desired_channels' if desired_channels is non-zero, or
141 // *channels_in_file otherwise. If desired_channels is non-zero,
142 // *channels_in_file has the number of components that _would_ have been
143 // output otherwise. E.g. if you set desired_channels to 4, you will always
144 // get RGBA output, but you can check *channels_in_file to see if it's trivially
145 // opaque because e.g. there were only 3 channels in the source image.
146 //
147 // An output image with N components has the following components interleaved
148 // in this order in each pixel:
149 //
150 //     N=#comp     components
151 //       1           grey
152 //       2           grey, alpha
153 //       3           red, green, blue
154 //       4           red, green, blue, alpha
155 //
156 // If image loading fails for any reason, the return value will be NULL,
157 // and *x, *y, *channels_in_file will be unchanged. The function
158 // stbi_failure_reason() can be queried for an extremely brief, end-user
159 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
160 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
161 // more user-friendly ones.
162 //
163 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
164 //
165 // ===========================================================================
166 //
167 // Philosophy
168 //
169 // stb libraries are designed with the following priorities:
170 //
171 //    1. easy to use
172 //    2. easy to maintain
173 //    3. good performance
174 //
175 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
176 // and for best performance I may provide less-easy-to-use APIs that give higher
177 // performance, in addition to the easy to use ones. Nevertheless, it's important
178 // to keep in mind that from the standpoint of you, a client of this library,
179 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
180 //
181 // Some secondary priorities arise directly from the first two, some of which
182 // make more explicit reasons why performance can't be emphasized.
183 //
184 //    - Portable ("ease of use")
185 //    - Small source code footprint ("easy to maintain")
186 //    - No dependencies ("ease of use")
187 //
188 // ===========================================================================
189 //
190 // I/O callbacks
191 //
192 // I/O callbacks allow you to read from arbitrary sources, like packaged
193 // files or some other source. Data read from callbacks are processed
194 // through a small internal buffer (currently 128 bytes) to try to reduce
195 // overhead.
196 //
197 // The three functions you must define are "read" (reads some bytes of data),
198 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
199 //
200 // ===========================================================================
201 //
202 // SIMD support
203 //
204 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
205 // supported by the compiler. For ARM Neon support, you must explicitly
206 // request it.
207 //
208 // (The old do-it-yourself SIMD API is no longer supported in the current
209 // code.)
210 //
211 // On x86, SSE2 will automatically be used when available based on a run-time
212 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
213 // the typical path is to have separate builds for NEON and non-NEON devices
214 // (at least this is true for iOS and Android). Therefore, the NEON support is
215 // toggled by a build flag: define STBI_NEON to get NEON loops.
216 //
217 // If for some reason you do not want to use any of SIMD code, or if
218 // you have issues compiling it, you can disable it entirely by
219 // defining STBI_NO_SIMD.
220 //
221 // ===========================================================================
222 //
223 // HDR image support   (disable by defining STBI_NO_HDR)
224 //
225 // stb_image now supports loading HDR images in general, and currently
226 // the Radiance .HDR file format, although the support is provided
227 // generically. You can still load any file through the existing interface;
228 // if you attempt to load an HDR file, it will be automatically remapped to
229 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
230 // both of these constants can be reconfigured through this interface:
231 //
232 //     stbi_hdr_to_ldr_gamma(2.2f);
233 //     stbi_hdr_to_ldr_scale(1.0f);
234 //
235 // (note, do not use _inverse_ constants; stbi_image will invert them
236 // appropriately).
237 //
238 // Additionally, there is a new, parallel interface for loading files as
239 // (linear) floats to preserve the full dynamic range:
240 //
241 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
242 //
243 // If you load LDR images through this interface, those images will
244 // be promoted to floating point values, run through the inverse of
245 // constants corresponding to the above:
246 //
247 //     stbi_ldr_to_hdr_scale(1.0f);
248 //     stbi_ldr_to_hdr_gamma(2.2f);
249 //
250 // Finally, given a filename (or an open file or memory block--see header
251 // file for details) containing image data, you can query for the "most
252 // appropriate" interface to use (that is, whether the image is HDR or
253 // not), using:
254 //
255 //     stbi_is_hdr(char *filename);
256 //
257 // ===========================================================================
258 //
259 // iPhone PNG support:
260 //
261 // By default we convert iphone-formatted PNGs back to RGB, even though
262 // they are internally encoded differently. You can disable this conversion
263 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
264 // you will always just get the native iphone "format" through (which
265 // is BGR stored in RGB).
266 //
267 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
268 // pixel to remove any premultiplied alpha *only* if the image file explicitly
269 // says there's premultiplied data (currently only happens in iPhone images,
270 // and only if iPhone convert-to-rgb processing is on).
271 //
272 // ===========================================================================
273 //
274 // ADDITIONAL CONFIGURATION
275 //
276 //  - You can suppress implementation of any of the decoders to reduce
277 //    your code footprint by #defining one or more of the following
278 //    symbols before creating the implementation.
279 //
280 //        STBI_NO_JPEG
281 //        STBI_NO_PNG
282 //        STBI_NO_BMP
283 //        STBI_NO_PSD
284 //        STBI_NO_TGA
285 //        STBI_NO_GIF
286 //        STBI_NO_HDR
287 //        STBI_NO_PIC
288 //        STBI_NO_PNM   (.ppm and .pgm)
289 //
290 //  - You can request *only* certain decoders and suppress all other ones
291 //    (this will be more forward-compatible, as addition of new decoders
292 //    doesn't require you to disable them explicitly):
293 //
294 //        STBI_ONLY_JPEG
295 //        STBI_ONLY_PNG
296 //        STBI_ONLY_BMP
297 //        STBI_ONLY_PSD
298 //        STBI_ONLY_TGA
299 //        STBI_ONLY_GIF
300 //        STBI_ONLY_HDR
301 //        STBI_ONLY_PIC
302 //        STBI_ONLY_PNM   (.ppm and .pgm)
303 //
304 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
305 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
306 //
307 
308 
309 #ifndef STBI_NO_STDIO
310 #include <stdio.h>
311 #endif // STBI_NO_STDIO
312 
313 #define STBI_VERSION 1
314 
315 enum
316 {
317    STBI_default = 0, // only used for desired_channels
318 
319    STBI_grey       = 1,
320    STBI_grey_alpha = 2,
321    STBI_rgb        = 3,
322    STBI_rgb_alpha  = 4
323 };
324 
325 typedef unsigned char stbi_uc;
326 typedef unsigned short stbi_us;
327 
328 #ifdef __cplusplus
329 extern "C" {
330 #endif
331 
332 #ifdef STB_IMAGE_STATIC
333 #define STBIDEF static
334 #else
335 #define STBIDEF extern
336 #endif
337 
338 //////////////////////////////////////////////////////////////////////////////
339 //
340 // PRIMARY API - works on images of any type
341 //
342 
343 //
344 // load image by filename, open file, or memory buffer
345 //
346 
347 typedef struct
348 {
349    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
350    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
351    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
352 } stbi_io_callbacks;
353 
354 ////////////////////////////////////
355 //
356 // 8-bits-per-channel interface
357 //
358 
359 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
360 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
361 
362 #ifndef STBI_NO_STDIO
363 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
364 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
365 // for stbi_load_from_file, file pointer is left pointing immediately after image
366 #endif
367 
368 ////////////////////////////////////
369 //
370 // 16-bits-per-channel interface
371 //
372 
373 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
374 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
375 
376 #ifndef STBI_NO_STDIO
377 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
378 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
379 #endif
380 
381 ////////////////////////////////////
382 //
383 // float-per-channel interface
384 //
385 #ifndef STBI_NO_LINEAR
386    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
387    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
388 
389    #ifndef STBI_NO_STDIO
390    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
391    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
392    #endif
393 #endif
394 
395 #ifndef STBI_NO_HDR
396    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
397    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
398 #endif // STBI_NO_HDR
399 
400 #ifndef STBI_NO_LINEAR
401    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
402    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
403 #endif // STBI_NO_LINEAR
404 
405 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
406 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
407 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
408 #ifndef STBI_NO_STDIO
409 STBIDEF int      stbi_is_hdr          (char const *filename);
410 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
411 #endif // STBI_NO_STDIO
412 
413 
414 // get a VERY brief reason for failure
415 // NOT THREADSAFE
416 STBIDEF const char *stbi_failure_reason  (void);
417 
418 // free the loaded image -- this is just free()
419 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
420 
421 // get image dimensions & components without fully decoding
422 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
423 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
424 
425 #ifndef STBI_NO_STDIO
426 STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
427 STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
428 
429 #endif
430 
431 
432 
433 // for image formats that explicitly notate that they have premultiplied alpha,
434 // we just return the colors as stored in the file. set this flag to force
435 // unpremultiplication. results are undefined if the unpremultiply overflow.
436 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
437 
438 // indicate whether we should process iphone images back to canonical format,
439 // or just pass them through "as-is"
440 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
441 
442 // flip the image vertically, so the first pixel in the output array is the bottom left
443 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
444 
445 // ZLIB client - used by PNG, available for other purposes
446 
447 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
448 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
449 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
450 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
451 
452 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
453 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
454 
455 
456 #ifdef __cplusplus
457 }
458 #endif
459 
460 //
461 //
462 ////   end header file   /////////////////////////////////////////////////////
463 #endif // STBI_INCLUDE_STB_IMAGE_H
464 
465 #ifdef STB_IMAGE_IMPLEMENTATION
466 
467 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
468   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
469   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
470   || defined(STBI_ONLY_ZLIB)
471    #ifndef STBI_ONLY_JPEG
472    #define STBI_NO_JPEG
473    #endif
474    #ifndef STBI_ONLY_PNG
475    #define STBI_NO_PNG
476    #endif
477    #ifndef STBI_ONLY_BMP
478    #define STBI_NO_BMP
479    #endif
480    #ifndef STBI_ONLY_PSD
481    #define STBI_NO_PSD
482    #endif
483    #ifndef STBI_ONLY_TGA
484    #define STBI_NO_TGA
485    #endif
486    #ifndef STBI_ONLY_GIF
487    #define STBI_NO_GIF
488    #endif
489    #ifndef STBI_ONLY_HDR
490    #define STBI_NO_HDR
491    #endif
492    #ifndef STBI_ONLY_PIC
493    #define STBI_NO_PIC
494    #endif
495    #ifndef STBI_ONLY_PNM
496    #define STBI_NO_PNM
497    #endif
498 #endif
499 
500 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
501 #define STBI_NO_ZLIB
502 #endif
503 
504 
505 #include <stdarg.h>
506 #include <stddef.h> // ptrdiff_t on osx
507 #include <stdlib.h>
508 #include <string.h>
509 #include <limits.h>
510 
511 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
512 #include <math.h>  // ldexp
513 #endif
514 
515 #ifndef STBI_NO_STDIO
516 #include <stdio.h>
517 #endif
518 
519 #ifndef STBI_ASSERT
520 #include <assert.h>
521 #define STBI_ASSERT(x) assert(x)
522 #endif
523 
524 
525 #ifndef _MSC_VER
526    #ifdef __cplusplus
527    #define stbi_inline inline
528    #else
529    #define stbi_inline
530    #endif
531 #else
532    #define stbi_inline __forceinline
533 #endif
534 
535 
536 #ifdef _MSC_VER
537 typedef unsigned short stbi__uint16;
538 typedef   signed short stbi__int16;
539 typedef unsigned int   stbi__uint32;
540 typedef   signed int   stbi__int32;
541 #else
542 #include <stdint.h>
543 typedef uint16_t stbi__uint16;
544 typedef int16_t  stbi__int16;
545 typedef uint32_t stbi__uint32;
546 typedef int32_t  stbi__int32;
547 #endif
548 
549 // should produce compiler error if size is wrong
550 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
551 
552 #ifdef _MSC_VER
553 #define STBI_NOTUSED(v)  (void)(v)
554 #else
555 #define STBI_NOTUSED(v)  (void)sizeof(v)
556 #endif
557 
558 #ifdef _MSC_VER
559 #define STBI_HAS_LROTL
560 #endif
561 
562 #ifdef STBI_HAS_LROTL
563    #define stbi_lrot(x,y)  _lrotl(x,y)
564 #else
565    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
566 #endif
567 
568 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
569 // ok
570 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
571 // ok
572 #else
573 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
574 #endif
575 
576 #ifndef STBI_MALLOC
577 #define STBI_MALLOC(sz)           malloc(sz)
578 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
579 #define STBI_FREE(p)              free(p)
580 #endif
581 
582 #ifndef STBI_REALLOC_SIZED
583 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
584 #endif
585 
586 // x86/x64 detection
587 #if defined(__x86_64__) || defined(_M_X64)
588 #define STBI__X64_TARGET
589 #elif defined(__i386) || defined(_M_IX86)
590 #define STBI__X86_TARGET
591 #endif
592 
593 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
594 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
595 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
596 // but previous attempts to provide the SSE2 functions with runtime
597 // detection caused numerous issues. The way architecture extensions are
598 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
599 // New behavior: if compiled with -msse2, we use SSE2 without any
600 // detection; if not, we don't use it at all.
601 #define STBI_NO_SIMD
602 #endif
603 
604 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
605 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
606 //
607 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
608 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
609 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
610 // simultaneously enabling "-mstackrealign".
611 //
612 // See https://github.com/nothings/stb/issues/81 for more information.
613 //
614 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
615 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
616 #define STBI_NO_SIMD
617 #endif
618 
619 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
620 #define STBI_SSE2
621 #include <emmintrin.h>
622 
623 #ifdef _MSC_VER
624 
625 #if _MSC_VER >= 1400  // not VC6
626 #include <intrin.h> // __cpuid
stbi__cpuid3(void)627 static int stbi__cpuid3(void)
628 {
629    int info[4];
630    __cpuid(info,1);
631    return info[3];
632 }
633 #else
stbi__cpuid3(void)634 static int stbi__cpuid3(void)
635 {
636    int res;
637    __asm {
638       mov  eax,1
639       cpuid
640       mov  res,edx
641    }
642    return res;
643 }
644 #endif
645 
646 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
647 
stbi__sse2_available(void)648 static int stbi__sse2_available(void)
649 {
650    int info3 = stbi__cpuid3();
651    return ((info3 >> 26) & 1) != 0;
652 }
653 #else // assume GCC-style if not VC++
654 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
655 
stbi__sse2_available(void)656 static int stbi__sse2_available(void)
657 {
658    // If we're even attempting to compile this on GCC/Clang, that means
659    // -msse2 is on, which means the compiler is allowed to use SSE2
660    // instructions at will, and so are we.
661    return 1;
662 }
663 #endif
664 #endif
665 
666 // ARM NEON
667 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
668 #undef STBI_NEON
669 #endif
670 
671 #ifdef STBI_NEON
672 #include <arm_neon.h>
673 // assume GCC or Clang on ARM targets
674 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
675 #endif
676 
677 #ifndef STBI_SIMD_ALIGN
678 #define STBI_SIMD_ALIGN(type, name) type name
679 #endif
680 
681 ///////////////////////////////////////////////
682 //
683 //  stbi__context struct and start_xxx functions
684 
685 // stbi__context structure is our basic context used by all images, so it
686 // contains all the IO context, plus some basic image information
687 typedef struct
688 {
689    stbi__uint32 img_x, img_y;
690    int img_n, img_out_n;
691 
692    stbi_io_callbacks io;
693    void *io_user_data;
694 
695    int read_from_callbacks;
696    int buflen;
697    stbi_uc buffer_start[128];
698 
699    stbi_uc *img_buffer, *img_buffer_end;
700    stbi_uc *img_buffer_original, *img_buffer_original_end;
701 } stbi__context;
702 
703 
704 static void stbi__refill_buffer(stbi__context *s);
705 
706 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)707 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
708 {
709    s->io.read = NULL;
710    s->read_from_callbacks = 0;
711    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
712    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
713 }
714 
715 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)716 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
717 {
718    s->io = *c;
719    s->io_user_data = user;
720    s->buflen = sizeof(s->buffer_start);
721    s->read_from_callbacks = 1;
722    s->img_buffer_original = s->buffer_start;
723    stbi__refill_buffer(s);
724    s->img_buffer_original_end = s->img_buffer_end;
725 }
726 
727 #ifndef STBI_NO_STDIO
728 
stbi__stdio_read(void * user,char * data,int size)729 static int stbi__stdio_read(void *user, char *data, int size)
730 {
731    return (int) fread(data,1,size,(FILE*) user);
732 }
733 
stbi__stdio_skip(void * user,int n)734 static void stbi__stdio_skip(void *user, int n)
735 {
736    fseek((FILE*) user, n, SEEK_CUR);
737 }
738 
stbi__stdio_eof(void * user)739 static int stbi__stdio_eof(void *user)
740 {
741    return feof((FILE*) user);
742 }
743 
744 static stbi_io_callbacks stbi__stdio_callbacks =
745 {
746    stbi__stdio_read,
747    stbi__stdio_skip,
748    stbi__stdio_eof,
749 };
750 
stbi__start_file(stbi__context * s,FILE * f)751 static void stbi__start_file(stbi__context *s, FILE *f)
752 {
753    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
754 }
755 
756 //static void stop_file(stbi__context *s) { }
757 
758 #endif // !STBI_NO_STDIO
759 
stbi__rewind(stbi__context * s)760 static void stbi__rewind(stbi__context *s)
761 {
762    // conceptually rewind SHOULD rewind to the beginning of the stream,
763    // but we just rewind to the beginning of the initial buffer, because
764    // we only use it after doing 'test', which only ever looks at at most 92 bytes
765    s->img_buffer = s->img_buffer_original;
766    s->img_buffer_end = s->img_buffer_original_end;
767 }
768 
769 enum
770 {
771    STBI_ORDER_RGB,
772    STBI_ORDER_BGR
773 };
774 
775 typedef struct
776 {
777    int bits_per_channel;
778    int num_channels;
779    int channel_order;
780 } stbi__result_info;
781 
782 #ifndef STBI_NO_JPEG
783 static int      stbi__jpeg_test(stbi__context *s);
784 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
785 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
786 #endif
787 
788 #ifndef STBI_NO_PNG
789 static int      stbi__png_test(stbi__context *s);
790 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
791 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
792 #endif
793 
794 #ifndef STBI_NO_BMP
795 static int      stbi__bmp_test(stbi__context *s);
796 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
797 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
798 #endif
799 
800 #ifndef STBI_NO_TGA
801 static int      stbi__tga_test(stbi__context *s);
802 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
803 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
804 #endif
805 
806 #ifndef STBI_NO_PSD
807 static int      stbi__psd_test(stbi__context *s);
808 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
809 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
810 #endif
811 
812 #ifndef STBI_NO_HDR
813 static int      stbi__hdr_test(stbi__context *s);
814 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
815 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
816 #endif
817 
818 #ifndef STBI_NO_PIC
819 static int      stbi__pic_test(stbi__context *s);
820 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
821 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
822 #endif
823 
824 #ifndef STBI_NO_GIF
825 static int      stbi__gif_test(stbi__context *s);
826 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
827 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
828 #endif
829 
830 #ifndef STBI_NO_PNM
831 static int      stbi__pnm_test(stbi__context *s);
832 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
833 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
834 #endif
835 
836 // this is not threadsafe
837 static const char *stbi__g_failure_reason;
838 
stbi_failure_reason(void)839 STBIDEF const char *stbi_failure_reason(void)
840 {
841    return stbi__g_failure_reason;
842 }
843 
stbi__err(const char * str)844 static int stbi__err(const char *str)
845 {
846    stbi__g_failure_reason = str;
847    return 0;
848 }
849 
stbi__malloc(size_t size)850 static void *stbi__malloc(size_t size)
851 {
852     return STBI_MALLOC(size);
853 }
854 
855 // stb_image uses ints pervasively, including for offset calculations.
856 // therefore the largest decoded image size we can support with the
857 // current code, even on 64-bit targets, is INT_MAX. this is not a
858 // significant limitation for the intended use case.
859 //
860 // we do, however, need to make sure our size calculations don't
861 // overflow. hence a few helper functions for size calculations that
862 // multiply integers together, making sure that they're non-negative
863 // and no overflow occurs.
864 
865 // return 1 if the sum is valid, 0 on overflow.
866 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)867 static int stbi__addsizes_valid(int a, int b)
868 {
869    if (b < 0) return 0;
870    // now 0 <= b <= INT_MAX, hence also
871    // 0 <= INT_MAX - b <= INTMAX.
872    // And "a + b <= INT_MAX" (which might overflow) is the
873    // same as a <= INT_MAX - b (no overflow)
874    return a <= INT_MAX - b;
875 }
876 
877 // returns 1 if the product is valid, 0 on overflow.
878 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)879 static int stbi__mul2sizes_valid(int a, int b)
880 {
881    if (a < 0 || b < 0) return 0;
882    if (b == 0) return 1; // mul-by-0 is always safe
883    // portable way to check for no overflows in a*b
884    return a <= INT_MAX/b;
885 }
886 
887 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)888 static int stbi__mad2sizes_valid(int a, int b, int add)
889 {
890    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
891 }
892 
893 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)894 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
895 {
896    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
897       stbi__addsizes_valid(a*b*c, add);
898 }
899 
900 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)901 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
902 {
903    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
904       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
905 }
906 
907 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)908 static void *stbi__malloc_mad2(int a, int b, int add)
909 {
910    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
911    return stbi__malloc(a*b + add);
912 }
913 
stbi__malloc_mad3(int a,int b,int c,int add)914 static void *stbi__malloc_mad3(int a, int b, int c, int add)
915 {
916    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
917    return stbi__malloc(a*b*c + add);
918 }
919 
stbi__malloc_mad4(int a,int b,int c,int d,int add)920 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
921 {
922    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
923    return stbi__malloc(a*b*c*d + add);
924 }
925 
926 // stbi__err - error
927 // stbi__errpf - error returning pointer to float
928 // stbi__errpuc - error returning pointer to unsigned char
929 
930 #ifdef STBI_NO_FAILURE_STRINGS
931    #define stbi__err(x,y)  0
932 #elif defined(STBI_FAILURE_USERMSG)
933    #define stbi__err(x,y)  stbi__err(y)
934 #else
935    #define stbi__err(x,y)  stbi__err(x)
936 #endif
937 
938 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
939 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
940 
stbi_image_free(void * retval_from_stbi_load)941 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
942 {
943    STBI_FREE(retval_from_stbi_load);
944 }
945 
946 #ifndef STBI_NO_LINEAR
947 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
948 #endif
949 
950 #ifndef STBI_NO_HDR
951 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
952 #endif
953 
954 static int stbi__vertically_flip_on_load = 0;
955 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)956 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
957 {
958     stbi__vertically_flip_on_load = flag_true_if_should_flip;
959 }
960 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)961 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
962 {
963    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
964    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
965    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
966    ri->num_channels = 0;
967 
968    #ifndef STBI_NO_JPEG
969    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
970    #endif
971    #ifndef STBI_NO_PNG
972    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
973    #endif
974    #ifndef STBI_NO_BMP
975    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
976    #endif
977    #ifndef STBI_NO_GIF
978    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
979    #endif
980    #ifndef STBI_NO_PSD
981    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
982    #endif
983    #ifndef STBI_NO_PIC
984    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
985    #endif
986    #ifndef STBI_NO_PNM
987    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
988    #endif
989 
990    #ifndef STBI_NO_HDR
991    if (stbi__hdr_test(s)) {
992       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
993       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
994    }
995    #endif
996 
997    #ifndef STBI_NO_TGA
998    // test tga last because it's a crappy test!
999    if (stbi__tga_test(s))
1000       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1001    #endif
1002 
1003    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1004 }
1005 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1006 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1007 {
1008    int i;
1009    int img_len = w * h * channels;
1010    stbi_uc *reduced;
1011 
1012    reduced = (stbi_uc *) stbi__malloc(img_len);
1013    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1014 
1015    for (i = 0; i < img_len; ++i)
1016       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1017 
1018    STBI_FREE(orig);
1019    return reduced;
1020 }
1021 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1022 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1023 {
1024    int i;
1025    int img_len = w * h * channels;
1026    stbi__uint16 *enlarged;
1027 
1028    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1029    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1030 
1031    for (i = 0; i < img_len; ++i)
1032       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1033 
1034    STBI_FREE(orig);
1035    return enlarged;
1036 }
1037 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1038 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1039 {
1040    int row;
1041    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1042    stbi_uc temp[2048];
1043    stbi_uc *bytes = (stbi_uc *)image;
1044 
1045    for (row = 0; row < (h>>1); row++) {
1046       stbi_uc *row0 = bytes + row*bytes_per_row;
1047       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1048       // swap row0 with row1
1049       size_t bytes_left = bytes_per_row;
1050       while (bytes_left) {
1051          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1052          memcpy(temp, row0, bytes_copy);
1053          memcpy(row0, row1, bytes_copy);
1054          memcpy(row1, temp, bytes_copy);
1055          row0 += bytes_copy;
1056          row1 += bytes_copy;
1057          bytes_left -= bytes_copy;
1058       }
1059    }
1060 }
1061 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1062 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1063 {
1064    stbi__result_info ri;
1065    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1066 
1067    if (result == NULL)
1068       return NULL;
1069 
1070    if (ri.bits_per_channel != 8) {
1071       STBI_ASSERT(ri.bits_per_channel == 16);
1072       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1073       ri.bits_per_channel = 8;
1074    }
1075 
1076    // @TODO: move stbi__convert_format to here
1077 
1078    if (stbi__vertically_flip_on_load) {
1079       int channels = req_comp ? req_comp : *comp;
1080       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1081    }
1082 
1083    return (unsigned char *) result;
1084 }
1085 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1086 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1087 {
1088    stbi__result_info ri;
1089    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1090 
1091    if (result == NULL)
1092       return NULL;
1093 
1094    if (ri.bits_per_channel != 16) {
1095       STBI_ASSERT(ri.bits_per_channel == 8);
1096       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1097       ri.bits_per_channel = 16;
1098    }
1099 
1100    // @TODO: move stbi__convert_format16 to here
1101    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1102 
1103    if (stbi__vertically_flip_on_load) {
1104       int channels = req_comp ? req_comp : *comp;
1105       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1106    }
1107 
1108    return (stbi__uint16 *) result;
1109 }
1110 
1111 #ifndef STBI_NO_HDR
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1112 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1113 {
1114    if (stbi__vertically_flip_on_load && result != NULL) {
1115       int channels = req_comp ? req_comp : *comp;
1116       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1117    }
1118 }
1119 #endif
1120 
1121 #ifndef STBI_NO_STDIO
1122 
stbi__fopen(char const * filename,char const * mode)1123 static FILE *stbi__fopen(char const *filename, char const *mode)
1124 {
1125    FILE *f;
1126 #if defined(_MSC_VER) && _MSC_VER >= 1400
1127    if (0 != fopen_s(&f, filename, mode))
1128       f=0;
1129 #else
1130    f = fopen(filename, mode);
1131 #endif
1132    return f;
1133 }
1134 
1135 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1136 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1137 {
1138    FILE *f = stbi__fopen(filename, "rb");
1139    unsigned char *result;
1140    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1141    result = stbi_load_from_file(f,x,y,comp,req_comp);
1142    fclose(f);
1143    return result;
1144 }
1145 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1146 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1147 {
1148    unsigned char *result;
1149    stbi__context s;
1150    stbi__start_file(&s,f);
1151    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1152    if (result) {
1153       // need to 'unget' all the characters in the IO buffer
1154       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1155    }
1156    return result;
1157 }
1158 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1159 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1160 {
1161    stbi__uint16 *result;
1162    stbi__context s;
1163    stbi__start_file(&s,f);
1164    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1165    if (result) {
1166       // need to 'unget' all the characters in the IO buffer
1167       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1168    }
1169    return result;
1170 }
1171 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1172 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1173 {
1174    FILE *f = stbi__fopen(filename, "rb");
1175    stbi__uint16 *result;
1176    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1177    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1178    fclose(f);
1179    return result;
1180 }
1181 
1182 
1183 #endif //!STBI_NO_STDIO
1184 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1185 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1186 {
1187    stbi__context s;
1188    stbi__start_mem(&s,buffer,len);
1189    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1190 }
1191 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1192 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1193 {
1194    stbi__context s;
1195    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1196    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1197 }
1198 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1199 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1200 {
1201    stbi__context s;
1202    stbi__start_mem(&s,buffer,len);
1203    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1204 }
1205 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1206 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1207 {
1208    stbi__context s;
1209    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1210    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1211 }
1212 
1213 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1214 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1215 {
1216    unsigned char *data;
1217    #ifndef STBI_NO_HDR
1218    if (stbi__hdr_test(s)) {
1219       stbi__result_info ri;
1220       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1221       if (hdr_data)
1222          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1223       return hdr_data;
1224    }
1225    #endif
1226    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1227    if (data)
1228       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1229    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1230 }
1231 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1232 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1233 {
1234    stbi__context s;
1235    stbi__start_mem(&s,buffer,len);
1236    return stbi__loadf_main(&s,x,y,comp,req_comp);
1237 }
1238 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1239 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1240 {
1241    stbi__context s;
1242    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1243    return stbi__loadf_main(&s,x,y,comp,req_comp);
1244 }
1245 
1246 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1247 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1248 {
1249    float *result;
1250    FILE *f = stbi__fopen(filename, "rb");
1251    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1252    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1253    fclose(f);
1254    return result;
1255 }
1256 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1257 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1258 {
1259    stbi__context s;
1260    stbi__start_file(&s,f);
1261    return stbi__loadf_main(&s,x,y,comp,req_comp);
1262 }
1263 #endif // !STBI_NO_STDIO
1264 
1265 #endif // !STBI_NO_LINEAR
1266 
1267 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1268 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1269 // reports false!
1270 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1271 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1272 {
1273    #ifndef STBI_NO_HDR
1274    stbi__context s;
1275    stbi__start_mem(&s,buffer,len);
1276    return stbi__hdr_test(&s);
1277    #else
1278    STBI_NOTUSED(buffer);
1279    STBI_NOTUSED(len);
1280    return 0;
1281    #endif
1282 }
1283 
1284 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1285 STBIDEF int      stbi_is_hdr          (char const *filename)
1286 {
1287    FILE *f = stbi__fopen(filename, "rb");
1288    int result=0;
1289    if (f) {
1290       result = stbi_is_hdr_from_file(f);
1291       fclose(f);
1292    }
1293    return result;
1294 }
1295 
stbi_is_hdr_from_file(FILE * f)1296 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
1297 {
1298    #ifndef STBI_NO_HDR
1299    stbi__context s;
1300    stbi__start_file(&s,f);
1301    return stbi__hdr_test(&s);
1302    #else
1303    STBI_NOTUSED(f);
1304    return 0;
1305    #endif
1306 }
1307 #endif // !STBI_NO_STDIO
1308 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1309 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1310 {
1311    #ifndef STBI_NO_HDR
1312    stbi__context s;
1313    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1314    return stbi__hdr_test(&s);
1315    #else
1316    STBI_NOTUSED(clbk);
1317    STBI_NOTUSED(user);
1318    return 0;
1319    #endif
1320 }
1321 
1322 #ifndef STBI_NO_LINEAR
1323 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1324 
stbi_ldr_to_hdr_gamma(float gamma)1325 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1326 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1327 #endif
1328 
1329 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1330 
stbi_hdr_to_ldr_gamma(float gamma)1331 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1332 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1333 
1334 
1335 //////////////////////////////////////////////////////////////////////////////
1336 //
1337 // Common code used by all image loaders
1338 //
1339 
1340 enum
1341 {
1342    STBI__SCAN_load=0,
1343    STBI__SCAN_type,
1344    STBI__SCAN_header
1345 };
1346 
stbi__refill_buffer(stbi__context * s)1347 static void stbi__refill_buffer(stbi__context *s)
1348 {
1349    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1350    if (n == 0) {
1351       // at end of file, treat same as if from memory, but need to handle case
1352       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1353       s->read_from_callbacks = 0;
1354       s->img_buffer = s->buffer_start;
1355       s->img_buffer_end = s->buffer_start+1;
1356       *s->img_buffer = 0;
1357    } else {
1358       s->img_buffer = s->buffer_start;
1359       s->img_buffer_end = s->buffer_start + n;
1360    }
1361 }
1362 
stbi__get8(stbi__context * s)1363 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1364 {
1365    if (s->img_buffer < s->img_buffer_end)
1366       return *s->img_buffer++;
1367    if (s->read_from_callbacks) {
1368       stbi__refill_buffer(s);
1369       return *s->img_buffer++;
1370    }
1371    return 0;
1372 }
1373 
stbi__at_eof(stbi__context * s)1374 stbi_inline static int stbi__at_eof(stbi__context *s)
1375 {
1376    if (s->io.read) {
1377       if (!(s->io.eof)(s->io_user_data)) return 0;
1378       // if feof() is true, check if buffer = end
1379       // special case: we've only got the special 0 character at the end
1380       if (s->read_from_callbacks == 0) return 1;
1381    }
1382 
1383    return s->img_buffer >= s->img_buffer_end;
1384 }
1385 
stbi__skip(stbi__context * s,int n)1386 static void stbi__skip(stbi__context *s, int n)
1387 {
1388    if (n < 0) {
1389       s->img_buffer = s->img_buffer_end;
1390       return;
1391    }
1392    if (s->io.read) {
1393       int blen = (int) (s->img_buffer_end - s->img_buffer);
1394       if (blen < n) {
1395          s->img_buffer = s->img_buffer_end;
1396          (s->io.skip)(s->io_user_data, n - blen);
1397          return;
1398       }
1399    }
1400    s->img_buffer += n;
1401 }
1402 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1403 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1404 {
1405    if (s->io.read) {
1406       int blen = (int) (s->img_buffer_end - s->img_buffer);
1407       if (blen < n) {
1408          int res, count;
1409 
1410          memcpy(buffer, s->img_buffer, blen);
1411 
1412          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1413          res = (count == (n-blen));
1414          s->img_buffer = s->img_buffer_end;
1415          return res;
1416       }
1417    }
1418 
1419    if (s->img_buffer+n <= s->img_buffer_end) {
1420       memcpy(buffer, s->img_buffer, n);
1421       s->img_buffer += n;
1422       return 1;
1423    } else
1424       return 0;
1425 }
1426 
stbi__get16be(stbi__context * s)1427 static int stbi__get16be(stbi__context *s)
1428 {
1429    int z = stbi__get8(s);
1430    return (z << 8) + stbi__get8(s);
1431 }
1432 
stbi__get32be(stbi__context * s)1433 static stbi__uint32 stbi__get32be(stbi__context *s)
1434 {
1435    stbi__uint32 z = stbi__get16be(s);
1436    return (z << 16) + stbi__get16be(s);
1437 }
1438 
1439 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1440 // nothing
1441 #else
stbi__get16le(stbi__context * s)1442 static int stbi__get16le(stbi__context *s)
1443 {
1444    int z = stbi__get8(s);
1445    return z + (stbi__get8(s) << 8);
1446 }
1447 #endif
1448 
1449 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1450 static stbi__uint32 stbi__get32le(stbi__context *s)
1451 {
1452    stbi__uint32 z = stbi__get16le(s);
1453    return z + (stbi__get16le(s) << 16);
1454 }
1455 #endif
1456 
1457 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1458 
1459 
1460 //////////////////////////////////////////////////////////////////////////////
1461 //
1462 //  generic converter from built-in img_n to req_comp
1463 //    individual types do this automatically as much as possible (e.g. jpeg
1464 //    does all cases internally since it needs to colorspace convert anyway,
1465 //    and it never has alpha, so very few cases ). png can automatically
1466 //    interleave an alpha=255 channel, but falls back to this for other cases
1467 //
1468 //  assume data buffer is malloced, so malloc a new one and free that one
1469 //  only failure mode is malloc failing
1470 
stbi__compute_y(int r,int g,int b)1471 static stbi_uc stbi__compute_y(int r, int g, int b)
1472 {
1473    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1474 }
1475 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1476 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1477 {
1478    int i,j;
1479    unsigned char *good;
1480 
1481    if (req_comp == img_n) return data;
1482    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1483 
1484    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1485    if (good == NULL) {
1486       STBI_FREE(data);
1487       return stbi__errpuc("outofmem", "Out of memory");
1488    }
1489 
1490    for (j=0; j < (int) y; ++j) {
1491       unsigned char *src  = data + j * x * img_n   ;
1492       unsigned char *dest = good + j * x * req_comp;
1493 
1494       #define STBI__COMBO(a,b)  ((a)*8+(b))
1495       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1496       // convert source image with img_n components to one with req_comp components;
1497       // avoid switch per pixel, so use switch per scanline and massive macros
1498       switch (STBI__COMBO(img_n, req_comp)) {
1499          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1500          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1501          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1502          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1503          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1504          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1505          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1506          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1507          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1508          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1509          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1510          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1511          default: STBI_ASSERT(0);
1512       }
1513       #undef STBI__CASE
1514    }
1515 
1516    STBI_FREE(data);
1517    return good;
1518 }
1519 
stbi__compute_y_16(int r,int g,int b)1520 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1521 {
1522    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1523 }
1524 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1525 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1526 {
1527    int i,j;
1528    stbi__uint16 *good;
1529 
1530    if (req_comp == img_n) return data;
1531    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1532 
1533    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1534    if (good == NULL) {
1535       STBI_FREE(data);
1536       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1537    }
1538 
1539    for (j=0; j < (int) y; ++j) {
1540       stbi__uint16 *src  = data + j * x * img_n   ;
1541       stbi__uint16 *dest = good + j * x * req_comp;
1542 
1543       #define STBI__COMBO(a,b)  ((a)*8+(b))
1544       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1545       // convert source image with img_n components to one with req_comp components;
1546       // avoid switch per pixel, so use switch per scanline and massive macros
1547       switch (STBI__COMBO(img_n, req_comp)) {
1548          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1549          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1550          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1551          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1552          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1553          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1554          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1555          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1556          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1557          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1558          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1559          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1560          default: STBI_ASSERT(0);
1561       }
1562       #undef STBI__CASE
1563    }
1564 
1565    STBI_FREE(data);
1566    return good;
1567 }
1568 
1569 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1570 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1571 {
1572    int i,k,n;
1573    float *output;
1574    if (!data) return NULL;
1575    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1576    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1577    // compute number of non-alpha components
1578    if (comp & 1) n = comp; else n = comp-1;
1579    for (i=0; i < x*y; ++i) {
1580       for (k=0; k < n; ++k) {
1581          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1582       }
1583       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1584    }
1585    STBI_FREE(data);
1586    return output;
1587 }
1588 #endif
1589 
1590 #ifndef STBI_NO_HDR
1591 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1592 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1593 {
1594    int i,k,n;
1595    stbi_uc *output;
1596    if (!data) return NULL;
1597    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1598    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1599    // compute number of non-alpha components
1600    if (comp & 1) n = comp; else n = comp-1;
1601    for (i=0; i < x*y; ++i) {
1602       for (k=0; k < n; ++k) {
1603          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1604          if (z < 0) z = 0;
1605          if (z > 255) z = 255;
1606          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1607       }
1608       if (k < comp) {
1609          float z = data[i*comp+k] * 255 + 0.5f;
1610          if (z < 0) z = 0;
1611          if (z > 255) z = 255;
1612          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1613       }
1614    }
1615    STBI_FREE(data);
1616    return output;
1617 }
1618 #endif
1619 
1620 //////////////////////////////////////////////////////////////////////////////
1621 //
1622 //  "baseline" JPEG/JFIF decoder
1623 //
1624 //    simple implementation
1625 //      - doesn't support delayed output of y-dimension
1626 //      - simple interface (only one output format: 8-bit interleaved RGB)
1627 //      - doesn't try to recover corrupt jpegs
1628 //      - doesn't allow partial loading, loading multiple at once
1629 //      - still fast on x86 (copying globals into locals doesn't help x86)
1630 //      - allocates lots of intermediate memory (full size of all components)
1631 //        - non-interleaved case requires this anyway
1632 //        - allows good upsampling (see next)
1633 //    high-quality
1634 //      - upsampled channels are bilinearly interpolated, even across blocks
1635 //      - quality integer IDCT derived from IJG's 'slow'
1636 //    performance
1637 //      - fast huffman; reasonable integer IDCT
1638 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1639 //      - uses a lot of intermediate memory, could cache poorly
1640 
1641 #ifndef STBI_NO_JPEG
1642 
1643 // huffman decoding acceleration
1644 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1645 
1646 typedef struct
1647 {
1648    stbi_uc  fast[1 << FAST_BITS];
1649    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1650    stbi__uint16 code[256];
1651    stbi_uc  values[256];
1652    stbi_uc  size[257];
1653    unsigned int maxcode[18];
1654    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1655 } stbi__huffman;
1656 
1657 typedef struct
1658 {
1659    stbi__context *s;
1660    stbi__huffman huff_dc[4];
1661    stbi__huffman huff_ac[4];
1662    stbi__uint16 dequant[4][64];
1663    stbi__int16 fast_ac[4][1 << FAST_BITS];
1664 
1665 // sizes for components, interleaved MCUs
1666    int img_h_max, img_v_max;
1667    int img_mcu_x, img_mcu_y;
1668    int img_mcu_w, img_mcu_h;
1669 
1670 // definition of jpeg image component
1671    struct
1672    {
1673       int id;
1674       int h,v;
1675       int tq;
1676       int hd,ha;
1677       int dc_pred;
1678 
1679       int x,y,w2,h2;
1680       stbi_uc *data;
1681       void *raw_data, *raw_coeff;
1682       stbi_uc *linebuf;
1683       short   *coeff;   // progressive only
1684       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1685    } img_comp[4];
1686 
1687    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1688    int            code_bits;   // number of valid bits
1689    unsigned char  marker;      // marker seen while filling entropy buffer
1690    int            nomore;      // flag if we saw a marker so must stop
1691 
1692    int            progressive;
1693    int            spec_start;
1694    int            spec_end;
1695    int            succ_high;
1696    int            succ_low;
1697    int            eob_run;
1698    int            jfif;
1699    int            app14_color_transform; // Adobe APP14 tag
1700    int            rgb;
1701 
1702    int scan_n, order[4];
1703    int restart_interval, todo;
1704 
1705 // kernels
1706    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1707    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1708    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1709 } stbi__jpeg;
1710 
stbi__build_huffman(stbi__huffman * h,int * count)1711 static int stbi__build_huffman(stbi__huffman *h, int *count)
1712 {
1713    int i,j,k=0,code;
1714    // build size list for each symbol (from JPEG spec)
1715    for (i=0; i < 16; ++i)
1716       for (j=0; j < count[i]; ++j)
1717          h->size[k++] = (stbi_uc) (i+1);
1718    h->size[k] = 0;
1719 
1720    // compute actual symbols (from jpeg spec)
1721    code = 0;
1722    k = 0;
1723    for(j=1; j <= 16; ++j) {
1724       // compute delta to add to code to compute symbol id
1725       h->delta[j] = k - code;
1726       if (h->size[k] == j) {
1727          while (h->size[k] == j)
1728             h->code[k++] = (stbi__uint16) (code++);
1729          if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1730       }
1731       // compute largest code + 1 for this size, preshifted as needed later
1732       h->maxcode[j] = code << (16-j);
1733       code <<= 1;
1734    }
1735    h->maxcode[j] = 0xffffffff;
1736 
1737    // build non-spec acceleration table; 255 is flag for not-accelerated
1738    memset(h->fast, 255, 1 << FAST_BITS);
1739    for (i=0; i < k; ++i) {
1740       int s = h->size[i];
1741       if (s <= FAST_BITS) {
1742          int c = h->code[i] << (FAST_BITS-s);
1743          int m = 1 << (FAST_BITS-s);
1744          for (j=0; j < m; ++j) {
1745             h->fast[c+j] = (stbi_uc) i;
1746          }
1747       }
1748    }
1749    return 1;
1750 }
1751 
1752 // build a table that decodes both magnitude and value of small ACs in
1753 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1754 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1755 {
1756    int i;
1757    for (i=0; i < (1 << FAST_BITS); ++i) {
1758       stbi_uc fast = h->fast[i];
1759       fast_ac[i] = 0;
1760       if (fast < 255) {
1761          int rs = h->values[fast];
1762          int run = (rs >> 4) & 15;
1763          int magbits = rs & 15;
1764          int len = h->size[fast];
1765 
1766          if (magbits && len + magbits <= FAST_BITS) {
1767             // magnitude code followed by receive_extend code
1768             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1769             int m = 1 << (magbits - 1);
1770             if (k < m) k += (~0U << magbits) + 1;
1771             // if the result is small enough, we can fit it in fast_ac table
1772             if (k >= -128 && k <= 127)
1773                fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1774          }
1775       }
1776    }
1777 }
1778 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1779 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1780 {
1781    do {
1782       int b = j->nomore ? 0 : stbi__get8(j->s);
1783       if (b == 0xff) {
1784          int c = stbi__get8(j->s);
1785          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1786          if (c != 0) {
1787             j->marker = (unsigned char) c;
1788             j->nomore = 1;
1789             return;
1790          }
1791       }
1792       j->code_buffer |= b << (24 - j->code_bits);
1793       j->code_bits += 8;
1794    } while (j->code_bits <= 24);
1795 }
1796 
1797 // (1 << n) - 1
1798 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1799 
1800 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1801 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1802 {
1803    unsigned int temp;
1804    int c,k;
1805 
1806    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1807 
1808    // look at the top FAST_BITS and determine what symbol ID it is,
1809    // if the code is <= FAST_BITS
1810    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1811    k = h->fast[c];
1812    if (k < 255) {
1813       int s = h->size[k];
1814       if (s > j->code_bits)
1815          return -1;
1816       j->code_buffer <<= s;
1817       j->code_bits -= s;
1818       return h->values[k];
1819    }
1820 
1821    // naive test is to shift the code_buffer down so k bits are
1822    // valid, then test against maxcode. To speed this up, we've
1823    // preshifted maxcode left so that it has (16-k) 0s at the
1824    // end; in other words, regardless of the number of bits, it
1825    // wants to be compared against something shifted to have 16;
1826    // that way we don't need to shift inside the loop.
1827    temp = j->code_buffer >> 16;
1828    for (k=FAST_BITS+1 ; ; ++k)
1829       if (temp < h->maxcode[k])
1830          break;
1831    if (k == 17) {
1832       // error! code not found
1833       j->code_bits -= 16;
1834       return -1;
1835    }
1836 
1837    if (k > j->code_bits)
1838       return -1;
1839 
1840    // convert the huffman code to the symbol id
1841    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1842    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1843 
1844    // convert the id to a symbol
1845    j->code_bits -= k;
1846    j->code_buffer <<= k;
1847    return h->values[c];
1848 }
1849 
1850 // bias[n] = (-1<<n) + 1
1851 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1852 
1853 // combined JPEG 'receive' and JPEG 'extend', since baseline
1854 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1855 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1856 {
1857    unsigned int k;
1858    int sgn;
1859    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1860 
1861    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1862    k = stbi_lrot(j->code_buffer, n);
1863    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1864    j->code_buffer = k & ~stbi__bmask[n];
1865    k &= stbi__bmask[n];
1866    j->code_bits -= n;
1867    return k + (stbi__jbias[n] & ~sgn);
1868 }
1869 
1870 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1871 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1872 {
1873    unsigned int k;
1874    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1875    k = stbi_lrot(j->code_buffer, n);
1876    j->code_buffer = k & ~stbi__bmask[n];
1877    k &= stbi__bmask[n];
1878    j->code_bits -= n;
1879    return k;
1880 }
1881 
stbi__jpeg_get_bit(stbi__jpeg * j)1882 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1883 {
1884    unsigned int k;
1885    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1886    k = j->code_buffer;
1887    j->code_buffer <<= 1;
1888    --j->code_bits;
1889    return k & 0x80000000;
1890 }
1891 
1892 // given a value that's at position X in the zigzag stream,
1893 // where does it appear in the 8x8 matrix coded as row-major?
1894 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1895 {
1896     0,  1,  8, 16,  9,  2,  3, 10,
1897    17, 24, 32, 25, 18, 11,  4,  5,
1898    12, 19, 26, 33, 40, 48, 41, 34,
1899    27, 20, 13,  6,  7, 14, 21, 28,
1900    35, 42, 49, 56, 57, 50, 43, 36,
1901    29, 22, 15, 23, 30, 37, 44, 51,
1902    58, 59, 52, 45, 38, 31, 39, 46,
1903    53, 60, 61, 54, 47, 55, 62, 63,
1904    // let corrupt input sample past end
1905    63, 63, 63, 63, 63, 63, 63, 63,
1906    63, 63, 63, 63, 63, 63, 63
1907 };
1908 
1909 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1910 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1911 {
1912    int diff,dc,k;
1913    int t;
1914 
1915    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1916    t = stbi__jpeg_huff_decode(j, hdc);
1917    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1918 
1919    // 0 all the ac values now so we can do it 32-bits at a time
1920    memset(data,0,64*sizeof(data[0]));
1921 
1922    diff = t ? stbi__extend_receive(j, t) : 0;
1923    dc = j->img_comp[b].dc_pred + diff;
1924    j->img_comp[b].dc_pred = dc;
1925    data[0] = (short) (dc * dequant[0]);
1926 
1927    // decode AC components, see JPEG spec
1928    k = 1;
1929    do {
1930       unsigned int zig;
1931       int c,r,s;
1932       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1933       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1934       r = fac[c];
1935       if (r) { // fast-AC path
1936          k += (r >> 4) & 15; // run
1937          s = r & 15; // combined length
1938          j->code_buffer <<= s;
1939          j->code_bits -= s;
1940          // decode into unzigzag'd location
1941          zig = stbi__jpeg_dezigzag[k++];
1942          data[zig] = (short) ((r >> 8) * dequant[zig]);
1943       } else {
1944          int rs = stbi__jpeg_huff_decode(j, hac);
1945          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1946          s = rs & 15;
1947          r = rs >> 4;
1948          if (s == 0) {
1949             if (rs != 0xf0) break; // end block
1950             k += 16;
1951          } else {
1952             k += r;
1953             // decode into unzigzag'd location
1954             zig = stbi__jpeg_dezigzag[k++];
1955             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1956          }
1957       }
1958    } while (k < 64);
1959    return 1;
1960 }
1961 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)1962 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1963 {
1964    int diff,dc;
1965    int t;
1966    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1967 
1968    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1969 
1970    if (j->succ_high == 0) {
1971       // first scan for DC coefficient, must be first
1972       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1973       t = stbi__jpeg_huff_decode(j, hdc);
1974       diff = t ? stbi__extend_receive(j, t) : 0;
1975 
1976       dc = j->img_comp[b].dc_pred + diff;
1977       j->img_comp[b].dc_pred = dc;
1978       data[0] = (short) (dc << j->succ_low);
1979    } else {
1980       // refinement scan for DC coefficient
1981       if (stbi__jpeg_get_bit(j))
1982          data[0] += (short) (1 << j->succ_low);
1983    }
1984    return 1;
1985 }
1986 
1987 // @OPTIMIZE: store non-zigzagged during the decode passes,
1988 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)1989 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1990 {
1991    int k;
1992    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1993 
1994    if (j->succ_high == 0) {
1995       int shift = j->succ_low;
1996 
1997       if (j->eob_run) {
1998          --j->eob_run;
1999          return 1;
2000       }
2001 
2002       k = j->spec_start;
2003       do {
2004          unsigned int zig;
2005          int c,r,s;
2006          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2007          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2008          r = fac[c];
2009          if (r) { // fast-AC path
2010             k += (r >> 4) & 15; // run
2011             s = r & 15; // combined length
2012             j->code_buffer <<= s;
2013             j->code_bits -= s;
2014             zig = stbi__jpeg_dezigzag[k++];
2015             data[zig] = (short) ((r >> 8) << shift);
2016          } else {
2017             int rs = stbi__jpeg_huff_decode(j, hac);
2018             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2019             s = rs & 15;
2020             r = rs >> 4;
2021             if (s == 0) {
2022                if (r < 15) {
2023                   j->eob_run = (1 << r);
2024                   if (r)
2025                      j->eob_run += stbi__jpeg_get_bits(j, r);
2026                   --j->eob_run;
2027                   break;
2028                }
2029                k += 16;
2030             } else {
2031                k += r;
2032                zig = stbi__jpeg_dezigzag[k++];
2033                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2034             }
2035          }
2036       } while (k <= j->spec_end);
2037    } else {
2038       // refinement scan for these AC coefficients
2039 
2040       short bit = (short) (1 << j->succ_low);
2041 
2042       if (j->eob_run) {
2043          --j->eob_run;
2044          for (k = j->spec_start; k <= j->spec_end; ++k) {
2045             short *p = &data[stbi__jpeg_dezigzag[k]];
2046             if (*p != 0)
2047                if (stbi__jpeg_get_bit(j))
2048                   if ((*p & bit)==0) {
2049                      if (*p > 0)
2050                         *p += bit;
2051                      else
2052                         *p -= bit;
2053                   }
2054          }
2055       } else {
2056          k = j->spec_start;
2057          do {
2058             int r,s;
2059             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2060             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2061             s = rs & 15;
2062             r = rs >> 4;
2063             if (s == 0) {
2064                if (r < 15) {
2065                   j->eob_run = (1 << r) - 1;
2066                   if (r)
2067                      j->eob_run += stbi__jpeg_get_bits(j, r);
2068                   r = 64; // force end of block
2069                } else {
2070                   // r=15 s=0 should write 16 0s, so we just do
2071                   // a run of 15 0s and then write s (which is 0),
2072                   // so we don't have to do anything special here
2073                }
2074             } else {
2075                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2076                // sign bit
2077                if (stbi__jpeg_get_bit(j))
2078                   s = bit;
2079                else
2080                   s = -bit;
2081             }
2082 
2083             // advance by r
2084             while (k <= j->spec_end) {
2085                short *p = &data[stbi__jpeg_dezigzag[k++]];
2086                if (*p != 0) {
2087                   if (stbi__jpeg_get_bit(j))
2088                      if ((*p & bit)==0) {
2089                         if (*p > 0)
2090                            *p += bit;
2091                         else
2092                            *p -= bit;
2093                      }
2094                } else {
2095                   if (r == 0) {
2096                      *p = (short) s;
2097                      break;
2098                   }
2099                   --r;
2100                }
2101             }
2102          } while (k <= j->spec_end);
2103       }
2104    }
2105    return 1;
2106 }
2107 
2108 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2109 stbi_inline static stbi_uc stbi__clamp(int x)
2110 {
2111    // trick to use a single test to catch both cases
2112    if ((unsigned int) x > 255) {
2113       if (x < 0) return 0;
2114       if (x > 255) return 255;
2115    }
2116    return (stbi_uc) x;
2117 }
2118 
2119 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2120 #define stbi__fsh(x)  ((x) << 12)
2121 
2122 // derived from jidctint -- DCT_ISLOW
2123 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2124    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2125    p2 = s2;                                    \
2126    p3 = s6;                                    \
2127    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2128    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2129    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2130    p2 = s0;                                    \
2131    p3 = s4;                                    \
2132    t0 = stbi__fsh(p2+p3);                      \
2133    t1 = stbi__fsh(p2-p3);                      \
2134    x0 = t0+t3;                                 \
2135    x3 = t0-t3;                                 \
2136    x1 = t1+t2;                                 \
2137    x2 = t1-t2;                                 \
2138    t0 = s7;                                    \
2139    t1 = s5;                                    \
2140    t2 = s3;                                    \
2141    t3 = s1;                                    \
2142    p3 = t0+t2;                                 \
2143    p4 = t1+t3;                                 \
2144    p1 = t0+t3;                                 \
2145    p2 = t1+t2;                                 \
2146    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2147    t0 = t0*stbi__f2f( 0.298631336f);           \
2148    t1 = t1*stbi__f2f( 2.053119869f);           \
2149    t2 = t2*stbi__f2f( 3.072711026f);           \
2150    t3 = t3*stbi__f2f( 1.501321110f);           \
2151    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2152    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2153    p3 = p3*stbi__f2f(-1.961570560f);           \
2154    p4 = p4*stbi__f2f(-0.390180644f);           \
2155    t3 += p1+p4;                                \
2156    t2 += p2+p3;                                \
2157    t1 += p2+p4;                                \
2158    t0 += p1+p3;
2159 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2160 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2161 {
2162    int i,val[64],*v=val;
2163    stbi_uc *o;
2164    short *d = data;
2165 
2166    // columns
2167    for (i=0; i < 8; ++i,++d, ++v) {
2168       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2169       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2170            && d[40]==0 && d[48]==0 && d[56]==0) {
2171          //    no shortcut                 0     seconds
2172          //    (1|2|3|4|5|6|7)==0          0     seconds
2173          //    all separate               -0.047 seconds
2174          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2175          int dcterm = d[0] << 2;
2176          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2177       } else {
2178          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2179          // constants scaled things up by 1<<12; let's bring them back
2180          // down, but keep 2 extra bits of precision
2181          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2182          v[ 0] = (x0+t3) >> 10;
2183          v[56] = (x0-t3) >> 10;
2184          v[ 8] = (x1+t2) >> 10;
2185          v[48] = (x1-t2) >> 10;
2186          v[16] = (x2+t1) >> 10;
2187          v[40] = (x2-t1) >> 10;
2188          v[24] = (x3+t0) >> 10;
2189          v[32] = (x3-t0) >> 10;
2190       }
2191    }
2192 
2193    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2194       // no fast case since the first 1D IDCT spread components out
2195       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2196       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2197       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2198       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2199       // so we want to round that, which means adding 0.5 * 1<<17,
2200       // aka 65536. Also, we'll end up with -128 to 127 that we want
2201       // to encode as 0..255 by adding 128, so we'll add that before the shift
2202       x0 += 65536 + (128<<17);
2203       x1 += 65536 + (128<<17);
2204       x2 += 65536 + (128<<17);
2205       x3 += 65536 + (128<<17);
2206       // tried computing the shifts into temps, or'ing the temps to see
2207       // if any were out of range, but that was slower
2208       o[0] = stbi__clamp((x0+t3) >> 17);
2209       o[7] = stbi__clamp((x0-t3) >> 17);
2210       o[1] = stbi__clamp((x1+t2) >> 17);
2211       o[6] = stbi__clamp((x1-t2) >> 17);
2212       o[2] = stbi__clamp((x2+t1) >> 17);
2213       o[5] = stbi__clamp((x2-t1) >> 17);
2214       o[3] = stbi__clamp((x3+t0) >> 17);
2215       o[4] = stbi__clamp((x3-t0) >> 17);
2216    }
2217 }
2218 
2219 #ifdef STBI_SSE2
2220 // sse2 integer IDCT. not the fastest possible implementation but it
2221 // produces bit-identical results to the generic C version so it's
2222 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2223 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2224 {
2225    // This is constructed to match our regular (generic) integer IDCT exactly.
2226    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2227    __m128i tmp;
2228 
2229    // dot product constant: even elems=x, odd elems=y
2230    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2231 
2232    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2233    // out(1) = c1[even]*x + c1[odd]*y
2234    #define dct_rot(out0,out1, x,y,c0,c1) \
2235       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2236       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2237       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2238       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2239       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2240       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2241 
2242    // out = in << 12  (in 16-bit, out 32-bit)
2243    #define dct_widen(out, in) \
2244       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2245       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2246 
2247    // wide add
2248    #define dct_wadd(out, a, b) \
2249       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2250       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2251 
2252    // wide sub
2253    #define dct_wsub(out, a, b) \
2254       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2255       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2256 
2257    // butterfly a/b, add bias, then shift by "s" and pack
2258    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2259       { \
2260          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2261          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2262          dct_wadd(sum, abiased, b); \
2263          dct_wsub(dif, abiased, b); \
2264          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2265          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2266       }
2267 
2268    // 8-bit interleave step (for transposes)
2269    #define dct_interleave8(a, b) \
2270       tmp = a; \
2271       a = _mm_unpacklo_epi8(a, b); \
2272       b = _mm_unpackhi_epi8(tmp, b)
2273 
2274    // 16-bit interleave step (for transposes)
2275    #define dct_interleave16(a, b) \
2276       tmp = a; \
2277       a = _mm_unpacklo_epi16(a, b); \
2278       b = _mm_unpackhi_epi16(tmp, b)
2279 
2280    #define dct_pass(bias,shift) \
2281       { \
2282          /* even part */ \
2283          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2284          __m128i sum04 = _mm_add_epi16(row0, row4); \
2285          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2286          dct_widen(t0e, sum04); \
2287          dct_widen(t1e, dif04); \
2288          dct_wadd(x0, t0e, t3e); \
2289          dct_wsub(x3, t0e, t3e); \
2290          dct_wadd(x1, t1e, t2e); \
2291          dct_wsub(x2, t1e, t2e); \
2292          /* odd part */ \
2293          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2294          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2295          __m128i sum17 = _mm_add_epi16(row1, row7); \
2296          __m128i sum35 = _mm_add_epi16(row3, row5); \
2297          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2298          dct_wadd(x4, y0o, y4o); \
2299          dct_wadd(x5, y1o, y5o); \
2300          dct_wadd(x6, y2o, y5o); \
2301          dct_wadd(x7, y3o, y4o); \
2302          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2303          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2304          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2305          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2306       }
2307 
2308    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2309    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2310    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2311    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2312    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2313    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2314    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2315    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2316 
2317    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2318    __m128i bias_0 = _mm_set1_epi32(512);
2319    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2320 
2321    // load
2322    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2323    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2324    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2325    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2326    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2327    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2328    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2329    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2330 
2331    // column pass
2332    dct_pass(bias_0, 10);
2333 
2334    {
2335       // 16bit 8x8 transpose pass 1
2336       dct_interleave16(row0, row4);
2337       dct_interleave16(row1, row5);
2338       dct_interleave16(row2, row6);
2339       dct_interleave16(row3, row7);
2340 
2341       // transpose pass 2
2342       dct_interleave16(row0, row2);
2343       dct_interleave16(row1, row3);
2344       dct_interleave16(row4, row6);
2345       dct_interleave16(row5, row7);
2346 
2347       // transpose pass 3
2348       dct_interleave16(row0, row1);
2349       dct_interleave16(row2, row3);
2350       dct_interleave16(row4, row5);
2351       dct_interleave16(row6, row7);
2352    }
2353 
2354    // row pass
2355    dct_pass(bias_1, 17);
2356 
2357    {
2358       // pack
2359       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2360       __m128i p1 = _mm_packus_epi16(row2, row3);
2361       __m128i p2 = _mm_packus_epi16(row4, row5);
2362       __m128i p3 = _mm_packus_epi16(row6, row7);
2363 
2364       // 8bit 8x8 transpose pass 1
2365       dct_interleave8(p0, p2); // a0e0a1e1...
2366       dct_interleave8(p1, p3); // c0g0c1g1...
2367 
2368       // transpose pass 2
2369       dct_interleave8(p0, p1); // a0c0e0g0...
2370       dct_interleave8(p2, p3); // b0d0f0h0...
2371 
2372       // transpose pass 3
2373       dct_interleave8(p0, p2); // a0b0c0d0...
2374       dct_interleave8(p1, p3); // a4b4c4d4...
2375 
2376       // store
2377       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2378       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2379       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2380       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2381       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2382       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2383       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2384       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2385    }
2386 
2387 #undef dct_const
2388 #undef dct_rot
2389 #undef dct_widen
2390 #undef dct_wadd
2391 #undef dct_wsub
2392 #undef dct_bfly32o
2393 #undef dct_interleave8
2394 #undef dct_interleave16
2395 #undef dct_pass
2396 }
2397 
2398 #endif // STBI_SSE2
2399 
2400 #ifdef STBI_NEON
2401 
2402 // NEON integer IDCT. should produce bit-identical
2403 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2404 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2405 {
2406    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2407 
2408    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2409    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2410    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2411    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2412    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2413    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2414    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2415    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2416    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2417    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2418    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2419    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2420 
2421 #define dct_long_mul(out, inq, coeff) \
2422    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2423    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2424 
2425 #define dct_long_mac(out, acc, inq, coeff) \
2426    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2427    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2428 
2429 #define dct_widen(out, inq) \
2430    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2431    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2432 
2433 // wide add
2434 #define dct_wadd(out, a, b) \
2435    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2436    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2437 
2438 // wide sub
2439 #define dct_wsub(out, a, b) \
2440    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2441    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2442 
2443 // butterfly a/b, then shift using "shiftop" by "s" and pack
2444 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2445    { \
2446       dct_wadd(sum, a, b); \
2447       dct_wsub(dif, a, b); \
2448       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2449       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2450    }
2451 
2452 #define dct_pass(shiftop, shift) \
2453    { \
2454       /* even part */ \
2455       int16x8_t sum26 = vaddq_s16(row2, row6); \
2456       dct_long_mul(p1e, sum26, rot0_0); \
2457       dct_long_mac(t2e, p1e, row6, rot0_1); \
2458       dct_long_mac(t3e, p1e, row2, rot0_2); \
2459       int16x8_t sum04 = vaddq_s16(row0, row4); \
2460       int16x8_t dif04 = vsubq_s16(row0, row4); \
2461       dct_widen(t0e, sum04); \
2462       dct_widen(t1e, dif04); \
2463       dct_wadd(x0, t0e, t3e); \
2464       dct_wsub(x3, t0e, t3e); \
2465       dct_wadd(x1, t1e, t2e); \
2466       dct_wsub(x2, t1e, t2e); \
2467       /* odd part */ \
2468       int16x8_t sum15 = vaddq_s16(row1, row5); \
2469       int16x8_t sum17 = vaddq_s16(row1, row7); \
2470       int16x8_t sum35 = vaddq_s16(row3, row5); \
2471       int16x8_t sum37 = vaddq_s16(row3, row7); \
2472       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2473       dct_long_mul(p5o, sumodd, rot1_0); \
2474       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2475       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2476       dct_long_mul(p3o, sum37, rot2_0); \
2477       dct_long_mul(p4o, sum15, rot2_1); \
2478       dct_wadd(sump13o, p1o, p3o); \
2479       dct_wadd(sump24o, p2o, p4o); \
2480       dct_wadd(sump23o, p2o, p3o); \
2481       dct_wadd(sump14o, p1o, p4o); \
2482       dct_long_mac(x4, sump13o, row7, rot3_0); \
2483       dct_long_mac(x5, sump24o, row5, rot3_1); \
2484       dct_long_mac(x6, sump23o, row3, rot3_2); \
2485       dct_long_mac(x7, sump14o, row1, rot3_3); \
2486       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2487       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2488       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2489       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2490    }
2491 
2492    // load
2493    row0 = vld1q_s16(data + 0*8);
2494    row1 = vld1q_s16(data + 1*8);
2495    row2 = vld1q_s16(data + 2*8);
2496    row3 = vld1q_s16(data + 3*8);
2497    row4 = vld1q_s16(data + 4*8);
2498    row5 = vld1q_s16(data + 5*8);
2499    row6 = vld1q_s16(data + 6*8);
2500    row7 = vld1q_s16(data + 7*8);
2501 
2502    // add DC bias
2503    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2504 
2505    // column pass
2506    dct_pass(vrshrn_n_s32, 10);
2507 
2508    // 16bit 8x8 transpose
2509    {
2510 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2511 // whether compilers actually get this is another story, sadly.
2512 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2513 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2514 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2515 
2516       // pass 1
2517       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2518       dct_trn16(row2, row3);
2519       dct_trn16(row4, row5);
2520       dct_trn16(row6, row7);
2521 
2522       // pass 2
2523       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2524       dct_trn32(row1, row3);
2525       dct_trn32(row4, row6);
2526       dct_trn32(row5, row7);
2527 
2528       // pass 3
2529       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2530       dct_trn64(row1, row5);
2531       dct_trn64(row2, row6);
2532       dct_trn64(row3, row7);
2533 
2534 #undef dct_trn16
2535 #undef dct_trn32
2536 #undef dct_trn64
2537    }
2538 
2539    // row pass
2540    // vrshrn_n_s32 only supports shifts up to 16, we need
2541    // 17. so do a non-rounding shift of 16 first then follow
2542    // up with a rounding shift by 1.
2543    dct_pass(vshrn_n_s32, 16);
2544 
2545    {
2546       // pack and round
2547       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2548       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2549       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2550       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2551       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2552       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2553       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2554       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2555 
2556       // again, these can translate into one instruction, but often don't.
2557 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2558 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2559 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2560 
2561       // sadly can't use interleaved stores here since we only write
2562       // 8 bytes to each scan line!
2563 
2564       // 8x8 8-bit transpose pass 1
2565       dct_trn8_8(p0, p1);
2566       dct_trn8_8(p2, p3);
2567       dct_trn8_8(p4, p5);
2568       dct_trn8_8(p6, p7);
2569 
2570       // pass 2
2571       dct_trn8_16(p0, p2);
2572       dct_trn8_16(p1, p3);
2573       dct_trn8_16(p4, p6);
2574       dct_trn8_16(p5, p7);
2575 
2576       // pass 3
2577       dct_trn8_32(p0, p4);
2578       dct_trn8_32(p1, p5);
2579       dct_trn8_32(p2, p6);
2580       dct_trn8_32(p3, p7);
2581 
2582       // store
2583       vst1_u8(out, p0); out += out_stride;
2584       vst1_u8(out, p1); out += out_stride;
2585       vst1_u8(out, p2); out += out_stride;
2586       vst1_u8(out, p3); out += out_stride;
2587       vst1_u8(out, p4); out += out_stride;
2588       vst1_u8(out, p5); out += out_stride;
2589       vst1_u8(out, p6); out += out_stride;
2590       vst1_u8(out, p7);
2591 
2592 #undef dct_trn8_8
2593 #undef dct_trn8_16
2594 #undef dct_trn8_32
2595    }
2596 
2597 #undef dct_long_mul
2598 #undef dct_long_mac
2599 #undef dct_widen
2600 #undef dct_wadd
2601 #undef dct_wsub
2602 #undef dct_bfly32o
2603 #undef dct_pass
2604 }
2605 
2606 #endif // STBI_NEON
2607 
2608 #define STBI__MARKER_none  0xff
2609 // if there's a pending marker from the entropy stream, return that
2610 // otherwise, fetch from the stream and get a marker. if there's no
2611 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2612 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2613 {
2614    stbi_uc x;
2615    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2616    x = stbi__get8(j->s);
2617    if (x != 0xff) return STBI__MARKER_none;
2618    while (x == 0xff)
2619       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2620    return x;
2621 }
2622 
2623 // in each scan, we'll have scan_n components, and the order
2624 // of the components is specified by order[]
2625 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2626 
2627 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2628 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2629 static void stbi__jpeg_reset(stbi__jpeg *j)
2630 {
2631    j->code_bits = 0;
2632    j->code_buffer = 0;
2633    j->nomore = 0;
2634    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2635    j->marker = STBI__MARKER_none;
2636    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2637    j->eob_run = 0;
2638    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2639    // since we don't even allow 1<<30 pixels
2640 }
2641 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2642 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2643 {
2644    stbi__jpeg_reset(z);
2645    if (!z->progressive) {
2646       if (z->scan_n == 1) {
2647          int i,j;
2648          STBI_SIMD_ALIGN(short, data[64]);
2649          int n = z->order[0];
2650          // non-interleaved data, we just need to process one block at a time,
2651          // in trivial scanline order
2652          // number of blocks to do just depends on how many actual "pixels" this
2653          // component has, independent of interleaved MCU blocking and such
2654          int w = (z->img_comp[n].x+7) >> 3;
2655          int h = (z->img_comp[n].y+7) >> 3;
2656          for (j=0; j < h; ++j) {
2657             for (i=0; i < w; ++i) {
2658                int ha = z->img_comp[n].ha;
2659                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2660                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2661                // every data block is an MCU, so countdown the restart interval
2662                if (--z->todo <= 0) {
2663                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2664                   // if it's NOT a restart, then just bail, so we get corrupt data
2665                   // rather than no data
2666                   if (!STBI__RESTART(z->marker)) return 1;
2667                   stbi__jpeg_reset(z);
2668                }
2669             }
2670          }
2671          return 1;
2672       } else { // interleaved
2673          int i,j,k,x,y;
2674          STBI_SIMD_ALIGN(short, data[64]);
2675          for (j=0; j < z->img_mcu_y; ++j) {
2676             for (i=0; i < z->img_mcu_x; ++i) {
2677                // scan an interleaved mcu... process scan_n components in order
2678                for (k=0; k < z->scan_n; ++k) {
2679                   int n = z->order[k];
2680                   // scan out an mcu's worth of this component; that's just determined
2681                   // by the basic H and V specified for the component
2682                   for (y=0; y < z->img_comp[n].v; ++y) {
2683                      for (x=0; x < z->img_comp[n].h; ++x) {
2684                         int x2 = (i*z->img_comp[n].h + x)*8;
2685                         int y2 = (j*z->img_comp[n].v + y)*8;
2686                         int ha = z->img_comp[n].ha;
2687                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2688                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2689                      }
2690                   }
2691                }
2692                // after all interleaved components, that's an interleaved MCU,
2693                // so now count down the restart interval
2694                if (--z->todo <= 0) {
2695                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2696                   if (!STBI__RESTART(z->marker)) return 1;
2697                   stbi__jpeg_reset(z);
2698                }
2699             }
2700          }
2701          return 1;
2702       }
2703    } else {
2704       if (z->scan_n == 1) {
2705          int i,j;
2706          int n = z->order[0];
2707          // non-interleaved data, we just need to process one block at a time,
2708          // in trivial scanline order
2709          // number of blocks to do just depends on how many actual "pixels" this
2710          // component has, independent of interleaved MCU blocking and such
2711          int w = (z->img_comp[n].x+7) >> 3;
2712          int h = (z->img_comp[n].y+7) >> 3;
2713          for (j=0; j < h; ++j) {
2714             for (i=0; i < w; ++i) {
2715                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2716                if (z->spec_start == 0) {
2717                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2718                      return 0;
2719                } else {
2720                   int ha = z->img_comp[n].ha;
2721                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2722                      return 0;
2723                }
2724                // every data block is an MCU, so countdown the restart interval
2725                if (--z->todo <= 0) {
2726                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2727                   if (!STBI__RESTART(z->marker)) return 1;
2728                   stbi__jpeg_reset(z);
2729                }
2730             }
2731          }
2732          return 1;
2733       } else { // interleaved
2734          int i,j,k,x,y;
2735          for (j=0; j < z->img_mcu_y; ++j) {
2736             for (i=0; i < z->img_mcu_x; ++i) {
2737                // scan an interleaved mcu... process scan_n components in order
2738                for (k=0; k < z->scan_n; ++k) {
2739                   int n = z->order[k];
2740                   // scan out an mcu's worth of this component; that's just determined
2741                   // by the basic H and V specified for the component
2742                   for (y=0; y < z->img_comp[n].v; ++y) {
2743                      for (x=0; x < z->img_comp[n].h; ++x) {
2744                         int x2 = (i*z->img_comp[n].h + x);
2745                         int y2 = (j*z->img_comp[n].v + y);
2746                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2747                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2748                            return 0;
2749                      }
2750                   }
2751                }
2752                // after all interleaved components, that's an interleaved MCU,
2753                // so now count down the restart interval
2754                if (--z->todo <= 0) {
2755                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2756                   if (!STBI__RESTART(z->marker)) return 1;
2757                   stbi__jpeg_reset(z);
2758                }
2759             }
2760          }
2761          return 1;
2762       }
2763    }
2764 }
2765 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2766 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2767 {
2768    int i;
2769    for (i=0; i < 64; ++i)
2770       data[i] *= dequant[i];
2771 }
2772 
stbi__jpeg_finish(stbi__jpeg * z)2773 static void stbi__jpeg_finish(stbi__jpeg *z)
2774 {
2775    if (z->progressive) {
2776       // dequantize and idct the data
2777       int i,j,n;
2778       for (n=0; n < z->s->img_n; ++n) {
2779          int w = (z->img_comp[n].x+7) >> 3;
2780          int h = (z->img_comp[n].y+7) >> 3;
2781          for (j=0; j < h; ++j) {
2782             for (i=0; i < w; ++i) {
2783                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2784                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2785                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2786             }
2787          }
2788       }
2789    }
2790 }
2791 
stbi__process_marker(stbi__jpeg * z,int m)2792 static int stbi__process_marker(stbi__jpeg *z, int m)
2793 {
2794    int L;
2795    switch (m) {
2796       case STBI__MARKER_none: // no marker found
2797          return stbi__err("expected marker","Corrupt JPEG");
2798 
2799       case 0xDD: // DRI - specify restart interval
2800          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2801          z->restart_interval = stbi__get16be(z->s);
2802          return 1;
2803 
2804       case 0xDB: // DQT - define quantization table
2805          L = stbi__get16be(z->s)-2;
2806          while (L > 0) {
2807             int q = stbi__get8(z->s);
2808             int p = q >> 4, sixteen = (p != 0);
2809             int t = q & 15,i;
2810             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2811             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2812 
2813             for (i=0; i < 64; ++i)
2814                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2815             L -= (sixteen ? 129 : 65);
2816          }
2817          return L==0;
2818 
2819       case 0xC4: // DHT - define huffman table
2820          L = stbi__get16be(z->s)-2;
2821          while (L > 0) {
2822             stbi_uc *v;
2823             int sizes[16],i,n=0;
2824             int q = stbi__get8(z->s);
2825             int tc = q >> 4;
2826             int th = q & 15;
2827             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2828             for (i=0; i < 16; ++i) {
2829                sizes[i] = stbi__get8(z->s);
2830                n += sizes[i];
2831             }
2832             L -= 17;
2833             if (tc == 0) {
2834                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2835                v = z->huff_dc[th].values;
2836             } else {
2837                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2838                v = z->huff_ac[th].values;
2839             }
2840             for (i=0; i < n; ++i)
2841                v[i] = stbi__get8(z->s);
2842             if (tc != 0)
2843                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2844             L -= n;
2845          }
2846          return L==0;
2847    }
2848 
2849    // check for comment block or APP blocks
2850    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2851       L = stbi__get16be(z->s);
2852       if (L < 2) {
2853          if (m == 0xFE)
2854             return stbi__err("bad COM len","Corrupt JPEG");
2855          else
2856             return stbi__err("bad APP len","Corrupt JPEG");
2857       }
2858       L -= 2;
2859 
2860       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2861          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2862          int ok = 1;
2863          int i;
2864          for (i=0; i < 5; ++i)
2865             if (stbi__get8(z->s) != tag[i])
2866                ok = 0;
2867          L -= 5;
2868          if (ok)
2869             z->jfif = 1;
2870       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2871          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2872          int ok = 1;
2873          int i;
2874          for (i=0; i < 6; ++i)
2875             if (stbi__get8(z->s) != tag[i])
2876                ok = 0;
2877          L -= 6;
2878          if (ok) {
2879             stbi__get8(z->s); // version
2880             stbi__get16be(z->s); // flags0
2881             stbi__get16be(z->s); // flags1
2882             z->app14_color_transform = stbi__get8(z->s); // color transform
2883             L -= 6;
2884          }
2885       }
2886 
2887       stbi__skip(z->s, L);
2888       return 1;
2889    }
2890 
2891    return stbi__err("unknown marker","Corrupt JPEG");
2892 }
2893 
2894 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2895 static int stbi__process_scan_header(stbi__jpeg *z)
2896 {
2897    int i;
2898    int Ls = stbi__get16be(z->s);
2899    z->scan_n = stbi__get8(z->s);
2900    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2901    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2902    for (i=0; i < z->scan_n; ++i) {
2903       int id = stbi__get8(z->s), which;
2904       int q = stbi__get8(z->s);
2905       for (which = 0; which < z->s->img_n; ++which)
2906          if (z->img_comp[which].id == id)
2907             break;
2908       if (which == z->s->img_n) return 0; // no match
2909       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2910       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2911       z->order[i] = which;
2912    }
2913 
2914    {
2915       int aa;
2916       z->spec_start = stbi__get8(z->s);
2917       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2918       aa = stbi__get8(z->s);
2919       z->succ_high = (aa >> 4);
2920       z->succ_low  = (aa & 15);
2921       if (z->progressive) {
2922          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2923             return stbi__err("bad SOS", "Corrupt JPEG");
2924       } else {
2925          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2926          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2927          z->spec_end = 63;
2928       }
2929    }
2930 
2931    return 1;
2932 }
2933 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2934 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2935 {
2936    int i;
2937    for (i=0; i < ncomp; ++i) {
2938       if (z->img_comp[i].raw_data) {
2939          STBI_FREE(z->img_comp[i].raw_data);
2940          z->img_comp[i].raw_data = NULL;
2941          z->img_comp[i].data = NULL;
2942       }
2943       if (z->img_comp[i].raw_coeff) {
2944          STBI_FREE(z->img_comp[i].raw_coeff);
2945          z->img_comp[i].raw_coeff = 0;
2946          z->img_comp[i].coeff = 0;
2947       }
2948       if (z->img_comp[i].linebuf) {
2949          STBI_FREE(z->img_comp[i].linebuf);
2950          z->img_comp[i].linebuf = NULL;
2951       }
2952    }
2953    return why;
2954 }
2955 
stbi__process_frame_header(stbi__jpeg * z,int scan)2956 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2957 {
2958    stbi__context *s = z->s;
2959    int Lf,p,i,q, h_max=1,v_max=1,c;
2960    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2961    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2962    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2963    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2964    c = stbi__get8(s);
2965    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2966    s->img_n = c;
2967    for (i=0; i < c; ++i) {
2968       z->img_comp[i].data = NULL;
2969       z->img_comp[i].linebuf = NULL;
2970    }
2971 
2972    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2973 
2974    z->rgb = 0;
2975    for (i=0; i < s->img_n; ++i) {
2976       static unsigned char rgb[3] = { 'R', 'G', 'B' };
2977       z->img_comp[i].id = stbi__get8(s);
2978       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2979          ++z->rgb;
2980       q = stbi__get8(s);
2981       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2982       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2983       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2984    }
2985 
2986    if (scan != STBI__SCAN_load) return 1;
2987 
2988    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2989 
2990    for (i=0; i < s->img_n; ++i) {
2991       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2992       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2993    }
2994 
2995    // compute interleaved mcu info
2996    z->img_h_max = h_max;
2997    z->img_v_max = v_max;
2998    z->img_mcu_w = h_max * 8;
2999    z->img_mcu_h = v_max * 8;
3000    // these sizes can't be more than 17 bits
3001    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3002    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3003 
3004    for (i=0; i < s->img_n; ++i) {
3005       // number of effective pixels (e.g. for non-interleaved MCU)
3006       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3007       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3008       // to simplify generation, we'll allocate enough memory to decode
3009       // the bogus oversized data from using interleaved MCUs and their
3010       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3011       // discard the extra data until colorspace conversion
3012       //
3013       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3014       // so these muls can't overflow with 32-bit ints (which we require)
3015       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3016       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3017       z->img_comp[i].coeff = 0;
3018       z->img_comp[i].raw_coeff = 0;
3019       z->img_comp[i].linebuf = NULL;
3020       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3021       if (z->img_comp[i].raw_data == NULL)
3022          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3023       // align blocks for idct using mmx/sse
3024       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3025       if (z->progressive) {
3026          // w2, h2 are multiples of 8 (see above)
3027          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3028          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3029          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3030          if (z->img_comp[i].raw_coeff == NULL)
3031             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3032          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3033       }
3034    }
3035 
3036    return 1;
3037 }
3038 
3039 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3040 #define stbi__DNL(x)         ((x) == 0xdc)
3041 #define stbi__SOI(x)         ((x) == 0xd8)
3042 #define stbi__EOI(x)         ((x) == 0xd9)
3043 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3044 #define stbi__SOS(x)         ((x) == 0xda)
3045 
3046 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3047 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3048 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3049 {
3050    int m;
3051    z->jfif = 0;
3052    z->app14_color_transform = -1; // valid values are 0,1,2
3053    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3054    m = stbi__get_marker(z);
3055    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3056    if (scan == STBI__SCAN_type) return 1;
3057    m = stbi__get_marker(z);
3058    while (!stbi__SOF(m)) {
3059       if (!stbi__process_marker(z,m)) return 0;
3060       m = stbi__get_marker(z);
3061       while (m == STBI__MARKER_none) {
3062          // some files have extra padding after their blocks, so ok, we'll scan
3063          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3064          m = stbi__get_marker(z);
3065       }
3066    }
3067    z->progressive = stbi__SOF_progressive(m);
3068    if (!stbi__process_frame_header(z, scan)) return 0;
3069    return 1;
3070 }
3071 
3072 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3073 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3074 {
3075    int m;
3076    for (m = 0; m < 4; m++) {
3077       j->img_comp[m].raw_data = NULL;
3078       j->img_comp[m].raw_coeff = NULL;
3079    }
3080    j->restart_interval = 0;
3081    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3082    m = stbi__get_marker(j);
3083    while (!stbi__EOI(m)) {
3084       if (stbi__SOS(m)) {
3085          if (!stbi__process_scan_header(j)) return 0;
3086          if (!stbi__parse_entropy_coded_data(j)) return 0;
3087          if (j->marker == STBI__MARKER_none ) {
3088             // handle 0s at the end of image data from IP Kamera 9060
3089             while (!stbi__at_eof(j->s)) {
3090                int x = stbi__get8(j->s);
3091                if (x == 255) {
3092                   j->marker = stbi__get8(j->s);
3093                   break;
3094                }
3095             }
3096             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3097          }
3098       } else if (stbi__DNL(m)) {
3099          int Ld = stbi__get16be(j->s);
3100          stbi__uint32 NL = stbi__get16be(j->s);
3101          if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
3102          if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
3103       } else {
3104          if (!stbi__process_marker(j, m)) return 0;
3105       }
3106       m = stbi__get_marker(j);
3107    }
3108    if (j->progressive)
3109       stbi__jpeg_finish(j);
3110    return 1;
3111 }
3112 
3113 // static jfif-centered resampling (across block boundaries)
3114 
3115 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3116                                     int w, int hs);
3117 
3118 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3119 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3120 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3121 {
3122    STBI_NOTUSED(out);
3123    STBI_NOTUSED(in_far);
3124    STBI_NOTUSED(w);
3125    STBI_NOTUSED(hs);
3126    return in_near;
3127 }
3128 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3129 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3130 {
3131    // need to generate two samples vertically for every one in input
3132    int i;
3133    STBI_NOTUSED(hs);
3134    for (i=0; i < w; ++i)
3135       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3136    return out;
3137 }
3138 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3139 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3140 {
3141    // need to generate two samples horizontally for every one in input
3142    int i;
3143    stbi_uc *input = in_near;
3144 
3145    if (w == 1) {
3146       // if only one sample, can't do any interpolation
3147       out[0] = out[1] = input[0];
3148       return out;
3149    }
3150 
3151    out[0] = input[0];
3152    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3153    for (i=1; i < w-1; ++i) {
3154       int n = 3*input[i]+2;
3155       out[i*2+0] = stbi__div4(n+input[i-1]);
3156       out[i*2+1] = stbi__div4(n+input[i+1]);
3157    }
3158    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3159    out[i*2+1] = input[w-1];
3160 
3161    STBI_NOTUSED(in_far);
3162    STBI_NOTUSED(hs);
3163 
3164    return out;
3165 }
3166 
3167 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3168 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3169 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3170 {
3171    // need to generate 2x2 samples for every one in input
3172    int i,t0,t1;
3173    if (w == 1) {
3174       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3175       return out;
3176    }
3177 
3178    t1 = 3*in_near[0] + in_far[0];
3179    out[0] = stbi__div4(t1+2);
3180    for (i=1; i < w; ++i) {
3181       t0 = t1;
3182       t1 = 3*in_near[i]+in_far[i];
3183       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3184       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3185    }
3186    out[w*2-1] = stbi__div4(t1+2);
3187 
3188    STBI_NOTUSED(hs);
3189 
3190    return out;
3191 }
3192 
3193 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3194 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3195 {
3196    // need to generate 2x2 samples for every one in input
3197    int i=0,t0,t1;
3198 
3199    if (w == 1) {
3200       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3201       return out;
3202    }
3203 
3204    t1 = 3*in_near[0] + in_far[0];
3205    // process groups of 8 pixels for as long as we can.
3206    // note we can't handle the last pixel in a row in this loop
3207    // because we need to handle the filter boundary conditions.
3208    for (; i < ((w-1) & ~7); i += 8) {
3209 #if defined(STBI_SSE2)
3210       // load and perform the vertical filtering pass
3211       // this uses 3*x + y = 4*x + (y - x)
3212       __m128i zero  = _mm_setzero_si128();
3213       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3214       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3215       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3216       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3217       __m128i diff  = _mm_sub_epi16(farw, nearw);
3218       __m128i nears = _mm_slli_epi16(nearw, 2);
3219       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3220 
3221       // horizontal filter works the same based on shifted vers of current
3222       // row. "prev" is current row shifted right by 1 pixel; we need to
3223       // insert the previous pixel value (from t1).
3224       // "next" is current row shifted left by 1 pixel, with first pixel
3225       // of next block of 8 pixels added in.
3226       __m128i prv0 = _mm_slli_si128(curr, 2);
3227       __m128i nxt0 = _mm_srli_si128(curr, 2);
3228       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3229       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3230 
3231       // horizontal filter, polyphase implementation since it's convenient:
3232       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3233       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3234       // note the shared term.
3235       __m128i bias  = _mm_set1_epi16(8);
3236       __m128i curs = _mm_slli_epi16(curr, 2);
3237       __m128i prvd = _mm_sub_epi16(prev, curr);
3238       __m128i nxtd = _mm_sub_epi16(next, curr);
3239       __m128i curb = _mm_add_epi16(curs, bias);
3240       __m128i even = _mm_add_epi16(prvd, curb);
3241       __m128i odd  = _mm_add_epi16(nxtd, curb);
3242 
3243       // interleave even and odd pixels, then undo scaling.
3244       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3245       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3246       __m128i de0  = _mm_srli_epi16(int0, 4);
3247       __m128i de1  = _mm_srli_epi16(int1, 4);
3248 
3249       // pack and write output
3250       __m128i outv = _mm_packus_epi16(de0, de1);
3251       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3252 #elif defined(STBI_NEON)
3253       // load and perform the vertical filtering pass
3254       // this uses 3*x + y = 4*x + (y - x)
3255       uint8x8_t farb  = vld1_u8(in_far + i);
3256       uint8x8_t nearb = vld1_u8(in_near + i);
3257       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3258       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3259       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3260 
3261       // horizontal filter works the same based on shifted vers of current
3262       // row. "prev" is current row shifted right by 1 pixel; we need to
3263       // insert the previous pixel value (from t1).
3264       // "next" is current row shifted left by 1 pixel, with first pixel
3265       // of next block of 8 pixels added in.
3266       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3267       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3268       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3269       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3270 
3271       // horizontal filter, polyphase implementation since it's convenient:
3272       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3273       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3274       // note the shared term.
3275       int16x8_t curs = vshlq_n_s16(curr, 2);
3276       int16x8_t prvd = vsubq_s16(prev, curr);
3277       int16x8_t nxtd = vsubq_s16(next, curr);
3278       int16x8_t even = vaddq_s16(curs, prvd);
3279       int16x8_t odd  = vaddq_s16(curs, nxtd);
3280 
3281       // undo scaling and round, then store with even/odd phases interleaved
3282       uint8x8x2_t o;
3283       o.val[0] = vqrshrun_n_s16(even, 4);
3284       o.val[1] = vqrshrun_n_s16(odd,  4);
3285       vst2_u8(out + i*2, o);
3286 #endif
3287 
3288       // "previous" value for next iter
3289       t1 = 3*in_near[i+7] + in_far[i+7];
3290    }
3291 
3292    t0 = t1;
3293    t1 = 3*in_near[i] + in_far[i];
3294    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3295 
3296    for (++i; i < w; ++i) {
3297       t0 = t1;
3298       t1 = 3*in_near[i]+in_far[i];
3299       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3300       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3301    }
3302    out[w*2-1] = stbi__div4(t1+2);
3303 
3304    STBI_NOTUSED(hs);
3305 
3306    return out;
3307 }
3308 #endif
3309 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3310 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3311 {
3312    // resample with nearest-neighbor
3313    int i,j;
3314    STBI_NOTUSED(in_far);
3315    for (i=0; i < w; ++i)
3316       for (j=0; j < hs; ++j)
3317          out[i*hs+j] = in_near[i];
3318    return out;
3319 }
3320 
3321 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3322 // to make sure the code produces the same results in both SIMD and scalar
3323 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3324 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3325 {
3326    int i;
3327    for (i=0; i < count; ++i) {
3328       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3329       int r,g,b;
3330       int cr = pcr[i] - 128;
3331       int cb = pcb[i] - 128;
3332       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3333       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3334       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3335       r >>= 20;
3336       g >>= 20;
3337       b >>= 20;
3338       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3339       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3340       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3341       out[0] = (stbi_uc)r;
3342       out[1] = (stbi_uc)g;
3343       out[2] = (stbi_uc)b;
3344       out[3] = 255;
3345       out += step;
3346    }
3347 }
3348 
3349 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3350 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3351 {
3352    int i = 0;
3353 
3354 #ifdef STBI_SSE2
3355    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3356    // it's useful in practice (you wouldn't use it for textures, for example).
3357    // so just accelerate step == 4 case.
3358    if (step == 4) {
3359       // this is a fairly straightforward implementation and not super-optimized.
3360       __m128i signflip  = _mm_set1_epi8(-0x80);
3361       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3362       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3363       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3364       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3365       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3366       __m128i xw = _mm_set1_epi16(255); // alpha channel
3367 
3368       for (; i+7 < count; i += 8) {
3369          // load
3370          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3371          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3372          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3373          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3374          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3375 
3376          // unpack to short (and left-shift cr, cb by 8)
3377          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3378          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3379          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3380 
3381          // color transform
3382          __m128i yws = _mm_srli_epi16(yw, 4);
3383          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3384          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3385          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3386          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3387          __m128i rws = _mm_add_epi16(cr0, yws);
3388          __m128i gwt = _mm_add_epi16(cb0, yws);
3389          __m128i bws = _mm_add_epi16(yws, cb1);
3390          __m128i gws = _mm_add_epi16(gwt, cr1);
3391 
3392          // descale
3393          __m128i rw = _mm_srai_epi16(rws, 4);
3394          __m128i bw = _mm_srai_epi16(bws, 4);
3395          __m128i gw = _mm_srai_epi16(gws, 4);
3396 
3397          // back to byte, set up for transpose
3398          __m128i brb = _mm_packus_epi16(rw, bw);
3399          __m128i gxb = _mm_packus_epi16(gw, xw);
3400 
3401          // transpose to interleave channels
3402          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3403          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3404          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3405          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3406 
3407          // store
3408          _mm_storeu_si128((__m128i *) (out + 0), o0);
3409          _mm_storeu_si128((__m128i *) (out + 16), o1);
3410          out += 32;
3411       }
3412    }
3413 #endif
3414 
3415 #ifdef STBI_NEON
3416    // in this version, step=3 support would be easy to add. but is there demand?
3417    if (step == 4) {
3418       // this is a fairly straightforward implementation and not super-optimized.
3419       uint8x8_t signflip = vdup_n_u8(0x80);
3420       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3421       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3422       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3423       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3424 
3425       for (; i+7 < count; i += 8) {
3426          // load
3427          uint8x8_t y_bytes  = vld1_u8(y + i);
3428          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3429          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3430          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3431          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3432 
3433          // expand to s16
3434          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3435          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3436          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3437 
3438          // color transform
3439          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3440          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3441          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3442          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3443          int16x8_t rws = vaddq_s16(yws, cr0);
3444          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3445          int16x8_t bws = vaddq_s16(yws, cb1);
3446 
3447          // undo scaling, round, convert to byte
3448          uint8x8x4_t o;
3449          o.val[0] = vqrshrun_n_s16(rws, 4);
3450          o.val[1] = vqrshrun_n_s16(gws, 4);
3451          o.val[2] = vqrshrun_n_s16(bws, 4);
3452          o.val[3] = vdup_n_u8(255);
3453 
3454          // store, interleaving r/g/b/a
3455          vst4_u8(out, o);
3456          out += 8*4;
3457       }
3458    }
3459 #endif
3460 
3461    for (; i < count; ++i) {
3462       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3463       int r,g,b;
3464       int cr = pcr[i] - 128;
3465       int cb = pcb[i] - 128;
3466       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3467       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3468       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3469       r >>= 20;
3470       g >>= 20;
3471       b >>= 20;
3472       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3473       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3474       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3475       out[0] = (stbi_uc)r;
3476       out[1] = (stbi_uc)g;
3477       out[2] = (stbi_uc)b;
3478       out[3] = 255;
3479       out += step;
3480    }
3481 }
3482 #endif
3483 
3484 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3485 static void stbi__setup_jpeg(stbi__jpeg *j)
3486 {
3487    j->idct_block_kernel = stbi__idct_block;
3488    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3489    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3490 
3491 #ifdef STBI_SSE2
3492    if (stbi__sse2_available()) {
3493       j->idct_block_kernel = stbi__idct_simd;
3494       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3495       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3496    }
3497 #endif
3498 
3499 #ifdef STBI_NEON
3500    j->idct_block_kernel = stbi__idct_simd;
3501    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3502    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3503 #endif
3504 }
3505 
3506 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3507 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3508 {
3509    stbi__free_jpeg_components(j, j->s->img_n, 0);
3510 }
3511 
3512 typedef struct
3513 {
3514    resample_row_func resample;
3515    stbi_uc *line0,*line1;
3516    int hs,vs;   // expansion factor in each axis
3517    int w_lores; // horizontal pixels pre-expansion
3518    int ystep;   // how far through vertical expansion we are
3519    int ypos;    // which pre-expansion row we're on
3520 } stbi__resample;
3521 
3522 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3523 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3524 {
3525    unsigned int t = x*y + 128;
3526    return (stbi_uc) ((t + (t >>8)) >> 8);
3527 }
3528 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3529 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3530 {
3531    int n, decode_n, is_rgb;
3532    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3533 
3534    // validate req_comp
3535    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3536 
3537    // load a jpeg image from whichever source, but leave in YCbCr format
3538    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3539 
3540    // determine actual number of components to generate
3541    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3542 
3543    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3544 
3545    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3546       decode_n = 1;
3547    else
3548       decode_n = z->s->img_n;
3549 
3550    // resample and color-convert
3551    {
3552       int k;
3553       unsigned int i,j;
3554       stbi_uc *output;
3555       stbi_uc *coutput[4];
3556 
3557       stbi__resample res_comp[4];
3558 
3559       for (k=0; k < decode_n; ++k) {
3560          stbi__resample *r = &res_comp[k];
3561 
3562          // allocate line buffer big enough for upsampling off the edges
3563          // with upsample factor of 4
3564          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3565          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3566 
3567          r->hs      = z->img_h_max / z->img_comp[k].h;
3568          r->vs      = z->img_v_max / z->img_comp[k].v;
3569          r->ystep   = r->vs >> 1;
3570          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3571          r->ypos    = 0;
3572          r->line0   = r->line1 = z->img_comp[k].data;
3573 
3574          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3575          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3576          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3577          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3578          else                               r->resample = stbi__resample_row_generic;
3579       }
3580 
3581       // can't error after this so, this is safe
3582       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3583       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3584 
3585       // now go ahead and resample
3586       for (j=0; j < z->s->img_y; ++j) {
3587          stbi_uc *out = output + n * z->s->img_x * j;
3588          for (k=0; k < decode_n; ++k) {
3589             stbi__resample *r = &res_comp[k];
3590             int y_bot = r->ystep >= (r->vs >> 1);
3591             coutput[k] = r->resample(z->img_comp[k].linebuf,
3592                                      y_bot ? r->line1 : r->line0,
3593                                      y_bot ? r->line0 : r->line1,
3594                                      r->w_lores, r->hs);
3595             if (++r->ystep >= r->vs) {
3596                r->ystep = 0;
3597                r->line0 = r->line1;
3598                if (++r->ypos < z->img_comp[k].y)
3599                   r->line1 += z->img_comp[k].w2;
3600             }
3601          }
3602          if (n >= 3) {
3603             stbi_uc *y = coutput[0];
3604             if (z->s->img_n == 3) {
3605                if (is_rgb) {
3606                   for (i=0; i < z->s->img_x; ++i) {
3607                      out[0] = y[i];
3608                      out[1] = coutput[1][i];
3609                      out[2] = coutput[2][i];
3610                      out[3] = 255;
3611                      out += n;
3612                   }
3613                } else {
3614                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3615                }
3616             } else if (z->s->img_n == 4) {
3617                if (z->app14_color_transform == 0) { // CMYK
3618                   for (i=0; i < z->s->img_x; ++i) {
3619                      stbi_uc m = coutput[3][i];
3620                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3621                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3622                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3623                      out[3] = 255;
3624                      out += n;
3625                   }
3626                } else if (z->app14_color_transform == 2) { // YCCK
3627                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3628                   for (i=0; i < z->s->img_x; ++i) {
3629                      stbi_uc m = coutput[3][i];
3630                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3631                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3632                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3633                      out += n;
3634                   }
3635                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3636                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3637                }
3638             } else
3639                for (i=0; i < z->s->img_x; ++i) {
3640                   out[0] = out[1] = out[2] = y[i];
3641                   out[3] = 255; // not used if n==3
3642                   out += n;
3643                }
3644          } else {
3645             if (is_rgb) {
3646                if (n == 1)
3647                   for (i=0; i < z->s->img_x; ++i)
3648                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3649                else {
3650                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3651                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3652                      out[1] = 255;
3653                   }
3654                }
3655             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3656                for (i=0; i < z->s->img_x; ++i) {
3657                   stbi_uc m = coutput[3][i];
3658                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3659                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3660                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3661                   out[0] = stbi__compute_y(r, g, b);
3662                   out[1] = 255;
3663                   out += n;
3664                }
3665             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3666                for (i=0; i < z->s->img_x; ++i) {
3667                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3668                   out[1] = 255;
3669                   out += n;
3670                }
3671             } else {
3672                stbi_uc *y = coutput[0];
3673                if (n == 1)
3674                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3675                else
3676                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3677             }
3678          }
3679       }
3680       stbi__cleanup_jpeg(z);
3681       *out_x = z->s->img_x;
3682       *out_y = z->s->img_y;
3683       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3684       return output;
3685    }
3686 }
3687 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3688 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3689 {
3690    unsigned char* result;
3691    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3692    STBI_NOTUSED(ri);
3693    j->s = s;
3694    stbi__setup_jpeg(j);
3695    result = load_jpeg_image(j, x,y,comp,req_comp);
3696    STBI_FREE(j);
3697    return result;
3698 }
3699 
stbi__jpeg_test(stbi__context * s)3700 static int stbi__jpeg_test(stbi__context *s)
3701 {
3702    int r;
3703    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3704    j->s = s;
3705    stbi__setup_jpeg(j);
3706    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3707    stbi__rewind(s);
3708    STBI_FREE(j);
3709    return r;
3710 }
3711 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3712 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3713 {
3714    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3715       stbi__rewind( j->s );
3716       return 0;
3717    }
3718    if (x) *x = j->s->img_x;
3719    if (y) *y = j->s->img_y;
3720    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3721    return 1;
3722 }
3723 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3724 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3725 {
3726    int result;
3727    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3728    j->s = s;
3729    result = stbi__jpeg_info_raw(j, x, y, comp);
3730    STBI_FREE(j);
3731    return result;
3732 }
3733 #endif
3734 
3735 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3736 //    simple implementation
3737 //      - all input must be provided in an upfront buffer
3738 //      - all output is written to a single output buffer (can malloc/realloc)
3739 //    performance
3740 //      - fast huffman
3741 
3742 #ifndef STBI_NO_ZLIB
3743 
3744 // fast-way is faster to check than jpeg huffman, but slow way is slower
3745 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3746 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3747 
3748 // zlib-style huffman encoding
3749 // (jpegs packs from left, zlib from right, so can't share code)
3750 typedef struct
3751 {
3752    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3753    stbi__uint16 firstcode[16];
3754    int maxcode[17];
3755    stbi__uint16 firstsymbol[16];
3756    stbi_uc  size[288];
3757    stbi__uint16 value[288];
3758 } stbi__zhuffman;
3759 
stbi__bitreverse16(int n)3760 stbi_inline static int stbi__bitreverse16(int n)
3761 {
3762   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3763   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3764   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3765   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3766   return n;
3767 }
3768 
stbi__bit_reverse(int v,int bits)3769 stbi_inline static int stbi__bit_reverse(int v, int bits)
3770 {
3771    STBI_ASSERT(bits <= 16);
3772    // to bit reverse n bits, reverse 16 and shift
3773    // e.g. 11 bits, bit reverse and shift away 5
3774    return stbi__bitreverse16(v) >> (16-bits);
3775 }
3776 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3777 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3778 {
3779    int i,k=0;
3780    int code, next_code[16], sizes[17];
3781 
3782    // DEFLATE spec for generating codes
3783    memset(sizes, 0, sizeof(sizes));
3784    memset(z->fast, 0, sizeof(z->fast));
3785    for (i=0; i < num; ++i)
3786       ++sizes[sizelist[i]];
3787    sizes[0] = 0;
3788    for (i=1; i < 16; ++i)
3789       if (sizes[i] > (1 << i))
3790          return stbi__err("bad sizes", "Corrupt PNG");
3791    code = 0;
3792    for (i=1; i < 16; ++i) {
3793       next_code[i] = code;
3794       z->firstcode[i] = (stbi__uint16) code;
3795       z->firstsymbol[i] = (stbi__uint16) k;
3796       code = (code + sizes[i]);
3797       if (sizes[i])
3798          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3799       z->maxcode[i] = code << (16-i); // preshift for inner loop
3800       code <<= 1;
3801       k += sizes[i];
3802    }
3803    z->maxcode[16] = 0x10000; // sentinel
3804    for (i=0; i < num; ++i) {
3805       int s = sizelist[i];
3806       if (s) {
3807          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3808          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3809          z->size [c] = (stbi_uc     ) s;
3810          z->value[c] = (stbi__uint16) i;
3811          if (s <= STBI__ZFAST_BITS) {
3812             int j = stbi__bit_reverse(next_code[s],s);
3813             while (j < (1 << STBI__ZFAST_BITS)) {
3814                z->fast[j] = fastv;
3815                j += (1 << s);
3816             }
3817          }
3818          ++next_code[s];
3819       }
3820    }
3821    return 1;
3822 }
3823 
3824 // zlib-from-memory implementation for PNG reading
3825 //    because PNG allows splitting the zlib stream arbitrarily,
3826 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3827 //    we require PNG read all the IDATs and combine them into a single
3828 //    memory buffer
3829 
3830 typedef struct
3831 {
3832    stbi_uc *zbuffer, *zbuffer_end;
3833    int num_bits;
3834    stbi__uint32 code_buffer;
3835 
3836    char *zout;
3837    char *zout_start;
3838    char *zout_end;
3839    int   z_expandable;
3840 
3841    stbi__zhuffman z_length, z_distance;
3842 } stbi__zbuf;
3843 
stbi__zget8(stbi__zbuf * z)3844 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3845 {
3846    if (z->zbuffer >= z->zbuffer_end) return 0;
3847    return *z->zbuffer++;
3848 }
3849 
stbi__fill_bits(stbi__zbuf * z)3850 static void stbi__fill_bits(stbi__zbuf *z)
3851 {
3852    do {
3853       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3854       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3855       z->num_bits += 8;
3856    } while (z->num_bits <= 24);
3857 }
3858 
stbi__zreceive(stbi__zbuf * z,int n)3859 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3860 {
3861    unsigned int k;
3862    if (z->num_bits < n) stbi__fill_bits(z);
3863    k = z->code_buffer & ((1 << n) - 1);
3864    z->code_buffer >>= n;
3865    z->num_bits -= n;
3866    return k;
3867 }
3868 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3869 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3870 {
3871    int b,s,k;
3872    // not resolved by fast table, so compute it the slow way
3873    // use jpeg approach, which requires MSbits at top
3874    k = stbi__bit_reverse(a->code_buffer, 16);
3875    for (s=STBI__ZFAST_BITS+1; ; ++s)
3876       if (k < z->maxcode[s])
3877          break;
3878    if (s == 16) return -1; // invalid code!
3879    // code size is s, so:
3880    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3881    STBI_ASSERT(z->size[b] == s);
3882    a->code_buffer >>= s;
3883    a->num_bits -= s;
3884    return z->value[b];
3885 }
3886 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3887 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3888 {
3889    int b,s;
3890    if (a->num_bits < 16) stbi__fill_bits(a);
3891    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3892    if (b) {
3893       s = b >> 9;
3894       a->code_buffer >>= s;
3895       a->num_bits -= s;
3896       return b & 511;
3897    }
3898    return stbi__zhuffman_decode_slowpath(a, z);
3899 }
3900 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3901 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3902 {
3903    char *q;
3904    int cur, limit, old_limit;
3905    z->zout = zout;
3906    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3907    cur   = (int) (z->zout     - z->zout_start);
3908    limit = old_limit = (int) (z->zout_end - z->zout_start);
3909    while (cur + n > limit)
3910       limit *= 2;
3911    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3912    STBI_NOTUSED(old_limit);
3913    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3914    z->zout_start = q;
3915    z->zout       = q + cur;
3916    z->zout_end   = q + limit;
3917    return 1;
3918 }
3919 
3920 static int stbi__zlength_base[31] = {
3921    3,4,5,6,7,8,9,10,11,13,
3922    15,17,19,23,27,31,35,43,51,59,
3923    67,83,99,115,131,163,195,227,258,0,0 };
3924 
3925 static int stbi__zlength_extra[31]=
3926 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3927 
3928 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3929 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3930 
3931 static int stbi__zdist_extra[32] =
3932 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3933 
stbi__parse_huffman_block(stbi__zbuf * a)3934 static int stbi__parse_huffman_block(stbi__zbuf *a)
3935 {
3936    char *zout = a->zout;
3937    for(;;) {
3938       int z = stbi__zhuffman_decode(a, &a->z_length);
3939       if (z < 256) {
3940          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3941          if (zout >= a->zout_end) {
3942             if (!stbi__zexpand(a, zout, 1)) return 0;
3943             zout = a->zout;
3944          }
3945          *zout++ = (char) z;
3946       } else {
3947          stbi_uc *p;
3948          int len,dist;
3949          if (z == 256) {
3950             a->zout = zout;
3951             return 1;
3952          }
3953          z -= 257;
3954          len = stbi__zlength_base[z];
3955          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3956          z = stbi__zhuffman_decode(a, &a->z_distance);
3957          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3958          dist = stbi__zdist_base[z];
3959          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3960          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3961          if (zout + len > a->zout_end) {
3962             if (!stbi__zexpand(a, zout, len)) return 0;
3963             zout = a->zout;
3964          }
3965          p = (stbi_uc *) (zout - dist);
3966          if (dist == 1) { // run of one byte; common in images.
3967             stbi_uc v = *p;
3968             if (len) { do *zout++ = v; while (--len); }
3969          } else {
3970             if (len) { do *zout++ = *p++; while (--len); }
3971          }
3972       }
3973    }
3974 }
3975 
stbi__compute_huffman_codes(stbi__zbuf * a)3976 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3977 {
3978    static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3979    stbi__zhuffman z_codelength;
3980    stbi_uc lencodes[286+32+137];//padding for maximum single op
3981    stbi_uc codelength_sizes[19];
3982    int i,n;
3983 
3984    int hlit  = stbi__zreceive(a,5) + 257;
3985    int hdist = stbi__zreceive(a,5) + 1;
3986    int hclen = stbi__zreceive(a,4) + 4;
3987    int ntot  = hlit + hdist;
3988 
3989    memset(codelength_sizes, 0, sizeof(codelength_sizes));
3990    for (i=0; i < hclen; ++i) {
3991       int s = stbi__zreceive(a,3);
3992       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3993    }
3994    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3995 
3996    n = 0;
3997    while (n < ntot) {
3998       int c = stbi__zhuffman_decode(a, &z_codelength);
3999       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4000       if (c < 16)
4001          lencodes[n++] = (stbi_uc) c;
4002       else {
4003          stbi_uc fill = 0;
4004          if (c == 16) {
4005             c = stbi__zreceive(a,2)+3;
4006             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4007             fill = lencodes[n-1];
4008          } else if (c == 17)
4009             c = stbi__zreceive(a,3)+3;
4010          else {
4011             STBI_ASSERT(c == 18);
4012             c = stbi__zreceive(a,7)+11;
4013          }
4014          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4015          memset(lencodes+n, fill, c);
4016          n += c;
4017       }
4018    }
4019    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4020    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4021    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4022    return 1;
4023 }
4024 
stbi__parse_uncompressed_block(stbi__zbuf * a)4025 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4026 {
4027    stbi_uc header[4];
4028    int len,nlen,k;
4029    if (a->num_bits & 7)
4030       stbi__zreceive(a, a->num_bits & 7); // discard
4031    // drain the bit-packed data into header
4032    k = 0;
4033    while (a->num_bits > 0) {
4034       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4035       a->code_buffer >>= 8;
4036       a->num_bits -= 8;
4037    }
4038    STBI_ASSERT(a->num_bits == 0);
4039    // now fill header the normal way
4040    while (k < 4)
4041       header[k++] = stbi__zget8(a);
4042    len  = header[1] * 256 + header[0];
4043    nlen = header[3] * 256 + header[2];
4044    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4045    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4046    if (a->zout + len > a->zout_end)
4047       if (!stbi__zexpand(a, a->zout, len)) return 0;
4048    memcpy(a->zout, a->zbuffer, len);
4049    a->zbuffer += len;
4050    a->zout += len;
4051    return 1;
4052 }
4053 
stbi__parse_zlib_header(stbi__zbuf * a)4054 static int stbi__parse_zlib_header(stbi__zbuf *a)
4055 {
4056    int cmf   = stbi__zget8(a);
4057    int cm    = cmf & 15;
4058    /* int cinfo = cmf >> 4; */
4059    int flg   = stbi__zget8(a);
4060    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4061    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4062    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4063    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4064    return 1;
4065 }
4066 
4067 static const stbi_uc stbi__zdefault_length[288] =
4068 {
4069    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4070    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4071    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4072    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4073    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4074    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4075    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4076    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4077    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4078 };
4079 static const stbi_uc stbi__zdefault_distance[32] =
4080 {
4081    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4082 };
4083 /*
4084 Init algorithm:
4085 {
4086    int i;   // use <= to match clearly with spec
4087    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4088    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4089    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4090    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4091 
4092    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4093 }
4094 */
4095 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4096 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4097 {
4098    int final, type;
4099    if (parse_header)
4100       if (!stbi__parse_zlib_header(a)) return 0;
4101    a->num_bits = 0;
4102    a->code_buffer = 0;
4103    do {
4104       final = stbi__zreceive(a,1);
4105       type = stbi__zreceive(a,2);
4106       if (type == 0) {
4107          if (!stbi__parse_uncompressed_block(a)) return 0;
4108       } else if (type == 3) {
4109          return 0;
4110       } else {
4111          if (type == 1) {
4112             // use fixed code lengths
4113             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4114             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4115          } else {
4116             if (!stbi__compute_huffman_codes(a)) return 0;
4117          }
4118          if (!stbi__parse_huffman_block(a)) return 0;
4119       }
4120    } while (!final);
4121    return 1;
4122 }
4123 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4124 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4125 {
4126    a->zout_start = obuf;
4127    a->zout       = obuf;
4128    a->zout_end   = obuf + olen;
4129    a->z_expandable = exp;
4130 
4131    return stbi__parse_zlib(a, parse_header);
4132 }
4133 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4134 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4135 {
4136    stbi__zbuf a;
4137    char *p = (char *) stbi__malloc(initial_size);
4138    if (p == NULL) return NULL;
4139    a.zbuffer = (stbi_uc *) buffer;
4140    a.zbuffer_end = (stbi_uc *) buffer + len;
4141    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4142       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4143       return a.zout_start;
4144    } else {
4145       STBI_FREE(a.zout_start);
4146       return NULL;
4147    }
4148 }
4149 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4150 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4151 {
4152    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4153 }
4154 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4155 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4156 {
4157    stbi__zbuf a;
4158    char *p = (char *) stbi__malloc(initial_size);
4159    if (p == NULL) return NULL;
4160    a.zbuffer = (stbi_uc *) buffer;
4161    a.zbuffer_end = (stbi_uc *) buffer + len;
4162    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4163       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4164       return a.zout_start;
4165    } else {
4166       STBI_FREE(a.zout_start);
4167       return NULL;
4168    }
4169 }
4170 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4171 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4172 {
4173    stbi__zbuf a;
4174    a.zbuffer = (stbi_uc *) ibuffer;
4175    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4176    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4177       return (int) (a.zout - a.zout_start);
4178    else
4179       return -1;
4180 }
4181 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4182 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4183 {
4184    stbi__zbuf a;
4185    char *p = (char *) stbi__malloc(16384);
4186    if (p == NULL) return NULL;
4187    a.zbuffer = (stbi_uc *) buffer;
4188    a.zbuffer_end = (stbi_uc *) buffer+len;
4189    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4190       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4191       return a.zout_start;
4192    } else {
4193       STBI_FREE(a.zout_start);
4194       return NULL;
4195    }
4196 }
4197 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4198 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4199 {
4200    stbi__zbuf a;
4201    a.zbuffer = (stbi_uc *) ibuffer;
4202    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4203    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4204       return (int) (a.zout - a.zout_start);
4205    else
4206       return -1;
4207 }
4208 #endif
4209 
4210 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4211 //    simple implementation
4212 //      - only 8-bit samples
4213 //      - no CRC checking
4214 //      - allocates lots of intermediate memory
4215 //        - avoids problem of streaming data between subsystems
4216 //        - avoids explicit window management
4217 //    performance
4218 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4219 
4220 #ifndef STBI_NO_PNG
4221 typedef struct
4222 {
4223    stbi__uint32 length;
4224    stbi__uint32 type;
4225 } stbi__pngchunk;
4226 
stbi__get_chunk_header(stbi__context * s)4227 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4228 {
4229    stbi__pngchunk c;
4230    c.length = stbi__get32be(s);
4231    c.type   = stbi__get32be(s);
4232    return c;
4233 }
4234 
stbi__check_png_header(stbi__context * s)4235 static int stbi__check_png_header(stbi__context *s)
4236 {
4237    static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4238    int i;
4239    for (i=0; i < 8; ++i)
4240       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4241    return 1;
4242 }
4243 
4244 typedef struct
4245 {
4246    stbi__context *s;
4247    stbi_uc *idata, *expanded, *out;
4248    int depth;
4249 } stbi__png;
4250 
4251 
4252 enum {
4253    STBI__F_none=0,
4254    STBI__F_sub=1,
4255    STBI__F_up=2,
4256    STBI__F_avg=3,
4257    STBI__F_paeth=4,
4258    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4259    STBI__F_avg_first,
4260    STBI__F_paeth_first
4261 };
4262 
4263 static stbi_uc first_row_filter[5] =
4264 {
4265    STBI__F_none,
4266    STBI__F_sub,
4267    STBI__F_none,
4268    STBI__F_avg_first,
4269    STBI__F_paeth_first
4270 };
4271 
stbi__paeth(int a,int b,int c)4272 static int stbi__paeth(int a, int b, int c)
4273 {
4274    int p = a + b - c;
4275    int pa = abs(p-a);
4276    int pb = abs(p-b);
4277    int pc = abs(p-c);
4278    if (pa <= pb && pa <= pc) return a;
4279    if (pb <= pc) return b;
4280    return c;
4281 }
4282 
4283 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4284 
4285 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4286 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4287 {
4288    int bytes = (depth == 16? 2 : 1);
4289    stbi__context *s = a->s;
4290    stbi__uint32 i,j,stride = x*out_n*bytes;
4291    stbi__uint32 img_len, img_width_bytes;
4292    int k;
4293    int img_n = s->img_n; // copy it into a local for later
4294 
4295    int output_bytes = out_n*bytes;
4296    int filter_bytes = img_n*bytes;
4297    int width = x;
4298 
4299    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4300    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4301    if (!a->out) return stbi__err("outofmem", "Out of memory");
4302 
4303    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4304    img_len = (img_width_bytes + 1) * y;
4305    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4306    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4307    // so just check for raw_len < img_len always.
4308    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4309 
4310    for (j=0; j < y; ++j) {
4311       stbi_uc *cur = a->out + stride*j;
4312       stbi_uc *prior;
4313       int filter = *raw++;
4314 
4315       if (filter > 4)
4316          return stbi__err("invalid filter","Corrupt PNG");
4317 
4318       if (depth < 8) {
4319          STBI_ASSERT(img_width_bytes <= x);
4320          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4321          filter_bytes = 1;
4322          width = img_width_bytes;
4323       }
4324       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4325 
4326       // if first row, use special filter that doesn't sample previous row
4327       if (j == 0) filter = first_row_filter[filter];
4328 
4329       // handle first byte explicitly
4330       for (k=0; k < filter_bytes; ++k) {
4331          switch (filter) {
4332             case STBI__F_none       : cur[k] = raw[k]; break;
4333             case STBI__F_sub        : cur[k] = raw[k]; break;
4334             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4335             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4336             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4337             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4338             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4339          }
4340       }
4341 
4342       if (depth == 8) {
4343          if (img_n != out_n)
4344             cur[img_n] = 255; // first pixel
4345          raw += img_n;
4346          cur += out_n;
4347          prior += out_n;
4348       } else if (depth == 16) {
4349          if (img_n != out_n) {
4350             cur[filter_bytes]   = 255; // first pixel top byte
4351             cur[filter_bytes+1] = 255; // first pixel bottom byte
4352          }
4353          raw += filter_bytes;
4354          cur += output_bytes;
4355          prior += output_bytes;
4356       } else {
4357          raw += 1;
4358          cur += 1;
4359          prior += 1;
4360       }
4361 
4362       // this is a little gross, so that we don't switch per-pixel or per-component
4363       if (depth < 8 || img_n == out_n) {
4364          int nk = (width - 1)*filter_bytes;
4365          #define STBI__CASE(f) \
4366              case f:     \
4367                 for (k=0; k < nk; ++k)
4368          switch (filter) {
4369             // "none" filter turns into a memcpy here; make that explicit.
4370             case STBI__F_none:         memcpy(cur, raw, nk); break;
4371             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4372             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4373             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4374             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4375             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4376             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4377          }
4378          #undef STBI__CASE
4379          raw += nk;
4380       } else {
4381          STBI_ASSERT(img_n+1 == out_n);
4382          #define STBI__CASE(f) \
4383              case f:     \
4384                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4385                    for (k=0; k < filter_bytes; ++k)
4386          switch (filter) {
4387             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4388             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4389             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4390             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4391             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4392             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4393             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4394          }
4395          #undef STBI__CASE
4396 
4397          // the loop above sets the high byte of the pixels' alpha, but for
4398          // 16 bit png files we also need the low byte set. we'll do that here.
4399          if (depth == 16) {
4400             cur = a->out + stride*j; // start at the beginning of the row again
4401             for (i=0; i < x; ++i,cur+=output_bytes) {
4402                cur[filter_bytes+1] = 255;
4403             }
4404          }
4405       }
4406    }
4407 
4408    // we make a separate pass to expand bits to pixels; for performance,
4409    // this could run two scanlines behind the above code, so it won't
4410    // intefere with filtering but will still be in the cache.
4411    if (depth < 8) {
4412       for (j=0; j < y; ++j) {
4413          stbi_uc *cur = a->out + stride*j;
4414          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4415          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4416          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4417          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4418 
4419          // note that the final byte might overshoot and write more data than desired.
4420          // we can allocate enough data that this never writes out of memory, but it
4421          // could also overwrite the next scanline. can it overwrite non-empty data
4422          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4423          // so we need to explicitly clamp the final ones
4424 
4425          if (depth == 4) {
4426             for (k=x*img_n; k >= 2; k-=2, ++in) {
4427                *cur++ = scale * ((*in >> 4)       );
4428                *cur++ = scale * ((*in     ) & 0x0f);
4429             }
4430             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4431          } else if (depth == 2) {
4432             for (k=x*img_n; k >= 4; k-=4, ++in) {
4433                *cur++ = scale * ((*in >> 6)       );
4434                *cur++ = scale * ((*in >> 4) & 0x03);
4435                *cur++ = scale * ((*in >> 2) & 0x03);
4436                *cur++ = scale * ((*in     ) & 0x03);
4437             }
4438             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4439             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4440             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4441          } else if (depth == 1) {
4442             for (k=x*img_n; k >= 8; k-=8, ++in) {
4443                *cur++ = scale * ((*in >> 7)       );
4444                *cur++ = scale * ((*in >> 6) & 0x01);
4445                *cur++ = scale * ((*in >> 5) & 0x01);
4446                *cur++ = scale * ((*in >> 4) & 0x01);
4447                *cur++ = scale * ((*in >> 3) & 0x01);
4448                *cur++ = scale * ((*in >> 2) & 0x01);
4449                *cur++ = scale * ((*in >> 1) & 0x01);
4450                *cur++ = scale * ((*in     ) & 0x01);
4451             }
4452             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4453             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4454             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4455             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4456             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4457             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4458             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4459          }
4460          if (img_n != out_n) {
4461             int q;
4462             // insert alpha = 255
4463             cur = a->out + stride*j;
4464             if (img_n == 1) {
4465                for (q=x-1; q >= 0; --q) {
4466                   cur[q*2+1] = 255;
4467                   cur[q*2+0] = cur[q];
4468                }
4469             } else {
4470                STBI_ASSERT(img_n == 3);
4471                for (q=x-1; q >= 0; --q) {
4472                   cur[q*4+3] = 255;
4473                   cur[q*4+2] = cur[q*3+2];
4474                   cur[q*4+1] = cur[q*3+1];
4475                   cur[q*4+0] = cur[q*3+0];
4476                }
4477             }
4478          }
4479       }
4480    } else if (depth == 16) {
4481       // force the image data from big-endian to platform-native.
4482       // this is done in a separate pass due to the decoding relying
4483       // on the data being untouched, but could probably be done
4484       // per-line during decode if care is taken.
4485       stbi_uc *cur = a->out;
4486       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4487 
4488       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4489          *cur16 = (cur[0] << 8) | cur[1];
4490       }
4491    }
4492 
4493    return 1;
4494 }
4495 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4496 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4497 {
4498    int bytes = (depth == 16 ? 2 : 1);
4499    int out_bytes = out_n * bytes;
4500    stbi_uc *final;
4501    int p;
4502    if (!interlaced)
4503       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4504 
4505    // de-interlacing
4506    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4507    for (p=0; p < 7; ++p) {
4508       int xorig[] = { 0,4,0,2,0,1,0 };
4509       int yorig[] = { 0,0,4,0,2,0,1 };
4510       int xspc[]  = { 8,8,4,4,2,2,1 };
4511       int yspc[]  = { 8,8,8,4,4,2,2 };
4512       int i,j,x,y;
4513       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4514       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4515       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4516       if (x && y) {
4517          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4518          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4519             STBI_FREE(final);
4520             return 0;
4521          }
4522          for (j=0; j < y; ++j) {
4523             for (i=0; i < x; ++i) {
4524                int out_y = j*yspc[p]+yorig[p];
4525                int out_x = i*xspc[p]+xorig[p];
4526                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4527                       a->out + (j*x+i)*out_bytes, out_bytes);
4528             }
4529          }
4530          STBI_FREE(a->out);
4531          image_data += img_len;
4532          image_data_len -= img_len;
4533       }
4534    }
4535    a->out = final;
4536 
4537    return 1;
4538 }
4539 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4540 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4541 {
4542    stbi__context *s = z->s;
4543    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4544    stbi_uc *p = z->out;
4545 
4546    // compute color-based transparency, assuming we've
4547    // already got 255 as the alpha value in the output
4548    STBI_ASSERT(out_n == 2 || out_n == 4);
4549 
4550    if (out_n == 2) {
4551       for (i=0; i < pixel_count; ++i) {
4552          p[1] = (p[0] == tc[0] ? 0 : 255);
4553          p += 2;
4554       }
4555    } else {
4556       for (i=0; i < pixel_count; ++i) {
4557          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4558             p[3] = 0;
4559          p += 4;
4560       }
4561    }
4562    return 1;
4563 }
4564 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4565 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4566 {
4567    stbi__context *s = z->s;
4568    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4569    stbi__uint16 *p = (stbi__uint16*) z->out;
4570 
4571    // compute color-based transparency, assuming we've
4572    // already got 65535 as the alpha value in the output
4573    STBI_ASSERT(out_n == 2 || out_n == 4);
4574 
4575    if (out_n == 2) {
4576       for (i = 0; i < pixel_count; ++i) {
4577          p[1] = (p[0] == tc[0] ? 0 : 65535);
4578          p += 2;
4579       }
4580    } else {
4581       for (i = 0; i < pixel_count; ++i) {
4582          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4583             p[3] = 0;
4584          p += 4;
4585       }
4586    }
4587    return 1;
4588 }
4589 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4590 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4591 {
4592    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4593    stbi_uc *p, *temp_out, *orig = a->out;
4594 
4595    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4596    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4597 
4598    // between here and free(out) below, exitting would leak
4599    temp_out = p;
4600 
4601    if (pal_img_n == 3) {
4602       for (i=0; i < pixel_count; ++i) {
4603          int n = orig[i]*4;
4604          p[0] = palette[n  ];
4605          p[1] = palette[n+1];
4606          p[2] = palette[n+2];
4607          p += 3;
4608       }
4609    } else {
4610       for (i=0; i < pixel_count; ++i) {
4611          int n = orig[i]*4;
4612          p[0] = palette[n  ];
4613          p[1] = palette[n+1];
4614          p[2] = palette[n+2];
4615          p[3] = palette[n+3];
4616          p += 4;
4617       }
4618    }
4619    STBI_FREE(a->out);
4620    a->out = temp_out;
4621 
4622    STBI_NOTUSED(len);
4623 
4624    return 1;
4625 }
4626 
4627 static int stbi__unpremultiply_on_load = 0;
4628 static int stbi__de_iphone_flag = 0;
4629 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4630 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4631 {
4632    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4633 }
4634 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4635 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4636 {
4637    stbi__de_iphone_flag = flag_true_if_should_convert;
4638 }
4639 
stbi__de_iphone(stbi__png * z)4640 static void stbi__de_iphone(stbi__png *z)
4641 {
4642    stbi__context *s = z->s;
4643    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4644    stbi_uc *p = z->out;
4645 
4646    if (s->img_out_n == 3) {  // convert bgr to rgb
4647       for (i=0; i < pixel_count; ++i) {
4648          stbi_uc t = p[0];
4649          p[0] = p[2];
4650          p[2] = t;
4651          p += 3;
4652       }
4653    } else {
4654       STBI_ASSERT(s->img_out_n == 4);
4655       if (stbi__unpremultiply_on_load) {
4656          // convert bgr to rgb and unpremultiply
4657          for (i=0; i < pixel_count; ++i) {
4658             stbi_uc a = p[3];
4659             stbi_uc t = p[0];
4660             if (a) {
4661                stbi_uc half = a / 2;
4662                p[0] = (p[2] * 255 + half) / a;
4663                p[1] = (p[1] * 255 + half) / a;
4664                p[2] = ( t   * 255 + half) / a;
4665             } else {
4666                p[0] = p[2];
4667                p[2] = t;
4668             }
4669             p += 4;
4670          }
4671       } else {
4672          // convert bgr to rgb
4673          for (i=0; i < pixel_count; ++i) {
4674             stbi_uc t = p[0];
4675             p[0] = p[2];
4676             p[2] = t;
4677             p += 4;
4678          }
4679       }
4680    }
4681 }
4682 
4683 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4684 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4685 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4686 {
4687    stbi_uc palette[1024], pal_img_n=0;
4688    stbi_uc has_trans=0, tc[3];
4689    stbi__uint16 tc16[3];
4690    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4691    int first=1,k,interlace=0, color=0, is_iphone=0;
4692    stbi__context *s = z->s;
4693 
4694    z->expanded = NULL;
4695    z->idata = NULL;
4696    z->out = NULL;
4697 
4698    if (!stbi__check_png_header(s)) return 0;
4699 
4700    if (scan == STBI__SCAN_type) return 1;
4701 
4702    for (;;) {
4703       stbi__pngchunk c = stbi__get_chunk_header(s);
4704       switch (c.type) {
4705          case STBI__PNG_TYPE('C','g','B','I'):
4706             is_iphone = 1;
4707             stbi__skip(s, c.length);
4708             break;
4709          case STBI__PNG_TYPE('I','H','D','R'): {
4710             int comp,filter;
4711             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4712             first = 0;
4713             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4714             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4715             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4716             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4717             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4718             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4719             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4720             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4721             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4722             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4723             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4724             if (!pal_img_n) {
4725                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4726                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4727                if (scan == STBI__SCAN_header) return 1;
4728             } else {
4729                // if paletted, then pal_n is our final components, and
4730                // img_n is # components to decompress/filter.
4731                s->img_n = 1;
4732                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4733                // if SCAN_header, have to scan to see if we have a tRNS
4734             }
4735             break;
4736          }
4737 
4738          case STBI__PNG_TYPE('P','L','T','E'):  {
4739             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4740             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4741             pal_len = c.length / 3;
4742             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4743             for (i=0; i < pal_len; ++i) {
4744                palette[i*4+0] = stbi__get8(s);
4745                palette[i*4+1] = stbi__get8(s);
4746                palette[i*4+2] = stbi__get8(s);
4747                palette[i*4+3] = 255;
4748             }
4749             break;
4750          }
4751 
4752          case STBI__PNG_TYPE('t','R','N','S'): {
4753             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4754             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4755             if (pal_img_n) {
4756                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4757                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4758                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4759                pal_img_n = 4;
4760                for (i=0; i < c.length; ++i)
4761                   palette[i*4+3] = stbi__get8(s);
4762             } else {
4763                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4764                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4765                has_trans = 1;
4766                if (z->depth == 16) {
4767                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4768                } else {
4769                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4770                }
4771             }
4772             break;
4773          }
4774 
4775          case STBI__PNG_TYPE('I','D','A','T'): {
4776             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4777             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4778             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4779             if ((int)(ioff + c.length) < (int)ioff) return 0;
4780             if (ioff + c.length > idata_limit) {
4781                stbi__uint32 idata_limit_old = idata_limit;
4782                stbi_uc *p;
4783                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4784                while (ioff + c.length > idata_limit)
4785                   idata_limit *= 2;
4786                STBI_NOTUSED(idata_limit_old);
4787                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4788                z->idata = p;
4789             }
4790             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4791             ioff += c.length;
4792             break;
4793          }
4794 
4795          case STBI__PNG_TYPE('I','E','N','D'): {
4796             stbi__uint32 raw_len, bpl;
4797             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4798             if (scan != STBI__SCAN_load) return 1;
4799             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4800             // initial guess for decoded data size to avoid unnecessary reallocs
4801             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4802             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4803             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4804             if (z->expanded == NULL) return 0; // zlib should set error
4805             STBI_FREE(z->idata); z->idata = NULL;
4806             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4807                s->img_out_n = s->img_n+1;
4808             else
4809                s->img_out_n = s->img_n;
4810             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4811             if (has_trans) {
4812                if (z->depth == 16) {
4813                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4814                } else {
4815                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4816                }
4817             }
4818             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4819                stbi__de_iphone(z);
4820             if (pal_img_n) {
4821                // pal_img_n == 3 or 4
4822                s->img_n = pal_img_n; // record the actual colors we had
4823                s->img_out_n = pal_img_n;
4824                if (req_comp >= 3) s->img_out_n = req_comp;
4825                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4826                   return 0;
4827             } else if (has_trans) {
4828                // non-paletted image with tRNS -> source image has (constant) alpha
4829                ++s->img_n;
4830             }
4831             STBI_FREE(z->expanded); z->expanded = NULL;
4832             return 1;
4833          }
4834 
4835          default:
4836             // if critical, fail
4837             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4838             if ((c.type & (1 << 29)) == 0) {
4839                #ifndef STBI_NO_FAILURE_STRINGS
4840                // not threadsafe
4841                static char invalid_chunk[] = "XXXX PNG chunk not known";
4842                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4843                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4844                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4845                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4846                #endif
4847                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4848             }
4849             stbi__skip(s, c.length);
4850             break;
4851       }
4852       // end of PNG chunk, read and skip CRC
4853       stbi__get32be(s);
4854    }
4855 }
4856 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4857 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4858 {
4859    void *result=NULL;
4860    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4861    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4862       if (p->depth < 8)
4863          ri->bits_per_channel = 8;
4864       else
4865          ri->bits_per_channel = p->depth;
4866       result = p->out;
4867       p->out = NULL;
4868       if (req_comp && req_comp != p->s->img_out_n) {
4869          if (ri->bits_per_channel == 8)
4870             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4871          else
4872             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4873          p->s->img_out_n = req_comp;
4874          if (result == NULL) return result;
4875       }
4876       *x = p->s->img_x;
4877       *y = p->s->img_y;
4878       if (n) *n = p->s->img_n;
4879    }
4880    STBI_FREE(p->out);      p->out      = NULL;
4881    STBI_FREE(p->expanded); p->expanded = NULL;
4882    STBI_FREE(p->idata);    p->idata    = NULL;
4883 
4884    return result;
4885 }
4886 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4887 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4888 {
4889    stbi__png p;
4890    p.s = s;
4891    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4892 }
4893 
stbi__png_test(stbi__context * s)4894 static int stbi__png_test(stbi__context *s)
4895 {
4896    int r;
4897    r = stbi__check_png_header(s);
4898    stbi__rewind(s);
4899    return r;
4900 }
4901 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4902 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4903 {
4904    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4905       stbi__rewind( p->s );
4906       return 0;
4907    }
4908    if (x) *x = p->s->img_x;
4909    if (y) *y = p->s->img_y;
4910    if (comp) *comp = p->s->img_n;
4911    return 1;
4912 }
4913 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4914 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4915 {
4916    stbi__png p;
4917    p.s = s;
4918    return stbi__png_info_raw(&p, x, y, comp);
4919 }
4920 #endif
4921 
4922 // Microsoft/Windows BMP image
4923 
4924 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4925 static int stbi__bmp_test_raw(stbi__context *s)
4926 {
4927    int r;
4928    int sz;
4929    if (stbi__get8(s) != 'B') return 0;
4930    if (stbi__get8(s) != 'M') return 0;
4931    stbi__get32le(s); // discard filesize
4932    stbi__get16le(s); // discard reserved
4933    stbi__get16le(s); // discard reserved
4934    stbi__get32le(s); // discard data offset
4935    sz = stbi__get32le(s);
4936    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4937    return r;
4938 }
4939 
stbi__bmp_test(stbi__context * s)4940 static int stbi__bmp_test(stbi__context *s)
4941 {
4942    int r = stbi__bmp_test_raw(s);
4943    stbi__rewind(s);
4944    return r;
4945 }
4946 
4947 
4948 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)4949 static int stbi__high_bit(unsigned int z)
4950 {
4951    int n=0;
4952    if (z == 0) return -1;
4953    if (z >= 0x10000) n += 16, z >>= 16;
4954    if (z >= 0x00100) n +=  8, z >>=  8;
4955    if (z >= 0x00010) n +=  4, z >>=  4;
4956    if (z >= 0x00004) n +=  2, z >>=  2;
4957    if (z >= 0x00002) n +=  1, z >>=  1;
4958    return n;
4959 }
4960 
stbi__bitcount(unsigned int a)4961 static int stbi__bitcount(unsigned int a)
4962 {
4963    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
4964    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
4965    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4966    a = (a + (a >> 8)); // max 16 per 8 bits
4967    a = (a + (a >> 16)); // max 32 per 8 bits
4968    return a & 0xff;
4969 }
4970 
stbi__shiftsigned(int v,int shift,int bits)4971 static int stbi__shiftsigned(int v, int shift, int bits)
4972 {
4973    int result;
4974    int z=0;
4975 
4976    if (shift < 0) v <<= -shift;
4977    else v >>= shift;
4978    result = v;
4979 
4980    z = bits;
4981    while (z < 8) {
4982       result += v >> z;
4983       z += bits;
4984    }
4985    return result;
4986 }
4987 
4988 typedef struct
4989 {
4990    int bpp, offset, hsz;
4991    unsigned int mr,mg,mb,ma, all_a;
4992 } stbi__bmp_data;
4993 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)4994 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4995 {
4996    int hsz;
4997    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4998    stbi__get32le(s); // discard filesize
4999    stbi__get16le(s); // discard reserved
5000    stbi__get16le(s); // discard reserved
5001    info->offset = stbi__get32le(s);
5002    info->hsz = hsz = stbi__get32le(s);
5003    info->mr = info->mg = info->mb = info->ma = 0;
5004 
5005    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5006    if (hsz == 12) {
5007       s->img_x = stbi__get16le(s);
5008       s->img_y = stbi__get16le(s);
5009    } else {
5010       s->img_x = stbi__get32le(s);
5011       s->img_y = stbi__get32le(s);
5012    }
5013    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5014    info->bpp = stbi__get16le(s);
5015    if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5016    if (hsz != 12) {
5017       int compress = stbi__get32le(s);
5018       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5019       stbi__get32le(s); // discard sizeof
5020       stbi__get32le(s); // discard hres
5021       stbi__get32le(s); // discard vres
5022       stbi__get32le(s); // discard colorsused
5023       stbi__get32le(s); // discard max important
5024       if (hsz == 40 || hsz == 56) {
5025          if (hsz == 56) {
5026             stbi__get32le(s);
5027             stbi__get32le(s);
5028             stbi__get32le(s);
5029             stbi__get32le(s);
5030          }
5031          if (info->bpp == 16 || info->bpp == 32) {
5032             if (compress == 0) {
5033                if (info->bpp == 32) {
5034                   info->mr = 0xffu << 16;
5035                   info->mg = 0xffu <<  8;
5036                   info->mb = 0xffu <<  0;
5037                   info->ma = 0xffu << 24;
5038                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5039                } else {
5040                   info->mr = 31u << 10;
5041                   info->mg = 31u <<  5;
5042                   info->mb = 31u <<  0;
5043                }
5044             } else if (compress == 3) {
5045                info->mr = stbi__get32le(s);
5046                info->mg = stbi__get32le(s);
5047                info->mb = stbi__get32le(s);
5048                // not documented, but generated by photoshop and handled by mspaint
5049                if (info->mr == info->mg && info->mg == info->mb) {
5050                   // ?!?!?
5051                   return stbi__errpuc("bad BMP", "bad BMP");
5052                }
5053             } else
5054                return stbi__errpuc("bad BMP", "bad BMP");
5055          }
5056       } else {
5057          int i;
5058          if (hsz != 108 && hsz != 124)
5059             return stbi__errpuc("bad BMP", "bad BMP");
5060          info->mr = stbi__get32le(s);
5061          info->mg = stbi__get32le(s);
5062          info->mb = stbi__get32le(s);
5063          info->ma = stbi__get32le(s);
5064          stbi__get32le(s); // discard color space
5065          for (i=0; i < 12; ++i)
5066             stbi__get32le(s); // discard color space parameters
5067          if (hsz == 124) {
5068             stbi__get32le(s); // discard rendering intent
5069             stbi__get32le(s); // discard offset of profile data
5070             stbi__get32le(s); // discard size of profile data
5071             stbi__get32le(s); // discard reserved
5072          }
5073       }
5074    }
5075    return (void *) 1;
5076 }
5077 
5078 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5079 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5080 {
5081    stbi_uc *out;
5082    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5083    stbi_uc pal[256][4];
5084    int psize=0,i,j,width;
5085    int flip_vertically, pad, target;
5086    stbi__bmp_data info;
5087    STBI_NOTUSED(ri);
5088 
5089    info.all_a = 255;
5090    if (stbi__bmp_parse_header(s, &info) == NULL)
5091       return NULL; // error code already set
5092 
5093    flip_vertically = ((int) s->img_y) > 0;
5094    s->img_y = abs((int) s->img_y);
5095 
5096    mr = info.mr;
5097    mg = info.mg;
5098    mb = info.mb;
5099    ma = info.ma;
5100    all_a = info.all_a;
5101 
5102    if (info.hsz == 12) {
5103       if (info.bpp < 24)
5104          psize = (info.offset - 14 - 24) / 3;
5105    } else {
5106       if (info.bpp < 16)
5107          psize = (info.offset - 14 - info.hsz) >> 2;
5108    }
5109 
5110    s->img_n = ma ? 4 : 3;
5111    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5112       target = req_comp;
5113    else
5114       target = s->img_n; // if they want monochrome, we'll post-convert
5115 
5116    // sanity-check size
5117    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5118       return stbi__errpuc("too large", "Corrupt BMP");
5119 
5120    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5121    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5122    if (info.bpp < 16) {
5123       int z=0;
5124       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5125       for (i=0; i < psize; ++i) {
5126          pal[i][2] = stbi__get8(s);
5127          pal[i][1] = stbi__get8(s);
5128          pal[i][0] = stbi__get8(s);
5129          if (info.hsz != 12) stbi__get8(s);
5130          pal[i][3] = 255;
5131       }
5132       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5133       if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5134       else if (info.bpp == 8) width = s->img_x;
5135       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5136       pad = (-width)&3;
5137       for (j=0; j < (int) s->img_y; ++j) {
5138          for (i=0; i < (int) s->img_x; i += 2) {
5139             int v=stbi__get8(s),v2=0;
5140             if (info.bpp == 4) {
5141                v2 = v & 15;
5142                v >>= 4;
5143             }
5144             out[z++] = pal[v][0];
5145             out[z++] = pal[v][1];
5146             out[z++] = pal[v][2];
5147             if (target == 4) out[z++] = 255;
5148             if (i+1 == (int) s->img_x) break;
5149             v = (info.bpp == 8) ? stbi__get8(s) : v2;
5150             out[z++] = pal[v][0];
5151             out[z++] = pal[v][1];
5152             out[z++] = pal[v][2];
5153             if (target == 4) out[z++] = 255;
5154          }
5155          stbi__skip(s, pad);
5156       }
5157    } else {
5158       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5159       int z = 0;
5160       int easy=0;
5161       stbi__skip(s, info.offset - 14 - info.hsz);
5162       if (info.bpp == 24) width = 3 * s->img_x;
5163       else if (info.bpp == 16) width = 2*s->img_x;
5164       else /* bpp = 32 and pad = 0 */ width=0;
5165       pad = (-width) & 3;
5166       if (info.bpp == 24) {
5167          easy = 1;
5168       } else if (info.bpp == 32) {
5169          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5170             easy = 2;
5171       }
5172       if (!easy) {
5173          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5174          // right shift amt to put high bit in position #7
5175          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5176          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5177          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5178          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5179       }
5180       for (j=0; j < (int) s->img_y; ++j) {
5181          if (easy) {
5182             for (i=0; i < (int) s->img_x; ++i) {
5183                unsigned char a;
5184                out[z+2] = stbi__get8(s);
5185                out[z+1] = stbi__get8(s);
5186                out[z+0] = stbi__get8(s);
5187                z += 3;
5188                a = (easy == 2 ? stbi__get8(s) : 255);
5189                all_a |= a;
5190                if (target == 4) out[z++] = a;
5191             }
5192          } else {
5193             int bpp = info.bpp;
5194             for (i=0; i < (int) s->img_x; ++i) {
5195                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5196                int a;
5197                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5198                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5199                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5200                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5201                all_a |= a;
5202                if (target == 4) out[z++] = STBI__BYTECAST(a);
5203             }
5204          }
5205          stbi__skip(s, pad);
5206       }
5207    }
5208 
5209    // if alpha channel is all 0s, replace with all 255s
5210    if (target == 4 && all_a == 0)
5211       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5212          out[i] = 255;
5213 
5214    if (flip_vertically) {
5215       stbi_uc t;
5216       for (j=0; j < (int) s->img_y>>1; ++j) {
5217          stbi_uc *p1 = out +      j     *s->img_x*target;
5218          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5219          for (i=0; i < (int) s->img_x*target; ++i) {
5220             t = p1[i], p1[i] = p2[i], p2[i] = t;
5221          }
5222       }
5223    }
5224 
5225    if (req_comp && req_comp != target) {
5226       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5227       if (out == NULL) return out; // stbi__convert_format frees input on failure
5228    }
5229 
5230    *x = s->img_x;
5231    *y = s->img_y;
5232    if (comp) *comp = s->img_n;
5233    return out;
5234 }
5235 #endif
5236 
5237 // Targa Truevision - TGA
5238 // by Jonathan Dummer
5239 #ifndef STBI_NO_TGA
5240 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5241 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5242 {
5243    // only RGB or RGBA (incl. 16bit) or grey allowed
5244    if(is_rgb16) *is_rgb16 = 0;
5245    switch(bits_per_pixel) {
5246       case 8:  return STBI_grey;
5247       case 16: if(is_grey) return STBI_grey_alpha;
5248             // else: fall-through
5249       case 15: if(is_rgb16) *is_rgb16 = 1;
5250             return STBI_rgb;
5251       case 24: // fall-through
5252       case 32: return bits_per_pixel/8;
5253       default: return 0;
5254    }
5255 }
5256 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5257 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5258 {
5259     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5260     int sz, tga_colormap_type;
5261     stbi__get8(s);                   // discard Offset
5262     tga_colormap_type = stbi__get8(s); // colormap type
5263     if( tga_colormap_type > 1 ) {
5264         stbi__rewind(s);
5265         return 0;      // only RGB or indexed allowed
5266     }
5267     tga_image_type = stbi__get8(s); // image type
5268     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5269         if (tga_image_type != 1 && tga_image_type != 9) {
5270             stbi__rewind(s);
5271             return 0;
5272         }
5273         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5274         sz = stbi__get8(s);    //   check bits per palette color entry
5275         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5276             stbi__rewind(s);
5277             return 0;
5278         }
5279         stbi__skip(s,4);       // skip image x and y origin
5280         tga_colormap_bpp = sz;
5281     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5282         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5283             stbi__rewind(s);
5284             return 0; // only RGB or grey allowed, +/- RLE
5285         }
5286         stbi__skip(s,9); // skip colormap specification and image x/y origin
5287         tga_colormap_bpp = 0;
5288     }
5289     tga_w = stbi__get16le(s);
5290     if( tga_w < 1 ) {
5291         stbi__rewind(s);
5292         return 0;   // test width
5293     }
5294     tga_h = stbi__get16le(s);
5295     if( tga_h < 1 ) {
5296         stbi__rewind(s);
5297         return 0;   // test height
5298     }
5299     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5300     stbi__get8(s); // ignore alpha bits
5301     if (tga_colormap_bpp != 0) {
5302         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5303             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5304             // I don't think anything but 8 or 16bit indexes makes sense
5305             stbi__rewind(s);
5306             return 0;
5307         }
5308         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5309     } else {
5310         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5311     }
5312     if(!tga_comp) {
5313       stbi__rewind(s);
5314       return 0;
5315     }
5316     if (x) *x = tga_w;
5317     if (y) *y = tga_h;
5318     if (comp) *comp = tga_comp;
5319     return 1;                   // seems to have passed everything
5320 }
5321 
stbi__tga_test(stbi__context * s)5322 static int stbi__tga_test(stbi__context *s)
5323 {
5324    int res = 0;
5325    int sz, tga_color_type;
5326    stbi__get8(s);      //   discard Offset
5327    tga_color_type = stbi__get8(s);   //   color type
5328    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5329    sz = stbi__get8(s);   //   image type
5330    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5331       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5332       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5333       sz = stbi__get8(s);    //   check bits per palette color entry
5334       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5335       stbi__skip(s,4);       // skip image x and y origin
5336    } else { // "normal" image w/o colormap
5337       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5338       stbi__skip(s,9); // skip colormap specification and image x/y origin
5339    }
5340    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5341    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5342    sz = stbi__get8(s);   //   bits per pixel
5343    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5344    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5345 
5346    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5347 
5348 errorEnd:
5349    stbi__rewind(s);
5350    return res;
5351 }
5352 
5353 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5354 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5355 {
5356    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5357    stbi__uint16 fiveBitMask = 31;
5358    // we have 3 channels with 5bits each
5359    int r = (px >> 10) & fiveBitMask;
5360    int g = (px >> 5) & fiveBitMask;
5361    int b = px & fiveBitMask;
5362    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5363    out[0] = (stbi_uc)((r * 255)/31);
5364    out[1] = (stbi_uc)((g * 255)/31);
5365    out[2] = (stbi_uc)((b * 255)/31);
5366 
5367    // some people claim that the most significant bit might be used for alpha
5368    // (possibly if an alpha-bit is set in the "image descriptor byte")
5369    // but that only made 16bit test images completely translucent..
5370    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5371 }
5372 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5373 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5374 {
5375    //   read in the TGA header stuff
5376    int tga_offset = stbi__get8(s);
5377    int tga_indexed = stbi__get8(s);
5378    int tga_image_type = stbi__get8(s);
5379    int tga_is_RLE = 0;
5380    int tga_palette_start = stbi__get16le(s);
5381    int tga_palette_len = stbi__get16le(s);
5382    int tga_palette_bits = stbi__get8(s);
5383    int tga_x_origin = stbi__get16le(s);
5384    int tga_y_origin = stbi__get16le(s);
5385    int tga_width = stbi__get16le(s);
5386    int tga_height = stbi__get16le(s);
5387    int tga_bits_per_pixel = stbi__get8(s);
5388    int tga_comp, tga_rgb16=0;
5389    int tga_inverted = stbi__get8(s);
5390    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5391    //   image data
5392    unsigned char *tga_data;
5393    unsigned char *tga_palette = NULL;
5394    int i, j;
5395    unsigned char raw_data[4] = {0};
5396    int RLE_count = 0;
5397    int RLE_repeating = 0;
5398    int read_next_pixel = 1;
5399    STBI_NOTUSED(ri);
5400 
5401    //   do a tiny bit of precessing
5402    if ( tga_image_type >= 8 )
5403    {
5404       tga_image_type -= 8;
5405       tga_is_RLE = 1;
5406    }
5407    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5408 
5409    //   If I'm paletted, then I'll use the number of bits from the palette
5410    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5411    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5412 
5413    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5414       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5415 
5416    //   tga info
5417    *x = tga_width;
5418    *y = tga_height;
5419    if (comp) *comp = tga_comp;
5420 
5421    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5422       return stbi__errpuc("too large", "Corrupt TGA");
5423 
5424    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5425    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5426 
5427    // skip to the data's starting position (offset usually = 0)
5428    stbi__skip(s, tga_offset );
5429 
5430    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5431       for (i=0; i < tga_height; ++i) {
5432          int row = tga_inverted ? tga_height -i - 1 : i;
5433          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5434          stbi__getn(s, tga_row, tga_width * tga_comp);
5435       }
5436    } else  {
5437       //   do I need to load a palette?
5438       if ( tga_indexed)
5439       {
5440          //   any data to skip? (offset usually = 0)
5441          stbi__skip(s, tga_palette_start );
5442          //   load the palette
5443          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5444          if (!tga_palette) {
5445             STBI_FREE(tga_data);
5446             return stbi__errpuc("outofmem", "Out of memory");
5447          }
5448          if (tga_rgb16) {
5449             stbi_uc *pal_entry = tga_palette;
5450             STBI_ASSERT(tga_comp == STBI_rgb);
5451             for (i=0; i < tga_palette_len; ++i) {
5452                stbi__tga_read_rgb16(s, pal_entry);
5453                pal_entry += tga_comp;
5454             }
5455          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5456                STBI_FREE(tga_data);
5457                STBI_FREE(tga_palette);
5458                return stbi__errpuc("bad palette", "Corrupt TGA");
5459          }
5460       }
5461       //   load the data
5462       for (i=0; i < tga_width * tga_height; ++i)
5463       {
5464          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5465          if ( tga_is_RLE )
5466          {
5467             if ( RLE_count == 0 )
5468             {
5469                //   yep, get the next byte as a RLE command
5470                int RLE_cmd = stbi__get8(s);
5471                RLE_count = 1 + (RLE_cmd & 127);
5472                RLE_repeating = RLE_cmd >> 7;
5473                read_next_pixel = 1;
5474             } else if ( !RLE_repeating )
5475             {
5476                read_next_pixel = 1;
5477             }
5478          } else
5479          {
5480             read_next_pixel = 1;
5481          }
5482          //   OK, if I need to read a pixel, do it now
5483          if ( read_next_pixel )
5484          {
5485             //   load however much data we did have
5486             if ( tga_indexed )
5487             {
5488                // read in index, then perform the lookup
5489                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5490                if ( pal_idx >= tga_palette_len ) {
5491                   // invalid index
5492                   pal_idx = 0;
5493                }
5494                pal_idx *= tga_comp;
5495                for (j = 0; j < tga_comp; ++j) {
5496                   raw_data[j] = tga_palette[pal_idx+j];
5497                }
5498             } else if(tga_rgb16) {
5499                STBI_ASSERT(tga_comp == STBI_rgb);
5500                stbi__tga_read_rgb16(s, raw_data);
5501             } else {
5502                //   read in the data raw
5503                for (j = 0; j < tga_comp; ++j) {
5504                   raw_data[j] = stbi__get8(s);
5505                }
5506             }
5507             //   clear the reading flag for the next pixel
5508             read_next_pixel = 0;
5509          } // end of reading a pixel
5510 
5511          // copy data
5512          for (j = 0; j < tga_comp; ++j)
5513            tga_data[i*tga_comp+j] = raw_data[j];
5514 
5515          //   in case we're in RLE mode, keep counting down
5516          --RLE_count;
5517       }
5518       //   do I need to invert the image?
5519       if ( tga_inverted )
5520       {
5521          for (j = 0; j*2 < tga_height; ++j)
5522          {
5523             int index1 = j * tga_width * tga_comp;
5524             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5525             for (i = tga_width * tga_comp; i > 0; --i)
5526             {
5527                unsigned char temp = tga_data[index1];
5528                tga_data[index1] = tga_data[index2];
5529                tga_data[index2] = temp;
5530                ++index1;
5531                ++index2;
5532             }
5533          }
5534       }
5535       //   clear my palette, if I had one
5536       if ( tga_palette != NULL )
5537       {
5538          STBI_FREE( tga_palette );
5539       }
5540    }
5541 
5542    // swap RGB - if the source data was RGB16, it already is in the right order
5543    if (tga_comp >= 3 && !tga_rgb16)
5544    {
5545       unsigned char* tga_pixel = tga_data;
5546       for (i=0; i < tga_width * tga_height; ++i)
5547       {
5548          unsigned char temp = tga_pixel[0];
5549          tga_pixel[0] = tga_pixel[2];
5550          tga_pixel[2] = temp;
5551          tga_pixel += tga_comp;
5552       }
5553    }
5554 
5555    // convert to target component count
5556    if (req_comp && req_comp != tga_comp)
5557       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5558 
5559    //   the things I do to get rid of an error message, and yet keep
5560    //   Microsoft's C compilers happy... [8^(
5561    tga_palette_start = tga_palette_len = tga_palette_bits =
5562          tga_x_origin = tga_y_origin = 0;
5563    //   OK, done
5564    return tga_data;
5565 }
5566 #endif
5567 
5568 // *************************************************************************************************
5569 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5570 
5571 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5572 static int stbi__psd_test(stbi__context *s)
5573 {
5574    int r = (stbi__get32be(s) == 0x38425053);
5575    stbi__rewind(s);
5576    return r;
5577 }
5578 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5579 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5580 {
5581    int count, nleft, len;
5582 
5583    count = 0;
5584    while ((nleft = pixelCount - count) > 0) {
5585       len = stbi__get8(s);
5586       if (len == 128) {
5587          // No-op.
5588       } else if (len < 128) {
5589          // Copy next len+1 bytes literally.
5590          len++;
5591          if (len > nleft) return 0; // corrupt data
5592          count += len;
5593          while (len) {
5594             *p = stbi__get8(s);
5595             p += 4;
5596             len--;
5597          }
5598       } else if (len > 128) {
5599          stbi_uc   val;
5600          // Next -len+1 bytes in the dest are replicated from next source byte.
5601          // (Interpret len as a negative 8-bit int.)
5602          len = 257 - len;
5603          if (len > nleft) return 0; // corrupt data
5604          val = stbi__get8(s);
5605          count += len;
5606          while (len) {
5607             *p = val;
5608             p += 4;
5609             len--;
5610          }
5611       }
5612    }
5613 
5614    return 1;
5615 }
5616 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5617 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5618 {
5619    int pixelCount;
5620    int channelCount, compression;
5621    int channel, i;
5622    int bitdepth;
5623    int w,h;
5624    stbi_uc *out;
5625    STBI_NOTUSED(ri);
5626 
5627    // Check identifier
5628    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5629       return stbi__errpuc("not PSD", "Corrupt PSD image");
5630 
5631    // Check file type version.
5632    if (stbi__get16be(s) != 1)
5633       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5634 
5635    // Skip 6 reserved bytes.
5636    stbi__skip(s, 6 );
5637 
5638    // Read the number of channels (R, G, B, A, etc).
5639    channelCount = stbi__get16be(s);
5640    if (channelCount < 0 || channelCount > 16)
5641       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5642 
5643    // Read the rows and columns of the image.
5644    h = stbi__get32be(s);
5645    w = stbi__get32be(s);
5646 
5647    // Make sure the depth is 8 bits.
5648    bitdepth = stbi__get16be(s);
5649    if (bitdepth != 8 && bitdepth != 16)
5650       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5651 
5652    // Make sure the color mode is RGB.
5653    // Valid options are:
5654    //   0: Bitmap
5655    //   1: Grayscale
5656    //   2: Indexed color
5657    //   3: RGB color
5658    //   4: CMYK color
5659    //   7: Multichannel
5660    //   8: Duotone
5661    //   9: Lab color
5662    if (stbi__get16be(s) != 3)
5663       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5664 
5665    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5666    stbi__skip(s,stbi__get32be(s) );
5667 
5668    // Skip the image resources.  (resolution, pen tool paths, etc)
5669    stbi__skip(s, stbi__get32be(s) );
5670 
5671    // Skip the reserved data.
5672    stbi__skip(s, stbi__get32be(s) );
5673 
5674    // Find out if the data is compressed.
5675    // Known values:
5676    //   0: no compression
5677    //   1: RLE compressed
5678    compression = stbi__get16be(s);
5679    if (compression > 1)
5680       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5681 
5682    // Check size
5683    if (!stbi__mad3sizes_valid(4, w, h, 0))
5684       return stbi__errpuc("too large", "Corrupt PSD");
5685 
5686    // Create the destination image.
5687 
5688    if (!compression && bitdepth == 16 && bpc == 16) {
5689       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5690       ri->bits_per_channel = 16;
5691    } else
5692       out = (stbi_uc *) stbi__malloc(4 * w*h);
5693 
5694    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5695    pixelCount = w*h;
5696 
5697    // Initialize the data to zero.
5698    //memset( out, 0, pixelCount * 4 );
5699 
5700    // Finally, the image data.
5701    if (compression) {
5702       // RLE as used by .PSD and .TIFF
5703       // Loop until you get the number of unpacked bytes you are expecting:
5704       //     Read the next source byte into n.
5705       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5706       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5707       //     Else if n is 128, noop.
5708       // Endloop
5709 
5710       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5711       // which we're going to just skip.
5712       stbi__skip(s, h * channelCount * 2 );
5713 
5714       // Read the RLE data by channel.
5715       for (channel = 0; channel < 4; channel++) {
5716          stbi_uc *p;
5717 
5718          p = out+channel;
5719          if (channel >= channelCount) {
5720             // Fill this channel with default data.
5721             for (i = 0; i < pixelCount; i++, p += 4)
5722                *p = (channel == 3 ? 255 : 0);
5723          } else {
5724             // Read the RLE data.
5725             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5726                STBI_FREE(out);
5727                return stbi__errpuc("corrupt", "bad RLE data");
5728             }
5729          }
5730       }
5731 
5732    } else {
5733       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5734       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5735 
5736       // Read the data by channel.
5737       for (channel = 0; channel < 4; channel++) {
5738          if (channel >= channelCount) {
5739             // Fill this channel with default data.
5740             if (bitdepth == 16 && bpc == 16) {
5741                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5742                stbi__uint16 val = channel == 3 ? 65535 : 0;
5743                for (i = 0; i < pixelCount; i++, q += 4)
5744                   *q = val;
5745             } else {
5746                stbi_uc *p = out+channel;
5747                stbi_uc val = channel == 3 ? 255 : 0;
5748                for (i = 0; i < pixelCount; i++, p += 4)
5749                   *p = val;
5750             }
5751          } else {
5752             if (ri->bits_per_channel == 16) {    // output bpc
5753                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5754                for (i = 0; i < pixelCount; i++, q += 4)
5755                   *q = (stbi__uint16) stbi__get16be(s);
5756             } else {
5757                stbi_uc *p = out+channel;
5758                if (bitdepth == 16) {  // input bpc
5759                   for (i = 0; i < pixelCount; i++, p += 4)
5760                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5761                } else {
5762                   for (i = 0; i < pixelCount; i++, p += 4)
5763                      *p = stbi__get8(s);
5764                }
5765             }
5766          }
5767       }
5768    }
5769 
5770    // remove weird white matte from PSD
5771    if (channelCount >= 4) {
5772       if (ri->bits_per_channel == 16) {
5773          for (i=0; i < w*h; ++i) {
5774             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5775             if (pixel[3] != 0 && pixel[3] != 65535) {
5776                float a = pixel[3] / 65535.0f;
5777                float ra = 1.0f / a;
5778                float inv_a = 65535.0f * (1 - ra);
5779                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5780                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5781                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5782             }
5783          }
5784       } else {
5785          for (i=0; i < w*h; ++i) {
5786             unsigned char *pixel = out + 4*i;
5787             if (pixel[3] != 0 && pixel[3] != 255) {
5788                float a = pixel[3] / 255.0f;
5789                float ra = 1.0f / a;
5790                float inv_a = 255.0f * (1 - ra);
5791                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5792                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5793                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5794             }
5795          }
5796       }
5797    }
5798 
5799    // convert to desired output format
5800    if (req_comp && req_comp != 4) {
5801       if (ri->bits_per_channel == 16)
5802          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5803       else
5804          out = stbi__convert_format(out, 4, req_comp, w, h);
5805       if (out == NULL) return out; // stbi__convert_format frees input on failure
5806    }
5807 
5808    if (comp) *comp = 4;
5809    *y = h;
5810    *x = w;
5811 
5812    return out;
5813 }
5814 #endif
5815 
5816 // *************************************************************************************************
5817 // Softimage PIC loader
5818 // by Tom Seddon
5819 //
5820 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5821 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5822 
5823 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5824 static int stbi__pic_is4(stbi__context *s,const char *str)
5825 {
5826    int i;
5827    for (i=0; i<4; ++i)
5828       if (stbi__get8(s) != (stbi_uc)str[i])
5829          return 0;
5830 
5831    return 1;
5832 }
5833 
stbi__pic_test_core(stbi__context * s)5834 static int stbi__pic_test_core(stbi__context *s)
5835 {
5836    int i;
5837 
5838    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5839       return 0;
5840 
5841    for(i=0;i<84;++i)
5842       stbi__get8(s);
5843 
5844    if (!stbi__pic_is4(s,"PICT"))
5845       return 0;
5846 
5847    return 1;
5848 }
5849 
5850 typedef struct
5851 {
5852    stbi_uc size,type,channel;
5853 } stbi__pic_packet;
5854 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5855 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5856 {
5857    int mask=0x80, i;
5858 
5859    for (i=0; i<4; ++i, mask>>=1) {
5860       if (channel & mask) {
5861          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5862          dest[i]=stbi__get8(s);
5863       }
5864    }
5865 
5866    return dest;
5867 }
5868 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5869 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5870 {
5871    int mask=0x80,i;
5872 
5873    for (i=0;i<4; ++i, mask>>=1)
5874       if (channel&mask)
5875          dest[i]=src[i];
5876 }
5877 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5878 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5879 {
5880    int act_comp=0,num_packets=0,y,chained;
5881    stbi__pic_packet packets[10];
5882 
5883    // this will (should...) cater for even some bizarre stuff like having data
5884     // for the same channel in multiple packets.
5885    do {
5886       stbi__pic_packet *packet;
5887 
5888       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5889          return stbi__errpuc("bad format","too many packets");
5890 
5891       packet = &packets[num_packets++];
5892 
5893       chained = stbi__get8(s);
5894       packet->size    = stbi__get8(s);
5895       packet->type    = stbi__get8(s);
5896       packet->channel = stbi__get8(s);
5897 
5898       act_comp |= packet->channel;
5899 
5900       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5901       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5902    } while (chained);
5903 
5904    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5905 
5906    for(y=0; y<height; ++y) {
5907       int packet_idx;
5908 
5909       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5910          stbi__pic_packet *packet = &packets[packet_idx];
5911          stbi_uc *dest = result+y*width*4;
5912 
5913          switch (packet->type) {
5914             default:
5915                return stbi__errpuc("bad format","packet has bad compression type");
5916 
5917             case 0: {//uncompressed
5918                int x;
5919 
5920                for(x=0;x<width;++x, dest+=4)
5921                   if (!stbi__readval(s,packet->channel,dest))
5922                      return 0;
5923                break;
5924             }
5925 
5926             case 1://Pure RLE
5927                {
5928                   int left=width, i;
5929 
5930                   while (left>0) {
5931                      stbi_uc count,value[4];
5932 
5933                      count=stbi__get8(s);
5934                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
5935 
5936                      if (count > left)
5937                         count = (stbi_uc) left;
5938 
5939                      if (!stbi__readval(s,packet->channel,value))  return 0;
5940 
5941                      for(i=0; i<count; ++i,dest+=4)
5942                         stbi__copyval(packet->channel,dest,value);
5943                      left -= count;
5944                   }
5945                }
5946                break;
5947 
5948             case 2: {//Mixed RLE
5949                int left=width;
5950                while (left>0) {
5951                   int count = stbi__get8(s), i;
5952                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
5953 
5954                   if (count >= 128) { // Repeated
5955                      stbi_uc value[4];
5956 
5957                      if (count==128)
5958                         count = stbi__get16be(s);
5959                      else
5960                         count -= 127;
5961                      if (count > left)
5962                         return stbi__errpuc("bad file","scanline overrun");
5963 
5964                      if (!stbi__readval(s,packet->channel,value))
5965                         return 0;
5966 
5967                      for(i=0;i<count;++i, dest += 4)
5968                         stbi__copyval(packet->channel,dest,value);
5969                   } else { // Raw
5970                      ++count;
5971                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
5972 
5973                      for(i=0;i<count;++i, dest+=4)
5974                         if (!stbi__readval(s,packet->channel,dest))
5975                            return 0;
5976                   }
5977                   left-=count;
5978                }
5979                break;
5980             }
5981          }
5982       }
5983    }
5984 
5985    return result;
5986 }
5987 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)5988 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
5989 {
5990    stbi_uc *result;
5991    int i, x,y, internal_comp;
5992    STBI_NOTUSED(ri);
5993 
5994    if (!comp) comp = &internal_comp;
5995 
5996    for (i=0; i<92; ++i)
5997       stbi__get8(s);
5998 
5999    x = stbi__get16be(s);
6000    y = stbi__get16be(s);
6001    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6002    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6003 
6004    stbi__get32be(s); //skip `ratio'
6005    stbi__get16be(s); //skip `fields'
6006    stbi__get16be(s); //skip `pad'
6007 
6008    // intermediate buffer is RGBA
6009    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6010    memset(result, 0xff, x*y*4);
6011 
6012    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6013       STBI_FREE(result);
6014       result=0;
6015    }
6016    *px = x;
6017    *py = y;
6018    if (req_comp == 0) req_comp = *comp;
6019    result=stbi__convert_format(result,4,req_comp,x,y);
6020 
6021    return result;
6022 }
6023 
stbi__pic_test(stbi__context * s)6024 static int stbi__pic_test(stbi__context *s)
6025 {
6026    int r = stbi__pic_test_core(s);
6027    stbi__rewind(s);
6028    return r;
6029 }
6030 #endif
6031 
6032 // *************************************************************************************************
6033 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6034 
6035 #ifndef STBI_NO_GIF
6036 typedef struct
6037 {
6038    stbi__int16 prefix;
6039    stbi_uc first;
6040    stbi_uc suffix;
6041 } stbi__gif_lzw;
6042 
6043 typedef struct
6044 {
6045    int w,h;
6046    stbi_uc *out, *old_out;             // output buffer (always 4 components)
6047    int flags, bgindex, ratio, transparent, eflags, delay;
6048    stbi_uc  pal[256][4];
6049    stbi_uc lpal[256][4];
6050    stbi__gif_lzw codes[4096];
6051    stbi_uc *color_table;
6052    int parse, step;
6053    int lflags;
6054    int start_x, start_y;
6055    int max_x, max_y;
6056    int cur_x, cur_y;
6057    int line_size;
6058 } stbi__gif;
6059 
stbi__gif_test_raw(stbi__context * s)6060 static int stbi__gif_test_raw(stbi__context *s)
6061 {
6062    int sz;
6063    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6064    sz = stbi__get8(s);
6065    if (sz != '9' && sz != '7') return 0;
6066    if (stbi__get8(s) != 'a') return 0;
6067    return 1;
6068 }
6069 
stbi__gif_test(stbi__context * s)6070 static int stbi__gif_test(stbi__context *s)
6071 {
6072    int r = stbi__gif_test_raw(s);
6073    stbi__rewind(s);
6074    return r;
6075 }
6076 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6077 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6078 {
6079    int i;
6080    for (i=0; i < num_entries; ++i) {
6081       pal[i][2] = stbi__get8(s);
6082       pal[i][1] = stbi__get8(s);
6083       pal[i][0] = stbi__get8(s);
6084       pal[i][3] = transp == i ? 0 : 255;
6085    }
6086 }
6087 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6088 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6089 {
6090    stbi_uc version;
6091    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6092       return stbi__err("not GIF", "Corrupt GIF");
6093 
6094    version = stbi__get8(s);
6095    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6096    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6097 
6098    stbi__g_failure_reason = "";
6099    g->w = stbi__get16le(s);
6100    g->h = stbi__get16le(s);
6101    g->flags = stbi__get8(s);
6102    g->bgindex = stbi__get8(s);
6103    g->ratio = stbi__get8(s);
6104    g->transparent = -1;
6105 
6106    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6107 
6108    if (is_info) return 1;
6109 
6110    if (g->flags & 0x80)
6111       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6112 
6113    return 1;
6114 }
6115 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6116 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6117 {
6118    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6119    if (!stbi__gif_header(s, g, comp, 1)) {
6120       STBI_FREE(g);
6121       stbi__rewind( s );
6122       return 0;
6123    }
6124    if (x) *x = g->w;
6125    if (y) *y = g->h;
6126    STBI_FREE(g);
6127    return 1;
6128 }
6129 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6130 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6131 {
6132    stbi_uc *p, *c;
6133 
6134    // recurse to decode the prefixes, since the linked-list is backwards,
6135    // and working backwards through an interleaved image would be nasty
6136    if (g->codes[code].prefix >= 0)
6137       stbi__out_gif_code(g, g->codes[code].prefix);
6138 
6139    if (g->cur_y >= g->max_y) return;
6140 
6141    p = &g->out[g->cur_x + g->cur_y];
6142    c = &g->color_table[g->codes[code].suffix * 4];
6143 
6144    if (c[3] >= 128) {
6145       p[0] = c[2];
6146       p[1] = c[1];
6147       p[2] = c[0];
6148       p[3] = c[3];
6149    }
6150    g->cur_x += 4;
6151 
6152    if (g->cur_x >= g->max_x) {
6153       g->cur_x = g->start_x;
6154       g->cur_y += g->step;
6155 
6156       while (g->cur_y >= g->max_y && g->parse > 0) {
6157          g->step = (1 << g->parse) * g->line_size;
6158          g->cur_y = g->start_y + (g->step >> 1);
6159          --g->parse;
6160       }
6161    }
6162 }
6163 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6164 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6165 {
6166    stbi_uc lzw_cs;
6167    stbi__int32 len, init_code;
6168    stbi__uint32 first;
6169    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6170    stbi__gif_lzw *p;
6171 
6172    lzw_cs = stbi__get8(s);
6173    if (lzw_cs > 12) return NULL;
6174    clear = 1 << lzw_cs;
6175    first = 1;
6176    codesize = lzw_cs + 1;
6177    codemask = (1 << codesize) - 1;
6178    bits = 0;
6179    valid_bits = 0;
6180    for (init_code = 0; init_code < clear; init_code++) {
6181       g->codes[init_code].prefix = -1;
6182       g->codes[init_code].first = (stbi_uc) init_code;
6183       g->codes[init_code].suffix = (stbi_uc) init_code;
6184    }
6185 
6186    // support no starting clear code
6187    avail = clear+2;
6188    oldcode = -1;
6189 
6190    len = 0;
6191    for(;;) {
6192       if (valid_bits < codesize) {
6193          if (len == 0) {
6194             len = stbi__get8(s); // start new block
6195             if (len == 0)
6196                return g->out;
6197          }
6198          --len;
6199          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6200          valid_bits += 8;
6201       } else {
6202          stbi__int32 code = bits & codemask;
6203          bits >>= codesize;
6204          valid_bits -= codesize;
6205          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6206          if (code == clear) {  // clear code
6207             codesize = lzw_cs + 1;
6208             codemask = (1 << codesize) - 1;
6209             avail = clear + 2;
6210             oldcode = -1;
6211             first = 0;
6212          } else if (code == clear + 1) { // end of stream code
6213             stbi__skip(s, len);
6214             while ((len = stbi__get8(s)) > 0)
6215                stbi__skip(s,len);
6216             return g->out;
6217          } else if (code <= avail) {
6218             if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
6219 
6220             if (oldcode >= 0) {
6221                p = &g->codes[avail++];
6222                if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
6223                p->prefix = (stbi__int16) oldcode;
6224                p->first = g->codes[oldcode].first;
6225                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6226             } else if (code == avail)
6227                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6228 
6229             stbi__out_gif_code(g, (stbi__uint16) code);
6230 
6231             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6232                codesize++;
6233                codemask = (1 << codesize) - 1;
6234             }
6235 
6236             oldcode = code;
6237          } else {
6238             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6239          }
6240       }
6241    }
6242 }
6243 
stbi__fill_gif_background(stbi__gif * g,int x0,int y0,int x1,int y1)6244 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
6245 {
6246    int x, y;
6247    stbi_uc *c = g->pal[g->bgindex];
6248    for (y = y0; y < y1; y += 4 * g->w) {
6249       for (x = x0; x < x1; x += 4) {
6250          stbi_uc *p  = &g->out[y + x];
6251          p[0] = c[2];
6252          p[1] = c[1];
6253          p[2] = c[0];
6254          p[3] = 0;
6255       }
6256    }
6257 }
6258 
6259 // this function is designed to support animated gifs, although stb_image doesn't support it
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp)6260 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
6261 {
6262    int i;
6263    stbi_uc *prev_out = 0;
6264 
6265    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
6266       return 0; // stbi__g_failure_reason set by stbi__gif_header
6267 
6268    if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
6269       return stbi__errpuc("too large", "GIF too large");
6270 
6271    prev_out = g->out;
6272    g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
6273    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6274 
6275    switch ((g->eflags & 0x1C) >> 2) {
6276       case 0: // unspecified (also always used on 1st frame)
6277          stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
6278          break;
6279       case 1: // do not dispose
6280          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6281          g->old_out = prev_out;
6282          break;
6283       case 2: // dispose to background
6284          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6285          stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
6286          break;
6287       case 3: // dispose to previous
6288          if (g->old_out) {
6289             for (i = g->start_y; i < g->max_y; i += 4 * g->w)
6290                memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
6291          }
6292          break;
6293    }
6294 
6295    for (;;) {
6296       switch (stbi__get8(s)) {
6297          case 0x2C: /* Image Descriptor */
6298          {
6299             int prev_trans = -1;
6300             stbi__int32 x, y, w, h;
6301             stbi_uc *o;
6302 
6303             x = stbi__get16le(s);
6304             y = stbi__get16le(s);
6305             w = stbi__get16le(s);
6306             h = stbi__get16le(s);
6307             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6308                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6309 
6310             g->line_size = g->w * 4;
6311             g->start_x = x * 4;
6312             g->start_y = y * g->line_size;
6313             g->max_x   = g->start_x + w * 4;
6314             g->max_y   = g->start_y + h * g->line_size;
6315             g->cur_x   = g->start_x;
6316             g->cur_y   = g->start_y;
6317 
6318             g->lflags = stbi__get8(s);
6319 
6320             if (g->lflags & 0x40) {
6321                g->step = 8 * g->line_size; // first interlaced spacing
6322                g->parse = 3;
6323             } else {
6324                g->step = g->line_size;
6325                g->parse = 0;
6326             }
6327 
6328             if (g->lflags & 0x80) {
6329                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6330                g->color_table = (stbi_uc *) g->lpal;
6331             } else if (g->flags & 0x80) {
6332                if (g->transparent >= 0 && (g->eflags & 0x01)) {
6333                   prev_trans = g->pal[g->transparent][3];
6334                   g->pal[g->transparent][3] = 0;
6335                }
6336                g->color_table = (stbi_uc *) g->pal;
6337             } else
6338                return stbi__errpuc("missing color table", "Corrupt GIF");
6339 
6340             o = stbi__process_gif_raster(s, g);
6341             if (o == NULL) return NULL;
6342 
6343             if (prev_trans != -1)
6344                g->pal[g->transparent][3] = (stbi_uc) prev_trans;
6345 
6346             return o;
6347          }
6348 
6349          case 0x21: // Comment Extension.
6350          {
6351             int len;
6352             if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
6353                len = stbi__get8(s);
6354                if (len == 4) {
6355                   g->eflags = stbi__get8(s);
6356                   g->delay = stbi__get16le(s);
6357                   g->transparent = stbi__get8(s);
6358                } else {
6359                   stbi__skip(s, len);
6360                   break;
6361                }
6362             }
6363             while ((len = stbi__get8(s)) != 0)
6364                stbi__skip(s, len);
6365             break;
6366          }
6367 
6368          case 0x3B: // gif stream termination code
6369             return (stbi_uc *) s; // using '1' causes warning on some compilers
6370 
6371          default:
6372             return stbi__errpuc("unknown code", "Corrupt GIF");
6373       }
6374    }
6375 
6376    STBI_NOTUSED(req_comp);
6377 }
6378 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6379 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6380 {
6381    stbi_uc *u = 0;
6382    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6383    memset(g, 0, sizeof(*g));
6384    STBI_NOTUSED(ri);
6385 
6386    u = stbi__gif_load_next(s, g, comp, req_comp);
6387    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6388    if (u) {
6389       *x = g->w;
6390       *y = g->h;
6391       if (req_comp && req_comp != 4)
6392          u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
6393    }
6394    else if (g->out)
6395       STBI_FREE(g->out);
6396    STBI_FREE(g);
6397    return u;
6398 }
6399 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6400 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6401 {
6402    return stbi__gif_info_raw(s,x,y,comp);
6403 }
6404 #endif
6405 
6406 // *************************************************************************************************
6407 // Radiance RGBE HDR loader
6408 // originally by Nicolas Schulz
6409 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6410 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6411 {
6412    int i;
6413    for (i=0; signature[i]; ++i)
6414       if (stbi__get8(s) != signature[i])
6415           return 0;
6416    stbi__rewind(s);
6417    return 1;
6418 }
6419 
stbi__hdr_test(stbi__context * s)6420 static int stbi__hdr_test(stbi__context* s)
6421 {
6422    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6423    stbi__rewind(s);
6424    if(!r) {
6425        r = stbi__hdr_test_core(s, "#?RGBE\n");
6426        stbi__rewind(s);
6427    }
6428    return r;
6429 }
6430 
6431 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6432 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6433 {
6434    int len=0;
6435    char c = '\0';
6436 
6437    c = (char) stbi__get8(z);
6438 
6439    while (!stbi__at_eof(z) && c != '\n') {
6440       buffer[len++] = c;
6441       if (len == STBI__HDR_BUFLEN-1) {
6442          // flush to end of line
6443          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6444             ;
6445          break;
6446       }
6447       c = (char) stbi__get8(z);
6448    }
6449 
6450    buffer[len] = 0;
6451    return buffer;
6452 }
6453 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6454 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6455 {
6456    if ( input[3] != 0 ) {
6457       float f1;
6458       // Exponent
6459       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6460       if (req_comp <= 2)
6461          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6462       else {
6463          output[0] = input[0] * f1;
6464          output[1] = input[1] * f1;
6465          output[2] = input[2] * f1;
6466       }
6467       if (req_comp == 2) output[1] = 1;
6468       if (req_comp == 4) output[3] = 1;
6469    } else {
6470       switch (req_comp) {
6471          case 4: output[3] = 1; /* fallthrough */
6472          case 3: output[0] = output[1] = output[2] = 0;
6473                  break;
6474          case 2: output[1] = 1; /* fallthrough */
6475          case 1: output[0] = 0;
6476                  break;
6477       }
6478    }
6479 }
6480 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6481 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6482 {
6483    char buffer[STBI__HDR_BUFLEN];
6484    char *token;
6485    int valid = 0;
6486    int width, height;
6487    stbi_uc *scanline;
6488    float *hdr_data;
6489    int len;
6490    unsigned char count, value;
6491    int i, j, k, c1,c2, z;
6492    const char *headerToken;
6493    STBI_NOTUSED(ri);
6494 
6495    // Check identifier
6496    headerToken = stbi__hdr_gettoken(s,buffer);
6497    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6498       return stbi__errpf("not HDR", "Corrupt HDR image");
6499 
6500    // Parse header
6501    for(;;) {
6502       token = stbi__hdr_gettoken(s,buffer);
6503       if (token[0] == 0) break;
6504       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6505    }
6506 
6507    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6508 
6509    // Parse width and height
6510    // can't use sscanf() if we're not using stdio!
6511    token = stbi__hdr_gettoken(s,buffer);
6512    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6513    token += 3;
6514    height = (int) strtol(token, &token, 10);
6515    while (*token == ' ') ++token;
6516    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6517    token += 3;
6518    width = (int) strtol(token, NULL, 10);
6519 
6520    *x = width;
6521    *y = height;
6522 
6523    if (comp) *comp = 3;
6524    if (req_comp == 0) req_comp = 3;
6525 
6526    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6527       return stbi__errpf("too large", "HDR image is too large");
6528 
6529    // Read data
6530    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6531    if (!hdr_data)
6532       return stbi__errpf("outofmem", "Out of memory");
6533 
6534    // Load image data
6535    // image data is stored as some number of sca
6536    if ( width < 8 || width >= 32768) {
6537       // Read flat data
6538       for (j=0; j < height; ++j) {
6539          for (i=0; i < width; ++i) {
6540             stbi_uc rgbe[4];
6541            main_decode_loop:
6542             stbi__getn(s, rgbe, 4);
6543             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6544          }
6545       }
6546    } else {
6547       // Read RLE-encoded data
6548       scanline = NULL;
6549 
6550       for (j = 0; j < height; ++j) {
6551          c1 = stbi__get8(s);
6552          c2 = stbi__get8(s);
6553          len = stbi__get8(s);
6554          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6555             // not run-length encoded, so we have to actually use THIS data as a decoded
6556             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6557             stbi_uc rgbe[4];
6558             rgbe[0] = (stbi_uc) c1;
6559             rgbe[1] = (stbi_uc) c2;
6560             rgbe[2] = (stbi_uc) len;
6561             rgbe[3] = (stbi_uc) stbi__get8(s);
6562             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6563             i = 1;
6564             j = 0;
6565             STBI_FREE(scanline);
6566             goto main_decode_loop; // yes, this makes no sense
6567          }
6568          len <<= 8;
6569          len |= stbi__get8(s);
6570          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6571          if (scanline == NULL) {
6572             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6573             if (!scanline) {
6574                STBI_FREE(hdr_data);
6575                return stbi__errpf("outofmem", "Out of memory");
6576             }
6577          }
6578 
6579          for (k = 0; k < 4; ++k) {
6580             int nleft;
6581             i = 0;
6582             while ((nleft = width - i) > 0) {
6583                count = stbi__get8(s);
6584                if (count > 128) {
6585                   // Run
6586                   value = stbi__get8(s);
6587                   count -= 128;
6588                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6589                   for (z = 0; z < count; ++z)
6590                      scanline[i++ * 4 + k] = value;
6591                } else {
6592                   // Dump
6593                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6594                   for (z = 0; z < count; ++z)
6595                      scanline[i++ * 4 + k] = stbi__get8(s);
6596                }
6597             }
6598          }
6599          for (i=0; i < width; ++i)
6600             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6601       }
6602       if (scanline)
6603          STBI_FREE(scanline);
6604    }
6605 
6606    return hdr_data;
6607 }
6608 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6609 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6610 {
6611    char buffer[STBI__HDR_BUFLEN];
6612    char *token;
6613    int valid = 0;
6614    int dummy;
6615 
6616    if (!x) x = &dummy;
6617    if (!y) y = &dummy;
6618    if (!comp) comp = &dummy;
6619 
6620    if (stbi__hdr_test(s) == 0) {
6621        stbi__rewind( s );
6622        return 0;
6623    }
6624 
6625    for(;;) {
6626       token = stbi__hdr_gettoken(s,buffer);
6627       if (token[0] == 0) break;
6628       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6629    }
6630 
6631    if (!valid) {
6632        stbi__rewind( s );
6633        return 0;
6634    }
6635    token = stbi__hdr_gettoken(s,buffer);
6636    if (strncmp(token, "-Y ", 3)) {
6637        stbi__rewind( s );
6638        return 0;
6639    }
6640    token += 3;
6641    *y = (int) strtol(token, &token, 10);
6642    while (*token == ' ') ++token;
6643    if (strncmp(token, "+X ", 3)) {
6644        stbi__rewind( s );
6645        return 0;
6646    }
6647    token += 3;
6648    *x = (int) strtol(token, NULL, 10);
6649    *comp = 3;
6650    return 1;
6651 }
6652 #endif // STBI_NO_HDR
6653 
6654 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6655 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6656 {
6657    void *p;
6658    stbi__bmp_data info;
6659 
6660    info.all_a = 255;
6661    p = stbi__bmp_parse_header(s, &info);
6662    stbi__rewind( s );
6663    if (p == NULL)
6664       return 0;
6665    if (x) *x = s->img_x;
6666    if (y) *y = s->img_y;
6667    if (comp) *comp = info.ma ? 4 : 3;
6668    return 1;
6669 }
6670 #endif
6671 
6672 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6673 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6674 {
6675    int channelCount, dummy;
6676    if (!x) x = &dummy;
6677    if (!y) y = &dummy;
6678    if (!comp) comp = &dummy;
6679    if (stbi__get32be(s) != 0x38425053) {
6680        stbi__rewind( s );
6681        return 0;
6682    }
6683    if (stbi__get16be(s) != 1) {
6684        stbi__rewind( s );
6685        return 0;
6686    }
6687    stbi__skip(s, 6);
6688    channelCount = stbi__get16be(s);
6689    if (channelCount < 0 || channelCount > 16) {
6690        stbi__rewind( s );
6691        return 0;
6692    }
6693    *y = stbi__get32be(s);
6694    *x = stbi__get32be(s);
6695    if (stbi__get16be(s) != 8) {
6696        stbi__rewind( s );
6697        return 0;
6698    }
6699    if (stbi__get16be(s) != 3) {
6700        stbi__rewind( s );
6701        return 0;
6702    }
6703    *comp = 4;
6704    return 1;
6705 }
6706 #endif
6707 
6708 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6709 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6710 {
6711    int act_comp=0,num_packets=0,chained,dummy;
6712    stbi__pic_packet packets[10];
6713 
6714    if (!x) x = &dummy;
6715    if (!y) y = &dummy;
6716    if (!comp) comp = &dummy;
6717 
6718    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6719       stbi__rewind(s);
6720       return 0;
6721    }
6722 
6723    stbi__skip(s, 88);
6724 
6725    *x = stbi__get16be(s);
6726    *y = stbi__get16be(s);
6727    if (stbi__at_eof(s)) {
6728       stbi__rewind( s);
6729       return 0;
6730    }
6731    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6732       stbi__rewind( s );
6733       return 0;
6734    }
6735 
6736    stbi__skip(s, 8);
6737 
6738    do {
6739       stbi__pic_packet *packet;
6740 
6741       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6742          return 0;
6743 
6744       packet = &packets[num_packets++];
6745       chained = stbi__get8(s);
6746       packet->size    = stbi__get8(s);
6747       packet->type    = stbi__get8(s);
6748       packet->channel = stbi__get8(s);
6749       act_comp |= packet->channel;
6750 
6751       if (stbi__at_eof(s)) {
6752           stbi__rewind( s );
6753           return 0;
6754       }
6755       if (packet->size != 8) {
6756           stbi__rewind( s );
6757           return 0;
6758       }
6759    } while (chained);
6760 
6761    *comp = (act_comp & 0x10 ? 4 : 3);
6762 
6763    return 1;
6764 }
6765 #endif
6766 
6767 // *************************************************************************************************
6768 // Portable Gray Map and Portable Pixel Map loader
6769 // by Ken Miller
6770 //
6771 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6772 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6773 //
6774 // Known limitations:
6775 //    Does not support comments in the header section
6776 //    Does not support ASCII image data (formats P2 and P3)
6777 //    Does not support 16-bit-per-channel
6778 
6779 #ifndef STBI_NO_PNM
6780 
stbi__pnm_test(stbi__context * s)6781 static int      stbi__pnm_test(stbi__context *s)
6782 {
6783    char p, t;
6784    p = (char) stbi__get8(s);
6785    t = (char) stbi__get8(s);
6786    if (p != 'P' || (t != '5' && t != '6')) {
6787        stbi__rewind( s );
6788        return 0;
6789    }
6790    return 1;
6791 }
6792 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6793 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6794 {
6795    stbi_uc *out;
6796    STBI_NOTUSED(ri);
6797 
6798    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6799       return 0;
6800 
6801    *x = s->img_x;
6802    *y = s->img_y;
6803    if (comp) *comp = s->img_n;
6804 
6805    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6806       return stbi__errpuc("too large", "PNM too large");
6807 
6808    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6809    if (!out) return stbi__errpuc("outofmem", "Out of memory");
6810    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6811 
6812    if (req_comp && req_comp != s->img_n) {
6813       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6814       if (out == NULL) return out; // stbi__convert_format frees input on failure
6815    }
6816    return out;
6817 }
6818 
stbi__pnm_isspace(char c)6819 static int      stbi__pnm_isspace(char c)
6820 {
6821    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6822 }
6823 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)6824 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6825 {
6826    for (;;) {
6827       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6828          *c = (char) stbi__get8(s);
6829 
6830       if (stbi__at_eof(s) || *c != '#')
6831          break;
6832 
6833       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
6834          *c = (char) stbi__get8(s);
6835    }
6836 }
6837 
stbi__pnm_isdigit(char c)6838 static int      stbi__pnm_isdigit(char c)
6839 {
6840    return c >= '0' && c <= '9';
6841 }
6842 
stbi__pnm_getinteger(stbi__context * s,char * c)6843 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
6844 {
6845    int value = 0;
6846 
6847    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6848       value = value*10 + (*c - '0');
6849       *c = (char) stbi__get8(s);
6850    }
6851 
6852    return value;
6853 }
6854 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)6855 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6856 {
6857    int maxv, dummy;
6858    char c, p, t;
6859 
6860    if (!x) x = &dummy;
6861    if (!y) y = &dummy;
6862    if (!comp) comp = &dummy;
6863 
6864    stbi__rewind(s);
6865 
6866    // Get identifier
6867    p = (char) stbi__get8(s);
6868    t = (char) stbi__get8(s);
6869    if (p != 'P' || (t != '5' && t != '6')) {
6870        stbi__rewind(s);
6871        return 0;
6872    }
6873 
6874    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
6875 
6876    c = (char) stbi__get8(s);
6877    stbi__pnm_skip_whitespace(s, &c);
6878 
6879    *x = stbi__pnm_getinteger(s, &c); // read width
6880    stbi__pnm_skip_whitespace(s, &c);
6881 
6882    *y = stbi__pnm_getinteger(s, &c); // read height
6883    stbi__pnm_skip_whitespace(s, &c);
6884 
6885    maxv = stbi__pnm_getinteger(s, &c);  // read max value
6886 
6887    if (maxv > 255)
6888       return stbi__err("max value > 255", "PPM image not 8-bit");
6889    else
6890       return 1;
6891 }
6892 #endif
6893 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)6894 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6895 {
6896    #ifndef STBI_NO_JPEG
6897    if (stbi__jpeg_info(s, x, y, comp)) return 1;
6898    #endif
6899 
6900    #ifndef STBI_NO_PNG
6901    if (stbi__png_info(s, x, y, comp))  return 1;
6902    #endif
6903 
6904    #ifndef STBI_NO_GIF
6905    if (stbi__gif_info(s, x, y, comp))  return 1;
6906    #endif
6907 
6908    #ifndef STBI_NO_BMP
6909    if (stbi__bmp_info(s, x, y, comp))  return 1;
6910    #endif
6911 
6912    #ifndef STBI_NO_PSD
6913    if (stbi__psd_info(s, x, y, comp))  return 1;
6914    #endif
6915 
6916    #ifndef STBI_NO_PIC
6917    if (stbi__pic_info(s, x, y, comp))  return 1;
6918    #endif
6919 
6920    #ifndef STBI_NO_PNM
6921    if (stbi__pnm_info(s, x, y, comp))  return 1;
6922    #endif
6923 
6924    #ifndef STBI_NO_HDR
6925    if (stbi__hdr_info(s, x, y, comp))  return 1;
6926    #endif
6927 
6928    // test tga last because it's a crappy test!
6929    #ifndef STBI_NO_TGA
6930    if (stbi__tga_info(s, x, y, comp))
6931        return 1;
6932    #endif
6933    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6934 }
6935 
6936 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)6937 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6938 {
6939     FILE *f = stbi__fopen(filename, "rb");
6940     int result;
6941     if (!f) return stbi__err("can't fopen", "Unable to open file");
6942     result = stbi_info_from_file(f, x, y, comp);
6943     fclose(f);
6944     return result;
6945 }
6946 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)6947 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6948 {
6949    int r;
6950    stbi__context s;
6951    long pos = ftell(f);
6952    stbi__start_file(&s, f);
6953    r = stbi__info_main(&s,x,y,comp);
6954    fseek(f,pos,SEEK_SET);
6955    return r;
6956 }
6957 #endif // !STBI_NO_STDIO
6958 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)6959 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6960 {
6961    stbi__context s;
6962    stbi__start_mem(&s,buffer,len);
6963    return stbi__info_main(&s,x,y,comp);
6964 }
6965 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)6966 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6967 {
6968    stbi__context s;
6969    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6970    return stbi__info_main(&s,x,y,comp);
6971 }
6972 
6973 #endif // STB_IMAGE_IMPLEMENTATION
6974 
6975 /*
6976    revision history:
6977       2.16  (2017-07-23) all functions have 16-bit variants;
6978                          STBI_NO_STDIO works again;
6979                          compilation fixes;
6980                          fix rounding in unpremultiply;
6981                          optimize vertical flip;
6982                          disable raw_len validation;
6983                          documentation fixes
6984       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
6985                          warning fixes; disable run-time SSE detection on gcc;
6986                          uniform handling of optional "return" values;
6987                          thread-safe initialization of zlib tables
6988       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
6989       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
6990       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
6991       2.11  (2016-04-02) allocate large structures on the stack
6992                          remove white matting for transparent PSD
6993                          fix reported channel count for PNG & BMP
6994                          re-enable SSE2 in non-gcc 64-bit
6995                          support RGB-formatted JPEG
6996                          read 16-bit PNGs (only as 8-bit)
6997       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
6998       2.09  (2016-01-16) allow comments in PNM files
6999                          16-bit-per-pixel TGA (not bit-per-component)
7000                          info() for TGA could break due to .hdr handling
7001                          info() for BMP to shares code instead of sloppy parse
7002                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7003                          code cleanup
7004       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7005       2.07  (2015-09-13) fix compiler warnings
7006                          partial animated GIF support
7007                          limited 16-bpc PSD support
7008                          #ifdef unused functions
7009                          bug with < 92 byte PIC,PNM,HDR,TGA
7010       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7011       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7012       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7013       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7014                          stbi_set_flip_vertically_on_load (nguillemot)
7015                          fix NEON support; fix mingw support
7016       2.02  (2015-01-19) fix incorrect assert, fix warning
7017       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7018       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7019       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7020                          progressive JPEG (stb)
7021                          PGM/PPM support (Ken Miller)
7022                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7023                          GIF bugfix -- seemingly never worked
7024                          STBI_NO_*, STBI_ONLY_*
7025       1.48  (2014-12-14) fix incorrectly-named assert()
7026       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7027                          optimize PNG (ryg)
7028                          fix bug in interlaced PNG with user-specified channel count (stb)
7029       1.46  (2014-08-26)
7030               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7031       1.45  (2014-08-16)
7032               fix MSVC-ARM internal compiler error by wrapping malloc
7033       1.44  (2014-08-07)
7034               various warning fixes from Ronny Chevalier
7035       1.43  (2014-07-15)
7036               fix MSVC-only compiler problem in code changed in 1.42
7037       1.42  (2014-07-09)
7038               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7039               fixes to stbi__cleanup_jpeg path
7040               added STBI_ASSERT to avoid requiring assert.h
7041       1.41  (2014-06-25)
7042               fix search&replace from 1.36 that messed up comments/error messages
7043       1.40  (2014-06-22)
7044               fix gcc struct-initialization warning
7045       1.39  (2014-06-15)
7046               fix to TGA optimization when req_comp != number of components in TGA;
7047               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7048               add support for BMP version 5 (more ignored fields)
7049       1.38  (2014-06-06)
7050               suppress MSVC warnings on integer casts truncating values
7051               fix accidental rename of 'skip' field of I/O
7052       1.37  (2014-06-04)
7053               remove duplicate typedef
7054       1.36  (2014-06-03)
7055               convert to header file single-file library
7056               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7057       1.35  (2014-05-27)
7058               various warnings
7059               fix broken STBI_SIMD path
7060               fix bug where stbi_load_from_file no longer left file pointer in correct place
7061               fix broken non-easy path for 32-bit BMP (possibly never used)
7062               TGA optimization by Arseny Kapoulkine
7063       1.34  (unknown)
7064               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7065       1.33  (2011-07-14)
7066               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7067       1.32  (2011-07-13)
7068               support for "info" function for all supported filetypes (SpartanJ)
7069       1.31  (2011-06-20)
7070               a few more leak fixes, bug in PNG handling (SpartanJ)
7071       1.30  (2011-06-11)
7072               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7073               removed deprecated format-specific test/load functions
7074               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7075               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7076               fix inefficiency in decoding 32-bit BMP (David Woo)
7077       1.29  (2010-08-16)
7078               various warning fixes from Aurelien Pocheville
7079       1.28  (2010-08-01)
7080               fix bug in GIF palette transparency (SpartanJ)
7081       1.27  (2010-08-01)
7082               cast-to-stbi_uc to fix warnings
7083       1.26  (2010-07-24)
7084               fix bug in file buffering for PNG reported by SpartanJ
7085       1.25  (2010-07-17)
7086               refix trans_data warning (Won Chun)
7087       1.24  (2010-07-12)
7088               perf improvements reading from files on platforms with lock-heavy fgetc()
7089               minor perf improvements for jpeg
7090               deprecated type-specific functions so we'll get feedback if they're needed
7091               attempt to fix trans_data warning (Won Chun)
7092       1.23    fixed bug in iPhone support
7093       1.22  (2010-07-10)
7094               removed image *writing* support
7095               stbi_info support from Jetro Lauha
7096               GIF support from Jean-Marc Lienher
7097               iPhone PNG-extensions from James Brown
7098               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7099       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7100       1.20    added support for Softimage PIC, by Tom Seddon
7101       1.19    bug in interlaced PNG corruption check (found by ryg)
7102       1.18  (2008-08-02)
7103               fix a threading bug (local mutable static)
7104       1.17    support interlaced PNG
7105       1.16    major bugfix - stbi__convert_format converted one too many pixels
7106       1.15    initialize some fields for thread safety
7107       1.14    fix threadsafe conversion bug
7108               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7109       1.13    threadsafe
7110       1.12    const qualifiers in the API
7111       1.11    Support installable IDCT, colorspace conversion routines
7112       1.10    Fixes for 64-bit (don't use "unsigned long")
7113               optimized upsampling by Fabian "ryg" Giesen
7114       1.09    Fix format-conversion for PSD code (bad global variables!)
7115       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7116       1.07    attempt to fix C++ warning/errors again
7117       1.06    attempt to fix C++ warning/errors again
7118       1.05    fix TGA loading to return correct *comp and use good luminance calc
7119       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7120       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7121       1.02    support for (subset of) HDR files, float interface for preferred access to them
7122       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7123               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7124       1.00    interface to zlib that skips zlib header
7125       0.99    correct handling of alpha in palette
7126       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7127       0.97    jpeg errors on too large a file; also catch another malloc failure
7128       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7129       0.95    during header scan, seek to markers in case of padding
7130       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7131       0.93    handle jpegtran output; verbose errors
7132       0.92    read 4,8,16,24,32-bit BMP files of several formats
7133       0.91    output 24-bit Windows 3.0 BMP files
7134       0.90    fix a few more warnings; bump version number to approach 1.0
7135       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7136       0.60    fix compiling as c++
7137       0.59    fix warnings: merge Dave Moore's -Wall fixes
7138       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7139       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7140       0.56    fix bug: zlib uncompressed mode len vs. nlen
7141       0.55    fix bug: restart_interval not initialized to 0
7142       0.54    allow NULL for 'int *comp'
7143       0.53    fix bug in png 3->4; speedup png decoding
7144       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7145       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7146               on 'test' only check type, not whether we support this variant
7147       0.50  (2006-11-19)
7148               first released version
7149 */
7150 
7151 
7152 /*
7153 ------------------------------------------------------------------------------
7154 MIT License
7155 Copyright (c) 2017 Sean Barrett
7156 Permission is hereby granted, free of charge, to any person obtaining a copy of
7157 this software and associated documentation files (the "Software"), to deal in
7158 the Software without restriction, including without limitation the rights to
7159 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7160 of the Software, and to permit persons to whom the Software is furnished to do
7161 so, subject to the following conditions:
7162 The above copyright notice and this permission notice shall be included in all
7163 copies or substantial portions of the Software.
7164 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7165 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7166 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7167 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7168 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7169 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7170 SOFTWARE.
7171 ------------------------------------------------------------------------------
7172 */
7173