xref: /aosp_15_r20/external/piex/src/image_type_recognition/image_type_recognition_lite.cc (revision 4d671364a067eb4f124488347677d916765212d1)
1 // Copyright 2015 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 ////////////////////////////////////////////////////////////////////////////////
16 //
17 // This file implements the image type recognition algorithm. Functions, which
18 // will check each single image type, are implemented based on the comparisons
19 // of magic numbers or signature strings. Other checks (e.g endianness, general
20 // tiff magic number "42", etc.) could also be used in some of those functions
21 // to make the type recognition more stable. Those checks are designed
22 // according to the format spcifications and our own experiments. Notice that
23 // the magic numbers and signature strings may have different binary values
24 // according to different endiannesses.
25 #include "src/image_type_recognition/image_type_recognition_lite.h"
26 
27 #include <algorithm>
28 #include <cassert>
29 #include <string>
30 #include <vector>
31 
32 #include "src/binary_parse/range_checked_byte_ptr.h"
33 
34 namespace piex {
35 namespace image_type_recognition {
36 namespace {
37 
38 using std::string;
39 using binary_parse::MemoryStatus;
40 using binary_parse::RangeCheckedBytePtr;
41 
42 // Base class for checking image type. For each image type, one should create an
43 // inherited class and do the implementation.
44 class TypeChecker {
45  public:
46   // Comparing function, whihc is used for sorting.
Compare(const TypeChecker * a,const TypeChecker * b)47   static bool Compare(const TypeChecker* a, const TypeChecker* b) {
48     assert(a);
49     assert(b);
50     return a->RequestedSize() < b->RequestedSize();
51   }
52 
~TypeChecker()53   virtual ~TypeChecker() {}
54 
55   // Returns the type of current checker.
56   virtual RawImageTypes Type() const = 0;
57 
58   // Returns the requested data size (in bytes) for current checker. The checker
59   // guarantees that it will not read more than this size.
60   virtual size_t RequestedSize() const = 0;
61 
62   // Checks if source data belongs to current checker type.
63   virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
64 
65  protected:
66   // Limits the source length to the RequestedSize(), using it guarantees that
67   // we will not read more than this size from the source.
LimitSource(const RangeCheckedBytePtr & source) const68   RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
69     return source.pointerToSubArray(0 /* pos */, RequestedSize());
70   }
71 };
72 
73 // Check if the uint16 value at (source + offset) is equal to the target value.
CheckUInt16Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned short target_value)74 bool CheckUInt16Value(const RangeCheckedBytePtr& source,
75                       const size_t source_offset, const bool use_big_endian,
76                       const unsigned short target_value) {  // NOLINT
77   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
78   const unsigned short value = binary_parse::Get16u(  // NOLINT
79       source + source_offset, use_big_endian, &status);
80   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
81     return false;
82   }
83   return (target_value == value);
84 }
85 
86 // Check if the uint32 value at (source + offset) is equal to the target value.
CheckUInt32Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned int target_value)87 bool CheckUInt32Value(const RangeCheckedBytePtr& source,
88                       const size_t source_offset, const bool use_big_endian,
89                       const unsigned int target_value) {
90   MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
91   const unsigned int value =
92       binary_parse::Get32u(source + source_offset, use_big_endian, &status);
93   if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
94     return false;
95   }
96   return (target_value == value);
97 }
98 
99 // Determine the endianness. The return value is NOT the endianness indicator,
100 // it's just that this function was successful.
DetermineEndianness(const RangeCheckedBytePtr & source,bool * is_big_endian)101 bool DetermineEndianness(const RangeCheckedBytePtr& source,
102                          bool* is_big_endian) {
103   if (source.remainingLength() < 2) {
104     return false;
105   }
106 
107   if (source[0] == 0x49 && source[1] == 0x49) {
108     *is_big_endian = false;
109   } else if (source[0] == 0x4D && source[1] == 0x4D) {
110     *is_big_endian = true;
111   } else {
112     return false;
113   }
114   return true;
115 }
116 
117 // Check if signature string can match to the same length string start from
118 // (source + offset). The signature string will be used as longer magic number
119 // series.
IsSignatureMatched(const RangeCheckedBytePtr & source,const size_t source_offset,const string & signature)120 bool IsSignatureMatched(const RangeCheckedBytePtr& source,
121                         const size_t source_offset, const string& signature) {
122   return source.substr(source_offset, signature.size()) == signature;
123 }
124 
125 // Check if signature is found in [source + offset, source + offset + range].
IsSignatureFound(const RangeCheckedBytePtr & source,const size_t search_offset,const size_t search_range,const string & signature,size_t * first_matched)126 bool IsSignatureFound(const RangeCheckedBytePtr& source,
127                       const size_t search_offset, const size_t search_range,
128                       const string& signature, size_t* first_matched) {
129   if (source.remainingLength() < search_offset + search_range) {
130     return false;
131   }
132 
133   // The index must be in range [offset, offset + range - sizeof(signature)], so
134   // that it can guarantee that it will not read outside of range.
135   for (size_t i = search_offset;
136        i < search_offset + search_range - signature.size(); ++i) {
137     if (IsSignatureMatched(source, i, signature)) {
138       if (first_matched) {
139         *first_matched = i;
140       }
141       return true;
142     }
143   }
144   return false;
145 }
146 
147 // Sony RAW format.
148 class ArwTypeChecker : public TypeChecker {
149  public:
Type() const150   virtual RawImageTypes Type() const { return kArwImage; }
151 
RequestedSize() const152   virtual size_t RequestedSize() const { return 10000; }
153 
154   // Check multiple points:
155   // 1. valid endianness at the beginning of the file;
156   // 2. correct tiff magic number at the (offset == 8) position of the file;
157   // 3. signature "SONY" in first requested bytes;
158   // 4. correct signature for (section + version) in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const159   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
160     RangeCheckedBytePtr limited_source = LimitSource(source);
161 
162     bool use_big_endian;
163     if (!DetermineEndianness(limited_source, &use_big_endian)) {
164       return false;
165     }
166 
167     const unsigned short kTiffMagic = 0x2A;  // NOLINT
168     const unsigned int kTiffOffset = 8;
169     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
170                           kTiffMagic) ||
171         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
172                           kTiffOffset)) {
173       return false;
174     }
175 
176     // Search for kSignatureSony in first requested bytes
177     const string kSignatureSony("SONY");
178     if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
179                           kSignatureSony, NULL)) {
180       return false;
181     }
182 
183     // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
184     // requested bytes
185     const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
186     const int kSignatureVersionsSize = 6;
187     const string kSignatureVersions[kSignatureVersionsSize] = {
188         string("\x02\x00", 2),  // ARW 1.0
189         string("\x03\x00", 2),  // ARW 2.0
190         string("\x03\x01", 2),  // ARW 2.1
191         string("\x03\x02", 2),  // ARW 2.2
192         string("\x03\x03", 2),  // ARW 2.3
193         string("\x04\x00", 2),  // ARW 4.0
194     };
195     bool matched = false;
196     for (int i = 0; i < kSignatureVersionsSize; ++i) {
197       matched = matched || IsSignatureFound(
198                                limited_source, 0 /* offset */, RequestedSize(),
199                                kSignatureSection + kSignatureVersions[i], NULL);
200     }
201     return matched;
202   }
203 };
204 
205 // Canon RAW (CR3 extension).
206 class Cr3TypeChecker : public TypeChecker {
207  public:
208   static constexpr size_t kSignatureOffset = 4;
209   static constexpr const char* kSignature = "ftypcrx ";
210 
Type() const211   virtual RawImageTypes Type() const { return kCr3Image; }
212 
RequestedSize() const213   virtual size_t RequestedSize() const {
214     return kSignatureOffset + strlen(kSignature);
215   }
216 
217   // Checks for the ftyp box w/ brand 'crx '.
IsMyType(const RangeCheckedBytePtr & source) const218   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
219     RangeCheckedBytePtr limited_source = LimitSource(source);
220     return IsSignatureMatched(limited_source, kSignatureOffset, kSignature);
221   }
222 };
223 
224 // Canon RAW (CR2 extension).
225 class Cr2TypeChecker : public TypeChecker {
226  public:
Type() const227   virtual RawImageTypes Type() const { return kCr2Image; }
228 
RequestedSize() const229   virtual size_t RequestedSize() const { return 16; }
230 
231   // Check multiple points:
232   // 1. valid endianness at the beginning of the file;
233   // 2. magic number "42" at the (offset == 2) position of the file;
234   // 3. signature "CR2" at the (offset == 8) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const235   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
236     RangeCheckedBytePtr limited_source = LimitSource(source);
237 
238     bool use_big_endian;
239     if (!DetermineEndianness(limited_source, &use_big_endian)) {
240       return false;
241     }
242 
243     const unsigned short kTag = 42;  // NOLINT
244     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
245                           kTag)) {
246       return false;
247     }
248 
249     const string kSignature("CR\2\0", 4);
250     return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
251   }
252 };
253 
254 // Canon RAW (CRW extension).
255 class CrwTypeChecker : public TypeChecker {
256  public:
Type() const257   virtual RawImageTypes Type() const { return kCrwImage; }
258 
RequestedSize() const259   virtual size_t RequestedSize() const { return 14; }
260 
261   // Check only the signature at the (offset == 6) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const262   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
263     RangeCheckedBytePtr limited_source = LimitSource(source);
264 
265     bool use_big_endian;
266     if (!DetermineEndianness(limited_source, &use_big_endian)) {
267       return false;
268     }
269 
270     string signature;
271     if (use_big_endian) {
272       signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
273     } else {
274       signature = string("HEAPCCDR");
275     }
276     return IsSignatureMatched(limited_source, 6 /* offset */, signature);
277   }
278 };
279 
280 // Kodak RAW.
281 class DcrTypeChecker : public TypeChecker {
282  public:
Type() const283   virtual RawImageTypes Type() const { return kDcrImage; }
284 
RequestedSize() const285   virtual size_t RequestedSize() const { return 5000; }
286 
287   // Check two different cases, only need to fulfill one of the two:
288   // 1. signature at the (offset == 16) position of the file;
289   // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
290   // first requested bytes of the file.
IsMyType(const RangeCheckedBytePtr & source) const291   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
292     RangeCheckedBytePtr limited_source = LimitSource(source);
293 
294     bool use_big_endian;
295     if (!DetermineEndianness(limited_source, &use_big_endian)) {
296       return false;
297     }
298 
299     // Case 1: has signature
300     const string kSignature(
301         "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
302     if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
303       return true;
304     }
305 
306     // Case 2: search for tags in first requested bytes
307     string kIfdTags[2];
308     if (use_big_endian) {
309       kIfdTags[0] = string("\x03\xe9\x00\x02", 4);  // OriginalFileName
310       kIfdTags[1] = string("\x0c\xe5\x00\x02", 4);  // FirmwareVersion
311     } else {
312       kIfdTags[0] = string("\xe9\x03\x02\x00", 4);  // OriginalFileName
313       kIfdTags[1] = string("\xe5\x0c\x02\x00", 4);  // FirmwareVersion
314     }
315     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
316                             kIfdTags[0], NULL) &&
317            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
318                             kIfdTags[1], NULL);
319   }
320 };
321 
322 // Digital Negative RAW.
323 class DngTypeChecker : public TypeChecker {
324  public:
Type() const325   virtual RawImageTypes Type() const { return kDngImage; }
326 
RequestedSize() const327   virtual size_t RequestedSize() const { return 1024; }
328 
329   // Check multiple points:
330   // 1. valid endianness at the beginning of the file;
331   // 2. at least two dng specific tags in the first requested bytes of the
332   // file
IsMyType(const RangeCheckedBytePtr & source) const333   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
334     RangeCheckedBytePtr limited_source = LimitSource(source);
335 
336     bool use_big_endian;
337     if (!DetermineEndianness(limited_source, &use_big_endian)) {
338       return false;
339     }
340 
341     // Search tags in first requested bytes and verify the order of them.
342     const int kTagsCount = 5;
343     string dng_tags[kTagsCount];
344     if (use_big_endian) {
345       dng_tags[0] =
346           string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8);  // tag: 50706
347       dng_tags[1] =
348           string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8);  // tag: 50707
349       dng_tags[2] = string("\xc6\x14\x00\x02", 4);        // tag: 50708
350       dng_tags[3] = string("\xc6\x20", 2);                // tag: 50720
351       dng_tags[4] =
352           string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8);  // tag: 50733
353     } else {
354       dng_tags[0] =
355           string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50706
356       dng_tags[1] =
357           string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50707
358       dng_tags[2] = string("\x14\xc6\x02\x00", 4);        // tag: 50708
359       dng_tags[3] = string("\x20\xc6", 2);                // tag: 50720
360       dng_tags[4] =
361           string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8);  // tag: 50733
362     }
363     int tags_found = 0;
364     for (int i = 0; i < kTagsCount; ++i) {
365       if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
366                            dng_tags[i], NULL)) {
367         tags_found++;
368       }
369     }
370     return tags_found >= 2;
371   }
372 };
373 
374 // Kodak RAW.
375 class KdcTypeChecker : public TypeChecker {
376  public:
Type() const377   virtual RawImageTypes Type() const { return kKdcImage; }
378 
RequestedSize() const379   virtual size_t RequestedSize() const { return 5000; }
380 
381   // Check two points:
382   // 1. valid endianness at the beginning of the file;
383   // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const384   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
385     RangeCheckedBytePtr limited_source = LimitSource(source);
386 
387     bool use_big_endian;
388     if (!DetermineEndianness(limited_source, &use_big_endian)) {
389       return false;
390     }
391 
392     // Search in first requested bytes
393     const size_t kIfdTagsSize = 2;
394     string kIfdTags[kIfdTagsSize];
395     if (use_big_endian) {
396       kIfdTags[0] = string("\xfa\x0d\x00\x01", 4);  // WhiteBalance
397       kIfdTags[1] = string("\xfa\x00\x00\x02", 4);  // SerialNumber
398     } else {
399       kIfdTags[0] = string("\x0d\xfa\x01\x00", 4);  // WhiteBalance
400       kIfdTags[1] = string("\x00\xfa\x02\x00", 4);  // SerialNumber
401     }
402 
403     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
404                             kIfdTags[0], NULL) &&
405            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
406                             kIfdTags[1], NULL);
407   }
408 };
409 
410 // Leaf RAW.
411 class MosTypeChecker : public TypeChecker {
412  public:
Type() const413   virtual RawImageTypes Type() const { return kMosImage; }
414 
RequestedSize() const415   virtual size_t RequestedSize() const { return 5000; }
416 
417   // Check two points:
418   // 1. valid endianness at the beginning of the file;
419   // 2. signature "PKTS    " in the first requested bytes. Note the
420   // "whitespace". It's important as they are special binary values.
IsMyType(const RangeCheckedBytePtr & source) const421   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
422     RangeCheckedBytePtr limited_source = LimitSource(source);
423 
424     bool use_big_endian;
425     if (!DetermineEndianness(source, &use_big_endian)) {
426       return false;
427     }
428 
429     // Search kSignaturePKTS in first requested bytes
430     const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
431     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
432                             kSignaturePKTS, NULL);
433   }
434 };
435 
436 // Minolta RAW.
437 class MrwTypeChecker : public TypeChecker {
438  public:
Type() const439   virtual RawImageTypes Type() const { return kMrwImage; }
440 
RequestedSize() const441   virtual size_t RequestedSize() const { return 4; }
442 
443   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const444   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
445     // Limits the source length to the RequestedSize(), using it guarantees that
446     // we will not read more than this size from the source.
447     RangeCheckedBytePtr limited_source =
448         source.pointerToSubArray(0 /* pos */, RequestedSize());
449 
450     const string kSignature("\0MRM", 4);
451     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
452   }
453 };
454 
455 // Check if the file contains a NRW signature "NRW   " in the first requested
456 // bytes. Note the "whitespace". It's important as they are special binary
457 // values.
458 const size_t kRequestedSizeForNrwSignature = 4000;
ContainsNrwSignature(const RangeCheckedBytePtr & source)459 bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
460   // Search for kSignatureNrw.
461   const string kSignatureNrw("NRW\x20\x20\x20", 6);
462   return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
463                           kSignatureNrw, NULL);
464 }
465 
466 // Checks if the file contains the signatures for Nikon formats:
467 // * the general Nikon singature "NIKON" string.
468 // * the ReferenceBlackWhite tag.
469 const size_t kRequestedSizeForNikonSignatures = 4000;
ContainsNikonSignatures(const RangeCheckedBytePtr & source,const bool use_big_endian)470 bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
471                              const bool use_big_endian) {
472   const string kSignatureNikon("NIKON");
473   const string kReferenceBlackWhiteTag = use_big_endian
474                                              ? string("\x02\x14\x00\x05", 4)
475                                              : string("\x14\x02\x05\x00", 4);
476   const std::vector<string> kSignatures = {kSignatureNikon,
477                                            kReferenceBlackWhiteTag};
478   for (auto const& signature : kSignatures) {
479     if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
480                           signature, NULL)) {
481       return false;
482     }
483   }
484   return true;
485 }
486 
487 // Nikon RAW (NEF extension).
488 class NefTypeChecker : public TypeChecker {
489  public:
Type() const490   virtual RawImageTypes Type() const { return kNefImage; }
491 
RequestedSize() const492   virtual size_t RequestedSize() const {
493     return std::max(kRequestedSizeForNikonSignatures,
494                     kRequestedSizeForNrwSignature);
495   }
496 
497   // Check multiple points:
498   // 1. valid endianness at the beginning of the file;
499   // 2. magic number at the (offset == 2) position of the file;
500   // 3. the signature "NIKON" in the requested bytes of the file;
501   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
502   // 5. does not contain the NRW signature. We may also check a special
503   // signature "RAW   " similar to the NRW case, but we got issues in some
504   // special images that the signature locates in the middle of the file, and it
505   // costs too  long time to check;
IsMyType(const RangeCheckedBytePtr & source) const506   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
507     RangeCheckedBytePtr limited_source = LimitSource(source);
508 
509     bool use_big_endian;
510     if (!DetermineEndianness(limited_source, &use_big_endian)) {
511       return false;
512     }
513 
514     const unsigned short kTiffMagic = 0x2A;  // NOLINT
515     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
516                           kTiffMagic)) {
517       return false;
518     }
519 
520     return ContainsNikonSignatures(limited_source, use_big_endian) &&
521            !ContainsNrwSignature(limited_source);  // not NRW
522   }
523 };
524 
525 // Nikon RAW (NRW extension).
526 class NrwTypeChecker : public TypeChecker {
527  public:
Type() const528   virtual RawImageTypes Type() const { return kNrwImage; }
529 
RequestedSize() const530   virtual size_t RequestedSize() const {
531     return std::max(kRequestedSizeForNikonSignatures,
532                     kRequestedSizeForNrwSignature);
533   }
534 
535   // Check multiple points:
536   // 1. valid endianness at the beginning of the file;
537   // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
538   // file;
539   // 3. the signature "NIKON" in the first requested bytes of the file;
540   // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
541   // 5. contains the NRW signature;
IsMyType(const RangeCheckedBytePtr & source) const542   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
543     RangeCheckedBytePtr limited_source = LimitSource(source);
544 
545     bool use_big_endian;
546     if (!DetermineEndianness(limited_source, &use_big_endian)) {
547       return false;
548     }
549 
550     const unsigned short kTiffMagic = 0x2A;  // NOLINT
551     const unsigned int kTiffOffset = 8;
552     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
553                           kTiffMagic) ||
554         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
555                           kTiffOffset)) {
556       return false;
557     }
558 
559     return ContainsNikonSignatures(limited_source, use_big_endian) &&
560            ContainsNrwSignature(limited_source);
561   }
562 };
563 
564 // Olympus RAW.
565 class OrfTypeChecker : public TypeChecker {
566  public:
Type() const567   virtual RawImageTypes Type() const { return kOrfImage; }
568 
RequestedSize() const569   virtual size_t RequestedSize() const { return 3000; }
570 
571   // Check multiple points:
572   // 1. valid endianness at the beginning of the file;
573   // 2. tag at the (offset == 2) position of the file;
574   // 3. signature "OLYMP" in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const575   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
576     RangeCheckedBytePtr limited_source = LimitSource(source);
577 
578     bool use_big_endian;
579     if (!DetermineEndianness(limited_source, &use_big_endian)) {
580       return false;
581     }
582 
583     const size_t kTagSize = 2;
584     const unsigned short kTag[kTagSize] = {0x4F52, 0x5352};  // NOLINT
585     if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
586                            kTag[0]) ||
587           CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
588                            kTag[1]))) {
589       return false;
590     }
591 
592     // Search for kSignatureOlymp in first requested bytes
593     const string kSignatureOlymp("OLYMP");
594     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
595                             kSignatureOlymp, NULL);
596   }
597 };
598 
599 // Pentax RAW.
600 class PefTypeChecker : public TypeChecker {
601  public:
Type() const602   virtual RawImageTypes Type() const { return kPefImage; }
603 
RequestedSize() const604   virtual size_t RequestedSize() const { return 1280; }
605 
606   // Check multiple points:
607   // 1. valid big endianness at the beginning of the file;
608   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
609   // 3. signature "AOC   " or "PENTAX  " in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const610   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
611     RangeCheckedBytePtr limited_source = LimitSource(source);
612 
613     bool use_big_endian;
614     if (!DetermineEndianness(limited_source, &use_big_endian)) {
615       return false;
616     }
617 
618     const unsigned short kTiffMagic = 0x2A;  // NOLINT
619     const unsigned int kTiffOffset = 8;
620     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
621                           kTiffMagic) ||
622         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
623                           kTiffOffset)) {
624       return false;
625     }
626 
627     // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
628     const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
629     const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
630     return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
631                             kSignatureAOC, NULL) ||
632            IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
633                             kSignaturePENTAX, NULL);
634   }
635 };
636 
637 // Apple format.
638 class QtkTypeChecker : public TypeChecker {
639  public:
Type() const640   virtual RawImageTypes Type() const { return kQtkImage; }
641 
RequestedSize() const642   virtual size_t RequestedSize() const { return 8; }
643 
644   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const645   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
646     RangeCheckedBytePtr limited_source = LimitSource(source);
647 
648     const size_t kSignatureSize = 2;
649     const string kSignature[kSignatureSize] = {
650         string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
651     };
652     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
653            IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
654   }
655 };
656 
657 // Fuji RAW.
658 class RafTypeChecker : public TypeChecker {
659  public:
Type() const660   virtual RawImageTypes Type() const { return kRafImage; }
661 
RequestedSize() const662   virtual size_t RequestedSize() const { return 8; }
663 
664   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const665   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
666     RangeCheckedBytePtr limited_source = LimitSource(source);
667 
668     const string kSignature("FUJIFILM");
669     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
670   }
671 };
672 
673 // Contax N RAW.
674 class RawContaxNTypeChecker : public TypeChecker {
675  public:
Type() const676   virtual RawImageTypes Type() const { return kRawContaxNImage; }
677 
RequestedSize() const678   virtual size_t RequestedSize() const { return 36; }
679 
680   // Check only the signature at the (offset == 25) position of the
681   // file.
IsMyType(const RangeCheckedBytePtr & source) const682   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
683     RangeCheckedBytePtr limited_source = LimitSource(source);
684 
685     const string kSignature("ARECOYK");
686     return IsSignatureMatched(limited_source, 25, kSignature);
687   }
688 };
689 
690 // Panasonic RAW.
691 class Rw2TypeChecker : public TypeChecker {
692  public:
Type() const693   virtual RawImageTypes Type() const { return kRw2Image; }
694 
RequestedSize() const695   virtual size_t RequestedSize() const { return 4; }
696 
697   // Check two points: 1. valid endianness at the beginning of the
698   // file; 2. tag at the (offset == 2) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const699   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
700     RangeCheckedBytePtr limited_source = LimitSource(source);
701 
702     bool use_big_endian;
703     if (!DetermineEndianness(source, &use_big_endian)) {
704       return false;
705     }
706 
707     const unsigned short kTag = 0x55;  // NOLINT
708     return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
709                             kTag);
710   }
711 };
712 
713 // Samsung RAW.
714 class SrwTypeChecker : public TypeChecker {
715  public:
Type() const716   virtual RawImageTypes Type() const { return kSrwImage; }
717 
RequestedSize() const718   virtual size_t RequestedSize() const { return 256; }
719 
720   // Check multiple points:
721   // 1. valid big endianness at the beginning of the file;
722   // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
723   // 3. the signature "SAMSUNG" in the requested bytes of the file;
IsMyType(const RangeCheckedBytePtr & source) const724   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
725     RangeCheckedBytePtr limited_source = LimitSource(source);
726 
727     bool use_big_endian;
728     if (!DetermineEndianness(source, &use_big_endian)) {
729       return false;
730     }
731 
732     const unsigned short kTiffMagic = 0x2A;  // NOLINT
733     const unsigned int kTiffOffset = 8;
734     if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
735                           kTiffMagic) ||
736         !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
737                           kTiffOffset)) {
738       return false;
739     }
740 
741     const string kSignature("SAMSUNG");
742     if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
743       return false;
744     }
745     return true;
746   }
747 };
748 
749 // Sigma / Polaroid RAW.
750 class X3fTypeChecker : public TypeChecker {
751  public:
Type() const752   virtual RawImageTypes Type() const { return kX3fImage; }
753 
RequestedSize() const754   virtual size_t RequestedSize() const { return 4; }
755 
756   // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const757   virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
758     RangeCheckedBytePtr limited_source = LimitSource(source);
759 
760     const string kSignature("FOVb", 4);
761     return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
762   }
763 };
764 
765 // This class contains the list of all type checkers. One should used this list
766 // as a whole to execute the image type recognition.
767 class TypeCheckerList {
768  public:
TypeCheckerList()769   TypeCheckerList() {
770     // Add all supported RAW type checkers here.
771     checkers_.push_back(new ArwTypeChecker());
772     checkers_.push_back(new Cr3TypeChecker());
773     checkers_.push_back(new Cr2TypeChecker());
774     checkers_.push_back(new CrwTypeChecker());
775     checkers_.push_back(new DcrTypeChecker());
776     checkers_.push_back(new DngTypeChecker());
777     checkers_.push_back(new KdcTypeChecker());
778     checkers_.push_back(new MosTypeChecker());
779     checkers_.push_back(new MrwTypeChecker());
780     checkers_.push_back(new NefTypeChecker());
781     checkers_.push_back(new NrwTypeChecker());
782     checkers_.push_back(new OrfTypeChecker());
783     checkers_.push_back(new PefTypeChecker());
784     checkers_.push_back(new QtkTypeChecker());
785     checkers_.push_back(new RafTypeChecker());
786     checkers_.push_back(new RawContaxNTypeChecker());
787     checkers_.push_back(new Rw2TypeChecker());
788     checkers_.push_back(new SrwTypeChecker());
789     checkers_.push_back(new X3fTypeChecker());
790 
791     // Sort the checkers by the ascending RequestedSize() to get better
792     // performance when checking type.
793     std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
794   }
795 
~TypeCheckerList()796   ~TypeCheckerList() {
797     for (size_t i = 0; i < checkers_.size(); ++i) {
798       delete checkers_[i];
799       checkers_[i] = NULL;
800     }
801   }
802 
803   // Returns the type of source data. If it can not be identified, returns
804   // kNonRawImage.
GetType(const RangeCheckedBytePtr & source) const805   RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
806     for (size_t i = 0; i < checkers_.size(); ++i) {
807       if (checkers_[i]->IsMyType(source)) {
808         return checkers_[i]->Type();
809       }
810     }
811     return kNonRawImage;
812   }
813 
814   // Returns the maximum size of requested size of data for identifying image
815   // type using this class. The class guarantees that it will not read more than
816   // this size.
RequestedSize() const817   size_t RequestedSize() const {
818     assert(!checkers_.empty());
819     // The checkers_ is ascending sorted. The last element is the maximum.
820     return checkers_.back()->RequestedSize();
821   }
822 
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)823   bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
824     const TypeChecker* type_checker = GetTypeCheckerForType(type);
825     if (type_checker) {
826       return type_checker->IsMyType(source);
827     } else {
828       return false;
829     }
830   }
831 
RequestedSizeForType(const RawImageTypes type)832   size_t RequestedSizeForType(const RawImageTypes type) {
833     const TypeChecker* type_checker = GetTypeCheckerForType(type);
834     if (type_checker) {
835       return type_checker->RequestedSize();
836     } else {
837       return 0;
838     }
839   }
840 
841  private:
GetTypeCheckerForType(const RawImageTypes type)842   const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
843     for (const auto* type_checker : checkers_) {
844       if (type_checker->Type() == type) {
845         return type_checker;
846       }
847     }
848     return nullptr;
849   }
850 
851   std::vector<TypeChecker*> checkers_;
852 };
853 
854 }  // namespace
855 
IsRaw(const RawImageTypes type)856 bool IsRaw(const RawImageTypes type) {
857   switch (type) {
858     // Non-RAW-image type
859     case kNonRawImage: {
860       return false;
861     }
862 
863     // Raw image types
864     case kArwImage:
865     case kCr3Image:
866     case kCr2Image:
867     case kCrwImage:
868     case kDcrImage:
869     case kDngImage:
870     case kKdcImage:
871     case kMosImage:
872     case kMrwImage:
873     case kNefImage:
874     case kNrwImage:
875     case kOrfImage:
876     case kPefImage:
877     case kQtkImage:
878     case kRafImage:
879     case kRawContaxNImage:
880     case kRw2Image:
881     case kSrwImage:
882     case kX3fImage: {
883       return true;
884     }
885 
886     default: {
887       // Unsupported type!
888       assert(false);
889     }
890   }
891   return false;
892 }
893 
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)894 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
895   return TypeCheckerList().IsOfType(source, type);
896 }
897 
RecognizeRawImageTypeLite(const RangeCheckedBytePtr & source)898 RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
899   return TypeCheckerList().GetType(source);
900 }
901 
GetNumberOfBytesForIsRawLite()902 size_t GetNumberOfBytesForIsRawLite() {
903   return TypeCheckerList().RequestedSize();
904 }
905 
GetNumberOfBytesForIsOfType(const RawImageTypes type)906 size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
907   return TypeCheckerList().RequestedSizeForType(type);
908 }
909 
IsRawLite(const RangeCheckedBytePtr & source)910 bool IsRawLite(const RangeCheckedBytePtr& source) {
911   return IsRaw(RecognizeRawImageTypeLite(source));
912 }
913 
914 }  // namespace image_type_recognition
915 }  // namespace piex
916