1 // Copyright 2015 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 ////////////////////////////////////////////////////////////////////////////////
16 //
17 // This file implements the image type recognition algorithm. Functions, which
18 // will check each single image type, are implemented based on the comparisons
19 // of magic numbers or signature strings. Other checks (e.g endianness, general
20 // tiff magic number "42", etc.) could also be used in some of those functions
21 // to make the type recognition more stable. Those checks are designed
22 // according to the format spcifications and our own experiments. Notice that
23 // the magic numbers and signature strings may have different binary values
24 // according to different endiannesses.
25 #include "src/image_type_recognition/image_type_recognition_lite.h"
26
27 #include <algorithm>
28 #include <cassert>
29 #include <string>
30 #include <vector>
31
32 #include "src/binary_parse/range_checked_byte_ptr.h"
33
34 namespace piex {
35 namespace image_type_recognition {
36 namespace {
37
38 using std::string;
39 using binary_parse::MemoryStatus;
40 using binary_parse::RangeCheckedBytePtr;
41
42 // Base class for checking image type. For each image type, one should create an
43 // inherited class and do the implementation.
44 class TypeChecker {
45 public:
46 // Comparing function, whihc is used for sorting.
Compare(const TypeChecker * a,const TypeChecker * b)47 static bool Compare(const TypeChecker* a, const TypeChecker* b) {
48 assert(a);
49 assert(b);
50 return a->RequestedSize() < b->RequestedSize();
51 }
52
~TypeChecker()53 virtual ~TypeChecker() {}
54
55 // Returns the type of current checker.
56 virtual RawImageTypes Type() const = 0;
57
58 // Returns the requested data size (in bytes) for current checker. The checker
59 // guarantees that it will not read more than this size.
60 virtual size_t RequestedSize() const = 0;
61
62 // Checks if source data belongs to current checker type.
63 virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
64
65 protected:
66 // Limits the source length to the RequestedSize(), using it guarantees that
67 // we will not read more than this size from the source.
LimitSource(const RangeCheckedBytePtr & source) const68 RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
69 return source.pointerToSubArray(0 /* pos */, RequestedSize());
70 }
71 };
72
73 // Check if the uint16 value at (source + offset) is equal to the target value.
CheckUInt16Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned short target_value)74 bool CheckUInt16Value(const RangeCheckedBytePtr& source,
75 const size_t source_offset, const bool use_big_endian,
76 const unsigned short target_value) { // NOLINT
77 MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
78 const unsigned short value = binary_parse::Get16u( // NOLINT
79 source + source_offset, use_big_endian, &status);
80 if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
81 return false;
82 }
83 return (target_value == value);
84 }
85
86 // Check if the uint32 value at (source + offset) is equal to the target value.
CheckUInt32Value(const RangeCheckedBytePtr & source,const size_t source_offset,const bool use_big_endian,const unsigned int target_value)87 bool CheckUInt32Value(const RangeCheckedBytePtr& source,
88 const size_t source_offset, const bool use_big_endian,
89 const unsigned int target_value) {
90 MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
91 const unsigned int value =
92 binary_parse::Get32u(source + source_offset, use_big_endian, &status);
93 if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
94 return false;
95 }
96 return (target_value == value);
97 }
98
99 // Determine the endianness. The return value is NOT the endianness indicator,
100 // it's just that this function was successful.
DetermineEndianness(const RangeCheckedBytePtr & source,bool * is_big_endian)101 bool DetermineEndianness(const RangeCheckedBytePtr& source,
102 bool* is_big_endian) {
103 if (source.remainingLength() < 2) {
104 return false;
105 }
106
107 if (source[0] == 0x49 && source[1] == 0x49) {
108 *is_big_endian = false;
109 } else if (source[0] == 0x4D && source[1] == 0x4D) {
110 *is_big_endian = true;
111 } else {
112 return false;
113 }
114 return true;
115 }
116
117 // Check if signature string can match to the same length string start from
118 // (source + offset). The signature string will be used as longer magic number
119 // series.
IsSignatureMatched(const RangeCheckedBytePtr & source,const size_t source_offset,const string & signature)120 bool IsSignatureMatched(const RangeCheckedBytePtr& source,
121 const size_t source_offset, const string& signature) {
122 return source.substr(source_offset, signature.size()) == signature;
123 }
124
125 // Check if signature is found in [source + offset, source + offset + range].
IsSignatureFound(const RangeCheckedBytePtr & source,const size_t search_offset,const size_t search_range,const string & signature,size_t * first_matched)126 bool IsSignatureFound(const RangeCheckedBytePtr& source,
127 const size_t search_offset, const size_t search_range,
128 const string& signature, size_t* first_matched) {
129 if (source.remainingLength() < search_offset + search_range) {
130 return false;
131 }
132
133 // The index must be in range [offset, offset + range - sizeof(signature)], so
134 // that it can guarantee that it will not read outside of range.
135 for (size_t i = search_offset;
136 i < search_offset + search_range - signature.size(); ++i) {
137 if (IsSignatureMatched(source, i, signature)) {
138 if (first_matched) {
139 *first_matched = i;
140 }
141 return true;
142 }
143 }
144 return false;
145 }
146
147 // Sony RAW format.
148 class ArwTypeChecker : public TypeChecker {
149 public:
Type() const150 virtual RawImageTypes Type() const { return kArwImage; }
151
RequestedSize() const152 virtual size_t RequestedSize() const { return 10000; }
153
154 // Check multiple points:
155 // 1. valid endianness at the beginning of the file;
156 // 2. correct tiff magic number at the (offset == 8) position of the file;
157 // 3. signature "SONY" in first requested bytes;
158 // 4. correct signature for (section + version) in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const159 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
160 RangeCheckedBytePtr limited_source = LimitSource(source);
161
162 bool use_big_endian;
163 if (!DetermineEndianness(limited_source, &use_big_endian)) {
164 return false;
165 }
166
167 const unsigned short kTiffMagic = 0x2A; // NOLINT
168 const unsigned int kTiffOffset = 8;
169 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
170 kTiffMagic) ||
171 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
172 kTiffOffset)) {
173 return false;
174 }
175
176 // Search for kSignatureSony in first requested bytes
177 const string kSignatureSony("SONY");
178 if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
179 kSignatureSony, NULL)) {
180 return false;
181 }
182
183 // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
184 // requested bytes
185 const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
186 const int kSignatureVersionsSize = 6;
187 const string kSignatureVersions[kSignatureVersionsSize] = {
188 string("\x02\x00", 2), // ARW 1.0
189 string("\x03\x00", 2), // ARW 2.0
190 string("\x03\x01", 2), // ARW 2.1
191 string("\x03\x02", 2), // ARW 2.2
192 string("\x03\x03", 2), // ARW 2.3
193 string("\x04\x00", 2), // ARW 4.0
194 };
195 bool matched = false;
196 for (int i = 0; i < kSignatureVersionsSize; ++i) {
197 matched = matched || IsSignatureFound(
198 limited_source, 0 /* offset */, RequestedSize(),
199 kSignatureSection + kSignatureVersions[i], NULL);
200 }
201 return matched;
202 }
203 };
204
205 // Canon RAW (CR3 extension).
206 class Cr3TypeChecker : public TypeChecker {
207 public:
208 static constexpr size_t kSignatureOffset = 4;
209 static constexpr const char* kSignature = "ftypcrx ";
210
Type() const211 virtual RawImageTypes Type() const { return kCr3Image; }
212
RequestedSize() const213 virtual size_t RequestedSize() const {
214 return kSignatureOffset + strlen(kSignature);
215 }
216
217 // Checks for the ftyp box w/ brand 'crx '.
IsMyType(const RangeCheckedBytePtr & source) const218 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
219 RangeCheckedBytePtr limited_source = LimitSource(source);
220 return IsSignatureMatched(limited_source, kSignatureOffset, kSignature);
221 }
222 };
223
224 // Canon RAW (CR2 extension).
225 class Cr2TypeChecker : public TypeChecker {
226 public:
Type() const227 virtual RawImageTypes Type() const { return kCr2Image; }
228
RequestedSize() const229 virtual size_t RequestedSize() const { return 16; }
230
231 // Check multiple points:
232 // 1. valid endianness at the beginning of the file;
233 // 2. magic number "42" at the (offset == 2) position of the file;
234 // 3. signature "CR2" at the (offset == 8) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const235 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
236 RangeCheckedBytePtr limited_source = LimitSource(source);
237
238 bool use_big_endian;
239 if (!DetermineEndianness(limited_source, &use_big_endian)) {
240 return false;
241 }
242
243 const unsigned short kTag = 42; // NOLINT
244 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
245 kTag)) {
246 return false;
247 }
248
249 const string kSignature("CR\2\0", 4);
250 return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
251 }
252 };
253
254 // Canon RAW (CRW extension).
255 class CrwTypeChecker : public TypeChecker {
256 public:
Type() const257 virtual RawImageTypes Type() const { return kCrwImage; }
258
RequestedSize() const259 virtual size_t RequestedSize() const { return 14; }
260
261 // Check only the signature at the (offset == 6) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const262 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
263 RangeCheckedBytePtr limited_source = LimitSource(source);
264
265 bool use_big_endian;
266 if (!DetermineEndianness(limited_source, &use_big_endian)) {
267 return false;
268 }
269
270 string signature;
271 if (use_big_endian) {
272 signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
273 } else {
274 signature = string("HEAPCCDR");
275 }
276 return IsSignatureMatched(limited_source, 6 /* offset */, signature);
277 }
278 };
279
280 // Kodak RAW.
281 class DcrTypeChecker : public TypeChecker {
282 public:
Type() const283 virtual RawImageTypes Type() const { return kDcrImage; }
284
RequestedSize() const285 virtual size_t RequestedSize() const { return 5000; }
286
287 // Check two different cases, only need to fulfill one of the two:
288 // 1. signature at the (offset == 16) position of the file;
289 // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
290 // first requested bytes of the file.
IsMyType(const RangeCheckedBytePtr & source) const291 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
292 RangeCheckedBytePtr limited_source = LimitSource(source);
293
294 bool use_big_endian;
295 if (!DetermineEndianness(limited_source, &use_big_endian)) {
296 return false;
297 }
298
299 // Case 1: has signature
300 const string kSignature(
301 "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
302 if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
303 return true;
304 }
305
306 // Case 2: search for tags in first requested bytes
307 string kIfdTags[2];
308 if (use_big_endian) {
309 kIfdTags[0] = string("\x03\xe9\x00\x02", 4); // OriginalFileName
310 kIfdTags[1] = string("\x0c\xe5\x00\x02", 4); // FirmwareVersion
311 } else {
312 kIfdTags[0] = string("\xe9\x03\x02\x00", 4); // OriginalFileName
313 kIfdTags[1] = string("\xe5\x0c\x02\x00", 4); // FirmwareVersion
314 }
315 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
316 kIfdTags[0], NULL) &&
317 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
318 kIfdTags[1], NULL);
319 }
320 };
321
322 // Digital Negative RAW.
323 class DngTypeChecker : public TypeChecker {
324 public:
Type() const325 virtual RawImageTypes Type() const { return kDngImage; }
326
RequestedSize() const327 virtual size_t RequestedSize() const { return 1024; }
328
329 // Check multiple points:
330 // 1. valid endianness at the beginning of the file;
331 // 2. at least two dng specific tags in the first requested bytes of the
332 // file
IsMyType(const RangeCheckedBytePtr & source) const333 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
334 RangeCheckedBytePtr limited_source = LimitSource(source);
335
336 bool use_big_endian;
337 if (!DetermineEndianness(limited_source, &use_big_endian)) {
338 return false;
339 }
340
341 // Search tags in first requested bytes and verify the order of them.
342 const int kTagsCount = 5;
343 string dng_tags[kTagsCount];
344 if (use_big_endian) {
345 dng_tags[0] =
346 string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8); // tag: 50706
347 dng_tags[1] =
348 string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8); // tag: 50707
349 dng_tags[2] = string("\xc6\x14\x00\x02", 4); // tag: 50708
350 dng_tags[3] = string("\xc6\x20", 2); // tag: 50720
351 dng_tags[4] =
352 string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8); // tag: 50733
353 } else {
354 dng_tags[0] =
355 string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50706
356 dng_tags[1] =
357 string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50707
358 dng_tags[2] = string("\x14\xc6\x02\x00", 4); // tag: 50708
359 dng_tags[3] = string("\x20\xc6", 2); // tag: 50720
360 dng_tags[4] =
361 string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8); // tag: 50733
362 }
363 int tags_found = 0;
364 for (int i = 0; i < kTagsCount; ++i) {
365 if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
366 dng_tags[i], NULL)) {
367 tags_found++;
368 }
369 }
370 return tags_found >= 2;
371 }
372 };
373
374 // Kodak RAW.
375 class KdcTypeChecker : public TypeChecker {
376 public:
Type() const377 virtual RawImageTypes Type() const { return kKdcImage; }
378
RequestedSize() const379 virtual size_t RequestedSize() const { return 5000; }
380
381 // Check two points:
382 // 1. valid endianness at the beginning of the file;
383 // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const384 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
385 RangeCheckedBytePtr limited_source = LimitSource(source);
386
387 bool use_big_endian;
388 if (!DetermineEndianness(limited_source, &use_big_endian)) {
389 return false;
390 }
391
392 // Search in first requested bytes
393 const size_t kIfdTagsSize = 2;
394 string kIfdTags[kIfdTagsSize];
395 if (use_big_endian) {
396 kIfdTags[0] = string("\xfa\x0d\x00\x01", 4); // WhiteBalance
397 kIfdTags[1] = string("\xfa\x00\x00\x02", 4); // SerialNumber
398 } else {
399 kIfdTags[0] = string("\x0d\xfa\x01\x00", 4); // WhiteBalance
400 kIfdTags[1] = string("\x00\xfa\x02\x00", 4); // SerialNumber
401 }
402
403 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
404 kIfdTags[0], NULL) &&
405 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
406 kIfdTags[1], NULL);
407 }
408 };
409
410 // Leaf RAW.
411 class MosTypeChecker : public TypeChecker {
412 public:
Type() const413 virtual RawImageTypes Type() const { return kMosImage; }
414
RequestedSize() const415 virtual size_t RequestedSize() const { return 5000; }
416
417 // Check two points:
418 // 1. valid endianness at the beginning of the file;
419 // 2. signature "PKTS " in the first requested bytes. Note the
420 // "whitespace". It's important as they are special binary values.
IsMyType(const RangeCheckedBytePtr & source) const421 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
422 RangeCheckedBytePtr limited_source = LimitSource(source);
423
424 bool use_big_endian;
425 if (!DetermineEndianness(source, &use_big_endian)) {
426 return false;
427 }
428
429 // Search kSignaturePKTS in first requested bytes
430 const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
431 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
432 kSignaturePKTS, NULL);
433 }
434 };
435
436 // Minolta RAW.
437 class MrwTypeChecker : public TypeChecker {
438 public:
Type() const439 virtual RawImageTypes Type() const { return kMrwImage; }
440
RequestedSize() const441 virtual size_t RequestedSize() const { return 4; }
442
443 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const444 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
445 // Limits the source length to the RequestedSize(), using it guarantees that
446 // we will not read more than this size from the source.
447 RangeCheckedBytePtr limited_source =
448 source.pointerToSubArray(0 /* pos */, RequestedSize());
449
450 const string kSignature("\0MRM", 4);
451 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
452 }
453 };
454
455 // Check if the file contains a NRW signature "NRW " in the first requested
456 // bytes. Note the "whitespace". It's important as they are special binary
457 // values.
458 const size_t kRequestedSizeForNrwSignature = 4000;
ContainsNrwSignature(const RangeCheckedBytePtr & source)459 bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
460 // Search for kSignatureNrw.
461 const string kSignatureNrw("NRW\x20\x20\x20", 6);
462 return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
463 kSignatureNrw, NULL);
464 }
465
466 // Checks if the file contains the signatures for Nikon formats:
467 // * the general Nikon singature "NIKON" string.
468 // * the ReferenceBlackWhite tag.
469 const size_t kRequestedSizeForNikonSignatures = 4000;
ContainsNikonSignatures(const RangeCheckedBytePtr & source,const bool use_big_endian)470 bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
471 const bool use_big_endian) {
472 const string kSignatureNikon("NIKON");
473 const string kReferenceBlackWhiteTag = use_big_endian
474 ? string("\x02\x14\x00\x05", 4)
475 : string("\x14\x02\x05\x00", 4);
476 const std::vector<string> kSignatures = {kSignatureNikon,
477 kReferenceBlackWhiteTag};
478 for (auto const& signature : kSignatures) {
479 if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
480 signature, NULL)) {
481 return false;
482 }
483 }
484 return true;
485 }
486
487 // Nikon RAW (NEF extension).
488 class NefTypeChecker : public TypeChecker {
489 public:
Type() const490 virtual RawImageTypes Type() const { return kNefImage; }
491
RequestedSize() const492 virtual size_t RequestedSize() const {
493 return std::max(kRequestedSizeForNikonSignatures,
494 kRequestedSizeForNrwSignature);
495 }
496
497 // Check multiple points:
498 // 1. valid endianness at the beginning of the file;
499 // 2. magic number at the (offset == 2) position of the file;
500 // 3. the signature "NIKON" in the requested bytes of the file;
501 // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
502 // 5. does not contain the NRW signature. We may also check a special
503 // signature "RAW " similar to the NRW case, but we got issues in some
504 // special images that the signature locates in the middle of the file, and it
505 // costs too long time to check;
IsMyType(const RangeCheckedBytePtr & source) const506 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
507 RangeCheckedBytePtr limited_source = LimitSource(source);
508
509 bool use_big_endian;
510 if (!DetermineEndianness(limited_source, &use_big_endian)) {
511 return false;
512 }
513
514 const unsigned short kTiffMagic = 0x2A; // NOLINT
515 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
516 kTiffMagic)) {
517 return false;
518 }
519
520 return ContainsNikonSignatures(limited_source, use_big_endian) &&
521 !ContainsNrwSignature(limited_source); // not NRW
522 }
523 };
524
525 // Nikon RAW (NRW extension).
526 class NrwTypeChecker : public TypeChecker {
527 public:
Type() const528 virtual RawImageTypes Type() const { return kNrwImage; }
529
RequestedSize() const530 virtual size_t RequestedSize() const {
531 return std::max(kRequestedSizeForNikonSignatures,
532 kRequestedSizeForNrwSignature);
533 }
534
535 // Check multiple points:
536 // 1. valid endianness at the beginning of the file;
537 // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
538 // file;
539 // 3. the signature "NIKON" in the first requested bytes of the file;
540 // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
541 // 5. contains the NRW signature;
IsMyType(const RangeCheckedBytePtr & source) const542 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
543 RangeCheckedBytePtr limited_source = LimitSource(source);
544
545 bool use_big_endian;
546 if (!DetermineEndianness(limited_source, &use_big_endian)) {
547 return false;
548 }
549
550 const unsigned short kTiffMagic = 0x2A; // NOLINT
551 const unsigned int kTiffOffset = 8;
552 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
553 kTiffMagic) ||
554 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
555 kTiffOffset)) {
556 return false;
557 }
558
559 return ContainsNikonSignatures(limited_source, use_big_endian) &&
560 ContainsNrwSignature(limited_source);
561 }
562 };
563
564 // Olympus RAW.
565 class OrfTypeChecker : public TypeChecker {
566 public:
Type() const567 virtual RawImageTypes Type() const { return kOrfImage; }
568
RequestedSize() const569 virtual size_t RequestedSize() const { return 3000; }
570
571 // Check multiple points:
572 // 1. valid endianness at the beginning of the file;
573 // 2. tag at the (offset == 2) position of the file;
574 // 3. signature "OLYMP" in the first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const575 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
576 RangeCheckedBytePtr limited_source = LimitSource(source);
577
578 bool use_big_endian;
579 if (!DetermineEndianness(limited_source, &use_big_endian)) {
580 return false;
581 }
582
583 const size_t kTagSize = 2;
584 const unsigned short kTag[kTagSize] = {0x4F52, 0x5352}; // NOLINT
585 if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
586 kTag[0]) ||
587 CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
588 kTag[1]))) {
589 return false;
590 }
591
592 // Search for kSignatureOlymp in first requested bytes
593 const string kSignatureOlymp("OLYMP");
594 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
595 kSignatureOlymp, NULL);
596 }
597 };
598
599 // Pentax RAW.
600 class PefTypeChecker : public TypeChecker {
601 public:
Type() const602 virtual RawImageTypes Type() const { return kPefImage; }
603
RequestedSize() const604 virtual size_t RequestedSize() const { return 1280; }
605
606 // Check multiple points:
607 // 1. valid big endianness at the beginning of the file;
608 // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
609 // 3. signature "AOC " or "PENTAX " in first requested bytes.
IsMyType(const RangeCheckedBytePtr & source) const610 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
611 RangeCheckedBytePtr limited_source = LimitSource(source);
612
613 bool use_big_endian;
614 if (!DetermineEndianness(limited_source, &use_big_endian)) {
615 return false;
616 }
617
618 const unsigned short kTiffMagic = 0x2A; // NOLINT
619 const unsigned int kTiffOffset = 8;
620 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
621 kTiffMagic) ||
622 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
623 kTiffOffset)) {
624 return false;
625 }
626
627 // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
628 const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
629 const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
630 return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
631 kSignatureAOC, NULL) ||
632 IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
633 kSignaturePENTAX, NULL);
634 }
635 };
636
637 // Apple format.
638 class QtkTypeChecker : public TypeChecker {
639 public:
Type() const640 virtual RawImageTypes Type() const { return kQtkImage; }
641
RequestedSize() const642 virtual size_t RequestedSize() const { return 8; }
643
644 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const645 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
646 RangeCheckedBytePtr limited_source = LimitSource(source);
647
648 const size_t kSignatureSize = 2;
649 const string kSignature[kSignatureSize] = {
650 string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
651 };
652 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
653 IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
654 }
655 };
656
657 // Fuji RAW.
658 class RafTypeChecker : public TypeChecker {
659 public:
Type() const660 virtual RawImageTypes Type() const { return kRafImage; }
661
RequestedSize() const662 virtual size_t RequestedSize() const { return 8; }
663
664 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const665 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
666 RangeCheckedBytePtr limited_source = LimitSource(source);
667
668 const string kSignature("FUJIFILM");
669 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
670 }
671 };
672
673 // Contax N RAW.
674 class RawContaxNTypeChecker : public TypeChecker {
675 public:
Type() const676 virtual RawImageTypes Type() const { return kRawContaxNImage; }
677
RequestedSize() const678 virtual size_t RequestedSize() const { return 36; }
679
680 // Check only the signature at the (offset == 25) position of the
681 // file.
IsMyType(const RangeCheckedBytePtr & source) const682 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
683 RangeCheckedBytePtr limited_source = LimitSource(source);
684
685 const string kSignature("ARECOYK");
686 return IsSignatureMatched(limited_source, 25, kSignature);
687 }
688 };
689
690 // Panasonic RAW.
691 class Rw2TypeChecker : public TypeChecker {
692 public:
Type() const693 virtual RawImageTypes Type() const { return kRw2Image; }
694
RequestedSize() const695 virtual size_t RequestedSize() const { return 4; }
696
697 // Check two points: 1. valid endianness at the beginning of the
698 // file; 2. tag at the (offset == 2) position of the file.
IsMyType(const RangeCheckedBytePtr & source) const699 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
700 RangeCheckedBytePtr limited_source = LimitSource(source);
701
702 bool use_big_endian;
703 if (!DetermineEndianness(source, &use_big_endian)) {
704 return false;
705 }
706
707 const unsigned short kTag = 0x55; // NOLINT
708 return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
709 kTag);
710 }
711 };
712
713 // Samsung RAW.
714 class SrwTypeChecker : public TypeChecker {
715 public:
Type() const716 virtual RawImageTypes Type() const { return kSrwImage; }
717
RequestedSize() const718 virtual size_t RequestedSize() const { return 256; }
719
720 // Check multiple points:
721 // 1. valid big endianness at the beginning of the file;
722 // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
723 // 3. the signature "SAMSUNG" in the requested bytes of the file;
IsMyType(const RangeCheckedBytePtr & source) const724 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
725 RangeCheckedBytePtr limited_source = LimitSource(source);
726
727 bool use_big_endian;
728 if (!DetermineEndianness(source, &use_big_endian)) {
729 return false;
730 }
731
732 const unsigned short kTiffMagic = 0x2A; // NOLINT
733 const unsigned int kTiffOffset = 8;
734 if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
735 kTiffMagic) ||
736 !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
737 kTiffOffset)) {
738 return false;
739 }
740
741 const string kSignature("SAMSUNG");
742 if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
743 return false;
744 }
745 return true;
746 }
747 };
748
749 // Sigma / Polaroid RAW.
750 class X3fTypeChecker : public TypeChecker {
751 public:
Type() const752 virtual RawImageTypes Type() const { return kX3fImage; }
753
RequestedSize() const754 virtual size_t RequestedSize() const { return 4; }
755
756 // Check only the signature at the beginning of the file.
IsMyType(const RangeCheckedBytePtr & source) const757 virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
758 RangeCheckedBytePtr limited_source = LimitSource(source);
759
760 const string kSignature("FOVb", 4);
761 return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
762 }
763 };
764
765 // This class contains the list of all type checkers. One should used this list
766 // as a whole to execute the image type recognition.
767 class TypeCheckerList {
768 public:
TypeCheckerList()769 TypeCheckerList() {
770 // Add all supported RAW type checkers here.
771 checkers_.push_back(new ArwTypeChecker());
772 checkers_.push_back(new Cr3TypeChecker());
773 checkers_.push_back(new Cr2TypeChecker());
774 checkers_.push_back(new CrwTypeChecker());
775 checkers_.push_back(new DcrTypeChecker());
776 checkers_.push_back(new DngTypeChecker());
777 checkers_.push_back(new KdcTypeChecker());
778 checkers_.push_back(new MosTypeChecker());
779 checkers_.push_back(new MrwTypeChecker());
780 checkers_.push_back(new NefTypeChecker());
781 checkers_.push_back(new NrwTypeChecker());
782 checkers_.push_back(new OrfTypeChecker());
783 checkers_.push_back(new PefTypeChecker());
784 checkers_.push_back(new QtkTypeChecker());
785 checkers_.push_back(new RafTypeChecker());
786 checkers_.push_back(new RawContaxNTypeChecker());
787 checkers_.push_back(new Rw2TypeChecker());
788 checkers_.push_back(new SrwTypeChecker());
789 checkers_.push_back(new X3fTypeChecker());
790
791 // Sort the checkers by the ascending RequestedSize() to get better
792 // performance when checking type.
793 std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
794 }
795
~TypeCheckerList()796 ~TypeCheckerList() {
797 for (size_t i = 0; i < checkers_.size(); ++i) {
798 delete checkers_[i];
799 checkers_[i] = NULL;
800 }
801 }
802
803 // Returns the type of source data. If it can not be identified, returns
804 // kNonRawImage.
GetType(const RangeCheckedBytePtr & source) const805 RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
806 for (size_t i = 0; i < checkers_.size(); ++i) {
807 if (checkers_[i]->IsMyType(source)) {
808 return checkers_[i]->Type();
809 }
810 }
811 return kNonRawImage;
812 }
813
814 // Returns the maximum size of requested size of data for identifying image
815 // type using this class. The class guarantees that it will not read more than
816 // this size.
RequestedSize() const817 size_t RequestedSize() const {
818 assert(!checkers_.empty());
819 // The checkers_ is ascending sorted. The last element is the maximum.
820 return checkers_.back()->RequestedSize();
821 }
822
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)823 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
824 const TypeChecker* type_checker = GetTypeCheckerForType(type);
825 if (type_checker) {
826 return type_checker->IsMyType(source);
827 } else {
828 return false;
829 }
830 }
831
RequestedSizeForType(const RawImageTypes type)832 size_t RequestedSizeForType(const RawImageTypes type) {
833 const TypeChecker* type_checker = GetTypeCheckerForType(type);
834 if (type_checker) {
835 return type_checker->RequestedSize();
836 } else {
837 return 0;
838 }
839 }
840
841 private:
GetTypeCheckerForType(const RawImageTypes type)842 const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
843 for (const auto* type_checker : checkers_) {
844 if (type_checker->Type() == type) {
845 return type_checker;
846 }
847 }
848 return nullptr;
849 }
850
851 std::vector<TypeChecker*> checkers_;
852 };
853
854 } // namespace
855
IsRaw(const RawImageTypes type)856 bool IsRaw(const RawImageTypes type) {
857 switch (type) {
858 // Non-RAW-image type
859 case kNonRawImage: {
860 return false;
861 }
862
863 // Raw image types
864 case kArwImage:
865 case kCr3Image:
866 case kCr2Image:
867 case kCrwImage:
868 case kDcrImage:
869 case kDngImage:
870 case kKdcImage:
871 case kMosImage:
872 case kMrwImage:
873 case kNefImage:
874 case kNrwImage:
875 case kOrfImage:
876 case kPefImage:
877 case kQtkImage:
878 case kRafImage:
879 case kRawContaxNImage:
880 case kRw2Image:
881 case kSrwImage:
882 case kX3fImage: {
883 return true;
884 }
885
886 default: {
887 // Unsupported type!
888 assert(false);
889 }
890 }
891 return false;
892 }
893
IsOfType(const RangeCheckedBytePtr & source,const RawImageTypes type)894 bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
895 return TypeCheckerList().IsOfType(source, type);
896 }
897
RecognizeRawImageTypeLite(const RangeCheckedBytePtr & source)898 RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
899 return TypeCheckerList().GetType(source);
900 }
901
GetNumberOfBytesForIsRawLite()902 size_t GetNumberOfBytesForIsRawLite() {
903 return TypeCheckerList().RequestedSize();
904 }
905
GetNumberOfBytesForIsOfType(const RawImageTypes type)906 size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
907 return TypeCheckerList().RequestedSizeForType(type);
908 }
909
IsRawLite(const RangeCheckedBytePtr & source)910 bool IsRawLite(const RangeCheckedBytePtr& source) {
911 return IsRaw(RecognizeRawImageTypeLite(source));
912 }
913
914 } // namespace image_type_recognition
915 } // namespace piex
916