xref: /aosp_15_r20/frameworks/av/media/module/metadatautils/MetaDataUtils.cpp (revision ec779b8e0859a360c3d303172224686826e6e0e1)
1 /*
2  * Copyright 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MetaDataUtils"
19 #include <utils/Log.h>
20 
21 #include <media/stagefright/foundation/avc_utils.h>
22 #include <media/stagefright/foundation/base64.h>
23 #include <media/stagefright/foundation/ABitReader.h>
24 #include <media/stagefright/foundation/ABuffer.h>
25 #include <media/stagefright/foundation/ByteUtils.h>
26 #include <media/stagefright/MediaDefs.h>
27 #include <media/stagefright/MetaDataUtils.h>
28 #include <media/NdkMediaFormat.h>
29 
30 namespace android {
31 
MakeAVCCodecSpecificData(MetaDataBase & meta,const uint8_t * data,size_t size)32 bool MakeAVCCodecSpecificData(MetaDataBase &meta, const uint8_t *data, size_t size) {
33     if (data == nullptr || size == 0) {
34         return false;
35     }
36 
37     int32_t width;
38     int32_t height;
39     int32_t sarWidth;
40     int32_t sarHeight;
41     sp<ABuffer> accessUnit = new ABuffer((void*)data,  size);
42     sp<ABuffer> csd = MakeAVCCodecSpecificData(accessUnit, &width, &height, &sarWidth, &sarHeight);
43     if (csd == nullptr) {
44         return false;
45     }
46     meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
47 
48     meta.setData(kKeyAVCC, kTypeAVCC, csd->data(), csd->size());
49     meta.setInt32(kKeyWidth, width);
50     meta.setInt32(kKeyHeight, height);
51     if (sarWidth > 0 && sarHeight > 0) {
52         meta.setInt32(kKeySARWidth, sarWidth);
53         meta.setInt32(kKeySARHeight, sarHeight);
54     }
55     return true;
56 }
57 
MakeAVCCodecSpecificData(AMediaFormat * meta,const uint8_t * data,size_t size)58 bool MakeAVCCodecSpecificData(AMediaFormat *meta, const uint8_t *data, size_t size) {
59     if (meta == nullptr || data == nullptr || size == 0) {
60         return false;
61     }
62 
63     int32_t width;
64     int32_t height;
65     int32_t sarWidth;
66     int32_t sarHeight;
67     sp<ABuffer> accessUnit = new ABuffer((void*)data,  size);
68     sp<ABuffer> csd = MakeAVCCodecSpecificData(accessUnit, &width, &height, &sarWidth, &sarHeight);
69     if (csd == nullptr) {
70         return false;
71     }
72     AMediaFormat_setString(meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
73 
74     AMediaFormat_setBuffer(meta, AMEDIAFORMAT_KEY_CSD_AVC, csd->data(), csd->size());
75     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_WIDTH, width);
76     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_HEIGHT, height);
77     if (sarWidth > 0 && sarHeight > 0) {
78         AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_SAR_WIDTH, sarWidth);
79         AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, sarHeight);
80     }
81     return true;
82 }
83 
84 // Check if the next 24 bits are VP9 SYNC_CODE
isVp9SyncCode(ABitReader & bits)85 static bool isVp9SyncCode(ABitReader &bits) {
86     if (bits.numBitsLeft() < 24) {
87         return false;
88     }
89     return bits.getBits(24) == 0x498342;
90 }
91 
92 // This parses bitdepth and subsampling in a VP9 uncompressed header
93 // (refer section bitdepth_colorspace_sampling in 6.2 of the VP9 bitstream spec)
getVp9BitdepthChromaSubSampling(ABitReader & bits,int32_t profile,int32_t * bitDepth,int32_t * chromaSubsampling)94 static bool getVp9BitdepthChromaSubSampling(ABitReader &bits,
95         int32_t profile,
96         int32_t *bitDepth,
97         int32_t *chromaSubsampling) {
98     if (profile >= 2) {
99         if (bits.numBitsLeft() < 1) {
100             return false;
101         }
102         *bitDepth = bits.getBits(1) ? 12 : 10;
103     } else {
104         *bitDepth = 8;
105     }
106 
107     uint32_t colorspace;
108     if (!bits.getBitsGraceful(3, &colorspace)) {
109         return false;
110     }
111 
112     *chromaSubsampling = -1;
113     if (colorspace != 7 /*SRGB*/) {
114         // Skip yuv_range_flag
115         if (!bits.skipBits(1)) {
116             return false;
117         }
118         // Check for subsampling only for profiles 1 and 3.
119         if (profile == 1 || profile == 3) {
120             uint32_t ss_x;
121             uint32_t ss_y;
122             if (bits.getBitsGraceful(1, &ss_x) && bits.getBitsGraceful(1, &ss_y)) {
123                 *chromaSubsampling = ss_x << 1 & ss_y;
124             } else {
125                 return false;
126             }
127         } else {
128             *chromaSubsampling = 3;
129         }
130     } else {
131         if (profile == 1 || profile == 3) {
132             *chromaSubsampling = 0;
133         }
134     }
135     return true;
136 }
137 
138 /**
139  * Build VP9 Codec Feature Metadata (CodecPrivate) to set CSD for VP9 codec.
140  * For reference:
141  * https://www.webmproject.org/docs/container/#vp9-codec-feature-metadata-codecprivate.
142  *
143  * @param meta          A pointer to AMediaFormat object.
144  * @param profile       The profile value of the VP9 stream.
145  * @param level         The VP9 codec level. If the level is unknown, pass -1 to this parameter.
146  * @param bitDepth      The bit depth of the luma and color components of the VP9 stream.
147  * @param chromaSubsampling  The chroma subsampling of the VP9 stream. If chromaSubsampling is
148  *                           unknown, pass -1 to this parameter.
149  * @return true if CodecPrivate is set as CSD of AMediaFormat object.
150  *
151  */
MakeVP9CodecPrivate(AMediaFormat * meta,int32_t profile,int32_t level,int32_t bitDepth,int32_t chromaSubsampling)152 static bool MakeVP9CodecPrivate(AMediaFormat* meta, int32_t profile, int32_t level,
153                                 int32_t bitDepth, int32_t chromaSubsampling) {
154     if (meta == nullptr) {
155         return false;
156     }
157 
158     std::vector<uint8_t> codecPrivate;
159     // Construct CodecPrivate in WebM format (ID | Length | Data).
160     // Helper lambda to add a field to the codec private data
161     auto addField = [&codecPrivate](uint8_t id, uint8_t value) {
162         codecPrivate.push_back(id);
163         codecPrivate.push_back(0x01);  // Length is always 1
164         codecPrivate.push_back(value);
165     };
166 
167     // Add fields
168     addField(0x01, static_cast<uint8_t>(profile));
169     if (level >= 0) {
170         addField(0x02, static_cast<uint8_t>(level));
171     }
172     addField(0x03, static_cast<uint8_t>(bitDepth));
173     if (chromaSubsampling >= 0) {
174         addField(0x04, static_cast<uint8_t>(chromaSubsampling));
175     }
176     // Set CSD in the meta format
177     AMediaFormat_setBuffer(meta, AMEDIAFORMAT_KEY_CSD_0, codecPrivate.data(), codecPrivate.size());
178     return true;
179 }
180 
181 // The param data contains the first frame data, starting with the uncompressed frame
182 // header. This uncompressed header (refer section 6.2 of the VP9 bitstream spec) is
183 // used to parse profile, bitdepth and subsampling.
MakeVP9CodecSpecificDataFromFirstFrame(AMediaFormat * meta,const uint8_t * data,size_t size)184 bool MakeVP9CodecSpecificDataFromFirstFrame(AMediaFormat* meta, const uint8_t* data, size_t size) {
185     if (meta == nullptr || data == nullptr || size == 0) {
186         return false;
187     }
188 
189     ABitReader bits(data, size);
190 
191     // First 2 bits of the uncompressed header should be the frame_marker.
192     if (bits.getBits(2) != 0b10) {
193         return false;
194     }
195 
196     int32_t profileLowBit = bits.getBits(1);
197     int32_t profileHighBit = bits.getBits(1);
198     int32_t profile = profileHighBit * 2 + profileLowBit;
199 
200     // One reserved '0' bit if profile is 3.
201     if (profile == 3 && bits.getBits(1) != 0) {
202         return false;
203     }
204 
205     // If show_existing_frame is set, we get no more data. Since this is
206     // expected to be the first frame, we can return false which will cascade
207     // into ERROR_MALFORMED.
208     if (bits.getBits(1)) {
209         return false;
210     }
211 
212     int32_t frame_type = bits.getBits(1);
213 
214     // Upto 7 bits could be read till now, which were guaranteed to be available
215     // since size > 0. Check for bits available before reading them from now on.
216     if (bits.numBitsLeft() < 2) {
217         return false;
218     }
219 
220     int32_t show_frame = bits.getBits(1);
221     int32_t error_resilient_mode = bits.getBits(1);
222     int32_t bitDepth = 8;
223     int32_t chromaSubsampling = -1;
224 
225     if (frame_type == 0 /* KEY_FRAME */) {
226         // Check for sync code.
227         if (!isVp9SyncCode(bits)) {
228             return false;
229         }
230 
231         if (!getVp9BitdepthChromaSubSampling(bits, profile, &bitDepth, &chromaSubsampling)) {
232             return false;
233         }
234     } else {
235         int32_t intra_only = 0;
236         if (!show_frame) {
237             if (bits.numBitsLeft() < 1) {
238                 return false;
239             }
240             intra_only = bits.getBits(1);
241         }
242 
243         if (!error_resilient_mode) {
244             if (bits.numBitsLeft() < 2) {
245                 return false;
246             }
247             // ignore reset_frame_context
248             bits.skipBits(2);
249         }
250 
251         if (!intra_only) {
252             // Require first frame to be either KEY_FRAME or INTER_FRAME with intra_only set to true
253             return false;
254         }
255 
256         // Check for sync code.
257         if (!isVp9SyncCode(bits)) {
258             return false;
259         }
260 
261         if (profile > 0) {
262             if (!getVp9BitdepthChromaSubSampling(bits, profile, &bitDepth, &chromaSubsampling)) {
263                 return false;
264             }
265         } else {
266             bitDepth = 8;
267             chromaSubsampling = 3;
268         }
269     }
270     int32_t csdSize = 6;
271     if (chromaSubsampling != -1) {
272         csdSize += 3;
273     }
274     // As level is not present in first frame build CodecPrivate without it.
275     return MakeVP9CodecPrivate(meta, profile, -1, bitDepth, chromaSubsampling);
276 }
277 
MakeVP9CodecPrivateFromVpcC(AMediaFormat * meta,const uint8_t * csdData,size_t size)278 bool MakeVP9CodecPrivateFromVpcC(AMediaFormat* meta, const uint8_t* csdData, size_t size) {
279     if (meta == nullptr || csdData == nullptr || size < 12) {
280         return false;
281     }
282 
283     // Check the first 4 bytes (VersionAndFlags) if they match the required value.
284     if (csdData[0] != 0x01 || csdData[1] != 0x00 || csdData[2] != 0x00 || csdData[3] != 0x00) {
285         return false;
286     }
287 
288     // Create VP9 Codec Feature Metadata (CodecPrivate) that can be parsed.
289     // https://www.webmproject.org/docs/container/#vp9-codec-feature-metadata-codecprivate
290     const uint8_t* vpcCData = csdData + 4;  // Skip the first 4 bytes (VersionAndFlags)
291 
292     int32_t profile = vpcCData[0];
293     int32_t level = vpcCData[1];
294     int32_t bitDepth = (vpcCData[2] >> 4) & 0x0F;           // Bit Depth (4 bits).
295     int32_t chromaSubsampling = (vpcCData[2] >> 1) & 0x07;  // Chroma Subsampling (3 bits).
296     return MakeVP9CodecPrivate(meta, profile, level, bitDepth, chromaSubsampling);
297 }
298 
MakeAACCodecSpecificData(MetaDataBase & meta,const uint8_t * data,size_t size)299 bool MakeAACCodecSpecificData(MetaDataBase &meta, const uint8_t *data, size_t size) {
300     if (data == nullptr || size < 7) {
301         return false;
302     }
303 
304     ABitReader bits(data, size);
305 
306     // adts_fixed_header
307 
308     if (bits.getBits(12) != 0xfffu) {
309         ALOGE("Wrong atds_fixed_header");
310         return false;
311     }
312 
313     bits.skipBits(4);  // ID, layer, protection_absent
314 
315     unsigned profile = bits.getBits(2);
316     if (profile == 3u) {
317         ALOGE("profile should not be 3");
318         return false;
319     }
320     unsigned sampling_freq_index = bits.getBits(4);
321     bits.getBits(1);  // private_bit
322     unsigned channel_configuration = bits.getBits(3);
323     if (channel_configuration == 0u) {
324         ALOGE("channel_config should not be 0");
325         return false;
326     }
327 
328     if (!MakeAACCodecSpecificData(
329             meta, profile, sampling_freq_index, channel_configuration)) {
330         return false;
331     }
332 
333     meta.setInt32(kKeyIsADTS, true);
334     return true;
335 }
336 
MakeAACCodecSpecificData(uint8_t * csd,size_t * esds_size,unsigned profile,unsigned sampling_freq_index,unsigned channel_configuration,int32_t * sampling_rate)337 bool MakeAACCodecSpecificData(
338         uint8_t *csd, /* out */
339         size_t *esds_size, /* in/out */
340         unsigned profile, /* in */
341         unsigned sampling_freq_index, /* in */
342         unsigned channel_configuration, /* in */
343         int32_t *sampling_rate /* out */
344 ) {
345     if(sampling_freq_index > 11u) {
346         return false;
347     }
348     static const int32_t kSamplingFreq[] = {
349         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
350         16000, 12000, 11025, 8000
351     };
352     *sampling_rate = kSamplingFreq[sampling_freq_index];
353 
354     static const uint8_t kStaticESDS[] = {
355         0x03, 22,
356         0x00, 0x00,     // ES_ID
357         0x00,           // streamDependenceFlag, URL_Flag, OCRstreamFlag
358 
359         0x04, 17,
360         0x40,                       // Audio ISO/IEC 14496-3
361         0x00, 0x00, 0x00, 0x00,
362         0x00, 0x00, 0x00, 0x00,
363         0x00, 0x00, 0x00, 0x00,
364 
365         0x05, 2,
366         // AudioSpecificInfo follows
367 
368         // oooo offf fccc c000
369         // o - audioObjectType
370         // f - samplingFreqIndex
371         // c - channelConfig
372     };
373 
374     size_t csdSize = sizeof(kStaticESDS) + 2;
375     if (csdSize > *esds_size) {
376         return false;
377     }
378     memcpy(csd, kStaticESDS, sizeof(kStaticESDS));
379 
380     csd[sizeof(kStaticESDS)] =
381         ((profile + 1) << 3) | (sampling_freq_index >> 1);
382 
383     csd[sizeof(kStaticESDS) + 1] =
384         ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3);
385 
386     *esds_size = csdSize;
387     return true;
388 }
389 
MakeAACCodecSpecificData(AMediaFormat * meta,unsigned profile,unsigned sampling_freq_index,unsigned channel_configuration)390 bool MakeAACCodecSpecificData(AMediaFormat *meta, unsigned profile, unsigned sampling_freq_index,
391         unsigned channel_configuration) {
392 
393     if(sampling_freq_index > 11u) {
394         return false;
395     }
396 
397     uint8_t csd[2];
398     csd[0] = ((profile + 1) << 3) | (sampling_freq_index >> 1);
399     csd[1] = ((sampling_freq_index << 7) & 0x80) | (channel_configuration << 3);
400 
401     static const int32_t kSamplingFreq[] = {
402         96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
403         16000, 12000, 11025, 8000
404     };
405     int32_t sampleRate = kSamplingFreq[sampling_freq_index];
406 
407     AMediaFormat_setBuffer(meta, AMEDIAFORMAT_KEY_CSD_0, csd, sizeof(csd));
408     AMediaFormat_setString(meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AAC);
409     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
410     AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channel_configuration);
411 
412     return true;
413 }
414 
MakeAACCodecSpecificData(MetaDataBase & meta,unsigned profile,unsigned sampling_freq_index,unsigned channel_configuration)415 bool MakeAACCodecSpecificData(
416         MetaDataBase &meta,
417         unsigned profile, unsigned sampling_freq_index,
418         unsigned channel_configuration) {
419 
420     uint8_t csd[24];
421     size_t csdSize = sizeof(csd);
422     int32_t sampleRate;
423 
424     if (!MakeAACCodecSpecificData(csd, &csdSize, profile, sampling_freq_index,
425             channel_configuration, &sampleRate)) {
426         return false;
427     }
428 
429     meta.setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AAC);
430 
431     meta.setInt32(kKeySampleRate, sampleRate);
432     meta.setInt32(kKeyChannelCount, channel_configuration);
433     meta.setData(kKeyESDS, 0, csd, csdSize);
434     return true;
435 }
436 
437 
extractAlbumArt(AMediaFormat * fileMeta,const void * data,size_t size)438 static void extractAlbumArt(
439         AMediaFormat *fileMeta, const void *data, size_t size) {
440     ALOGV("extractAlbumArt from '%s'", (const char *)data);
441 
442     size_t inLen = strnlen((const char *)data, size);
443     size_t flacSize = inLen / 4 * 3;
444     uint8_t *flac = new uint8_t[flacSize];
445     if (!decodeBase64(flac, &flacSize, (const char*)data)) {
446         ALOGE("malformed base64 encoded data.");
447         delete[] flac;
448         return;
449     }
450 
451     ALOGV("got flac of size %zu", flacSize);
452 
453     uint32_t picType;
454     uint32_t typeLen;
455     uint32_t descLen;
456     uint32_t dataLen;
457     char type[128];
458 
459     if (flacSize < 8) {
460         delete[] flac;
461         return;
462     }
463 
464     picType = U32_AT(flac);
465 
466     if (picType != 3) {
467         // This is not a front cover.
468         delete[] flac;
469         return;
470     }
471 
472     typeLen = U32_AT(&flac[4]);
473     if (typeLen > sizeof(type) - 1) {
474         delete[] flac;
475         return;
476     }
477 
478     // we've already checked above that flacSize >= 8
479     if (flacSize - 8 < typeLen) {
480         delete[] flac;
481         return;
482     }
483 
484     memcpy(type, &flac[8], typeLen);
485     type[typeLen] = '\0';
486 
487     ALOGV("picType = %d, type = '%s'", picType, type);
488 
489     if (!strcmp(type, "-->")) {
490         // This is not inline cover art, but an external url instead.
491         delete[] flac;
492         return;
493     }
494 
495     if (flacSize < 32 || flacSize - 32 < typeLen) {
496         delete[] flac;
497         return;
498     }
499 
500     descLen = U32_AT(&flac[8 + typeLen]);
501     if (flacSize - 32 - typeLen < descLen) {
502         delete[] flac;
503         return;
504     }
505 
506     dataLen = U32_AT(&flac[8 + typeLen + 4 + descLen + 16]);
507 
508     // we've already checked above that (flacSize - 32 - typeLen - descLen) >= 0
509     if (flacSize - 32 - typeLen - descLen < dataLen) {
510         delete[] flac;
511         return;
512     }
513 
514     ALOGV("got image data, %zu trailing bytes",
515          flacSize - 32 - typeLen - descLen - dataLen);
516 
517     AMediaFormat_setBuffer(fileMeta, AMEDIAFORMAT_KEY_ALBUMART,
518             &flac[8 + typeLen + 4 + descLen + 20], dataLen);
519 
520     delete[] flac;
521 }
522 
parseVorbisComment(AMediaFormat * fileMeta,const char * comment,size_t commentLength)523 void parseVorbisComment(
524         AMediaFormat *fileMeta, const char *comment, size_t commentLength) {
525     // Haptic tag is only kept here as it will only be used in extractor to generate channel mask.
526     struct {
527         const char *const mTag;
528         const char *mKey;
529     } kMap[] = {
530         { "TITLE", AMEDIAFORMAT_KEY_TITLE },
531         { "ARTIST", AMEDIAFORMAT_KEY_ARTIST },
532         { "ALBUMARTIST", AMEDIAFORMAT_KEY_ALBUMARTIST },
533         { "ALBUM ARTIST", AMEDIAFORMAT_KEY_ALBUMARTIST },
534         { "COMPILATION", AMEDIAFORMAT_KEY_COMPILATION },
535         { "ALBUM", AMEDIAFORMAT_KEY_ALBUM },
536         { "COMPOSER", AMEDIAFORMAT_KEY_COMPOSER },
537         { "GENRE", AMEDIAFORMAT_KEY_GENRE },
538         { "AUTHOR", AMEDIAFORMAT_KEY_AUTHOR },
539         { "TRACKNUMBER", AMEDIAFORMAT_KEY_CDTRACKNUMBER },
540         { "DISCNUMBER", AMEDIAFORMAT_KEY_DISCNUMBER },
541         { "DATE", AMEDIAFORMAT_KEY_DATE },
542         { "YEAR", AMEDIAFORMAT_KEY_YEAR },
543         { "LYRICIST", AMEDIAFORMAT_KEY_LYRICIST },
544         { "METADATA_BLOCK_PICTURE", AMEDIAFORMAT_KEY_ALBUMART },
545         { "ANDROID_LOOP", AMEDIAFORMAT_KEY_LOOP },
546         { "ANDROID_HAPTIC", AMEDIAFORMAT_KEY_HAPTIC_CHANNEL_COUNT },
547     };
548 
549         for (size_t j = 0; j < sizeof(kMap) / sizeof(kMap[0]); ++j) {
550             size_t tagLen = strlen(kMap[j].mTag);
551             if (!strncasecmp(kMap[j].mTag, comment, tagLen)
552                     && comment[tagLen] == '=') {
553                 if (kMap[j].mKey == AMEDIAFORMAT_KEY_ALBUMART) {
554                     extractAlbumArt(
555                             fileMeta,
556                             &comment[tagLen + 1],
557                             commentLength - tagLen - 1);
558                 } else if (kMap[j].mKey == AMEDIAFORMAT_KEY_LOOP) {
559                     if (!strcasecmp(&comment[tagLen + 1], "true")) {
560                         AMediaFormat_setInt32(fileMeta, AMEDIAFORMAT_KEY_LOOP, 1);
561                     }
562                 } else if (kMap[j].mKey == AMEDIAFORMAT_KEY_HAPTIC_CHANNEL_COUNT) {
563                     char *end;
564                     errno = 0;
565                     const int hapticChannelCount = strtol(&comment[tagLen + 1], &end, 10);
566                     if (errno == 0) {
567                         AMediaFormat_setInt32(fileMeta, kMap[j].mKey, hapticChannelCount);
568                     } else {
569                         ALOGE("Error(%d) when parsing haptic channel count", errno);
570                     }
571                 } else {
572                     AMediaFormat_setString(fileMeta, kMap[j].mKey, &comment[tagLen + 1]);
573                 }
574             }
575         }
576 
577 }
578 
579 }  // namespace android
580