xref: /aosp_15_r20/external/perfetto/src/trace_processor/util/zip_reader.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/util/zip_reader.h"
18 
19 #include <cstdint>
20 #include <cstring>
21 #include <ctime>
22 #include <limits>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "perfetto/base/build_config.h"
29 #include "perfetto/base/logging.h"
30 #include "perfetto/base/status.h"
31 #include "perfetto/base/time.h"
32 #include "perfetto/ext/base/status_or.h"
33 #include "perfetto/ext/base/string_view.h"
34 #include "perfetto/ext/base/utils.h"
35 #include "perfetto/trace_processor/trace_blob_view.h"
36 #include "src/trace_processor/util/gzip_utils.h"
37 #include "src/trace_processor/util/status_macros.h"
38 #include "src/trace_processor/util/streaming_line_reader.h"
39 
40 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
41 #include <zconf.h>
42 #include <zlib.h>
43 #endif
44 
45 namespace perfetto::trace_processor::util {
46 
47 namespace {
48 
49 // Entry signatures.
50 constexpr uint32_t kFileHeaderSig = 0x04034b50;
51 constexpr uint32_t kCentralDirectorySig = 0x02014b50;
52 constexpr uint32_t kDataDescriptorSig = 0x08074b50;
53 
54 // 4 bytes each of: 1) signature, 2) crc, 3) compressed size 4) uncompressed
55 // size.
56 constexpr uint32_t kDataDescriptorSize = 4 * 4;
57 
58 enum GeneralPurposeBitFlag : uint32_t {
59   kEncrypted = 1 << 0,
60   k8kSlidingDictionary = 1u << 1,
61   kShannonFaro = 1u << 2,
62   kDataDescriptor = 1u << 3,
63   kLangageEncoding = 1u << 11,
64   kUnknown = ~(kEncrypted | k8kSlidingDictionary | kShannonFaro |
65                kDataDescriptor | kLangageEncoding),
66 };
67 
68 // Compression flags.
69 const uint16_t kNoCompression = 0;
70 const uint16_t kDeflate = 8;
71 
72 template <typename T>
ReadAndAdvance(const uint8_t ** ptr)73 T ReadAndAdvance(const uint8_t** ptr) {
74   T res{};
75   memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T));
76   *ptr += sizeof(T);
77   return res;
78 }
79 
80 }  // namespace
81 
82 ZipReader::ZipReader() = default;
83 ZipReader::~ZipReader() = default;
84 
Parse(TraceBlobView tbv)85 base::Status ZipReader::Parse(TraceBlobView tbv) {
86   reader_.PushBack(std::move(tbv));
87 
88   // .zip file sequence:
89   // [ File 1 header (30 bytes) ]
90   // [ File 1 name ]
91   // [ File 1 extra fields (optional) ]
92   // [ File 1 compressed payload ]
93   // [ File 1 data descriptor (optional) ]
94   //
95   // [ File 2 header (30 bytes) ]
96   // [ File 2 name ]
97   // [ File 2 extra fields (optional) ]
98   // [ File 2 compressed payload ]
99   // [ File 2 data descriptor (optional) ]
100   //
101   // [ Central directory (ignored) ]
102 
103   for (;;) {
104     auto state = cur_.parse_state;
105     switch (state) {
106       case FileParseState::kHeader:
107         RETURN_IF_ERROR(TryParseHeader());
108         break;
109       case FileParseState::kFilename:
110         RETURN_IF_ERROR(TryParseFilename());
111         break;
112       case FileParseState::kSkipBytes:
113         RETURN_IF_ERROR(TrySkipBytes());
114         break;
115       case FileParseState::kCompressedData:
116         RETURN_IF_ERROR(TryParseCompressedData());
117         break;
118     }
119     if (state == cur_.parse_state) {
120       return base::OkStatus();
121     }
122   }
123 }
124 
TryParseHeader()125 base::Status ZipReader::TryParseHeader() {
126   PERFETTO_CHECK(cur_.hdr.signature == 0);
127 
128   std::optional<TraceBlobView> hdr =
129       reader_.SliceOff(reader_.start_offset(), kZipFileHdrSize);
130   if (!hdr) {
131     return base::OkStatus();
132   }
133   PERFETTO_CHECK(reader_.PopFrontBytes(kZipFileHdrSize));
134 
135   const uint8_t* hdr_it = hdr->data();
136   cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it);
137   if (cur_.hdr.signature == kCentralDirectorySig) {
138     // We reached the central directory at the end of file.
139     // We don't make any use here of the central directory, so we just
140     // ignore everything else after this point.
141     // Here we abuse the ZipFile class a bit. The Central Directory header
142     // has a different layout. The first 4 bytes (signature) match, the
143     // rest don't but the sizeof(central dir) is >> sizeof(file header) so
144     // we are fine.
145     // We do this rather than retuning because we could have further
146     // Parse() calls (imagine parsing bytes one by one), and we need a way
147     // to keep track of the "keep eating input without doing anything".
148     cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max();
149     cur_.parse_state = FileParseState::kSkipBytes;
150     return base::OkStatus();
151   }
152   if (cur_.hdr.signature != kFileHeaderSig) {
153     return base::ErrStatus(
154         "Invalid signature found at offset 0x%zx. Actual=0x%x, "
155         "expected=0x%x",
156         reader_.start_offset(), cur_.hdr.signature, kFileHeaderSig);
157   }
158 
159   cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it);
160   cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it);
161   cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it);
162   cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it);
163   cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it);
164   cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it);
165   cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
166   cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
167   cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it);
168   cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it);
169   PERFETTO_DCHECK(static_cast<size_t>(hdr_it - hdr->data()) == kZipFileHdrSize);
170 
171   // We support only up to version 2.0 (20). Higher versions define
172   // more advanced features that we don't support (zip64 extensions,
173   // encryption).
174   // Disallow encryption or any flags we don't know how to handle.
175   if ((cur_.hdr.version > 20) || (cur_.hdr.flags & kEncrypted) ||
176       (cur_.hdr.flags & kUnknown)) {
177     return base::ErrStatus(
178         "Unsupported zip features at offset 0x%zx. version=%x, flags=%x",
179         reader_.start_offset(), cur_.hdr.version, cur_.hdr.flags);
180   }
181   if (cur_.hdr.compression != kNoCompression &&
182       cur_.hdr.compression != kDeflate) {
183     return base::ErrStatus(
184         "Unsupported compression type at offset 0x%zx. type=%x. Only "
185         "deflate and no compression are supported.",
186         reader_.start_offset(), cur_.hdr.compression);
187   }
188   if (cur_.hdr.flags & kDataDescriptor && cur_.hdr.compression != kDeflate) {
189     return base::ErrStatus(
190         "Unsupported compression type at offset 0x%zx. type=%x. Only "
191         "deflate supported for ZIPs compressed in a streaming fashion.",
192         reader_.start_offset(), cur_.hdr.compression);
193   }
194   cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len;
195   cur_.parse_state = FileParseState::kFilename;
196   return base::OkStatus();
197 }
198 
TryParseFilename()199 base::Status ZipReader::TryParseFilename() {
200   if (cur_.hdr.fname_len == 0) {
201     cur_.parse_state = FileParseState::kSkipBytes;
202     return base::OkStatus();
203   }
204   PERFETTO_CHECK(cur_.hdr.fname.empty());
205 
206   std::optional<TraceBlobView> fname_tbv =
207       reader_.SliceOff(reader_.start_offset(), cur_.hdr.fname_len);
208   if (!fname_tbv) {
209     return base::OkStatus();
210   }
211   PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.fname_len));
212   cur_.hdr.fname = std::string(reinterpret_cast<const char*>(fname_tbv->data()),
213                                cur_.hdr.fname_len);
214   cur_.parse_state = FileParseState::kSkipBytes;
215   return base::OkStatus();
216 }
217 
TrySkipBytes()218 base::Status ZipReader::TrySkipBytes() {
219   if (cur_.ignore_bytes_after_fname == 0) {
220     cur_.parse_state = FileParseState::kCompressedData;
221     return base::OkStatus();
222   }
223 
224   size_t avail = reader_.avail();
225   if (avail < cur_.ignore_bytes_after_fname) {
226     PERFETTO_CHECK(reader_.PopFrontBytes(avail));
227     cur_.ignore_bytes_after_fname -= avail;
228     return base::OkStatus();
229   }
230   PERFETTO_CHECK(reader_.PopFrontBytes(cur_.ignore_bytes_after_fname));
231   cur_.ignore_bytes_after_fname = 0;
232   cur_.parse_state = FileParseState::kCompressedData;
233   return base::OkStatus();
234 }
235 
TryParseCompressedData()236 base::Status ZipReader::TryParseCompressedData() {
237   // Build up the compressed payload
238   if (cur_.hdr.flags & kDataDescriptor) {
239     if (!cur_.compressed) {
240       ASSIGN_OR_RETURN(auto compressed, TryParseUnsizedCompressedData());
241       if (!compressed) {
242         return base::OkStatus();
243       }
244       cur_.compressed = std::move(compressed);
245     }
246 
247     std::optional<TraceBlobView> data_descriptor =
248         reader_.SliceOff(reader_.start_offset(), kDataDescriptorSize);
249     if (!data_descriptor) {
250       return base::OkStatus();
251     }
252     PERFETTO_CHECK(reader_.PopFrontBytes(kDataDescriptorSize));
253 
254     const auto* desc_it = data_descriptor->data();
255     auto desc_sig = ReadAndAdvance<uint32_t>(&desc_it);
256     if (desc_sig != kDataDescriptorSig) {
257       return base::ErrStatus(
258           "Invalid signature found at offset 0x%zx. Actual=0x%x, "
259           "expected=0x%x",
260           reader_.start_offset(), desc_sig, kDataDescriptorSig);
261     }
262     cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&desc_it);
263     cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&desc_it);
264     cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&desc_it);
265   } else {
266     PERFETTO_CHECK(!cur_.compressed);
267     std::optional<TraceBlobView> raw_compressed =
268         reader_.SliceOff(reader_.start_offset(), cur_.hdr.compressed_size);
269     if (!raw_compressed) {
270       return base::OkStatus();
271     }
272     cur_.compressed = *std::move(raw_compressed);
273     PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.compressed_size));
274   }
275 
276   // We have accumulated the whole header, file name and compressed payload.
277   PERFETTO_CHECK(cur_.compressed);
278   PERFETTO_CHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len);
279   PERFETTO_CHECK(cur_.compressed->size() == cur_.hdr.compressed_size);
280   PERFETTO_CHECK(cur_.ignore_bytes_after_fname == 0);
281 
282   files_.emplace_back();
283   files_.back().hdr_ = std::move(cur_.hdr);
284   files_.back().compressed_data_ = *std::move(cur_.compressed);
285   cur_ = FileParseState();  // Reset the parsing state for the next file.
286   return base::OkStatus();
287 }  // namespace perfetto::trace_processor::util
288 
289 base::StatusOr<std::optional<TraceBlobView>>
TryParseUnsizedCompressedData()290 ZipReader::TryParseUnsizedCompressedData() {
291   PERFETTO_CHECK(cur_.hdr.compression == kDeflate);
292 
293   auto start = reader_.start_offset() + cur_.decompressor_bytes_fed;
294   auto end = reader_.end_offset();
295   auto slice = reader_.SliceOff(start, end - start);
296   PERFETTO_CHECK(slice);
297   auto res_code = cur_.decompressor.FeedAndExtract(slice->data(), slice->size(),
298                                                    [](const uint8_t*, size_t) {
299                                                      // Intentionally do
300                                                      // nothing: we are only
301                                                      // looking for the bounds
302                                                      // of the deflate stream,
303                                                      // we are not actually
304                                                      // interested in the
305                                                      // output.
306                                                    });
307   switch (res_code) {
308     case GzipDecompressor::ResultCode::kNeedsMoreInput:
309       cur_.decompressor_bytes_fed += slice->size();
310       return {std::nullopt};
311     case GzipDecompressor::ResultCode::kError:
312       return base::ErrStatus(
313           "Failed decompressing stream in ZIP file at offset 0x%zx",
314           reader_.start_offset());
315     case GzipDecompressor::ResultCode::kOk:
316       PERFETTO_FATAL("Unexpected result code");
317     case GzipDecompressor::ResultCode::kEof:
318       break;
319   }
320   cur_.decompressor_bytes_fed += slice->size() - cur_.decompressor.AvailIn();
321   auto raw_compressed =
322       reader_.SliceOff(reader_.start_offset(), cur_.decompressor_bytes_fed);
323   PERFETTO_CHECK(raw_compressed);
324   PERFETTO_CHECK(reader_.PopFrontBytes(cur_.decompressor_bytes_fed));
325   return {std::move(raw_compressed)};
326 }
327 
Find(const std::string & path)328 ZipFile* ZipReader::Find(const std::string& path) {
329   for (ZipFile& zf : files_) {
330     if (zf.name() == path)
331       return &zf;
332   }
333   return nullptr;
334 }
335 
336 ZipFile::ZipFile() = default;
337 ZipFile::~ZipFile() = default;
338 ZipFile::ZipFile(ZipFile&& other) noexcept = default;
339 ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default;
340 
Decompress(std::vector<uint8_t> * out_data) const341 base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const {
342   out_data->clear();
343   RETURN_IF_ERROR(DoDecompressionChecks());
344 
345   if (hdr_.compression == kNoCompression) {
346     const uint8_t* data = compressed_data_.data();
347     out_data->insert(out_data->end(), data, data + hdr_.compressed_size);
348     return base::OkStatus();
349   }
350 
351   if (hdr_.uncompressed_size == 0) {
352     return base::OkStatus();
353   }
354 
355   PERFETTO_DCHECK(hdr_.compression == kDeflate);
356   GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
357   dec.Feed(compressed_data_.data(), hdr_.compressed_size);
358 
359   out_data->resize(hdr_.uncompressed_size);
360   auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size());
361   if (dec_res.ret != GzipDecompressor::ResultCode::kEof) {
362     return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)",
363                            static_cast<int>(dec_res.ret), hdr_.fname.c_str(),
364                            hdr_.compressed_size, hdr_.uncompressed_size);
365   }
366   out_data->resize(dec_res.bytes_written);
367 
368 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
369   const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data());
370   auto crc_len = static_cast<::uInt>(out_data->size());
371   auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len));
372   if (actual_crc32 != hdr_.checksum) {
373     return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)",
374                            hdr_.fname.c_str(), actual_crc32, hdr_.checksum);
375   }
376 #endif
377 
378   return base::OkStatus();
379 }
380 
DecompressLines(LinesCallback callback) const381 base::Status ZipFile::DecompressLines(LinesCallback callback) const {
382   using ResultCode = GzipDecompressor::ResultCode;
383   RETURN_IF_ERROR(DoDecompressionChecks());
384 
385   StreamingLineReader line_reader(std::move(callback));
386 
387   if (hdr_.compression == kNoCompression) {
388     line_reader.Tokenize(
389         base::StringView(reinterpret_cast<const char*>(compressed_data_.data()),
390                          hdr_.compressed_size));
391     return base::OkStatus();
392   }
393 
394   PERFETTO_DCHECK(hdr_.compression == kDeflate);
395   GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
396   dec.Feed(compressed_data_.data(), hdr_.compressed_size);
397 
398   static constexpr size_t kChunkSize = 32768;
399   GzipDecompressor::Result dec_res;
400   do {
401     auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize));
402     dec_res = dec.ExtractOutput(wptr, kChunkSize);
403     if (dec_res.ret == ResultCode::kError ||
404         dec_res.ret == ResultCode::kNeedsMoreInput) {
405       return base::ErrStatus("zlib decompression error on %s (%d)",
406                              name().c_str(), static_cast<int>(dec_res.ret));
407     }
408     PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize);
409     line_reader.EndWrite(dec_res.bytes_written);
410   } while (dec_res.ret == ResultCode::kOk);
411   return base::OkStatus();
412 }
413 
414 // Common logic for both Decompress() and DecompressLines().
DoDecompressionChecks() const415 base::Status ZipFile::DoDecompressionChecks() const {
416   if (hdr_.compression == kNoCompression) {
417     PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size);
418     return base::OkStatus();
419   }
420   if (hdr_.compression != kDeflate) {
421     return base::ErrStatus("Zip compression mode not supported (%u)",
422                            hdr_.compression);
423   }
424   if (!IsGzipSupported()) {
425     return base::ErrStatus(
426         "Cannot open zip file. Gzip is not enabled in the current build. "
427         "Rebuild with enable_perfetto_zlib=true");
428   }
429   return base::OkStatus();
430 }
431 
432 // Returns a 64-bit version of time_t, that is, the num seconds since the
433 // Epoch.
GetDatetime() const434 int64_t ZipFile::GetDatetime() const {
435   // Date: 7 bits year, 4 bits month, 5 bits day.
436   // Time: 5 bits hour, 6 bits minute, 5 bits second.
437   struct tm mdt {};
438   // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure.
439   mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900;
440 
441   // As per the man page, the month ranges 0 to 11 (Jan = 0).
442   mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1;
443 
444   // However, still according to the same man page, the day starts from 1.
445   mdt.tm_mday = hdr_.mdate & 0x1f;
446 
447   mdt.tm_hour = hdr_.mtime >> (16 - 5);
448   mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f;
449 
450   // Seconds in the DOS format have only 5 bits, so they lose the last bit of
451   // resolution, hence the * 2.
452   mdt.tm_sec = (hdr_.mtime & 0x1f) * 2;
453   return base::TimeGm(&mdt);
454 }
455 
GetDatetimeStr() const456 std::string ZipFile::GetDatetimeStr() const {
457   char buf[32]{};
458   time_t secs = static_cast<time_t>(GetDatetime());
459   strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs));
460   buf[sizeof(buf) - 1] = '\0';
461   return buf;
462 }
463 
464 }  // namespace perfetto::trace_processor::util
465