1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/util/zip_reader.h"
18
19 #include <cstdint>
20 #include <cstring>
21 #include <ctime>
22 #include <limits>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "perfetto/base/build_config.h"
29 #include "perfetto/base/logging.h"
30 #include "perfetto/base/status.h"
31 #include "perfetto/base/time.h"
32 #include "perfetto/ext/base/status_or.h"
33 #include "perfetto/ext/base/string_view.h"
34 #include "perfetto/ext/base/utils.h"
35 #include "perfetto/trace_processor/trace_blob_view.h"
36 #include "src/trace_processor/util/gzip_utils.h"
37 #include "src/trace_processor/util/status_macros.h"
38 #include "src/trace_processor/util/streaming_line_reader.h"
39
40 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
41 #include <zconf.h>
42 #include <zlib.h>
43 #endif
44
45 namespace perfetto::trace_processor::util {
46
47 namespace {
48
49 // Entry signatures.
50 constexpr uint32_t kFileHeaderSig = 0x04034b50;
51 constexpr uint32_t kCentralDirectorySig = 0x02014b50;
52 constexpr uint32_t kDataDescriptorSig = 0x08074b50;
53
54 // 4 bytes each of: 1) signature, 2) crc, 3) compressed size 4) uncompressed
55 // size.
56 constexpr uint32_t kDataDescriptorSize = 4 * 4;
57
58 enum GeneralPurposeBitFlag : uint32_t {
59 kEncrypted = 1 << 0,
60 k8kSlidingDictionary = 1u << 1,
61 kShannonFaro = 1u << 2,
62 kDataDescriptor = 1u << 3,
63 kLangageEncoding = 1u << 11,
64 kUnknown = ~(kEncrypted | k8kSlidingDictionary | kShannonFaro |
65 kDataDescriptor | kLangageEncoding),
66 };
67
68 // Compression flags.
69 const uint16_t kNoCompression = 0;
70 const uint16_t kDeflate = 8;
71
72 template <typename T>
ReadAndAdvance(const uint8_t ** ptr)73 T ReadAndAdvance(const uint8_t** ptr) {
74 T res{};
75 memcpy(base::AssumeLittleEndian(&res), *ptr, sizeof(T));
76 *ptr += sizeof(T);
77 return res;
78 }
79
80 } // namespace
81
82 ZipReader::ZipReader() = default;
83 ZipReader::~ZipReader() = default;
84
Parse(TraceBlobView tbv)85 base::Status ZipReader::Parse(TraceBlobView tbv) {
86 reader_.PushBack(std::move(tbv));
87
88 // .zip file sequence:
89 // [ File 1 header (30 bytes) ]
90 // [ File 1 name ]
91 // [ File 1 extra fields (optional) ]
92 // [ File 1 compressed payload ]
93 // [ File 1 data descriptor (optional) ]
94 //
95 // [ File 2 header (30 bytes) ]
96 // [ File 2 name ]
97 // [ File 2 extra fields (optional) ]
98 // [ File 2 compressed payload ]
99 // [ File 2 data descriptor (optional) ]
100 //
101 // [ Central directory (ignored) ]
102
103 for (;;) {
104 auto state = cur_.parse_state;
105 switch (state) {
106 case FileParseState::kHeader:
107 RETURN_IF_ERROR(TryParseHeader());
108 break;
109 case FileParseState::kFilename:
110 RETURN_IF_ERROR(TryParseFilename());
111 break;
112 case FileParseState::kSkipBytes:
113 RETURN_IF_ERROR(TrySkipBytes());
114 break;
115 case FileParseState::kCompressedData:
116 RETURN_IF_ERROR(TryParseCompressedData());
117 break;
118 }
119 if (state == cur_.parse_state) {
120 return base::OkStatus();
121 }
122 }
123 }
124
TryParseHeader()125 base::Status ZipReader::TryParseHeader() {
126 PERFETTO_CHECK(cur_.hdr.signature == 0);
127
128 std::optional<TraceBlobView> hdr =
129 reader_.SliceOff(reader_.start_offset(), kZipFileHdrSize);
130 if (!hdr) {
131 return base::OkStatus();
132 }
133 PERFETTO_CHECK(reader_.PopFrontBytes(kZipFileHdrSize));
134
135 const uint8_t* hdr_it = hdr->data();
136 cur_.hdr.signature = ReadAndAdvance<uint32_t>(&hdr_it);
137 if (cur_.hdr.signature == kCentralDirectorySig) {
138 // We reached the central directory at the end of file.
139 // We don't make any use here of the central directory, so we just
140 // ignore everything else after this point.
141 // Here we abuse the ZipFile class a bit. The Central Directory header
142 // has a different layout. The first 4 bytes (signature) match, the
143 // rest don't but the sizeof(central dir) is >> sizeof(file header) so
144 // we are fine.
145 // We do this rather than retuning because we could have further
146 // Parse() calls (imagine parsing bytes one by one), and we need a way
147 // to keep track of the "keep eating input without doing anything".
148 cur_.ignore_bytes_after_fname = std::numeric_limits<size_t>::max();
149 cur_.parse_state = FileParseState::kSkipBytes;
150 return base::OkStatus();
151 }
152 if (cur_.hdr.signature != kFileHeaderSig) {
153 return base::ErrStatus(
154 "Invalid signature found at offset 0x%zx. Actual=0x%x, "
155 "expected=0x%x",
156 reader_.start_offset(), cur_.hdr.signature, kFileHeaderSig);
157 }
158
159 cur_.hdr.version = ReadAndAdvance<uint16_t>(&hdr_it);
160 cur_.hdr.flags = ReadAndAdvance<uint16_t>(&hdr_it);
161 cur_.hdr.compression = ReadAndAdvance<uint16_t>(&hdr_it);
162 cur_.hdr.mtime = ReadAndAdvance<uint16_t>(&hdr_it);
163 cur_.hdr.mdate = ReadAndAdvance<uint16_t>(&hdr_it);
164 cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&hdr_it);
165 cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
166 cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&hdr_it);
167 cur_.hdr.fname_len = ReadAndAdvance<uint16_t>(&hdr_it);
168 cur_.hdr.extra_field_len = ReadAndAdvance<uint16_t>(&hdr_it);
169 PERFETTO_DCHECK(static_cast<size_t>(hdr_it - hdr->data()) == kZipFileHdrSize);
170
171 // We support only up to version 2.0 (20). Higher versions define
172 // more advanced features that we don't support (zip64 extensions,
173 // encryption).
174 // Disallow encryption or any flags we don't know how to handle.
175 if ((cur_.hdr.version > 20) || (cur_.hdr.flags & kEncrypted) ||
176 (cur_.hdr.flags & kUnknown)) {
177 return base::ErrStatus(
178 "Unsupported zip features at offset 0x%zx. version=%x, flags=%x",
179 reader_.start_offset(), cur_.hdr.version, cur_.hdr.flags);
180 }
181 if (cur_.hdr.compression != kNoCompression &&
182 cur_.hdr.compression != kDeflate) {
183 return base::ErrStatus(
184 "Unsupported compression type at offset 0x%zx. type=%x. Only "
185 "deflate and no compression are supported.",
186 reader_.start_offset(), cur_.hdr.compression);
187 }
188 if (cur_.hdr.flags & kDataDescriptor && cur_.hdr.compression != kDeflate) {
189 return base::ErrStatus(
190 "Unsupported compression type at offset 0x%zx. type=%x. Only "
191 "deflate supported for ZIPs compressed in a streaming fashion.",
192 reader_.start_offset(), cur_.hdr.compression);
193 }
194 cur_.ignore_bytes_after_fname = cur_.hdr.extra_field_len;
195 cur_.parse_state = FileParseState::kFilename;
196 return base::OkStatus();
197 }
198
TryParseFilename()199 base::Status ZipReader::TryParseFilename() {
200 if (cur_.hdr.fname_len == 0) {
201 cur_.parse_state = FileParseState::kSkipBytes;
202 return base::OkStatus();
203 }
204 PERFETTO_CHECK(cur_.hdr.fname.empty());
205
206 std::optional<TraceBlobView> fname_tbv =
207 reader_.SliceOff(reader_.start_offset(), cur_.hdr.fname_len);
208 if (!fname_tbv) {
209 return base::OkStatus();
210 }
211 PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.fname_len));
212 cur_.hdr.fname = std::string(reinterpret_cast<const char*>(fname_tbv->data()),
213 cur_.hdr.fname_len);
214 cur_.parse_state = FileParseState::kSkipBytes;
215 return base::OkStatus();
216 }
217
TrySkipBytes()218 base::Status ZipReader::TrySkipBytes() {
219 if (cur_.ignore_bytes_after_fname == 0) {
220 cur_.parse_state = FileParseState::kCompressedData;
221 return base::OkStatus();
222 }
223
224 size_t avail = reader_.avail();
225 if (avail < cur_.ignore_bytes_after_fname) {
226 PERFETTO_CHECK(reader_.PopFrontBytes(avail));
227 cur_.ignore_bytes_after_fname -= avail;
228 return base::OkStatus();
229 }
230 PERFETTO_CHECK(reader_.PopFrontBytes(cur_.ignore_bytes_after_fname));
231 cur_.ignore_bytes_after_fname = 0;
232 cur_.parse_state = FileParseState::kCompressedData;
233 return base::OkStatus();
234 }
235
TryParseCompressedData()236 base::Status ZipReader::TryParseCompressedData() {
237 // Build up the compressed payload
238 if (cur_.hdr.flags & kDataDescriptor) {
239 if (!cur_.compressed) {
240 ASSIGN_OR_RETURN(auto compressed, TryParseUnsizedCompressedData());
241 if (!compressed) {
242 return base::OkStatus();
243 }
244 cur_.compressed = std::move(compressed);
245 }
246
247 std::optional<TraceBlobView> data_descriptor =
248 reader_.SliceOff(reader_.start_offset(), kDataDescriptorSize);
249 if (!data_descriptor) {
250 return base::OkStatus();
251 }
252 PERFETTO_CHECK(reader_.PopFrontBytes(kDataDescriptorSize));
253
254 const auto* desc_it = data_descriptor->data();
255 auto desc_sig = ReadAndAdvance<uint32_t>(&desc_it);
256 if (desc_sig != kDataDescriptorSig) {
257 return base::ErrStatus(
258 "Invalid signature found at offset 0x%zx. Actual=0x%x, "
259 "expected=0x%x",
260 reader_.start_offset(), desc_sig, kDataDescriptorSig);
261 }
262 cur_.hdr.checksum = ReadAndAdvance<uint32_t>(&desc_it);
263 cur_.hdr.compressed_size = ReadAndAdvance<uint32_t>(&desc_it);
264 cur_.hdr.uncompressed_size = ReadAndAdvance<uint32_t>(&desc_it);
265 } else {
266 PERFETTO_CHECK(!cur_.compressed);
267 std::optional<TraceBlobView> raw_compressed =
268 reader_.SliceOff(reader_.start_offset(), cur_.hdr.compressed_size);
269 if (!raw_compressed) {
270 return base::OkStatus();
271 }
272 cur_.compressed = *std::move(raw_compressed);
273 PERFETTO_CHECK(reader_.PopFrontBytes(cur_.hdr.compressed_size));
274 }
275
276 // We have accumulated the whole header, file name and compressed payload.
277 PERFETTO_CHECK(cur_.compressed);
278 PERFETTO_CHECK(cur_.hdr.fname.size() == cur_.hdr.fname_len);
279 PERFETTO_CHECK(cur_.compressed->size() == cur_.hdr.compressed_size);
280 PERFETTO_CHECK(cur_.ignore_bytes_after_fname == 0);
281
282 files_.emplace_back();
283 files_.back().hdr_ = std::move(cur_.hdr);
284 files_.back().compressed_data_ = *std::move(cur_.compressed);
285 cur_ = FileParseState(); // Reset the parsing state for the next file.
286 return base::OkStatus();
287 } // namespace perfetto::trace_processor::util
288
289 base::StatusOr<std::optional<TraceBlobView>>
TryParseUnsizedCompressedData()290 ZipReader::TryParseUnsizedCompressedData() {
291 PERFETTO_CHECK(cur_.hdr.compression == kDeflate);
292
293 auto start = reader_.start_offset() + cur_.decompressor_bytes_fed;
294 auto end = reader_.end_offset();
295 auto slice = reader_.SliceOff(start, end - start);
296 PERFETTO_CHECK(slice);
297 auto res_code = cur_.decompressor.FeedAndExtract(slice->data(), slice->size(),
298 [](const uint8_t*, size_t) {
299 // Intentionally do
300 // nothing: we are only
301 // looking for the bounds
302 // of the deflate stream,
303 // we are not actually
304 // interested in the
305 // output.
306 });
307 switch (res_code) {
308 case GzipDecompressor::ResultCode::kNeedsMoreInput:
309 cur_.decompressor_bytes_fed += slice->size();
310 return {std::nullopt};
311 case GzipDecompressor::ResultCode::kError:
312 return base::ErrStatus(
313 "Failed decompressing stream in ZIP file at offset 0x%zx",
314 reader_.start_offset());
315 case GzipDecompressor::ResultCode::kOk:
316 PERFETTO_FATAL("Unexpected result code");
317 case GzipDecompressor::ResultCode::kEof:
318 break;
319 }
320 cur_.decompressor_bytes_fed += slice->size() - cur_.decompressor.AvailIn();
321 auto raw_compressed =
322 reader_.SliceOff(reader_.start_offset(), cur_.decompressor_bytes_fed);
323 PERFETTO_CHECK(raw_compressed);
324 PERFETTO_CHECK(reader_.PopFrontBytes(cur_.decompressor_bytes_fed));
325 return {std::move(raw_compressed)};
326 }
327
Find(const std::string & path)328 ZipFile* ZipReader::Find(const std::string& path) {
329 for (ZipFile& zf : files_) {
330 if (zf.name() == path)
331 return &zf;
332 }
333 return nullptr;
334 }
335
336 ZipFile::ZipFile() = default;
337 ZipFile::~ZipFile() = default;
338 ZipFile::ZipFile(ZipFile&& other) noexcept = default;
339 ZipFile& ZipFile::operator=(ZipFile&& other) noexcept = default;
340
Decompress(std::vector<uint8_t> * out_data) const341 base::Status ZipFile::Decompress(std::vector<uint8_t>* out_data) const {
342 out_data->clear();
343 RETURN_IF_ERROR(DoDecompressionChecks());
344
345 if (hdr_.compression == kNoCompression) {
346 const uint8_t* data = compressed_data_.data();
347 out_data->insert(out_data->end(), data, data + hdr_.compressed_size);
348 return base::OkStatus();
349 }
350
351 if (hdr_.uncompressed_size == 0) {
352 return base::OkStatus();
353 }
354
355 PERFETTO_DCHECK(hdr_.compression == kDeflate);
356 GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
357 dec.Feed(compressed_data_.data(), hdr_.compressed_size);
358
359 out_data->resize(hdr_.uncompressed_size);
360 auto dec_res = dec.ExtractOutput(out_data->data(), out_data->size());
361 if (dec_res.ret != GzipDecompressor::ResultCode::kEof) {
362 return base::ErrStatus("Zip decompression error (%d) on %s (c=%u, u=%u)",
363 static_cast<int>(dec_res.ret), hdr_.fname.c_str(),
364 hdr_.compressed_size, hdr_.uncompressed_size);
365 }
366 out_data->resize(dec_res.bytes_written);
367
368 #if PERFETTO_BUILDFLAG(PERFETTO_ZLIB)
369 const auto* crc_data = reinterpret_cast<const ::Bytef*>(out_data->data());
370 auto crc_len = static_cast<::uInt>(out_data->size());
371 auto actual_crc32 = static_cast<uint32_t>(::crc32(0u, crc_data, crc_len));
372 if (actual_crc32 != hdr_.checksum) {
373 return base::ErrStatus("Zip CRC32 failure on %s (actual: %x, expected: %x)",
374 hdr_.fname.c_str(), actual_crc32, hdr_.checksum);
375 }
376 #endif
377
378 return base::OkStatus();
379 }
380
DecompressLines(LinesCallback callback) const381 base::Status ZipFile::DecompressLines(LinesCallback callback) const {
382 using ResultCode = GzipDecompressor::ResultCode;
383 RETURN_IF_ERROR(DoDecompressionChecks());
384
385 StreamingLineReader line_reader(std::move(callback));
386
387 if (hdr_.compression == kNoCompression) {
388 line_reader.Tokenize(
389 base::StringView(reinterpret_cast<const char*>(compressed_data_.data()),
390 hdr_.compressed_size));
391 return base::OkStatus();
392 }
393
394 PERFETTO_DCHECK(hdr_.compression == kDeflate);
395 GzipDecompressor dec(GzipDecompressor::InputMode::kRawDeflate);
396 dec.Feed(compressed_data_.data(), hdr_.compressed_size);
397
398 static constexpr size_t kChunkSize = 32768;
399 GzipDecompressor::Result dec_res;
400 do {
401 auto* wptr = reinterpret_cast<uint8_t*>(line_reader.BeginWrite(kChunkSize));
402 dec_res = dec.ExtractOutput(wptr, kChunkSize);
403 if (dec_res.ret == ResultCode::kError ||
404 dec_res.ret == ResultCode::kNeedsMoreInput) {
405 return base::ErrStatus("zlib decompression error on %s (%d)",
406 name().c_str(), static_cast<int>(dec_res.ret));
407 }
408 PERFETTO_DCHECK(dec_res.bytes_written <= kChunkSize);
409 line_reader.EndWrite(dec_res.bytes_written);
410 } while (dec_res.ret == ResultCode::kOk);
411 return base::OkStatus();
412 }
413
414 // Common logic for both Decompress() and DecompressLines().
DoDecompressionChecks() const415 base::Status ZipFile::DoDecompressionChecks() const {
416 if (hdr_.compression == kNoCompression) {
417 PERFETTO_CHECK(hdr_.compressed_size == hdr_.uncompressed_size);
418 return base::OkStatus();
419 }
420 if (hdr_.compression != kDeflate) {
421 return base::ErrStatus("Zip compression mode not supported (%u)",
422 hdr_.compression);
423 }
424 if (!IsGzipSupported()) {
425 return base::ErrStatus(
426 "Cannot open zip file. Gzip is not enabled in the current build. "
427 "Rebuild with enable_perfetto_zlib=true");
428 }
429 return base::OkStatus();
430 }
431
432 // Returns a 64-bit version of time_t, that is, the num seconds since the
433 // Epoch.
GetDatetime() const434 int64_t ZipFile::GetDatetime() const {
435 // Date: 7 bits year, 4 bits month, 5 bits day.
436 // Time: 5 bits hour, 6 bits minute, 5 bits second.
437 struct tm mdt {};
438 // As per man 3 mktime, `tm_year` is relative to 1900 not Epoch. Go figure.
439 mdt.tm_year = 1980 + (hdr_.mdate >> (16 - 7)) - 1900;
440
441 // As per the man page, the month ranges 0 to 11 (Jan = 0).
442 mdt.tm_mon = ((hdr_.mdate >> (16 - 7 - 4)) & 0x0f) - 1;
443
444 // However, still according to the same man page, the day starts from 1.
445 mdt.tm_mday = hdr_.mdate & 0x1f;
446
447 mdt.tm_hour = hdr_.mtime >> (16 - 5);
448 mdt.tm_min = (hdr_.mtime >> (16 - 5 - 6)) & 0x3f;
449
450 // Seconds in the DOS format have only 5 bits, so they lose the last bit of
451 // resolution, hence the * 2.
452 mdt.tm_sec = (hdr_.mtime & 0x1f) * 2;
453 return base::TimeGm(&mdt);
454 }
455
GetDatetimeStr() const456 std::string ZipFile::GetDatetimeStr() const {
457 char buf[32]{};
458 time_t secs = static_cast<time_t>(GetDatetime());
459 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", gmtime(&secs));
460 buf[sizeof(buf) - 1] = '\0';
461 return buf;
462 }
463
464 } // namespace perfetto::trace_processor::util
465