1*adcb0a62SAndroid Build Coastguard Worker /*
2*adcb0a62SAndroid Build Coastguard Worker * Copyright (C) 2020 The Android Open Source Project
3*adcb0a62SAndroid Build Coastguard Worker *
4*adcb0a62SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*adcb0a62SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*adcb0a62SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*adcb0a62SAndroid Build Coastguard Worker *
8*adcb0a62SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*adcb0a62SAndroid Build Coastguard Worker *
10*adcb0a62SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*adcb0a62SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*adcb0a62SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*adcb0a62SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*adcb0a62SAndroid Build Coastguard Worker * limitations under the License.
15*adcb0a62SAndroid Build Coastguard Worker */
16*adcb0a62SAndroid Build Coastguard Worker
17*adcb0a62SAndroid Build Coastguard Worker #pragma once
18*adcb0a62SAndroid Build Coastguard Worker
19*adcb0a62SAndroid Build Coastguard Worker #include <stdint.h>
20*adcb0a62SAndroid Build Coastguard Worker
21*adcb0a62SAndroid Build Coastguard Worker #include <map>
22*adcb0a62SAndroid Build Coastguard Worker #include <memory>
23*adcb0a62SAndroid Build Coastguard Worker #include <string_view>
24*adcb0a62SAndroid Build Coastguard Worker #include <utility>
25*adcb0a62SAndroid Build Coastguard Worker
26*adcb0a62SAndroid Build Coastguard Worker #include <android-base/logging.h>
27*adcb0a62SAndroid Build Coastguard Worker #include <log/log.h>
28*adcb0a62SAndroid Build Coastguard Worker
29*adcb0a62SAndroid Build Coastguard Worker #include "zip_error.h"
30*adcb0a62SAndroid Build Coastguard Worker
31*adcb0a62SAndroid Build Coastguard Worker /*
32*adcb0a62SAndroid Build Coastguard Worker * Round up to the next highest power of 2.
33*adcb0a62SAndroid Build Coastguard Worker *
34*adcb0a62SAndroid Build Coastguard Worker * Found on http://graphics.stanford.edu/~seander/bithacks.html.
35*adcb0a62SAndroid Build Coastguard Worker *
36*adcb0a62SAndroid Build Coastguard Worker * TODO: could switch to use std::bit_ceil() once ready
37*adcb0a62SAndroid Build Coastguard Worker */
RoundUpPower2(uint32_t val)38*adcb0a62SAndroid Build Coastguard Worker static constexpr uint32_t RoundUpPower2(uint32_t val) {
39*adcb0a62SAndroid Build Coastguard Worker val--;
40*adcb0a62SAndroid Build Coastguard Worker val |= val >> 1;
41*adcb0a62SAndroid Build Coastguard Worker val |= val >> 2;
42*adcb0a62SAndroid Build Coastguard Worker val |= val >> 4;
43*adcb0a62SAndroid Build Coastguard Worker val |= val >> 8;
44*adcb0a62SAndroid Build Coastguard Worker val |= val >> 16;
45*adcb0a62SAndroid Build Coastguard Worker val++;
46*adcb0a62SAndroid Build Coastguard Worker
47*adcb0a62SAndroid Build Coastguard Worker return val;
48*adcb0a62SAndroid Build Coastguard Worker }
49*adcb0a62SAndroid Build Coastguard Worker
50*adcb0a62SAndroid Build Coastguard Worker // This class is the interface of the central directory entries map. The map
51*adcb0a62SAndroid Build Coastguard Worker // helps to locate a particular cd entry based on the filename.
52*adcb0a62SAndroid Build Coastguard Worker class CdEntryMapInterface {
53*adcb0a62SAndroid Build Coastguard Worker public:
54*adcb0a62SAndroid Build Coastguard Worker virtual ~CdEntryMapInterface() = default;
55*adcb0a62SAndroid Build Coastguard Worker // Adds an entry to the map. The |name| should internally points to the
56*adcb0a62SAndroid Build Coastguard Worker // filename field of a cd entry. And |start| points to the beginning of the
57*adcb0a62SAndroid Build Coastguard Worker // central directory. Returns 0 on success.
58*adcb0a62SAndroid Build Coastguard Worker virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
59*adcb0a62SAndroid Build Coastguard Worker // For the zip entry |entryName|, finds the offset of its filename field in
60*adcb0a62SAndroid Build Coastguard Worker // the central directory. Returns a pair of [status, offset]. The value of
61*adcb0a62SAndroid Build Coastguard Worker // the status is 0 on success.
62*adcb0a62SAndroid Build Coastguard Worker virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
63*adcb0a62SAndroid Build Coastguard Worker const uint8_t* cd_start) const = 0;
64*adcb0a62SAndroid Build Coastguard Worker // Resets the iterator to the beginning of the map.
65*adcb0a62SAndroid Build Coastguard Worker virtual void ResetIteration() = 0;
66*adcb0a62SAndroid Build Coastguard Worker // Returns the [name, cd offset] of the current element. Also increments the
67*adcb0a62SAndroid Build Coastguard Worker // iterator to points to the next element. Returns an empty pair we have read
68*adcb0a62SAndroid Build Coastguard Worker // past boundary.
69*adcb0a62SAndroid Build Coastguard Worker virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
70*adcb0a62SAndroid Build Coastguard Worker
71*adcb0a62SAndroid Build Coastguard Worker static std::unique_ptr<CdEntryMapInterface> Create(uint64_t num_entries,
72*adcb0a62SAndroid Build Coastguard Worker size_t cd_length, uint16_t max_file_name_length);
73*adcb0a62SAndroid Build Coastguard Worker };
74*adcb0a62SAndroid Build Coastguard Worker
75*adcb0a62SAndroid Build Coastguard Worker /**
76*adcb0a62SAndroid Build Coastguard Worker * More space efficient string representation of strings in an mmaped zipped
77*adcb0a62SAndroid Build Coastguard Worker * file than std::string_view. Using std::string_view as an entry in the
78*adcb0a62SAndroid Build Coastguard Worker * ZipArchive hash table wastes space. std::string_view stores a pointer to a
79*adcb0a62SAndroid Build Coastguard Worker * string (on 64 bit, 8 bytes) and the length to read from that pointer,
80*adcb0a62SAndroid Build Coastguard Worker * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
81*adcb0a62SAndroid Build Coastguard Worker * 6 bytes.
82*adcb0a62SAndroid Build Coastguard Worker */
83*adcb0a62SAndroid Build Coastguard Worker
84*adcb0a62SAndroid Build Coastguard Worker /**
85*adcb0a62SAndroid Build Coastguard Worker * ZipStringOffset20 stores 20-bit for offset from a fixed location in the memory
86*adcb0a62SAndroid Build Coastguard Worker * mapped file instead of the entire address, with 12-bit for filename length (i.e.
87*adcb0a62SAndroid Build Coastguard Worker * typical PATH_MAX), consuming 4 bytes in total
88*adcb0a62SAndroid Build Coastguard Worker */
89*adcb0a62SAndroid Build Coastguard Worker struct ZipStringOffset20 {
90*adcb0a62SAndroid Build Coastguard Worker static constexpr size_t offset_max = (1u << 20) - 1;
91*adcb0a62SAndroid Build Coastguard Worker static constexpr size_t length_max = (1u << 12) - 1;
92*adcb0a62SAndroid Build Coastguard Worker uint32_t name_offset : 20;
93*adcb0a62SAndroid Build Coastguard Worker uint16_t name_length : 12;
94*adcb0a62SAndroid Build Coastguard Worker };
95*adcb0a62SAndroid Build Coastguard Worker
96*adcb0a62SAndroid Build Coastguard Worker static_assert(sizeof(struct ZipStringOffset20) == 4);
97*adcb0a62SAndroid Build Coastguard Worker
98*adcb0a62SAndroid Build Coastguard Worker /**
99*adcb0a62SAndroid Build Coastguard Worker * ZipStringOffset32 stores a 4 byte offset from a fixed location in the memory
100*adcb0a62SAndroid Build Coastguard Worker * mapped file instead of the entire address, consuming 8 bytes with alignment.
101*adcb0a62SAndroid Build Coastguard Worker */
102*adcb0a62SAndroid Build Coastguard Worker struct ZipStringOffset32 {
103*adcb0a62SAndroid Build Coastguard Worker uint32_t name_offset;
104*adcb0a62SAndroid Build Coastguard Worker uint16_t name_length;
105*adcb0a62SAndroid Build Coastguard Worker };
106*adcb0a62SAndroid Build Coastguard Worker
107*adcb0a62SAndroid Build Coastguard Worker // This implementation of CdEntryMap uses an array hash table. It uses less
108*adcb0a62SAndroid Build Coastguard Worker // memory than std::map; and it's used as the default implementation for zip
109*adcb0a62SAndroid Build Coastguard Worker // archives without zip64 extension.
110*adcb0a62SAndroid Build Coastguard Worker template <typename ZipStringOffset>
111*adcb0a62SAndroid Build Coastguard Worker class CdEntryMapZip32 : public CdEntryMapInterface {
112*adcb0a62SAndroid Build Coastguard Worker public:
113*adcb0a62SAndroid Build Coastguard Worker ZipError AddToMap(std::string_view name, const uint8_t* start) override;
114*adcb0a62SAndroid Build Coastguard Worker std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
115*adcb0a62SAndroid Build Coastguard Worker const uint8_t* cd_start) const override;
116*adcb0a62SAndroid Build Coastguard Worker void ResetIteration() override;
117*adcb0a62SAndroid Build Coastguard Worker std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
118*adcb0a62SAndroid Build Coastguard Worker
CdEntryMapZip32(uint16_t num_entries)119*adcb0a62SAndroid Build Coastguard Worker explicit CdEntryMapZip32(uint16_t num_entries) {
120*adcb0a62SAndroid Build Coastguard Worker /*
121*adcb0a62SAndroid Build Coastguard Worker * Create hash table. We have a minimum 75% load factor, possibly as
122*adcb0a62SAndroid Build Coastguard Worker * low as 50% after we round off to a power of 2. There must be at
123*adcb0a62SAndroid Build Coastguard Worker * least one unused entry to avoid an infinite loop during creation.
124*adcb0a62SAndroid Build Coastguard Worker */
125*adcb0a62SAndroid Build Coastguard Worker hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
126*adcb0a62SAndroid Build Coastguard Worker hash_table_.reset(static_cast<ZipStringOffset*>(
127*adcb0a62SAndroid Build Coastguard Worker calloc(hash_table_size_, sizeof(ZipStringOffset))));
128*adcb0a62SAndroid Build Coastguard Worker
129*adcb0a62SAndroid Build Coastguard Worker CHECK(hash_table_ != nullptr)
130*adcb0a62SAndroid Build Coastguard Worker << "Zip: unable to allocate the " << hash_table_size_
131*adcb0a62SAndroid Build Coastguard Worker << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
132*adcb0a62SAndroid Build Coastguard Worker }
133*adcb0a62SAndroid Build Coastguard Worker
134*adcb0a62SAndroid Build Coastguard Worker // We know how many entries are in the Zip archive, so we can have a
135*adcb0a62SAndroid Build Coastguard Worker // fixed-size hash table. We define a load factor of 0.75 and over
136*adcb0a62SAndroid Build Coastguard Worker // allocate so the maximum number entries can never be higher than
137*adcb0a62SAndroid Build Coastguard Worker // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
138*adcb0a62SAndroid Build Coastguard Worker struct FreeDeleter {
operatorFreeDeleter139*adcb0a62SAndroid Build Coastguard Worker void operator()(void* ptr) const { ::free(ptr); }
140*adcb0a62SAndroid Build Coastguard Worker };
141*adcb0a62SAndroid Build Coastguard Worker std::unique_ptr<ZipStringOffset[], FreeDeleter> hash_table_;
142*adcb0a62SAndroid Build Coastguard Worker uint32_t hash_table_size_{0};
143*adcb0a62SAndroid Build Coastguard Worker
144*adcb0a62SAndroid Build Coastguard Worker private:
145*adcb0a62SAndroid Build Coastguard Worker // The position of element for the current iteration.
146*adcb0a62SAndroid Build Coastguard Worker uint32_t current_position_{0};
147*adcb0a62SAndroid Build Coastguard Worker };
148*adcb0a62SAndroid Build Coastguard Worker
149*adcb0a62SAndroid Build Coastguard Worker // This implementation of CdEntryMap uses a std::map
150*adcb0a62SAndroid Build Coastguard Worker class CdEntryMapZip64 : public CdEntryMapInterface {
151*adcb0a62SAndroid Build Coastguard Worker public:
152*adcb0a62SAndroid Build Coastguard Worker ZipError AddToMap(std::string_view name, const uint8_t* start) override;
153*adcb0a62SAndroid Build Coastguard Worker std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
154*adcb0a62SAndroid Build Coastguard Worker const uint8_t* cd_start) const override;
155*adcb0a62SAndroid Build Coastguard Worker void ResetIteration() override;
156*adcb0a62SAndroid Build Coastguard Worker std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
157*adcb0a62SAndroid Build Coastguard Worker
158*adcb0a62SAndroid Build Coastguard Worker CdEntryMapZip64() = default;
159*adcb0a62SAndroid Build Coastguard Worker private:
160*adcb0a62SAndroid Build Coastguard Worker
161*adcb0a62SAndroid Build Coastguard Worker std::map<std::string_view, uint64_t> entry_table_;
162*adcb0a62SAndroid Build Coastguard Worker
163*adcb0a62SAndroid Build Coastguard Worker std::map<std::string_view, uint64_t>::iterator iterator_;
164*adcb0a62SAndroid Build Coastguard Worker };
165