1*ec63e07aSXin Li // Copyright 2019 Google LLC
2*ec63e07aSXin Li //
3*ec63e07aSXin Li // Licensed under the Apache License, Version 2.0 (the "License");
4*ec63e07aSXin Li // you may not use this file except in compliance with the License.
5*ec63e07aSXin Li // You may obtain a copy of the License at
6*ec63e07aSXin Li //
7*ec63e07aSXin Li // https://www.apache.org/licenses/LICENSE-2.0
8*ec63e07aSXin Li //
9*ec63e07aSXin Li // Unless required by applicable law or agreed to in writing, software
10*ec63e07aSXin Li // distributed under the License is distributed on an "AS IS" BASIS,
11*ec63e07aSXin Li // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*ec63e07aSXin Li // See the License for the specific language governing permissions and
13*ec63e07aSXin Li // limitations under the License.
14*ec63e07aSXin Li
15*ec63e07aSXin Li // Simple utility to wrap a binary file in a C++ source file.
16*ec63e07aSXin Li
17*ec63e07aSXin Li #include <algorithm>
18*ec63e07aSXin Li #include <cstdio>
19*ec63e07aSXin Li #include <cstdlib>
20*ec63e07aSXin Li #include <cstring>
21*ec63e07aSXin Li #include <string>
22*ec63e07aSXin Li #include <utility>
23*ec63e07aSXin Li #include <vector>
24*ec63e07aSXin Li
25*ec63e07aSXin Li #include "absl/strings/ascii.h"
26*ec63e07aSXin Li #include "absl/strings/str_cat.h"
27*ec63e07aSXin Li #include "absl/strings/str_format.h"
28*ec63e07aSXin Li #include "absl/strings/str_replace.h"
29*ec63e07aSXin Li #include "sandboxed_api/util/fileops.h"
30*ec63e07aSXin Li #include "sandboxed_api/util/raw_logging.h"
31*ec63e07aSXin Li #include "sandboxed_api/util/strerror.h"
32*ec63e07aSXin Li
33*ec63e07aSXin Li // C-escapes a character and writes it to a file stream.
FWriteCEscapedC(int c,FILE * out)34*ec63e07aSXin Li void FWriteCEscapedC(int c, FILE* out) {
35*ec63e07aSXin Li /* clang-format off */
36*ec63e07aSXin Li constexpr char kCEscapedLen[256] = {
37*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
38*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
39*ec63e07aSXin Li 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // "
40*ec63e07aSXin Li 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, // '0'..'9'
41*ec63e07aSXin Li 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
42*ec63e07aSXin Li 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
43*ec63e07aSXin Li 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
44*ec63e07aSXin Li 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
45*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
46*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
47*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
48*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
49*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
50*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
51*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
52*ec63e07aSXin Li 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
53*ec63e07aSXin Li };
54*ec63e07aSXin Li /* clang-format on */
55*ec63e07aSXin Li
56*ec63e07aSXin Li int char_len = kCEscapedLen[c];
57*ec63e07aSXin Li if (char_len == 1) {
58*ec63e07aSXin Li fputc(c, out);
59*ec63e07aSXin Li } else if (char_len == 2) {
60*ec63e07aSXin Li fputc('\\', out);
61*ec63e07aSXin Li switch (c) {
62*ec63e07aSXin Li case '\0':
63*ec63e07aSXin Li fputc('0', out);
64*ec63e07aSXin Li break;
65*ec63e07aSXin Li case '\n':
66*ec63e07aSXin Li fputc('n', out);
67*ec63e07aSXin Li break;
68*ec63e07aSXin Li case '\r':
69*ec63e07aSXin Li fputc('r', out);
70*ec63e07aSXin Li break;
71*ec63e07aSXin Li case '\t':
72*ec63e07aSXin Li fputc('t', out);
73*ec63e07aSXin Li break;
74*ec63e07aSXin Li case '\"':
75*ec63e07aSXin Li case '\'':
76*ec63e07aSXin Li case '\\':
77*ec63e07aSXin Li case '?':
78*ec63e07aSXin Li fputc(c, out);
79*ec63e07aSXin Li break;
80*ec63e07aSXin Li }
81*ec63e07aSXin Li } else {
82*ec63e07aSXin Li fputc('\\', out);
83*ec63e07aSXin Li fputc('0' + c / 64, out);
84*ec63e07aSXin Li fputc('0' + (c % 64) / 8, out);
85*ec63e07aSXin Li fputc('0' + c % 8, out);
86*ec63e07aSXin Li }
87*ec63e07aSXin Li }
88*ec63e07aSXin Li
89*ec63e07aSXin Li // Small RAII class that wraps C-style FILE streams and sets up buffering.
90*ec63e07aSXin Li class File {
91*ec63e07aSXin Li public:
File(const char * name,const char * mode)92*ec63e07aSXin Li File(const char* name, const char* mode)
93*ec63e07aSXin Li : name_{name}, stream_{fopen(name, mode)}, buf_(4096, '\0') {
94*ec63e07aSXin Li SAPI_RAW_PCHECK(stream_ != nullptr, "Open %s", name_);
95*ec63e07aSXin Li std::setvbuf(stream_, &buf_[0], _IOFBF, buf_.size());
96*ec63e07aSXin Li Check();
97*ec63e07aSXin Li }
~File()98*ec63e07aSXin Li ~File() { fclose(stream_); }
99*ec63e07aSXin Li
Check()100*ec63e07aSXin Li void Check() {
101*ec63e07aSXin Li if (ferror(stream_)) {
102*ec63e07aSXin Li SAPI_RAW_PLOG(ERROR, "I/O on %s", name_);
103*ec63e07aSXin Li _Exit(EXIT_FAILURE);
104*ec63e07aSXin Li }
105*ec63e07aSXin Li }
106*ec63e07aSXin Li
get() const107*ec63e07aSXin Li FILE* get() const { return stream_; }
108*ec63e07aSXin Li
109*ec63e07aSXin Li private:
110*ec63e07aSXin Li const char* name_;
111*ec63e07aSXin Li FILE* stream_;
112*ec63e07aSXin Li std::string buf_;
113*ec63e07aSXin Li };
114*ec63e07aSXin Li
115*ec63e07aSXin Li // Format literals for generating the .h file
116*ec63e07aSXin Li constexpr const char kHFileHeaderFmt[] =
117*ec63e07aSXin Li R"(// Automatically generated by sapi_cc_embed_data() Bazel rule
118*ec63e07aSXin Li
119*ec63e07aSXin Li #ifndef SANDBOXED_API_FILE_TOC_H_
120*ec63e07aSXin Li #define SANDBOXED_API_FILE_TOC_H_
121*ec63e07aSXin Li
122*ec63e07aSXin Li #include <cstddef>
123*ec63e07aSXin Li
124*ec63e07aSXin Li struct FileToc {
125*ec63e07aSXin Li const char* name;
126*ec63e07aSXin Li const char* data;
127*ec63e07aSXin Li size_t size;
128*ec63e07aSXin Li // Not actually used/computed by sapi_cc_embed_data(), this is for
129*ec63e07aSXin Li // compatibility with legacy code.
130*ec63e07aSXin Li unsigned char md5digest[16];
131*ec63e07aSXin Li };
132*ec63e07aSXin Li
133*ec63e07aSXin Li #endif // SANDBOXED_API_FILE_TOC_H_
134*ec63e07aSXin Li
135*ec63e07aSXin Li #ifndef %1$s
136*ec63e07aSXin Li #define %1$s
137*ec63e07aSXin Li
138*ec63e07aSXin Li )";
139*ec63e07aSXin Li constexpr const char kHNamespaceBeginFmt[] =
140*ec63e07aSXin Li R"(namespace %s {
141*ec63e07aSXin Li )";
142*ec63e07aSXin Li constexpr const char kHFileTocDefsFmt[] =
143*ec63e07aSXin Li R"(
144*ec63e07aSXin Li const FileToc* %1$s_create();
145*ec63e07aSXin Li size_t %1$s_size();
146*ec63e07aSXin Li )";
147*ec63e07aSXin Li constexpr const char kHNamespaceEndFmt[] =
148*ec63e07aSXin Li R"(
149*ec63e07aSXin Li } // namespace %s
150*ec63e07aSXin Li )";
151*ec63e07aSXin Li constexpr const char kHFileFooterFmt[] =
152*ec63e07aSXin Li R"(
153*ec63e07aSXin Li #endif // %s
154*ec63e07aSXin Li )";
155*ec63e07aSXin Li
156*ec63e07aSXin Li // Format literals for generating the .cc file out of the input files.
157*ec63e07aSXin Li constexpr const char kCcFileHeaderFmt[] =
158*ec63e07aSXin Li R"(// Automatically generated by sapi_cc_embed_data() build rule
159*ec63e07aSXin Li
160*ec63e07aSXin Li #include "%s.h"
161*ec63e07aSXin Li #include "absl/base/macros.h"
162*ec63e07aSXin Li #include "absl/strings/string_view.h"
163*ec63e07aSXin Li
164*ec63e07aSXin Li )";
165*ec63e07aSXin Li constexpr const char kCcNamespaceBeginFmt[] =
166*ec63e07aSXin Li R"(namespace %s {
167*ec63e07aSXin Li
168*ec63e07aSXin Li )";
169*ec63e07aSXin Li constexpr const char kCcDataBeginFmt[] =
170*ec63e07aSXin Li R"(constexpr absl::string_view %s = {")";
171*ec63e07aSXin Li constexpr const char kCcDataEndFmt[] =
172*ec63e07aSXin Li R"(", %d};
173*ec63e07aSXin Li )";
174*ec63e07aSXin Li constexpr const char kCcFileTocDefsBegin[] =
175*ec63e07aSXin Li R"(
176*ec63e07aSXin Li constexpr FileToc kToc[] = {
177*ec63e07aSXin Li )";
178*ec63e07aSXin Li constexpr const char kCcFileTocDefsEntryFmt[] =
179*ec63e07aSXin Li R"( {"%1$s", %2$s.data(), %2$s.size(), {}},
180*ec63e07aSXin Li )";
181*ec63e07aSXin Li constexpr const char kCcFileTocDefsEndFmt[] =
182*ec63e07aSXin Li R"(
183*ec63e07aSXin Li // Terminate array
184*ec63e07aSXin Li {nullptr, nullptr, 0, {}},
185*ec63e07aSXin Li };
186*ec63e07aSXin Li
187*ec63e07aSXin Li const FileToc* %1$s_create() {
188*ec63e07aSXin Li return kToc;
189*ec63e07aSXin Li }
190*ec63e07aSXin Li
191*ec63e07aSXin Li size_t %1$s_size() {
192*ec63e07aSXin Li return ABSL_ARRAYSIZE(kToc) - 1;
193*ec63e07aSXin Li }
194*ec63e07aSXin Li )";
195*ec63e07aSXin Li constexpr const char kCcNamespaceEndFmt[] =
196*ec63e07aSXin Li R"(
197*ec63e07aSXin Li } // namespace %s
198*ec63e07aSXin Li )";
199*ec63e07aSXin Li
main(int argc,char * argv[])200*ec63e07aSXin Li int main(int argc, char* argv[]) {
201*ec63e07aSXin Li if (argc < 7) {
202*ec63e07aSXin Li // We're not aiming for human usability here, as this tool is always run as
203*ec63e07aSXin Li // part of the build.
204*ec63e07aSXin Li absl::FPrintF(stderr,
205*ec63e07aSXin Li "%s PACKAGE NAME NAMESPACE OUTPUT_H OUTPUT_CC INPUT...\n",
206*ec63e07aSXin Li argv[0]);
207*ec63e07aSXin Li return EXIT_FAILURE;
208*ec63e07aSXin Li }
209*ec63e07aSXin Li char** arg = &argv[1];
210*ec63e07aSXin Li
211*ec63e07aSXin Li const char* package = *arg++;
212*ec63e07aSXin Li --argc;
213*ec63e07aSXin Li const char* name = *arg++;
214*ec63e07aSXin Li std::string toc_ident = absl::StrReplaceAll(name, {{"-", "_"}});
215*ec63e07aSXin Li --argc;
216*ec63e07aSXin Li
217*ec63e07aSXin Li const char* ns = *arg++;
218*ec63e07aSXin Li const bool have_ns = strlen(ns) > 0;
219*ec63e07aSXin Li --argc;
220*ec63e07aSXin Li
221*ec63e07aSXin Li { // Write header file first.
222*ec63e07aSXin Li File out_h(*arg++, "wb");
223*ec63e07aSXin Li --argc;
224*ec63e07aSXin Li std::string header_guard = absl::StrFormat("%s_%s_H_", package, toc_ident);
225*ec63e07aSXin Li std::replace_if(
226*ec63e07aSXin Li header_guard.begin(), header_guard.end(),
227*ec63e07aSXin Li [](char c) { return !absl::ascii_isalnum(c); }, '_');
228*ec63e07aSXin Li absl::FPrintF(out_h.get(), kHFileHeaderFmt, header_guard);
229*ec63e07aSXin Li if (have_ns) {
230*ec63e07aSXin Li absl::FPrintF(out_h.get(), kHNamespaceBeginFmt, ns);
231*ec63e07aSXin Li }
232*ec63e07aSXin Li absl::FPrintF(out_h.get(), kHFileTocDefsFmt, toc_ident);
233*ec63e07aSXin Li if (have_ns) {
234*ec63e07aSXin Li absl::FPrintF(out_h.get(), kHNamespaceEndFmt, ns);
235*ec63e07aSXin Li }
236*ec63e07aSXin Li absl::FPrintF(out_h.get(), kHFileFooterFmt, header_guard);
237*ec63e07aSXin Li out_h.Check();
238*ec63e07aSXin Li }
239*ec63e07aSXin Li
240*ec63e07aSXin Li // Write actual translation unit with the data.
241*ec63e07aSXin Li File out_cc(*arg++, "wb");
242*ec63e07aSXin Li --argc;
243*ec63e07aSXin Li
244*ec63e07aSXin Li std::string package_name = package;
245*ec63e07aSXin Li if (!package_name.empty()) {
246*ec63e07aSXin Li absl::StrAppend(&package_name, "/");
247*ec63e07aSXin Li }
248*ec63e07aSXin Li absl::StrAppend(&package_name, name);
249*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcFileHeaderFmt, package_name);
250*ec63e07aSXin Li if (have_ns) {
251*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcNamespaceBeginFmt, ns);
252*ec63e07aSXin Li }
253*ec63e07aSXin Li
254*ec63e07aSXin Li std::vector<std::pair<std::string, std::string>> toc_entries;
255*ec63e07aSXin Li while (argc > 1) {
256*ec63e07aSXin Li const char* in_filename = *arg++;
257*ec63e07aSXin Li --argc;
258*ec63e07aSXin Li File in(in_filename, "rb");
259*ec63e07aSXin Li
260*ec63e07aSXin Li std::string basename = sapi::file_util::fileops::Basename(in_filename);
261*ec63e07aSXin Li std::string ident = absl::StrCat("k", basename);
262*ec63e07aSXin Li std::replace_if(
263*ec63e07aSXin Li ident.begin(), ident.end(),
264*ec63e07aSXin Li [](char c) { return !absl::ascii_isalnum(c); }, '_');
265*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcDataBeginFmt, ident);
266*ec63e07aSXin Li // Remember identifiers, they are needed in the kToc array.
267*ec63e07aSXin Li toc_entries.emplace_back(std::move(basename), std::move(ident));
268*ec63e07aSXin Li
269*ec63e07aSXin Li int c;
270*ec63e07aSXin Li while ((c = fgetc(in.get())) != EOF) {
271*ec63e07aSXin Li FWriteCEscapedC(c, out_cc.get());
272*ec63e07aSXin Li }
273*ec63e07aSXin Li in.Check();
274*ec63e07aSXin Li
275*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcDataEndFmt, ftell(in.get()));
276*ec63e07aSXin Li }
277*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcFileTocDefsBegin);
278*ec63e07aSXin Li for (const auto& entry : toc_entries) {
279*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcFileTocDefsEntryFmt, entry.first,
280*ec63e07aSXin Li entry.second);
281*ec63e07aSXin Li }
282*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcFileTocDefsEndFmt, toc_ident);
283*ec63e07aSXin Li
284*ec63e07aSXin Li if (have_ns) {
285*ec63e07aSXin Li absl::FPrintF(out_cc.get(), kCcNamespaceEndFmt, ns);
286*ec63e07aSXin Li }
287*ec63e07aSXin Li
288*ec63e07aSXin Li out_cc.Check();
289*ec63e07aSXin Li return EXIT_SUCCESS;
290*ec63e07aSXin Li }
291