1 // Copyright (c) 2024 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "io.h"
16
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20
21 #if defined(SPIRV_WINDOWS)
22 #include <fcntl.h>
23 #include <io.h>
24
25 #define SET_STDIN_TO_BINARY_MODE() _setmode(_fileno(stdin), O_BINARY);
26 #define SET_STDIN_TO_TEXT_MODE() _setmode(_fileno(stdin), O_TEXT);
27 #define SET_STDOUT_TO_BINARY_MODE() _setmode(_fileno(stdout), O_BINARY);
28 #define SET_STDOUT_TO_TEXT_MODE() _setmode(_fileno(stdout), O_TEXT);
29 #define SET_STDOUT_MODE(mode) _setmode(_fileno(stdout), mode);
30 #else
31 #define SET_STDIN_TO_BINARY_MODE()
32 #define SET_STDIN_TO_TEXT_MODE()
33 #define SET_STDOUT_TO_BINARY_MODE() 0
34 #define SET_STDOUT_TO_TEXT_MODE() 0
35 #define SET_STDOUT_MODE(mode)
36 #endif
37
38 namespace {
39 // Appends the contents of the |file| to |data|, assuming each element in the
40 // file is of type |T|.
41 template <typename T>
ReadFile(FILE * file,std::vector<T> * data)42 void ReadFile(FILE* file, std::vector<T>* data) {
43 if (file == nullptr) return;
44
45 const int buf_size = 4096 / sizeof(T);
46 T buf[buf_size];
47 while (size_t len = fread(buf, sizeof(T), buf_size, file)) {
48 data->insert(data->end(), buf, buf + len);
49 }
50 }
51
52 // Returns true if |file| has encountered an error opening the file or reading
53 // from it. If there was an error, writes an error message to standard error.
WasFileCorrectlyRead(FILE * file,const char * filename)54 bool WasFileCorrectlyRead(FILE* file, const char* filename) {
55 if (file == nullptr) {
56 fprintf(stderr, "error: file does not exist '%s'\n", filename);
57 return false;
58 }
59
60 if (ftell(file) == -1L) {
61 if (ferror(file)) {
62 fprintf(stderr, "error: error reading file '%s'\n", filename);
63 return false;
64 }
65 }
66 return true;
67 }
68
69 // Ensure the file contained an exact number of elements, whose size is given in
70 // |alignment|.
WasFileSizeAligned(const char * filename,size_t read_size,size_t alignment)71 bool WasFileSizeAligned(const char* filename, size_t read_size,
72 size_t alignment) {
73 assert(alignment != 1);
74 if ((read_size % alignment) != 0) {
75 fprintf(stderr,
76 "error: file size should be a multiple of %zd; file '%s' corrupt\n",
77 alignment, filename);
78 return false;
79 }
80 return true;
81 }
82
83 // Different formats the hex is expected to be in.
84 enum class HexMode {
85 // 0x07230203, ...
86 Words,
87 // 0x07, 0x23, 0x02, 0x03, ...
88 BytesBigEndian,
89 // 0x03, 0x02, 0x23, 0x07, ...
90 BytesLittleEndian,
91 // 07 23 02 03 ...
92 StreamBigEndian,
93 // 03 02 23 07 ...
94 StreamLittleEndian,
95 };
96
97 // Whether a character should be skipped as whitespace / separator /
98 // end-of-file.
IsSpace(char c)99 bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; }
100
IsHexStream(const std::vector<char> & stream)101 bool IsHexStream(const std::vector<char>& stream) {
102 for (char c : stream) {
103 if (IsSpace(c)) {
104 continue;
105 }
106
107 // Every possible case of a SPIR-V hex stream starts with either '0' or 'x'
108 // (see |HexMode| values). Make a decision upon inspecting the first
109 // non-space character.
110 return c == '0' || c == 'x' || c == 'X';
111 }
112
113 return false;
114 }
115
MatchIgnoreCase(const char * token,const char * expect,size_t len)116 bool MatchIgnoreCase(const char* token, const char* expect, size_t len) {
117 for (size_t i = 0; i < len; ++i) {
118 if (tolower(token[i]) != tolower(expect[i])) {
119 return false;
120 }
121 }
122
123 return true;
124 }
125
126 // Helper class to tokenize a hex stream
127 class HexTokenizer {
128 public:
HexTokenizer(const char * filename,const std::vector<char> & stream,std::vector<uint32_t> * data)129 HexTokenizer(const char* filename, const std::vector<char>& stream,
130 std::vector<uint32_t>* data)
131 : filename_(filename), stream_(stream), data_(data) {
132 DetermineMode();
133 }
134
Parse()135 bool Parse() {
136 while (current_ < stream_.size() && !encountered_error_) {
137 data_->push_back(GetNextWord());
138
139 // Make sure trailing space does not lead to parse error by skipping it
140 // and exiting the loop.
141 SkipSpace();
142 }
143
144 return !encountered_error_;
145 }
146
147 private:
ParseError(const char * reason)148 void ParseError(const char* reason) {
149 if (!encountered_error_) {
150 fprintf(stderr,
151 "error: hex stream parse error at character %zu: %s in '%s'\n",
152 current_, reason, filename_);
153 encountered_error_ = true;
154 }
155 }
156
157 // Skip whitespace until the next non-whitespace non-comma character.
SkipSpace()158 void SkipSpace() {
159 while (current_ < stream_.size()) {
160 char c = stream_[current_];
161 if (!IsSpace(c)) {
162 return;
163 }
164
165 ++current_;
166 }
167 }
168
169 // Skip the 0x or x at the beginning of a hex value.
Skip0x()170 void Skip0x() {
171 // The first character must be 0 or x.
172 const char first = Next();
173 if (first != '0' && first != 'x' && first != 'X') {
174 ParseError("expected 0x or x");
175 } else if (first == '0') {
176 const char second = Next();
177 if (second != 'x' && second != 'X') {
178 ParseError("expected 0x");
179 }
180 }
181 }
182
183 // Consume the next character.
Next()184 char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; }
185
186 // Determine how to read the hex stream based on the first token.
DetermineMode()187 void DetermineMode() {
188 SkipSpace();
189
190 // Read 11 bytes, that is the size of the biggest token (10) + one more.
191 char first_token[11];
192 for (uint32_t i = 0; i < 11; ++i) {
193 first_token[i] = Next();
194 }
195
196 // Table of how to match the first token with a mode.
197 struct {
198 const char* expect;
199 bool must_have_delimiter;
200 HexMode mode;
201 } parse_info[] = {
202 {"0x07230203", true, HexMode::Words},
203 {"0x7230203", true, HexMode::Words},
204 {"x07230203", true, HexMode::Words},
205 {"x7230203", true, HexMode::Words},
206
207 {"0x07", true, HexMode::BytesBigEndian},
208 {"0x7", true, HexMode::BytesBigEndian},
209 {"x07", true, HexMode::BytesBigEndian},
210 {"x7", true, HexMode::BytesBigEndian},
211
212 {"0x03", true, HexMode::BytesLittleEndian},
213 {"0x3", true, HexMode::BytesLittleEndian},
214 {"x03", true, HexMode::BytesLittleEndian},
215 {"x3", true, HexMode::BytesLittleEndian},
216
217 {"07", false, HexMode::StreamBigEndian},
218 {"03", false, HexMode::StreamLittleEndian},
219 };
220
221 // Check to see if any of the possible first tokens are matched. If not,
222 // this is not a recognized hex stream.
223 encountered_error_ = true;
224 for (const auto& info : parse_info) {
225 const size_t expect_len = strlen(info.expect);
226 const bool matches_expect =
227 MatchIgnoreCase(first_token, info.expect, expect_len);
228 const bool satisfies_delimeter =
229 !info.must_have_delimiter || IsSpace(first_token[expect_len]);
230 if (matches_expect && satisfies_delimeter) {
231 mode_ = info.mode;
232 encountered_error_ = false;
233 break;
234 }
235 }
236
237 if (encountered_error_) {
238 fprintf(stderr,
239 "error: hex format detected, but pattern '%.11s' is not "
240 "recognized '%s'\n",
241 first_token, filename_);
242 }
243
244 // Reset the position to restart parsing with the determined mode.
245 current_ = 0;
246 }
247
248 // Consume up to |max_len| characters and put them in |token_chars|. A
249 // delimiter is expected. The resulting string is NUL-terminated.
NextN(char token_chars[9],size_t max_len)250 void NextN(char token_chars[9], size_t max_len) {
251 assert(max_len < 9);
252
253 for (size_t i = 0; i <= max_len; ++i) {
254 char c = Next();
255 if (IsSpace(c)) {
256 token_chars[i] = '\0';
257 return;
258 }
259
260 token_chars[i] = c;
261 if (!isxdigit(c)) {
262 ParseError("encountered non-hex character");
263 }
264 }
265
266 // If space is not reached before the maximum number of characters where
267 // consumed, that's an error.
268 ParseError("expected delimiter (space or comma)");
269 token_chars[max_len] = '\0';
270 }
271
272 // Consume one hex digit.
NextHexDigit()273 char NextHexDigit() {
274 char c = Next();
275 if (!isxdigit(c)) {
276 ParseError("encountered non-hex character");
277 }
278 return c;
279 }
280
281 // Extract a token out of the stream. It could be either a word or a byte,
282 // based on |mode_|.
GetNextToken()283 uint32_t GetNextToken() {
284 SkipSpace();
285
286 // The longest token can be 8 chars (for |HexMode::Words|), add one for
287 // '\0'.
288 char token_chars[9];
289
290 switch (mode_) {
291 case HexMode::Words:
292 case HexMode::BytesBigEndian:
293 case HexMode::BytesLittleEndian:
294 // Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*)
295 // digits.
296 Skip0x();
297 NextN(token_chars, mode_ == HexMode::Words ? 8 : 2);
298 break;
299 case HexMode::StreamBigEndian:
300 case HexMode::StreamLittleEndian:
301 // Always expected to see two consecutive hex digits.
302 token_chars[0] = NextHexDigit();
303 token_chars[1] = NextHexDigit();
304 token_chars[2] = '\0';
305 break;
306 }
307
308 if (encountered_error_) {
309 return 0;
310 }
311
312 // Parse the hex value that was just read.
313 return static_cast<uint32_t>(strtol(token_chars, nullptr, 16));
314 }
315
316 // Construct a word out of tokens
GetNextWord()317 uint32_t GetNextWord() {
318 if (mode_ == HexMode::Words) {
319 return GetNextToken();
320 }
321
322 uint32_t tokens[4] = {
323 GetNextToken(),
324 GetNextToken(),
325 GetNextToken(),
326 GetNextToken(),
327 };
328
329 switch (mode_) {
330 case HexMode::BytesBigEndian:
331 case HexMode::StreamBigEndian:
332 return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3];
333 case HexMode::BytesLittleEndian:
334 case HexMode::StreamLittleEndian:
335 return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0];
336 default:
337 assert(false);
338 return 0;
339 }
340 }
341
342 const char* filename_;
343 const std::vector<char>& stream_;
344 std::vector<uint32_t>* data_;
345
346 HexMode mode_ = HexMode::Words;
347 size_t current_ = 0;
348 bool encountered_error_ = false;
349 };
350 } // namespace
351
ReadBinaryFile(const char * filename,std::vector<uint32_t> * data)352 bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
353 assert(data->empty());
354
355 const bool use_file = filename && strcmp("-", filename);
356 FILE* fp = nullptr;
357 if (use_file) {
358 fp = fopen(filename, "rb");
359 } else {
360 SET_STDIN_TO_BINARY_MODE();
361 fp = stdin;
362 }
363
364 // Read into a char vector first. If this is a hex stream, it needs to be
365 // processed as such.
366 std::vector<char> data_raw;
367 ReadFile(fp, &data_raw);
368 bool succeeded = WasFileCorrectlyRead(fp, filename);
369 if (use_file && fp) fclose(fp);
370
371 if (!succeeded) {
372 return false;
373 }
374
375 if (IsHexStream(data_raw)) {
376 // If a hex stream, parse it and fill |data|.
377 HexTokenizer tokenizer(filename, data_raw, data);
378 succeeded = tokenizer.Parse();
379 } else {
380 // If not a hex stream, convert it to uint32_t via memcpy.
381 succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t));
382 if (succeeded) {
383 data->resize(data_raw.size() / sizeof(uint32_t), 0);
384 memcpy(data->data(), data_raw.data(), data_raw.size());
385 }
386 }
387
388 return succeeded;
389 }
390
ConvertHexToBinary(const std::vector<char> & stream,std::vector<uint32_t> * data)391 bool ConvertHexToBinary(const std::vector<char>& stream,
392 std::vector<uint32_t>* data) {
393 HexTokenizer tokenizer("<input string>", stream, data);
394 return tokenizer.Parse();
395 }
396
ReadTextFile(const char * filename,std::vector<char> * data)397 bool ReadTextFile(const char* filename, std::vector<char>* data) {
398 assert(data->empty());
399
400 const bool use_file = filename && strcmp("-", filename);
401 FILE* fp = nullptr;
402 if (use_file) {
403 fp = fopen(filename, "r");
404 } else {
405 SET_STDIN_TO_TEXT_MODE();
406 fp = stdin;
407 }
408
409 ReadFile(fp, data);
410 bool succeeded = WasFileCorrectlyRead(fp, filename);
411 if (use_file && fp) fclose(fp);
412 return succeeded;
413 }
414
415 namespace {
416 // A class to create and manage a file for outputting data.
417 class OutputFile {
418 public:
419 // Opens |filename| in the given mode. If |filename| is nullptr, the empty
420 // string or "-", stdout will be set to the given mode.
OutputFile(const char * filename,const char * mode)421 OutputFile(const char* filename, const char* mode) : old_mode_(0) {
422 const bool use_stdout =
423 !filename || (filename[0] == '-' && filename[1] == '\0');
424 if (use_stdout) {
425 if (strchr(mode, 'b')) {
426 old_mode_ = SET_STDOUT_TO_BINARY_MODE();
427 } else {
428 old_mode_ = SET_STDOUT_TO_TEXT_MODE();
429 }
430 fp_ = stdout;
431 } else {
432 fp_ = fopen(filename, mode);
433 }
434 }
435
~OutputFile()436 ~OutputFile() {
437 if (fp_ == stdout) {
438 fflush(stdout);
439 SET_STDOUT_MODE(old_mode_);
440 } else if (fp_ != nullptr) {
441 fclose(fp_);
442 }
443 }
444
445 // Returns a file handle to the file.
GetFileHandle() const446 FILE* GetFileHandle() const { return fp_; }
447
448 private:
449 FILE* fp_;
450 int old_mode_;
451 };
452 } // namespace
453
454 template <typename T>
WriteFile(const char * filename,const char * mode,const T * data,size_t count)455 bool WriteFile(const char* filename, const char* mode, const T* data,
456 size_t count) {
457 OutputFile file(filename, mode);
458 FILE* fp = file.GetFileHandle();
459 if (fp == nullptr) {
460 fprintf(stderr, "error: could not open file '%s'\n", filename);
461 return false;
462 }
463
464 size_t written = fwrite(data, sizeof(T), count, fp);
465 if (count != written) {
466 fprintf(stderr, "error: could not write to file '%s'\n", filename);
467 return false;
468 }
469
470 return true;
471 }
472
473 template bool WriteFile<uint32_t>(const char* filename, const char* mode,
474 const uint32_t* data, size_t count);
475 template bool WriteFile<char>(const char* filename, const char* mode,
476 const char* data, size_t count);
477