xref: /aosp_15_r20/external/angle/third_party/spirv-tools/src/tools/io.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // Copyright (c) 2024 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "io.h"
16 
17 #include <assert.h>
18 #include <ctype.h>
19 #include <stdlib.h>
20 
21 #if defined(SPIRV_WINDOWS)
22 #include <fcntl.h>
23 #include <io.h>
24 
25 #define SET_STDIN_TO_BINARY_MODE() _setmode(_fileno(stdin), O_BINARY);
26 #define SET_STDIN_TO_TEXT_MODE() _setmode(_fileno(stdin), O_TEXT);
27 #define SET_STDOUT_TO_BINARY_MODE() _setmode(_fileno(stdout), O_BINARY);
28 #define SET_STDOUT_TO_TEXT_MODE() _setmode(_fileno(stdout), O_TEXT);
29 #define SET_STDOUT_MODE(mode) _setmode(_fileno(stdout), mode);
30 #else
31 #define SET_STDIN_TO_BINARY_MODE()
32 #define SET_STDIN_TO_TEXT_MODE()
33 #define SET_STDOUT_TO_BINARY_MODE() 0
34 #define SET_STDOUT_TO_TEXT_MODE() 0
35 #define SET_STDOUT_MODE(mode)
36 #endif
37 
38 namespace {
39 // Appends the contents of the |file| to |data|, assuming each element in the
40 // file is of type |T|.
41 template <typename T>
ReadFile(FILE * file,std::vector<T> * data)42 void ReadFile(FILE* file, std::vector<T>* data) {
43   if (file == nullptr) return;
44 
45   const int buf_size = 4096 / sizeof(T);
46   T buf[buf_size];
47   while (size_t len = fread(buf, sizeof(T), buf_size, file)) {
48     data->insert(data->end(), buf, buf + len);
49   }
50 }
51 
52 // Returns true if |file| has encountered an error opening the file or reading
53 // from it. If there was an error, writes an error message to standard error.
WasFileCorrectlyRead(FILE * file,const char * filename)54 bool WasFileCorrectlyRead(FILE* file, const char* filename) {
55   if (file == nullptr) {
56     fprintf(stderr, "error: file does not exist '%s'\n", filename);
57     return false;
58   }
59 
60   if (ftell(file) == -1L) {
61     if (ferror(file)) {
62       fprintf(stderr, "error: error reading file '%s'\n", filename);
63       return false;
64     }
65   }
66   return true;
67 }
68 
69 // Ensure the file contained an exact number of elements, whose size is given in
70 // |alignment|.
WasFileSizeAligned(const char * filename,size_t read_size,size_t alignment)71 bool WasFileSizeAligned(const char* filename, size_t read_size,
72                         size_t alignment) {
73   assert(alignment != 1);
74   if ((read_size % alignment) != 0) {
75     fprintf(stderr,
76             "error: file size should be a multiple of %zd; file '%s' corrupt\n",
77             alignment, filename);
78     return false;
79   }
80   return true;
81 }
82 
83 // Different formats the hex is expected to be in.
84 enum class HexMode {
85   // 0x07230203, ...
86   Words,
87   // 0x07, 0x23, 0x02, 0x03, ...
88   BytesBigEndian,
89   // 0x03, 0x02, 0x23, 0x07, ...
90   BytesLittleEndian,
91   // 07 23 02 03 ...
92   StreamBigEndian,
93   // 03 02 23 07 ...
94   StreamLittleEndian,
95 };
96 
97 // Whether a character should be skipped as whitespace / separator /
98 // end-of-file.
IsSpace(char c)99 bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; }
100 
IsHexStream(const std::vector<char> & stream)101 bool IsHexStream(const std::vector<char>& stream) {
102   for (char c : stream) {
103     if (IsSpace(c)) {
104       continue;
105     }
106 
107     // Every possible case of a SPIR-V hex stream starts with either '0' or 'x'
108     // (see |HexMode| values).  Make a decision upon inspecting the first
109     // non-space character.
110     return c == '0' || c == 'x' || c == 'X';
111   }
112 
113   return false;
114 }
115 
MatchIgnoreCase(const char * token,const char * expect,size_t len)116 bool MatchIgnoreCase(const char* token, const char* expect, size_t len) {
117   for (size_t i = 0; i < len; ++i) {
118     if (tolower(token[i]) != tolower(expect[i])) {
119       return false;
120     }
121   }
122 
123   return true;
124 }
125 
126 // Helper class to tokenize a hex stream
127 class HexTokenizer {
128  public:
HexTokenizer(const char * filename,const std::vector<char> & stream,std::vector<uint32_t> * data)129   HexTokenizer(const char* filename, const std::vector<char>& stream,
130                std::vector<uint32_t>* data)
131       : filename_(filename), stream_(stream), data_(data) {
132     DetermineMode();
133   }
134 
Parse()135   bool Parse() {
136     while (current_ < stream_.size() && !encountered_error_) {
137       data_->push_back(GetNextWord());
138 
139       // Make sure trailing space does not lead to parse error by skipping it
140       // and exiting the loop.
141       SkipSpace();
142     }
143 
144     return !encountered_error_;
145   }
146 
147  private:
ParseError(const char * reason)148   void ParseError(const char* reason) {
149     if (!encountered_error_) {
150       fprintf(stderr,
151               "error: hex stream parse error at character %zu: %s in '%s'\n",
152               current_, reason, filename_);
153       encountered_error_ = true;
154     }
155   }
156 
157   // Skip whitespace until the next non-whitespace non-comma character.
SkipSpace()158   void SkipSpace() {
159     while (current_ < stream_.size()) {
160       char c = stream_[current_];
161       if (!IsSpace(c)) {
162         return;
163       }
164 
165       ++current_;
166     }
167   }
168 
169   // Skip the 0x or x at the beginning of a hex value.
Skip0x()170   void Skip0x() {
171     // The first character must be 0 or x.
172     const char first = Next();
173     if (first != '0' && first != 'x' && first != 'X') {
174       ParseError("expected 0x or x");
175     } else if (first == '0') {
176       const char second = Next();
177       if (second != 'x' && second != 'X') {
178         ParseError("expected 0x");
179       }
180     }
181   }
182 
183   // Consume the next character.
Next()184   char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; }
185 
186   // Determine how to read the hex stream based on the first token.
DetermineMode()187   void DetermineMode() {
188     SkipSpace();
189 
190     // Read 11 bytes, that is the size of the biggest token (10) + one more.
191     char first_token[11];
192     for (uint32_t i = 0; i < 11; ++i) {
193       first_token[i] = Next();
194     }
195 
196     // Table of how to match the first token with a mode.
197     struct {
198       const char* expect;
199       bool must_have_delimiter;
200       HexMode mode;
201     } parse_info[] = {
202         {"0x07230203", true, HexMode::Words},
203         {"0x7230203", true, HexMode::Words},
204         {"x07230203", true, HexMode::Words},
205         {"x7230203", true, HexMode::Words},
206 
207         {"0x07", true, HexMode::BytesBigEndian},
208         {"0x7", true, HexMode::BytesBigEndian},
209         {"x07", true, HexMode::BytesBigEndian},
210         {"x7", true, HexMode::BytesBigEndian},
211 
212         {"0x03", true, HexMode::BytesLittleEndian},
213         {"0x3", true, HexMode::BytesLittleEndian},
214         {"x03", true, HexMode::BytesLittleEndian},
215         {"x3", true, HexMode::BytesLittleEndian},
216 
217         {"07", false, HexMode::StreamBigEndian},
218         {"03", false, HexMode::StreamLittleEndian},
219     };
220 
221     // Check to see if any of the possible first tokens are matched.  If not,
222     // this is not a recognized hex stream.
223     encountered_error_ = true;
224     for (const auto& info : parse_info) {
225       const size_t expect_len = strlen(info.expect);
226       const bool matches_expect =
227           MatchIgnoreCase(first_token, info.expect, expect_len);
228       const bool satisfies_delimeter =
229           !info.must_have_delimiter || IsSpace(first_token[expect_len]);
230       if (matches_expect && satisfies_delimeter) {
231         mode_ = info.mode;
232         encountered_error_ = false;
233         break;
234       }
235     }
236 
237     if (encountered_error_) {
238       fprintf(stderr,
239               "error: hex format detected, but pattern '%.11s' is not "
240               "recognized '%s'\n",
241               first_token, filename_);
242     }
243 
244     // Reset the position to restart parsing with the determined mode.
245     current_ = 0;
246   }
247 
248   // Consume up to |max_len| characters and put them in |token_chars|.  A
249   // delimiter is expected. The resulting string is NUL-terminated.
NextN(char token_chars[9],size_t max_len)250   void NextN(char token_chars[9], size_t max_len) {
251     assert(max_len < 9);
252 
253     for (size_t i = 0; i <= max_len; ++i) {
254       char c = Next();
255       if (IsSpace(c)) {
256         token_chars[i] = '\0';
257         return;
258       }
259 
260       token_chars[i] = c;
261       if (!isxdigit(c)) {
262         ParseError("encountered non-hex character");
263       }
264     }
265 
266     // If space is not reached before the maximum number of characters where
267     // consumed, that's an error.
268     ParseError("expected delimiter (space or comma)");
269     token_chars[max_len] = '\0';
270   }
271 
272   // Consume one hex digit.
NextHexDigit()273   char NextHexDigit() {
274     char c = Next();
275     if (!isxdigit(c)) {
276       ParseError("encountered non-hex character");
277     }
278     return c;
279   }
280 
281   // Extract a token out of the stream.  It could be either a word or a byte,
282   // based on |mode_|.
GetNextToken()283   uint32_t GetNextToken() {
284     SkipSpace();
285 
286     // The longest token can be 8 chars (for |HexMode::Words|), add one for
287     // '\0'.
288     char token_chars[9];
289 
290     switch (mode_) {
291       case HexMode::Words:
292       case HexMode::BytesBigEndian:
293       case HexMode::BytesLittleEndian:
294         // Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*)
295         // digits.
296         Skip0x();
297         NextN(token_chars, mode_ == HexMode::Words ? 8 : 2);
298         break;
299       case HexMode::StreamBigEndian:
300       case HexMode::StreamLittleEndian:
301         // Always expected to see two consecutive hex digits.
302         token_chars[0] = NextHexDigit();
303         token_chars[1] = NextHexDigit();
304         token_chars[2] = '\0';
305         break;
306     }
307 
308     if (encountered_error_) {
309       return 0;
310     }
311 
312     // Parse the hex value that was just read.
313     return static_cast<uint32_t>(strtol(token_chars, nullptr, 16));
314   }
315 
316   // Construct a word out of tokens
GetNextWord()317   uint32_t GetNextWord() {
318     if (mode_ == HexMode::Words) {
319       return GetNextToken();
320     }
321 
322     uint32_t tokens[4] = {
323         GetNextToken(),
324         GetNextToken(),
325         GetNextToken(),
326         GetNextToken(),
327     };
328 
329     switch (mode_) {
330       case HexMode::BytesBigEndian:
331       case HexMode::StreamBigEndian:
332         return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3];
333       case HexMode::BytesLittleEndian:
334       case HexMode::StreamLittleEndian:
335         return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0];
336       default:
337         assert(false);
338         return 0;
339     }
340   }
341 
342   const char* filename_;
343   const std::vector<char>& stream_;
344   std::vector<uint32_t>* data_;
345 
346   HexMode mode_ = HexMode::Words;
347   size_t current_ = 0;
348   bool encountered_error_ = false;
349 };
350 }  // namespace
351 
ReadBinaryFile(const char * filename,std::vector<uint32_t> * data)352 bool ReadBinaryFile(const char* filename, std::vector<uint32_t>* data) {
353   assert(data->empty());
354 
355   const bool use_file = filename && strcmp("-", filename);
356   FILE* fp = nullptr;
357   if (use_file) {
358     fp = fopen(filename, "rb");
359   } else {
360     SET_STDIN_TO_BINARY_MODE();
361     fp = stdin;
362   }
363 
364   // Read into a char vector first.  If this is a hex stream, it needs to be
365   // processed as such.
366   std::vector<char> data_raw;
367   ReadFile(fp, &data_raw);
368   bool succeeded = WasFileCorrectlyRead(fp, filename);
369   if (use_file && fp) fclose(fp);
370 
371   if (!succeeded) {
372     return false;
373   }
374 
375   if (IsHexStream(data_raw)) {
376     // If a hex stream, parse it and fill |data|.
377     HexTokenizer tokenizer(filename, data_raw, data);
378     succeeded = tokenizer.Parse();
379   } else {
380     // If not a hex stream, convert it to uint32_t via memcpy.
381     succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t));
382     if (succeeded) {
383       data->resize(data_raw.size() / sizeof(uint32_t), 0);
384       memcpy(data->data(), data_raw.data(), data_raw.size());
385     }
386   }
387 
388   return succeeded;
389 }
390 
ConvertHexToBinary(const std::vector<char> & stream,std::vector<uint32_t> * data)391 bool ConvertHexToBinary(const std::vector<char>& stream,
392                         std::vector<uint32_t>* data) {
393   HexTokenizer tokenizer("<input string>", stream, data);
394   return tokenizer.Parse();
395 }
396 
ReadTextFile(const char * filename,std::vector<char> * data)397 bool ReadTextFile(const char* filename, std::vector<char>* data) {
398   assert(data->empty());
399 
400   const bool use_file = filename && strcmp("-", filename);
401   FILE* fp = nullptr;
402   if (use_file) {
403     fp = fopen(filename, "r");
404   } else {
405     SET_STDIN_TO_TEXT_MODE();
406     fp = stdin;
407   }
408 
409   ReadFile(fp, data);
410   bool succeeded = WasFileCorrectlyRead(fp, filename);
411   if (use_file && fp) fclose(fp);
412   return succeeded;
413 }
414 
415 namespace {
416 // A class to create and manage a file for outputting data.
417 class OutputFile {
418  public:
419   // Opens |filename| in the given mode.  If |filename| is nullptr, the empty
420   // string or "-", stdout will be set to the given mode.
OutputFile(const char * filename,const char * mode)421   OutputFile(const char* filename, const char* mode) : old_mode_(0) {
422     const bool use_stdout =
423         !filename || (filename[0] == '-' && filename[1] == '\0');
424     if (use_stdout) {
425       if (strchr(mode, 'b')) {
426         old_mode_ = SET_STDOUT_TO_BINARY_MODE();
427       } else {
428         old_mode_ = SET_STDOUT_TO_TEXT_MODE();
429       }
430       fp_ = stdout;
431     } else {
432       fp_ = fopen(filename, mode);
433     }
434   }
435 
~OutputFile()436   ~OutputFile() {
437     if (fp_ == stdout) {
438       fflush(stdout);
439       SET_STDOUT_MODE(old_mode_);
440     } else if (fp_ != nullptr) {
441       fclose(fp_);
442     }
443   }
444 
445   // Returns a file handle to the file.
GetFileHandle() const446   FILE* GetFileHandle() const { return fp_; }
447 
448  private:
449   FILE* fp_;
450   int old_mode_;
451 };
452 }  // namespace
453 
454 template <typename T>
WriteFile(const char * filename,const char * mode,const T * data,size_t count)455 bool WriteFile(const char* filename, const char* mode, const T* data,
456                size_t count) {
457   OutputFile file(filename, mode);
458   FILE* fp = file.GetFileHandle();
459   if (fp == nullptr) {
460     fprintf(stderr, "error: could not open file '%s'\n", filename);
461     return false;
462   }
463 
464   size_t written = fwrite(data, sizeof(T), count, fp);
465   if (count != written) {
466     fprintf(stderr, "error: could not write to file '%s'\n", filename);
467     return false;
468   }
469 
470   return true;
471 }
472 
473 template bool WriteFile<uint32_t>(const char* filename, const char* mode,
474                                   const uint32_t* data, size_t count);
475 template bool WriteFile<char>(const char* filename, const char* mode,
476                               const char* data, size_t count);
477