xref: /aosp_15_r20/external/cronet/net/tools/transport_security_state_generator/input_file_parsers.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2017 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/tools/transport_security_state_generator/input_file_parsers.h"
6 
7 #include <set>
8 #include <sstream>
9 #include <string_view>
10 #include <vector>
11 
12 #include "base/containers/contains.h"
13 #include "base/containers/fixed_flat_set.h"
14 #include "base/json/json_reader.h"
15 #include "base/logging.h"
16 #include "base/strings/strcat.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_split.h"
19 #include "base/strings/string_util.h"
20 #include "base/time/time.h"
21 #include "base/values.h"
22 #include "net/tools/transport_security_state_generator/cert_util.h"
23 #include "net/tools/transport_security_state_generator/pinset.h"
24 #include "net/tools/transport_security_state_generator/pinsets.h"
25 #include "net/tools/transport_security_state_generator/spki_hash.h"
26 #include "third_party/boringssl/src/include/openssl/x509v3.h"
27 
28 namespace net::transport_security_state {
29 
30 namespace {
31 
IsImportantWordInCertificateName(std::string_view name)32 bool IsImportantWordInCertificateName(std::string_view name) {
33   const char* const important_words[] = {"Universal", "Global", "EV", "G1",
34                                          "G2",        "G3",     "G4", "G5"};
35   for (auto* important_word : important_words) {
36     if (name == important_word) {
37       return true;
38     }
39   }
40   return false;
41 }
42 
43 // Strips all characters not matched by the RegEx [A-Za-z0-9_] from |name| and
44 // returns the result.
FilterName(std::string_view name)45 std::string FilterName(std::string_view name) {
46   std::string filtered;
47   for (const char& character : name) {
48     if ((character >= '0' && character <= '9') ||
49         (character >= 'a' && character <= 'z') ||
50         (character >= 'A' && character <= 'Z') || character == '_') {
51       filtered += character;
52     }
53   }
54   return base::ToLowerASCII(filtered);
55 }
56 
57 // Returns true if |pin_name| is a reasonable match for the certificate name
58 // |name|.
MatchCertificateName(std::string_view name,std::string_view pin_name)59 bool MatchCertificateName(std::string_view name, std::string_view pin_name) {
60   std::vector<std::string_view> words = base::SplitStringPiece(
61       name, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
62   if (words.empty()) {
63     LOG(ERROR) << "No words in certificate name for pin " << pin_name;
64     return false;
65   }
66   std::string_view first_word = words[0];
67 
68   if (first_word.ends_with(",")) {
69     first_word = first_word.substr(0, first_word.size() - 1);
70   }
71 
72   if (first_word.starts_with("*.")) {
73     first_word = first_word.substr(2, first_word.size() - 2);
74   }
75 
76   size_t pos = first_word.find('.');
77   if (pos != std::string::npos) {
78     first_word = first_word.substr(0, first_word.size() - pos);
79   }
80 
81   pos = first_word.find('-');
82   if (pos != std::string::npos) {
83     first_word = first_word.substr(0, first_word.size() - pos);
84   }
85 
86   if (first_word.empty()) {
87     LOG(ERROR) << "First word of certificate name (" << name << ") is empty";
88     return false;
89   }
90 
91   std::string filtered_word = FilterName(first_word);
92   first_word = filtered_word;
93   if (!base::EqualsCaseInsensitiveASCII(pin_name.substr(0, first_word.size()),
94                                         first_word)) {
95     LOG(ERROR) << "The first word of the certificate name (" << first_word
96                << ") isn't a prefix of the variable name (" << pin_name << ")";
97     return false;
98   }
99 
100   for (size_t i = 0; i < words.size(); ++i) {
101     std::string_view word = words[i];
102     if (word == "Class" && (i + 1) < words.size()) {
103       std::string class_name = base::StrCat({word, words[i + 1]});
104 
105       pos = pin_name.find(class_name);
106       if (pos == std::string::npos) {
107         LOG(ERROR)
108             << "Certficate class specification doesn't appear in the variable "
109                "name ("
110             << pin_name << ")";
111         return false;
112       }
113     } else if (word.size() == 1 && word[0] >= '0' && word[0] <= '9') {
114       pos = pin_name.find(word);
115       if (pos == std::string::npos) {
116         LOG(ERROR) << "Number doesn't appear in the certificate variable name ("
117                    << pin_name << ")";
118         return false;
119       }
120     } else if (IsImportantWordInCertificateName(word)) {
121       pos = pin_name.find(word);
122       if (pos == std::string::npos) {
123         LOG(ERROR) << std::string(word) +
124                           " doesn't appear in the certificate variable name ("
125                    << pin_name << ")";
126         return false;
127       }
128     }
129   }
130 
131   return true;
132 }
133 
134 // Returns true iff |candidate| is not empty, the first character is in the
135 // range A-Z, and the remaining characters are in the ranges a-Z, 0-9, or '_'.
IsValidName(std::string_view candidate)136 bool IsValidName(std::string_view candidate) {
137   if (candidate.empty() || candidate[0] < 'A' || candidate[0] > 'Z') {
138     return false;
139   }
140 
141   bool isValid = true;
142   for (const char& character : candidate) {
143     isValid = (character >= '0' && character <= '9') ||
144               (character >= 'a' && character <= 'z') ||
145               (character >= 'A' && character <= 'Z') || character == '_';
146     if (!isValid) {
147       return false;
148     }
149   }
150   return true;
151 }
152 
153 static const char kStartOfCert[] = "-----BEGIN CERTIFICATE";
154 static const char kStartOfPublicKey[] = "-----BEGIN PUBLIC KEY";
155 static const char kEndOfCert[] = "-----END CERTIFICATE";
156 static const char kEndOfPublicKey[] = "-----END PUBLIC KEY";
157 static const char kStartOfSHA256[] = "sha256/";
158 
159 enum class CertificateParserState {
160   PRE_NAME,
161   POST_NAME,
162   IN_CERTIFICATE,
163   IN_PUBLIC_KEY,
164   PRE_TIMESTAMP,
165 };
166 
167 // Valid keys for entries in the input JSON. These fields will be included in
168 // the output.
169 static constexpr char kNameJSONKey[] = "name";
170 static constexpr char kIncludeSubdomainsJSONKey[] = "include_subdomains";
171 static constexpr char kModeJSONKey[] = "mode";
172 static constexpr char kPinsJSONKey[] = "pins";
173 static constexpr char kTimestampName[] = "PinsListTimestamp";
174 
175 // Additional valid keys for entries in the input JSON that will not be included
176 // in the output and contain metadata (e.g., for list maintenance).
177 static constexpr char kPolicyJSONKey[] = "policy";
178 
179 }  // namespace
180 
ParseCertificatesFile(std::string_view certs_input,Pinsets * pinsets,base::Time * timestamp)181 bool ParseCertificatesFile(std::string_view certs_input,
182                            Pinsets* pinsets,
183                            base::Time* timestamp) {
184   if (certs_input.find("\r\n") != std::string_view::npos) {
185     LOG(ERROR) << "CRLF line-endings found in the pins file. All files must "
186                   "use LF (unix style) line-endings.";
187     return false;
188   }
189 
190   CertificateParserState current_state = CertificateParserState::PRE_NAME;
191   bool timestamp_parsed = false;
192 
193   const base::CompareCase& compare_mode = base::CompareCase::INSENSITIVE_ASCII;
194   std::string name;
195   std::string buffer;
196   std::string subject_name;
197   bssl::UniquePtr<X509> certificate;
198   SPKIHash hash;
199 
200   for (std::string_view line : SplitStringPiece(
201            certs_input, "\n", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL)) {
202     if (!line.empty() && line[0] == '#') {
203       continue;
204     }
205 
206     if (line.empty() && current_state == CertificateParserState::PRE_NAME) {
207       continue;
208     }
209 
210     switch (current_state) {
211       case CertificateParserState::PRE_NAME:
212         if (line == kTimestampName) {
213           current_state = CertificateParserState::PRE_TIMESTAMP;
214           break;
215         }
216         if (!IsValidName(line)) {
217           LOG(ERROR) << "Invalid name in pins file: " << line;
218           return false;
219         }
220         name = std::string(line);
221         current_state = CertificateParserState::POST_NAME;
222         break;
223       case CertificateParserState::POST_NAME:
224         if (base::StartsWith(line, kStartOfSHA256, compare_mode)) {
225           if (!hash.FromString(line)) {
226             LOG(ERROR) << "Invalid hash value in pins file for " << name;
227             return false;
228           }
229 
230           pinsets->RegisterSPKIHash(name, hash);
231           current_state = CertificateParserState::PRE_NAME;
232         } else if (base::StartsWith(line, kStartOfCert, compare_mode)) {
233           buffer = std::string(line) + '\n';
234           current_state = CertificateParserState::IN_CERTIFICATE;
235         } else if (base::StartsWith(line, kStartOfPublicKey, compare_mode)) {
236           buffer = std::string(line) + '\n';
237           current_state = CertificateParserState::IN_PUBLIC_KEY;
238         } else {
239           LOG(ERROR) << "Invalid value in pins file for " << name;
240           return false;
241         }
242         break;
243       case CertificateParserState::IN_CERTIFICATE:
244         buffer += std::string(line) + '\n';
245         if (!base::StartsWith(line, kEndOfCert, compare_mode)) {
246           continue;
247         }
248 
249         certificate = GetX509CertificateFromPEM(buffer);
250         if (!certificate) {
251           LOG(ERROR) << "Could not parse certificate " << name;
252           return false;
253         }
254 
255         if (!CalculateSPKIHashFromCertificate(certificate.get(), &hash)) {
256           LOG(ERROR) << "Could not extract SPKI from certificate " << name;
257           return false;
258         }
259 
260         if (!ExtractSubjectNameFromCertificate(certificate.get(),
261                                                &subject_name)) {
262           LOG(ERROR) << "Could not extract name from certificate " << name;
263           return false;
264         }
265 
266         if (!MatchCertificateName(subject_name, name)) {
267           LOG(ERROR) << name << " is not a reasonable name for "
268                      << subject_name;
269           return false;
270         }
271 
272         pinsets->RegisterSPKIHash(name, hash);
273         current_state = CertificateParserState::PRE_NAME;
274         break;
275       case CertificateParserState::IN_PUBLIC_KEY:
276         buffer += std::string(line) + '\n';
277         if (!base::StartsWith(line, kEndOfPublicKey, compare_mode)) {
278           continue;
279         }
280 
281         if (!CalculateSPKIHashFromKey(buffer, &hash)) {
282           LOG(ERROR) << "Could not parse the public key for " << name;
283           return false;
284         }
285 
286         pinsets->RegisterSPKIHash(name, hash);
287         current_state = CertificateParserState::PRE_NAME;
288         break;
289       case CertificateParserState::PRE_TIMESTAMP:
290         uint64_t timestamp_epoch;
291         if (!base::StringToUint64(line, &timestamp_epoch) ||
292             !base::IsValueInRangeForNumericType<time_t>(timestamp_epoch)) {
293           LOG(ERROR) << "Could not parse the timestamp value";
294           return false;
295         }
296         *timestamp = base::Time::FromTimeT(timestamp_epoch);
297         if (timestamp_parsed) {
298           LOG(ERROR) << "File contains multiple timestamps";
299           return false;
300         }
301         timestamp_parsed = true;
302         current_state = CertificateParserState::PRE_NAME;
303         break;
304       default:
305         DCHECK(false) << "Unknown parser state";
306     }
307   }
308 
309   if (!timestamp_parsed) {
310     LOG(ERROR) << "Timestamp is missing";
311     return false;
312   }
313   return true;
314 }
315 
ParseJSON(std::string_view hsts_json,std::string_view pins_json,TransportSecurityStateEntries * entries,Pinsets * pinsets)316 bool ParseJSON(std::string_view hsts_json,
317                std::string_view pins_json,
318                TransportSecurityStateEntries* entries,
319                Pinsets* pinsets) {
320   static constexpr auto valid_hsts_keys =
321       base::MakeFixedFlatSet<std::string_view>({
322           kNameJSONKey,
323           kPolicyJSONKey,
324           kIncludeSubdomainsJSONKey,
325           kModeJSONKey,
326           kPinsJSONKey,
327       });
328 
329   static constexpr auto valid_pins_keys =
330       base::MakeFixedFlatSet<std::string_view>({
331           kNameJSONKey,
332           kIncludeSubdomainsJSONKey,
333           kPinsJSONKey,
334       });
335 
336   // See the comments in net/http/transport_security_state_static.json for more
337   // info on these policies.
338   std::set<std::string> valid_policies = {
339       "test",        "public-suffix", "google",      "custom",
340       "bulk-legacy", "bulk-18-weeks", "bulk-1-year", "public-suffix-requested"};
341 
342   std::optional<base::Value> hsts_value = base::JSONReader::Read(hsts_json);
343   if (!hsts_value.has_value() || !hsts_value->is_dict()) {
344     LOG(ERROR) << "Could not parse the input HSTS JSON file";
345     return false;
346   }
347 
348   std::optional<base::Value> pins_value = base::JSONReader::Read(pins_json);
349   if (!pins_value.has_value()) {
350     LOG(ERROR) << "Could not parse the input pins JSON file";
351     return false;
352   }
353   base::Value::Dict* pins_dict = pins_value->GetIfDict();
354   if (!pins_dict) {
355     LOG(ERROR) << "Input pins JSON file does not contain a dictionary";
356     return false;
357   }
358 
359   const base::Value::List* pinning_entries_list =
360       pins_dict->FindList("entries");
361   if (!pinning_entries_list) {
362     LOG(ERROR) << "Could not parse the entries in the input pins JSON";
363     return false;
364   }
365   std::map<std::string, std::pair<std::string, bool>> pins_map;
366   for (size_t i = 0; i < pinning_entries_list->size(); ++i) {
367     const base::Value::Dict* parsed = (*pinning_entries_list)[i].GetIfDict();
368     if (!parsed) {
369       LOG(ERROR) << "Could not parse entry " << base::NumberToString(i)
370                  << " in the input pins JSON";
371       return false;
372     }
373     const std::string* maybe_hostname = parsed->FindString(kNameJSONKey);
374     if (!maybe_hostname) {
375       LOG(ERROR) << "Could not extract the hostname for entry "
376                  << base::NumberToString(i) << " from the input pins JSON";
377       return false;
378     }
379 
380     if (maybe_hostname->empty()) {
381       LOG(ERROR) << "The hostname for entry " << base::NumberToString(i)
382                  << " is empty";
383       return false;
384     }
385 
386     for (auto entry_value : *parsed) {
387       if (!base::Contains(valid_pins_keys, entry_value.first)) {
388         LOG(ERROR) << "The entry for " << *maybe_hostname
389                    << " contains an unknown " << entry_value.first << " field";
390         return false;
391       }
392     }
393 
394     const std::string* maybe_pinset = parsed->FindString(kPinsJSONKey);
395     if (!maybe_pinset) {
396       LOG(ERROR) << "Could not extract the pinset for entry "
397                  << base::NumberToString(i) << " from the input pins JSON";
398       return false;
399     }
400 
401     if (pins_map.find(*maybe_hostname) != pins_map.end()) {
402       LOG(ERROR) << *maybe_hostname
403                  << " has duplicate entries in the input pins JSON";
404       return false;
405     }
406 
407     pins_map[*maybe_hostname] =
408         std::pair(*maybe_pinset,
409                   parsed->FindBool(kIncludeSubdomainsJSONKey).value_or(false));
410   }
411 
412   const base::Value::List* preload_entries_list =
413       hsts_value->GetDict().FindList("entries");
414   if (!preload_entries_list) {
415     LOG(ERROR) << "Could not parse the entries in the input HSTS JSON";
416     return false;
417   }
418 
419   for (size_t i = 0; i < preload_entries_list->size(); ++i) {
420     const base::Value::Dict* parsed = (*preload_entries_list)[i].GetIfDict();
421     if (!parsed) {
422       LOG(ERROR) << "Could not parse entry " << base::NumberToString(i)
423                  << " in the input HSTS JSON";
424       return false;
425     }
426 
427     auto entry = std::make_unique<TransportSecurityStateEntry>();
428     const std::string* maybe_hostname = parsed->FindString(kNameJSONKey);
429     if (!maybe_hostname) {
430       LOG(ERROR) << "Could not extract the hostname for entry "
431                  << base::NumberToString(i) << " from the input HSTS JSON";
432       return false;
433     }
434     entry->hostname = *maybe_hostname;
435 
436     if (entry->hostname.empty()) {
437       LOG(ERROR) << "The hostname for entry " << base::NumberToString(i)
438                  << " is empty";
439       return false;
440     }
441 
442     for (auto entry_value : *parsed) {
443       if (!base::Contains(valid_hsts_keys, entry_value.first)) {
444         LOG(ERROR) << "The entry for " << entry->hostname
445                    << " contains an unknown " << entry_value.first << " field";
446         return false;
447       }
448     }
449 
450     const std::string* policy = parsed->FindString(kPolicyJSONKey);
451     if (!policy || !base::Contains(valid_policies, *policy)) {
452       LOG(ERROR) << "The entry for " << entry->hostname
453                  << " does not have a valid policy";
454       return false;
455     }
456 
457     const std::string* maybe_mode = parsed->FindString(kModeJSONKey);
458     std::string mode = maybe_mode ? *maybe_mode : std::string();
459     entry->force_https = false;
460     if (mode == "force-https") {
461       entry->force_https = true;
462     } else if (!mode.empty()) {
463       LOG(ERROR) << "An unknown mode is set for entry " << entry->hostname;
464       return false;
465     }
466 
467     entry->include_subdomains =
468         parsed->FindBool(kIncludeSubdomainsJSONKey).value_or(false);
469 
470     auto pins_it = pins_map.find(entry->hostname);
471     if (pins_it != pins_map.end()) {
472       entry->pinset = pins_it->second.first;
473       entry->hpkp_include_subdomains = pins_it->second.second;
474       pins_map.erase(entry->hostname);
475     }
476 
477     entries->push_back(std::move(entry));
478   }
479 
480   // Any remaining entries in pins_map have pinning information, but are not
481   // HSTS preloaded.
482   for (auto const& pins_entry : pins_map) {
483     auto entry = std::make_unique<TransportSecurityStateEntry>();
484     entry->hostname = pins_entry.first;
485     entry->force_https = false;
486     entry->pinset = pins_entry.second.first;
487     entry->hpkp_include_subdomains = pins_entry.second.second;
488     entries->push_back(std::move(entry));
489   }
490 
491   base::Value::List* pinsets_list = pins_dict->FindList("pinsets");
492   if (!pinsets_list) {
493     LOG(ERROR) << "Could not parse the pinsets in the input JSON";
494     return false;
495   }
496 
497   for (size_t i = 0; i < pinsets_list->size(); ++i) {
498     const base::Value::Dict* parsed = (*pinsets_list)[i].GetIfDict();
499     if (!parsed) {
500       LOG(ERROR) << "Could not parse pinset " << base::NumberToString(i)
501                  << " in the input JSON";
502       return false;
503     }
504 
505     const std::string* maybe_name = parsed->FindString("name");
506     if (!maybe_name) {
507       LOG(ERROR) << "Could not extract the name for pinset "
508                  << base::NumberToString(i) << " from the input JSON";
509       return false;
510     }
511     std::string name = *maybe_name;
512 
513     const std::string* maybe_report_uri = parsed->FindString("report_uri");
514     std::string report_uri =
515         maybe_report_uri ? *maybe_report_uri : std::string();
516 
517     auto pinset = std::make_unique<Pinset>(name, report_uri);
518 
519     const base::Value::List* pinset_static_hashes_list =
520         parsed->FindList("static_spki_hashes");
521     if (pinset_static_hashes_list) {
522       for (const auto& hash : *pinset_static_hashes_list) {
523         if (!hash.is_string()) {
524           LOG(ERROR) << "Could not parse static spki hash "
525                      << hash.DebugString() << " in the input JSON";
526           return false;
527         }
528         pinset->AddStaticSPKIHash(hash.GetString());
529       }
530     }
531 
532     const base::Value::List* pinset_bad_static_hashes_list =
533         parsed->FindList("bad_static_spki_hashes");
534     if (pinset_bad_static_hashes_list) {
535       for (const auto& hash : *pinset_bad_static_hashes_list) {
536         if (!hash.is_string()) {
537           LOG(ERROR) << "Could not parse bad static spki hash "
538                      << hash.DebugString() << " in the input JSON";
539           return false;
540         }
541         pinset->AddBadStaticSPKIHash(hash.GetString());
542       }
543     }
544 
545     pinsets->RegisterPinset(std::move(pinset));
546   }
547 
548   return true;
549 }
550 
551 }  // namespace net::transport_security_state
552