xref: /aosp_15_r20/external/cronet/net/tools/cache_transparency/generate_checksums.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2022 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Generates checksums for URLs in the Pervasive Payload list."""
5
6import argparse
7import csv
8import requests
9import urllib.parse
10import pervasive_checksum
11
12
13def generate_list_with_checksums(data):
14  pairs_list = []
15  flat_list = []
16  for i, url_info in enumerate(data):
17    url = url_info[0]
18    print(f"[{i}/{len(data)}] Fetching {url}")
19
20    with requests.get(url,
21                      headers={"Accept-Encoding": "gzip, deflate, br"},
22                      stream=True) as response:
23
24      headers = list(response.headers.items())
25      raw_body = response.raw.data
26
27    checksum = pervasive_checksum.calculate_checksum(headers, raw_body)
28    pairs_list.append([url, checksum])
29    flat_list.append(str(url))
30    flat_list.append(str(checksum))
31
32  return pairs_list, flat_list
33
34
35def main():
36  parser = argparse.ArgumentParser(
37      description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
38
39  parser.add_argument(
40      "input",
41      type=str,
42      nargs=1,
43      help="path for input csv file containing pervasive payloads list")
44
45  parser.add_argument("-v",
46                      "--list-version",
47                      "--version",
48                      dest="list_version",
49                      default="1",
50                      help="version of pervasive payloads list")
51
52  parser.add_argument("-f",
53                      "--format",
54                      dest="format",
55                      default="csv",
56                      choices=["csv", "comma_separated", "url_encoded"],
57                      help="output format to use. Default: csv")
58
59  parser.add_argument("output",
60                      type=str,
61                      nargs=1,
62                      help="path for output file for URLs and checksums")
63
64  args = parser.parse_args()
65
66  filename = args.input[0]
67
68  data = []
69  with open(filename, mode="r", newline="") as csvfile:
70    datareader = csv.reader(csvfile)
71    data = list(datareader)
72
73  pairs_list, flat_list = generate_list_with_checksums(data)
74
75  if args.format == "csv":
76    with open(args.output[0], mode="w", newline="") as f:
77      writer = csv.writer(f)
78      writer.writerows(pairs_list)
79
80  elif args.format == "comma_separated":
81    flat_list.insert(0, str(args.list_version))
82    with open(args.output[0], mode="w") as file:
83      file.write(",\n".join(flat_list))
84
85  elif args.format == "url_encoded":
86    concatenated = str(args.list_version) + ","
87    concatenated += ",".join(flat_list)
88    url_encoded_list = urllib.parse.quote_plus(concatenated)
89    with open(args.output[0], mode="w") as file:
90      file.write(url_encoded_list)
91    print(
92        "NOTE: To run the feature via commandline, use the following command:\n"
93        "out/Default/chrome --enable-features='PervasivePayloadsList:pervasive-payloads/(url_encoded_list),CacheTransparency,SplitCacheByNetworkIsolationKey'"
94    )
95
96
97if __name__ == "__main__":
98  main()
99