xref: /aosp_15_r20/external/cronet/testing/libfuzzer/proto/url.proto (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker// Copyright 2017 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker// Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker// found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker// This file contains the definition of the Url protobuf used in the
6*6777b538SAndroid Build Coastguard Worker// url_parse_proto_fuzzer that is meant to serve as an example for future
7*6777b538SAndroid Build Coastguard Worker// Chromium fuzzers that use libprotobuf-mutator. We will consider the format of
8*6777b538SAndroid Build Coastguard Worker// a URL for this fuzzer, to be
9*6777b538SAndroid Build Coastguard Worker// [scheme:][//[user[:password]@]host[:port]][/path][?query][#value] There may
10*6777b538SAndroid Build Coastguard Worker// be some URLs Chromium treats as valid that this syntax does not capture.
11*6777b538SAndroid Build Coastguard Worker// However, we will ignore them for the sake of simplicity. It is recommended to
12*6777b538SAndroid Build Coastguard Worker// read this file in conjunction with Convert() in url_proto_converter.cc as
13*6777b538SAndroid Build Coastguard Worker// logic in this function is sometimes used to ensure that the Url Protocol
14*6777b538SAndroid Build Coastguard Worker// Buffer obeys the syntax we have defined for URLs. Though reading it is
15*6777b538SAndroid Build Coastguard Worker// completely unnecessary for understanding this fuzzer, we have roughly
16*6777b538SAndroid Build Coastguard Worker// followed RFC 3986 (https://tools.ietf.org/html/rfc3986) which defines the
17*6777b538SAndroid Build Coastguard Worker// syntax of URIs (which URLs are a subset of).
18*6777b538SAndroid Build Coastguard Worker
19*6777b538SAndroid Build Coastguard Workersyntax = "proto2";
20*6777b538SAndroid Build Coastguard Worker
21*6777b538SAndroid Build Coastguard Workerpackage url_proto;
22*6777b538SAndroid Build Coastguard Worker
23*6777b538SAndroid Build Coastguard Worker// Here we define the format for a Url Protocol Buffer. This will be passed to
24*6777b538SAndroid Build Coastguard Worker// our fuzzer function.
25*6777b538SAndroid Build Coastguard Workermessage Url {
26*6777b538SAndroid Build Coastguard Worker  // If there is a scheme, then it must be followed by a colon. A scheme is in
27*6777b538SAndroid Build Coastguard Worker  // practice not required in a URL. Therefore, we will define the scheme as
28*6777b538SAndroid Build Coastguard Worker  // optional but ensure it is followed by a colon in our conversion code if it
29*6777b538SAndroid Build Coastguard Worker  // is included.
30*6777b538SAndroid Build Coastguard Worker  optional string scheme = 1;
31*6777b538SAndroid Build Coastguard Worker
32*6777b538SAndroid Build Coastguard Worker  enum Slash {
33*6777b538SAndroid Build Coastguard Worker    NONE = 0;      // Separate path segments using ""
34*6777b538SAndroid Build Coastguard Worker    FORWARD = 1;   // Separate path segments using /
35*6777b538SAndroid Build Coastguard Worker    BACKWARD = 2;  // Separate path segments using \
36*6777b538SAndroid Build Coastguard Worker  }
37*6777b538SAndroid Build Coastguard Worker    // The syntax rules of the two slashes that precede the host in a URL are
38*6777b538SAndroid Build Coastguard Worker    // surprisingly complex. They are not required, even if a scheme is included
39*6777b538SAndroid Build Coastguard Worker    // (http:example.com is treated as valid), and are valid even if a scheme is
40*6777b538SAndroid Build Coastguard Worker    // not included (//example.com is treated as file:///example.com). They can
41*6777b538SAndroid Build Coastguard Worker    // even be backslashes (http:\\example.com and http\/example.com are both
42*6777b538SAndroid Build Coastguard Worker    // valid) and there can be any number of them (http:/example.com and
43*6777b538SAndroid Build Coastguard Worker    // http://////example.com are both valid).
44*6777b538SAndroid Build Coastguard Worker    // We will therefore define slashes as a list of enum values (repeated
45*6777b538SAndroid Build Coastguard Worker    // Slash). In our conversion code, this will be read to append the
46*6777b538SAndroid Build Coastguard Worker    // appropriate kind and appropriate number of slashes to the URL.
47*6777b538SAndroid Build Coastguard Worker    repeated Slash slashes = 2 [packed = true];
48*6777b538SAndroid Build Coastguard Worker
49*6777b538SAndroid Build Coastguard Worker    // The [user:password@] part of the URL shown above is called the userinfo.
50*6777b538SAndroid Build Coastguard Worker    // Userinfo is not mandatory, but if it is included in a URL, then it must
51*6777b538SAndroid Build Coastguard Worker    // contain a string called user. There is another optional field in userinfo
52*6777b538SAndroid Build Coastguard Worker    // called the password. If a password is included, the user must be
53*6777b538SAndroid Build Coastguard Worker    // separated from it by ":". In either case, the userinfo must be separated
54*6777b538SAndroid Build Coastguard Worker    // from the host by "@". A URL must have a host if it has a userinfo. These
55*6777b538SAndroid Build Coastguard Worker    // requirements will be ensured by the conversion code.
56*6777b538SAndroid Build Coastguard Worker    message Userinfo {
57*6777b538SAndroid Build Coastguard Worker      required string user = 1;
58*6777b538SAndroid Build Coastguard Worker      optional string password = 2;
59*6777b538SAndroid Build Coastguard Worker    }
60*6777b538SAndroid Build Coastguard Worker    optional Userinfo userinfo = 3;
61*6777b538SAndroid Build Coastguard Worker
62*6777b538SAndroid Build Coastguard Worker    // Hosts, like most else in our Url definition, are optional (there are
63*6777b538SAndroid Build Coastguard Worker    // are URLs such as data URLs that do not have hosts).
64*6777b538SAndroid Build Coastguard Worker    optional string host = 4;
65*6777b538SAndroid Build Coastguard Worker
66*6777b538SAndroid Build Coastguard Worker    // Ports are unsigned integers between 1-2^16. The closest type to this in
67*6777b538SAndroid Build Coastguard Worker    // the proto2 format is uint32. Also if a port number is specified it must
68*6777b538SAndroid Build Coastguard Worker    // be preceded by a colon (consider "google.com80" 80 will be interpreted as
69*6777b538SAndroid Build Coastguard Worker    // part of the host). The conversion code will ensure this is the case.
70*6777b538SAndroid Build Coastguard Worker    optional uint32 port = 5;
71*6777b538SAndroid Build Coastguard Worker
72*6777b538SAndroid Build Coastguard Worker    // The rules for the path are somewhat complex. A path is not required,
73*6777b538SAndroid Build Coastguard Worker    // however if it follows a port or host, it must start with "/" according
74*6777b538SAndroid Build Coastguard Worker    // to the RFC, though Chromium accepts "\" as it converts all backslashes to
75*6777b538SAndroid Build Coastguard Worker    // slashes. It does not need to start with "/" if there is no host (in data
76*6777b538SAndroid Build Coastguard Worker    // URLs for example). Thus we will define path as a repeated string where
77*6777b538SAndroid Build Coastguard Worker    // each member contains a segment of the path and will be preceded by the
78*6777b538SAndroid Build Coastguard Worker    // path_separator. The one exception to this is for the first segment if
79*6777b538SAndroid Build Coastguard Worker    // path_seperator == NONE and there is a non empty path and host, then the
80*6777b538SAndroid Build Coastguard Worker    // first segment will be preceded by "/".
81*6777b538SAndroid Build Coastguard Worker    repeated string path = 6;
82*6777b538SAndroid Build Coastguard Worker    required Slash path_separator = 7 [default = FORWARD];
83*6777b538SAndroid Build Coastguard Worker
84*6777b538SAndroid Build Coastguard Worker    // A query must preceded by "?". This will be ensured in the conversion
85*6777b538SAndroid Build Coastguard Worker    // code. Queries can have many components which the converter will separate
86*6777b538SAndroid Build Coastguard Worker    // using "&", as is the convention.
87*6777b538SAndroid Build Coastguard Worker    repeated string query = 8;
88*6777b538SAndroid Build Coastguard Worker
89*6777b538SAndroid Build Coastguard Worker    // A fragment must preceded by "#". This will be ensured in the conversion
90*6777b538SAndroid Build Coastguard Worker    // code.
91*6777b538SAndroid Build Coastguard Worker    optional string fragment = 9;
92*6777b538SAndroid Build Coastguard Worker  }
93