xref: /aosp_15_r20/external/libwebm/webvtt/webvttparser.h (revision 103e46e4cd4b6efcf6001f23fa8665fb110abf8d)
1*103e46e4SHarish Mahendrakar // Copyright (c) 2012 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS.  All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar 
9*103e46e4SHarish Mahendrakar #ifndef WEBVTT_WEBVTTPARSER_H_
10*103e46e4SHarish Mahendrakar #define WEBVTT_WEBVTTPARSER_H_
11*103e46e4SHarish Mahendrakar 
12*103e46e4SHarish Mahendrakar #include <list>
13*103e46e4SHarish Mahendrakar #include <string>
14*103e46e4SHarish Mahendrakar 
15*103e46e4SHarish Mahendrakar namespace libwebvtt {
16*103e46e4SHarish Mahendrakar 
17*103e46e4SHarish Mahendrakar class Reader {
18*103e46e4SHarish Mahendrakar  public:
19*103e46e4SHarish Mahendrakar   // Fetch a character from the stream. Return
20*103e46e4SHarish Mahendrakar   // negative if error, positive if end-of-stream,
21*103e46e4SHarish Mahendrakar   // and 0 if a character is available.
22*103e46e4SHarish Mahendrakar   virtual int GetChar(char* c) = 0;
23*103e46e4SHarish Mahendrakar 
24*103e46e4SHarish Mahendrakar  protected:
25*103e46e4SHarish Mahendrakar   virtual ~Reader();
26*103e46e4SHarish Mahendrakar };
27*103e46e4SHarish Mahendrakar 
28*103e46e4SHarish Mahendrakar class LineReader : protected Reader {
29*103e46e4SHarish Mahendrakar  public:
30*103e46e4SHarish Mahendrakar   // Consume a line of text from the stream, stripping off
31*103e46e4SHarish Mahendrakar   // the line terminator characters.  Returns negative if error,
32*103e46e4SHarish Mahendrakar   // 0 on success, and positive at end-of-stream.
33*103e46e4SHarish Mahendrakar   int GetLine(std::string* line);
34*103e46e4SHarish Mahendrakar 
35*103e46e4SHarish Mahendrakar  protected:
36*103e46e4SHarish Mahendrakar   virtual ~LineReader();
37*103e46e4SHarish Mahendrakar 
38*103e46e4SHarish Mahendrakar   // Puts a character back into the stream.
39*103e46e4SHarish Mahendrakar   virtual void UngetChar(char c) = 0;
40*103e46e4SHarish Mahendrakar };
41*103e46e4SHarish Mahendrakar 
42*103e46e4SHarish Mahendrakar // As measured in thousandths of a second,
43*103e46e4SHarish Mahendrakar // e.g. a duration of 1 equals 0.001 seconds,
44*103e46e4SHarish Mahendrakar // and a duration of 1000 equals 1 second.
45*103e46e4SHarish Mahendrakar typedef long long presentation_t;  // NOLINT
46*103e46e4SHarish Mahendrakar 
47*103e46e4SHarish Mahendrakar struct Time {
48*103e46e4SHarish Mahendrakar   int hours;
49*103e46e4SHarish Mahendrakar   int minutes;
50*103e46e4SHarish Mahendrakar   int seconds;
51*103e46e4SHarish Mahendrakar   int milliseconds;
52*103e46e4SHarish Mahendrakar 
53*103e46e4SHarish Mahendrakar   bool operator==(const Time& rhs) const;
54*103e46e4SHarish Mahendrakar   bool operator<(const Time& rhs) const;
55*103e46e4SHarish Mahendrakar   bool operator>(const Time& rhs) const;
56*103e46e4SHarish Mahendrakar   bool operator<=(const Time& rhs) const;
57*103e46e4SHarish Mahendrakar   bool operator>=(const Time& rhs) const;
58*103e46e4SHarish Mahendrakar 
59*103e46e4SHarish Mahendrakar   presentation_t presentation() const;
60*103e46e4SHarish Mahendrakar   Time& presentation(presentation_t);
61*103e46e4SHarish Mahendrakar 
62*103e46e4SHarish Mahendrakar   Time& operator+=(presentation_t);
63*103e46e4SHarish Mahendrakar   Time operator+(presentation_t) const;
64*103e46e4SHarish Mahendrakar 
65*103e46e4SHarish Mahendrakar   Time& operator-=(presentation_t);
66*103e46e4SHarish Mahendrakar   presentation_t operator-(const Time&) const;
67*103e46e4SHarish Mahendrakar };
68*103e46e4SHarish Mahendrakar 
69*103e46e4SHarish Mahendrakar struct Setting {
70*103e46e4SHarish Mahendrakar   std::string name;
71*103e46e4SHarish Mahendrakar   std::string value;
72*103e46e4SHarish Mahendrakar };
73*103e46e4SHarish Mahendrakar 
74*103e46e4SHarish Mahendrakar struct Cue {
75*103e46e4SHarish Mahendrakar   std::string identifier;
76*103e46e4SHarish Mahendrakar 
77*103e46e4SHarish Mahendrakar   Time start_time;
78*103e46e4SHarish Mahendrakar   Time stop_time;
79*103e46e4SHarish Mahendrakar 
80*103e46e4SHarish Mahendrakar   typedef std::list<Setting> settings_t;
81*103e46e4SHarish Mahendrakar   settings_t settings;
82*103e46e4SHarish Mahendrakar 
83*103e46e4SHarish Mahendrakar   typedef std::list<std::string> payload_t;
84*103e46e4SHarish Mahendrakar   payload_t payload;
85*103e46e4SHarish Mahendrakar };
86*103e46e4SHarish Mahendrakar 
87*103e46e4SHarish Mahendrakar class Parser : private LineReader {
88*103e46e4SHarish Mahendrakar  public:
89*103e46e4SHarish Mahendrakar   explicit Parser(Reader* r);
90*103e46e4SHarish Mahendrakar   virtual ~Parser();
91*103e46e4SHarish Mahendrakar 
92*103e46e4SHarish Mahendrakar   // Pre-parse enough of the stream to determine whether
93*103e46e4SHarish Mahendrakar   // this is really a WEBVTT file. Returns 0 on success,
94*103e46e4SHarish Mahendrakar   // negative if error.
95*103e46e4SHarish Mahendrakar   int Init();
96*103e46e4SHarish Mahendrakar 
97*103e46e4SHarish Mahendrakar   // Parse the next WebVTT cue from the stream. Returns 0 if
98*103e46e4SHarish Mahendrakar   // an entire cue was parsed, negative if error, and positive
99*103e46e4SHarish Mahendrakar   // at end-of-stream.
100*103e46e4SHarish Mahendrakar   int Parse(Cue* cue);
101*103e46e4SHarish Mahendrakar 
102*103e46e4SHarish Mahendrakar  private:
103*103e46e4SHarish Mahendrakar   // Returns the next character in the stream, using the look-back character
104*103e46e4SHarish Mahendrakar   // if present (as per Reader::GetChar).
105*103e46e4SHarish Mahendrakar   virtual int GetChar(char* c);
106*103e46e4SHarish Mahendrakar 
107*103e46e4SHarish Mahendrakar   // Puts a character back into the stream (as per LineReader::UngetChar).
108*103e46e4SHarish Mahendrakar   virtual void UngetChar(char c);
109*103e46e4SHarish Mahendrakar 
110*103e46e4SHarish Mahendrakar   // Check for presence of a UTF-8 BOM in the stream.  Returns
111*103e46e4SHarish Mahendrakar   // negative if error, 0 on success, and positive at end-of-stream.
112*103e46e4SHarish Mahendrakar   int ParseBOM();
113*103e46e4SHarish Mahendrakar 
114*103e46e4SHarish Mahendrakar   // Parse the distinguished "cue timings" line, which includes the start
115*103e46e4SHarish Mahendrakar   // and stop times and settings.  Argument |line| contains the complete
116*103e46e4SHarish Mahendrakar   // line of text (as returned by ParseLine()), which the function is free
117*103e46e4SHarish Mahendrakar   // to modify as it sees fit, to facilitate scanning.  Argument |arrow_pos|
118*103e46e4SHarish Mahendrakar   // is the offset of the arrow token ("-->"), which indicates that this is
119*103e46e4SHarish Mahendrakar   // the timings line.  Returns negative if error, 0 on success.
120*103e46e4SHarish Mahendrakar   //
121*103e46e4SHarish Mahendrakar   static int ParseTimingsLine(std::string* line,
122*103e46e4SHarish Mahendrakar                               std::string::size_type arrow_pos,
123*103e46e4SHarish Mahendrakar                               Time* start_time, Time* stop_time,
124*103e46e4SHarish Mahendrakar                               Cue::settings_t* settings);
125*103e46e4SHarish Mahendrakar 
126*103e46e4SHarish Mahendrakar   // Parse a single time specifier (from the timings line), starting
127*103e46e4SHarish Mahendrakar   // at the given offset; lexical scanning stops when a NUL character
128*103e46e4SHarish Mahendrakar   // is detected. The function modifies offset |off| by the number of
129*103e46e4SHarish Mahendrakar   // characters consumed.  Returns negative if error, 0 on success.
130*103e46e4SHarish Mahendrakar   //
131*103e46e4SHarish Mahendrakar   static int ParseTime(const std::string& line, std::string::size_type* off,
132*103e46e4SHarish Mahendrakar                        Time* time);
133*103e46e4SHarish Mahendrakar 
134*103e46e4SHarish Mahendrakar   // Parse the cue settings from the timings line, starting at the
135*103e46e4SHarish Mahendrakar   // given offset.  Returns negative if error, 0 on success.
136*103e46e4SHarish Mahendrakar   //
137*103e46e4SHarish Mahendrakar   static int ParseSettings(const std::string& line, std::string::size_type off,
138*103e46e4SHarish Mahendrakar                            Cue::settings_t* settings);
139*103e46e4SHarish Mahendrakar 
140*103e46e4SHarish Mahendrakar   // Parse a non-negative integer from the characters in |line| beginning
141*103e46e4SHarish Mahendrakar   // at offset |off|.  The function increments |off| by the number
142*103e46e4SHarish Mahendrakar   // of characters consumed.  Returns the value, or negative if error.
143*103e46e4SHarish Mahendrakar   //
144*103e46e4SHarish Mahendrakar   static int ParseNumber(const std::string& line, std::string::size_type* off);
145*103e46e4SHarish Mahendrakar 
146*103e46e4SHarish Mahendrakar   Reader* const reader_;
147*103e46e4SHarish Mahendrakar 
148*103e46e4SHarish Mahendrakar   // Provides one character's worth of look-back, to facilitate scanning.
149*103e46e4SHarish Mahendrakar   int unget_;
150*103e46e4SHarish Mahendrakar 
151*103e46e4SHarish Mahendrakar   // Disable copy ctor and copy assign for Parser.
152*103e46e4SHarish Mahendrakar   Parser(const Parser&);
153*103e46e4SHarish Mahendrakar   Parser& operator=(const Parser&);
154*103e46e4SHarish Mahendrakar };
155*103e46e4SHarish Mahendrakar 
156*103e46e4SHarish Mahendrakar }  // namespace libwebvtt
157*103e46e4SHarish Mahendrakar 
158*103e46e4SHarish Mahendrakar #endif  // WEBVTT_WEBVTTPARSER_H_
159