1*103e46e4SHarish Mahendrakar // Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2*103e46e4SHarish Mahendrakar // 3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license 4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source 5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found 6*103e46e4SHarish Mahendrakar // in the file PATENTS. All contributing project authors may 7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree. 8*103e46e4SHarish Mahendrakar 9*103e46e4SHarish Mahendrakar #ifndef WEBVTT_WEBVTTPARSER_H_ 10*103e46e4SHarish Mahendrakar #define WEBVTT_WEBVTTPARSER_H_ 11*103e46e4SHarish Mahendrakar 12*103e46e4SHarish Mahendrakar #include <list> 13*103e46e4SHarish Mahendrakar #include <string> 14*103e46e4SHarish Mahendrakar 15*103e46e4SHarish Mahendrakar namespace libwebvtt { 16*103e46e4SHarish Mahendrakar 17*103e46e4SHarish Mahendrakar class Reader { 18*103e46e4SHarish Mahendrakar public: 19*103e46e4SHarish Mahendrakar // Fetch a character from the stream. Return 20*103e46e4SHarish Mahendrakar // negative if error, positive if end-of-stream, 21*103e46e4SHarish Mahendrakar // and 0 if a character is available. 22*103e46e4SHarish Mahendrakar virtual int GetChar(char* c) = 0; 23*103e46e4SHarish Mahendrakar 24*103e46e4SHarish Mahendrakar protected: 25*103e46e4SHarish Mahendrakar virtual ~Reader(); 26*103e46e4SHarish Mahendrakar }; 27*103e46e4SHarish Mahendrakar 28*103e46e4SHarish Mahendrakar class LineReader : protected Reader { 29*103e46e4SHarish Mahendrakar public: 30*103e46e4SHarish Mahendrakar // Consume a line of text from the stream, stripping off 31*103e46e4SHarish Mahendrakar // the line terminator characters. Returns negative if error, 32*103e46e4SHarish Mahendrakar // 0 on success, and positive at end-of-stream. 33*103e46e4SHarish Mahendrakar int GetLine(std::string* line); 34*103e46e4SHarish Mahendrakar 35*103e46e4SHarish Mahendrakar protected: 36*103e46e4SHarish Mahendrakar virtual ~LineReader(); 37*103e46e4SHarish Mahendrakar 38*103e46e4SHarish Mahendrakar // Puts a character back into the stream. 39*103e46e4SHarish Mahendrakar virtual void UngetChar(char c) = 0; 40*103e46e4SHarish Mahendrakar }; 41*103e46e4SHarish Mahendrakar 42*103e46e4SHarish Mahendrakar // As measured in thousandths of a second, 43*103e46e4SHarish Mahendrakar // e.g. a duration of 1 equals 0.001 seconds, 44*103e46e4SHarish Mahendrakar // and a duration of 1000 equals 1 second. 45*103e46e4SHarish Mahendrakar typedef long long presentation_t; // NOLINT 46*103e46e4SHarish Mahendrakar 47*103e46e4SHarish Mahendrakar struct Time { 48*103e46e4SHarish Mahendrakar int hours; 49*103e46e4SHarish Mahendrakar int minutes; 50*103e46e4SHarish Mahendrakar int seconds; 51*103e46e4SHarish Mahendrakar int milliseconds; 52*103e46e4SHarish Mahendrakar 53*103e46e4SHarish Mahendrakar bool operator==(const Time& rhs) const; 54*103e46e4SHarish Mahendrakar bool operator<(const Time& rhs) const; 55*103e46e4SHarish Mahendrakar bool operator>(const Time& rhs) const; 56*103e46e4SHarish Mahendrakar bool operator<=(const Time& rhs) const; 57*103e46e4SHarish Mahendrakar bool operator>=(const Time& rhs) const; 58*103e46e4SHarish Mahendrakar 59*103e46e4SHarish Mahendrakar presentation_t presentation() const; 60*103e46e4SHarish Mahendrakar Time& presentation(presentation_t); 61*103e46e4SHarish Mahendrakar 62*103e46e4SHarish Mahendrakar Time& operator+=(presentation_t); 63*103e46e4SHarish Mahendrakar Time operator+(presentation_t) const; 64*103e46e4SHarish Mahendrakar 65*103e46e4SHarish Mahendrakar Time& operator-=(presentation_t); 66*103e46e4SHarish Mahendrakar presentation_t operator-(const Time&) const; 67*103e46e4SHarish Mahendrakar }; 68*103e46e4SHarish Mahendrakar 69*103e46e4SHarish Mahendrakar struct Setting { 70*103e46e4SHarish Mahendrakar std::string name; 71*103e46e4SHarish Mahendrakar std::string value; 72*103e46e4SHarish Mahendrakar }; 73*103e46e4SHarish Mahendrakar 74*103e46e4SHarish Mahendrakar struct Cue { 75*103e46e4SHarish Mahendrakar std::string identifier; 76*103e46e4SHarish Mahendrakar 77*103e46e4SHarish Mahendrakar Time start_time; 78*103e46e4SHarish Mahendrakar Time stop_time; 79*103e46e4SHarish Mahendrakar 80*103e46e4SHarish Mahendrakar typedef std::list<Setting> settings_t; 81*103e46e4SHarish Mahendrakar settings_t settings; 82*103e46e4SHarish Mahendrakar 83*103e46e4SHarish Mahendrakar typedef std::list<std::string> payload_t; 84*103e46e4SHarish Mahendrakar payload_t payload; 85*103e46e4SHarish Mahendrakar }; 86*103e46e4SHarish Mahendrakar 87*103e46e4SHarish Mahendrakar class Parser : private LineReader { 88*103e46e4SHarish Mahendrakar public: 89*103e46e4SHarish Mahendrakar explicit Parser(Reader* r); 90*103e46e4SHarish Mahendrakar virtual ~Parser(); 91*103e46e4SHarish Mahendrakar 92*103e46e4SHarish Mahendrakar // Pre-parse enough of the stream to determine whether 93*103e46e4SHarish Mahendrakar // this is really a WEBVTT file. Returns 0 on success, 94*103e46e4SHarish Mahendrakar // negative if error. 95*103e46e4SHarish Mahendrakar int Init(); 96*103e46e4SHarish Mahendrakar 97*103e46e4SHarish Mahendrakar // Parse the next WebVTT cue from the stream. Returns 0 if 98*103e46e4SHarish Mahendrakar // an entire cue was parsed, negative if error, and positive 99*103e46e4SHarish Mahendrakar // at end-of-stream. 100*103e46e4SHarish Mahendrakar int Parse(Cue* cue); 101*103e46e4SHarish Mahendrakar 102*103e46e4SHarish Mahendrakar private: 103*103e46e4SHarish Mahendrakar // Returns the next character in the stream, using the look-back character 104*103e46e4SHarish Mahendrakar // if present (as per Reader::GetChar). 105*103e46e4SHarish Mahendrakar virtual int GetChar(char* c); 106*103e46e4SHarish Mahendrakar 107*103e46e4SHarish Mahendrakar // Puts a character back into the stream (as per LineReader::UngetChar). 108*103e46e4SHarish Mahendrakar virtual void UngetChar(char c); 109*103e46e4SHarish Mahendrakar 110*103e46e4SHarish Mahendrakar // Check for presence of a UTF-8 BOM in the stream. Returns 111*103e46e4SHarish Mahendrakar // negative if error, 0 on success, and positive at end-of-stream. 112*103e46e4SHarish Mahendrakar int ParseBOM(); 113*103e46e4SHarish Mahendrakar 114*103e46e4SHarish Mahendrakar // Parse the distinguished "cue timings" line, which includes the start 115*103e46e4SHarish Mahendrakar // and stop times and settings. Argument |line| contains the complete 116*103e46e4SHarish Mahendrakar // line of text (as returned by ParseLine()), which the function is free 117*103e46e4SHarish Mahendrakar // to modify as it sees fit, to facilitate scanning. Argument |arrow_pos| 118*103e46e4SHarish Mahendrakar // is the offset of the arrow token ("-->"), which indicates that this is 119*103e46e4SHarish Mahendrakar // the timings line. Returns negative if error, 0 on success. 120*103e46e4SHarish Mahendrakar // 121*103e46e4SHarish Mahendrakar static int ParseTimingsLine(std::string* line, 122*103e46e4SHarish Mahendrakar std::string::size_type arrow_pos, 123*103e46e4SHarish Mahendrakar Time* start_time, Time* stop_time, 124*103e46e4SHarish Mahendrakar Cue::settings_t* settings); 125*103e46e4SHarish Mahendrakar 126*103e46e4SHarish Mahendrakar // Parse a single time specifier (from the timings line), starting 127*103e46e4SHarish Mahendrakar // at the given offset; lexical scanning stops when a NUL character 128*103e46e4SHarish Mahendrakar // is detected. The function modifies offset |off| by the number of 129*103e46e4SHarish Mahendrakar // characters consumed. Returns negative if error, 0 on success. 130*103e46e4SHarish Mahendrakar // 131*103e46e4SHarish Mahendrakar static int ParseTime(const std::string& line, std::string::size_type* off, 132*103e46e4SHarish Mahendrakar Time* time); 133*103e46e4SHarish Mahendrakar 134*103e46e4SHarish Mahendrakar // Parse the cue settings from the timings line, starting at the 135*103e46e4SHarish Mahendrakar // given offset. Returns negative if error, 0 on success. 136*103e46e4SHarish Mahendrakar // 137*103e46e4SHarish Mahendrakar static int ParseSettings(const std::string& line, std::string::size_type off, 138*103e46e4SHarish Mahendrakar Cue::settings_t* settings); 139*103e46e4SHarish Mahendrakar 140*103e46e4SHarish Mahendrakar // Parse a non-negative integer from the characters in |line| beginning 141*103e46e4SHarish Mahendrakar // at offset |off|. The function increments |off| by the number 142*103e46e4SHarish Mahendrakar // of characters consumed. Returns the value, or negative if error. 143*103e46e4SHarish Mahendrakar // 144*103e46e4SHarish Mahendrakar static int ParseNumber(const std::string& line, std::string::size_type* off); 145*103e46e4SHarish Mahendrakar 146*103e46e4SHarish Mahendrakar Reader* const reader_; 147*103e46e4SHarish Mahendrakar 148*103e46e4SHarish Mahendrakar // Provides one character's worth of look-back, to facilitate scanning. 149*103e46e4SHarish Mahendrakar int unget_; 150*103e46e4SHarish Mahendrakar 151*103e46e4SHarish Mahendrakar // Disable copy ctor and copy assign for Parser. 152*103e46e4SHarish Mahendrakar Parser(const Parser&); 153*103e46e4SHarish Mahendrakar Parser& operator=(const Parser&); 154*103e46e4SHarish Mahendrakar }; 155*103e46e4SHarish Mahendrakar 156*103e46e4SHarish Mahendrakar } // namespace libwebvtt 157*103e46e4SHarish Mahendrakar 158*103e46e4SHarish Mahendrakar #endif // WEBVTT_WEBVTTPARSER_H_ 159