xref: /aosp_15_r20/external/cronet/third_party/libxml/chromium/xml_reader.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
6 #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
7 
8 #include <map>
9 #include <string>
10 
11 extern "C" {
12 struct _xmlTextReader;
13 }
14 
15 // XmlReader is a wrapper class around libxml's xmlReader,
16 // providing a simplified C++ API.
17 class XmlReader {
18  public:
19   XmlReader();
20   ~XmlReader();
21 
22   // Load a document into the reader from memory.  |input| must be UTF-8 and
23   // exist for the lifetime of this object.  Returns false on error.
24   // TODO(evanm): handle encodings other than UTF-8?
25   bool Load(const std::string& input);
26 
27   // Load a document into the reader from a file.  Returns false on error.
28   bool LoadFile(const std::string& file_path);
29 
30   // Wrappers around libxml functions -----------------------------------------
31 
32   // Read() advances to the next node.  Returns false on EOF or error.
33   bool Read();
34 
35   // Next(), when pointing at an opening tag, advances to the node after
36   // the matching closing tag.  Returns false on EOF or error.
37   bool Next();
38 
39   // Return the depth in the tree of the current node.
40   int Depth();
41 
42   // Returns the "local" name of the current node.
43   // For a tag like <foo:bar>, this is the string "bar".
44   std::string NodeName();
45 
46   // Returns the name of the current node.
47   // For a tag like <foo:bar>, this is the string "foo:bar".
48   std::string NodeFullName();
49 
50   // When pointing at a tag, retrieves the value of an attribute.
51   // Returns false on failure.
52   // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
53   // returns true and |value| is set to "a".
54   bool NodeAttribute(const char* name, std::string* value);
55 
56   // Populates |attributes| with all the attributes of the current tag and
57   // returns true. Note that namespace declarations are not reported.
58   // Returns false if there are no attributes in the current tag.
59   bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes);
60 
61   // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared
62   // in the current tag and returns true. Note that the default namespace, if
63   // declared in the tag, is populated with an empty prefix.
64   // Returns false if there are no namespaces declared in the current tag.
65   bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces);
66 
67   // Sets |content| to the content of the current node if it is a
68   // text, cdata, or significant-whitespace node, respectively.
69   // Returns true if the current node is a node of the corresponding, false
70   // otherwise.
71   bool GetTextIfTextElement(std::string* content);
72   bool GetTextIfCDataElement(std::string* content);
73   bool GetTextIfSignificantWhitespaceElement(std::string* content);
74 
75   // Returns true if the node is an element (e.g. <foo>). Note this returns
76   // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to
77   // check for those.
78   bool IsElement();
79 
80   // Returns true if the node is a closing element (e.g. </foo>).
81   bool IsClosingElement();
82 
83   // Returns true if the current node is an empty (self-closing) element (e.g.
84   // <foo/>).
85   bool IsEmptyElement();
86 
87   // Helper functions not provided by libxml ----------------------------------
88 
89   // Return the string content within an element.
90   // "<foo>bar</foo>" is a sequence of three nodes:
91   // (1) open tag, (2) text, (3) close tag.
92   // With the reader currently at (1), this returns the text of (2),
93   // and advances past (3).
94   // Returns false on error.
95   bool ReadElementContent(std::string* content);
96 
97   // Skip to the next opening tag, returning false if we reach a closing
98   // tag or EOF first.
99   // If currently on an opening tag, doesn't advance at all.
100   bool SkipToElement();
101 
102  private:
103   // Returns the libxml node type of the current node.
104   int NodeType();
105 
106   // A helper function for GetTextIf*Element() functions above.
107   // Checks if the node is the specified `node_type`, and, if so, populates
108   // `content` and returns true.
109   bool GetTextFromNodeIfType(int node_type, std::string* content);
110 
111   // The underlying libxml xmlTextReader.
112   _xmlTextReader* reader_;
113 };
114 
115 #endif  // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_
116