1 // Copyright 2019 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 6 #define THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 7 8 #include <map> 9 #include <string> 10 11 extern "C" { 12 struct _xmlTextReader; 13 } 14 15 // XmlReader is a wrapper class around libxml's xmlReader, 16 // providing a simplified C++ API. 17 class XmlReader { 18 public: 19 XmlReader(); 20 ~XmlReader(); 21 22 // Load a document into the reader from memory. |input| must be UTF-8 and 23 // exist for the lifetime of this object. Returns false on error. 24 // TODO(evanm): handle encodings other than UTF-8? 25 bool Load(const std::string& input); 26 27 // Load a document into the reader from a file. Returns false on error. 28 bool LoadFile(const std::string& file_path); 29 30 // Wrappers around libxml functions ----------------------------------------- 31 32 // Read() advances to the next node. Returns false on EOF or error. 33 bool Read(); 34 35 // Next(), when pointing at an opening tag, advances to the node after 36 // the matching closing tag. Returns false on EOF or error. 37 bool Next(); 38 39 // Return the depth in the tree of the current node. 40 int Depth(); 41 42 // Returns the "local" name of the current node. 43 // For a tag like <foo:bar>, this is the string "bar". 44 std::string NodeName(); 45 46 // Returns the name of the current node. 47 // For a tag like <foo:bar>, this is the string "foo:bar". 48 std::string NodeFullName(); 49 50 // When pointing at a tag, retrieves the value of an attribute. 51 // Returns false on failure. 52 // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value) 53 // returns true and |value| is set to "a". 54 bool NodeAttribute(const char* name, std::string* value); 55 56 // Populates |attributes| with all the attributes of the current tag and 57 // returns true. Note that namespace declarations are not reported. 58 // Returns false if there are no attributes in the current tag. 59 bool GetAllNodeAttributes(std::map<std::string, std::string>* attributes); 60 61 // Populates |namespaces| with all the namespaces (prefix/URI pairs) declared 62 // in the current tag and returns true. Note that the default namespace, if 63 // declared in the tag, is populated with an empty prefix. 64 // Returns false if there are no namespaces declared in the current tag. 65 bool GetAllDeclaredNamespaces(std::map<std::string, std::string>* namespaces); 66 67 // Sets |content| to the content of the current node if it is a 68 // text, cdata, or significant-whitespace node, respectively. 69 // Returns true if the current node is a node of the corresponding, false 70 // otherwise. 71 bool GetTextIfTextElement(std::string* content); 72 bool GetTextIfCDataElement(std::string* content); 73 bool GetTextIfSignificantWhitespaceElement(std::string* content); 74 75 // Returns true if the node is an element (e.g. <foo>). Note this returns 76 // false for self-closing elements (e.g. <foo/>). Use IsEmptyElement() to 77 // check for those. 78 bool IsElement(); 79 80 // Returns true if the node is a closing element (e.g. </foo>). 81 bool IsClosingElement(); 82 83 // Returns true if the current node is an empty (self-closing) element (e.g. 84 // <foo/>). 85 bool IsEmptyElement(); 86 87 // Helper functions not provided by libxml ---------------------------------- 88 89 // Return the string content within an element. 90 // "<foo>bar</foo>" is a sequence of three nodes: 91 // (1) open tag, (2) text, (3) close tag. 92 // With the reader currently at (1), this returns the text of (2), 93 // and advances past (3). 94 // Returns false on error. 95 bool ReadElementContent(std::string* content); 96 97 // Skip to the next opening tag, returning false if we reach a closing 98 // tag or EOF first. 99 // If currently on an opening tag, doesn't advance at all. 100 bool SkipToElement(); 101 102 private: 103 // Returns the libxml node type of the current node. 104 int NodeType(); 105 106 // A helper function for GetTextIf*Element() functions above. 107 // Checks if the node is the specified `node_type`, and, if so, populates 108 // `content` and returns true. 109 bool GetTextFromNodeIfType(int node_type, std::string* content); 110 111 // The underlying libxml xmlTextReader. 112 _xmlTextReader* reader_; 113 }; 114 115 #endif // THIRD_PARTY_LIBXML_CHROMIUM_XML_READER_H_ 116