1 #ifndef _XEXMLPARSER_HPP
2 #define _XEXMLPARSER_HPP
3 /*-------------------------------------------------------------------------
4 * drawElements Quality Program Test Executor
5 * ------------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief XML Parser.
24 *
25 * \todo [2012-06-07 pyry] Not supported / handled properly:
26 * - xml namespaces (<ns:Element>)
27 * - backslash escapes in strings
28 * - " -style escapes
29 * - utf-8
30 *//*--------------------------------------------------------------------*/
31
32 #include "xeDefs.hpp"
33 #include "deRingBuffer.hpp"
34
35 #include <string>
36 #include <map>
37
38 namespace xe
39 {
40 namespace xml
41 {
42
43 enum Token
44 {
45 TOKEN_INCOMPLETE = 0, //!< Not enough data to determine token.
46 TOKEN_END_OF_STRING, //!< End of document string.
47 TOKEN_DATA, //!< Block of data (anything outside tags).
48 TOKEN_COMMENT, //!< <!-- comment -->
49 TOKEN_IDENTIFIER, //!< Identifier (in tags).
50 TOKEN_STRING, //!< String (in tags).
51 TOKEN_TAG_START, //!< <
52 TOKEN_TAG_END, //!< >
53 TOKEN_END_TAG_START, //!< </
54 TOKEN_EMPTY_ELEMENT_END, //!< />
55 TOKEN_PROCESSING_INSTRUCTION_START, //!< <?
56 TOKEN_PROCESSING_INSTRUCTION_END, //!< ?>
57 TOKEN_EQUAL, //!< =
58 TOKEN_ENTITY, //!< Entity reference, such as &
59
60 TOKEN_LAST
61 };
62
63 enum Element
64 {
65 ELEMENT_INCOMPLETE = 0, //!< Incomplete element.
66 ELEMENT_START, //!< Element start.
67 ELEMENT_END, //!< Element end.
68 ELEMENT_DATA, //!< Data element.
69 ELEMENT_END_OF_STRING, //!< End of document string.
70
71 ELEMENT_LAST
72 };
73
74 const char *getTokenName(Token token);
75
76 // \todo [2012-10-17 pyry] Add line number etc.
77 class ParseError : public xe::ParseError
78 {
79 public:
ParseError(const std::string & message)80 ParseError(const std::string &message) : xe::ParseError(message)
81 {
82 }
83 };
84
85 class Tokenizer
86 {
87 public:
88 Tokenizer(void);
89 ~Tokenizer(void);
90
91 void clear(void); //!< Resets tokenizer to initial state.
92
93 void feed(const uint8_t *bytes, int numBytes);
94 void advance(void);
95
getToken(void) const96 Token getToken(void) const
97 {
98 return m_curToken;
99 }
getTokenLen(void) const100 int getTokenLen(void) const
101 {
102 return m_curTokenLen;
103 }
getTokenByte(int offset) const104 uint8_t getTokenByte(int offset) const
105 {
106 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
107 return m_buf.peekBack(offset);
108 }
109 void getTokenStr(std::string &dst) const;
110 void appendTokenStr(std::string &dst) const;
111
112 void getString(std::string &dst) const;
113
114 private:
115 Tokenizer(const Tokenizer &other);
116 Tokenizer &operator=(const Tokenizer &other);
117
118 int getChar(int offset) const;
119
120 void error(const std::string &what);
121
122 enum State
123 {
124 STATE_DATA = 0,
125 STATE_TAG,
126 STATE_IDENTIFIER,
127 STATE_VALUE,
128 STATE_COMMENT,
129 STATE_ENTITY,
130
131 STATE_LAST
132 };
133
134 enum
135 {
136 END_OF_STRING = 0, //!< End of string (0).
137 END_OF_BUFFER = 0xffffffff //!< End of current data buffer.
138 };
139
140 Token m_curToken; //!< Current token.
141 int m_curTokenLen; //!< Length of current token.
142
143 State m_state; //!< Tokenization state.
144
145 de::RingBuffer<uint8_t> m_buf;
146 };
147
148 class Parser
149 {
150 public:
151 typedef std::map<std::string, std::string> AttributeMap;
152 typedef AttributeMap::const_iterator AttributeIter;
153
154 Parser(void);
155 ~Parser(void);
156
157 void clear(void); //!< Resets parser to initial state.
158
159 void feed(const uint8_t *bytes, int numBytes);
160 void advance(void);
161
getElement(void) const162 Element getElement(void) const
163 {
164 return m_element;
165 }
166
167 // For ELEMENT_START / ELEMENT_END.
getElementName(void) const168 const char *getElementName(void) const
169 {
170 return m_elementName.c_str();
171 }
172
173 // For ELEMENT_START.
hasAttribute(const char * name) const174 bool hasAttribute(const char *name) const
175 {
176 return m_attributes.find(name) != m_attributes.end();
177 }
getAttribute(const char * name) const178 const char *getAttribute(const char *name) const
179 {
180 return m_attributes.find(name)->second.c_str();
181 }
attributes(void) const182 const AttributeMap &attributes(void) const
183 {
184 return m_attributes;
185 }
186
187 // For ELEMENT_DATA.
188 int getDataSize(void) const;
189 uint8_t getDataByte(int offset) const;
190 void getDataStr(std::string &dst) const;
191 void appendDataStr(std::string &dst) const;
192
193 private:
194 Parser(const Parser &other);
195 Parser &operator=(const Parser &other);
196
197 void parseEntityValue(void);
198
199 void error(const std::string &what);
200
201 enum State
202 {
203 STATE_DATA = 0, //!< Initial state - assuming data or tag open.
204 STATE_ENTITY, //!< Parsed entity is stored - overrides data.
205 STATE_IN_PROCESSING_INSTRUCTION, //!< In processing instruction.
206 STATE_START_TAG_OPEN, //!< Start tag open.
207 STATE_END_TAG_OPEN, //!< End tag open.
208 STATE_EXPECTING_END_TAG_CLOSE, //!< Expecting end tag close.
209 STATE_ATTRIBUTE_LIST, //!< Expecting attribute list.
210 STATE_EXPECTING_ATTRIBUTE_EQ, //!< Got attribute name, expecting =.
211 STATE_EXPECTING_ATTRIBUTE_VALUE, //!< Expecting attribute value.
212 STATE_YIELD_EMPTY_ELEMENT_END, //!< Empty element: start has been reported but not end.
213
214 STATE_LAST
215 };
216
217 Tokenizer m_tokenizer;
218
219 Element m_element;
220 std::string m_elementName;
221 AttributeMap m_attributes;
222
223 State m_state;
224 std::string m_attribName;
225 std::string m_entityValue; //!< Data override, such as entity value.
226 };
227
228 // Inline implementations
229
getTokenStr(std::string & dst) const230 inline void Tokenizer::getTokenStr(std::string &dst) const
231 {
232 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
233 dst.resize(m_curTokenLen);
234 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
235 dst[ndx] = m_buf.peekBack(ndx);
236 }
237
appendTokenStr(std::string & dst) const238 inline void Tokenizer::appendTokenStr(std::string &dst) const
239 {
240 DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
241
242 size_t oldLen = dst.size();
243 dst.resize(oldLen + m_curTokenLen);
244
245 for (int ndx = 0; ndx < m_curTokenLen; ndx++)
246 dst[oldLen + ndx] = m_buf.peekBack(ndx);
247 }
248
getDataSize(void) const249 inline int Parser::getDataSize(void) const
250 {
251 if (m_state != STATE_ENTITY)
252 return m_tokenizer.getTokenLen();
253 else
254 return (int)m_entityValue.size();
255 }
256
getDataByte(int offset) const257 inline uint8_t Parser::getDataByte(int offset) const
258 {
259 if (m_state != STATE_ENTITY)
260 return m_tokenizer.getTokenByte(offset);
261 else
262 return (uint8_t)m_entityValue[offset];
263 }
264
getDataStr(std::string & dst) const265 inline void Parser::getDataStr(std::string &dst) const
266 {
267 if (m_state != STATE_ENTITY)
268 return m_tokenizer.getTokenStr(dst);
269 else
270 dst = m_entityValue;
271 }
272
appendDataStr(std::string & dst) const273 inline void Parser::appendDataStr(std::string &dst) const
274 {
275 if (m_state != STATE_ENTITY)
276 return m_tokenizer.appendTokenStr(dst);
277 else
278 dst += m_entityValue;
279 }
280
281 } // namespace xml
282 } // namespace xe
283
284 #endif // _XEXMLPARSER_HPP
285