1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_XML_CFX_XMLPARSER_H_ 8 #define CORE_FXCRT_XML_CFX_XMLPARSER_H_ 9 10 #include <memory> 11 12 #include "core/fxcrt/data_vector.h" 13 #include "core/fxcrt/retain_ptr.h" 14 #include "core/fxcrt/unowned_ptr.h" 15 #include "core/fxcrt/widestring.h" 16 #include "third_party/abseil-cpp/absl/types/optional.h" 17 18 class CFX_SeekableStreamProxy; 19 class CFX_XMLDocument; 20 class CFX_XMLNode; 21 class IFX_SeekableReadStream; 22 23 class CFX_XMLParser final { 24 public: 25 static bool IsXMLNameChar(wchar_t ch, bool bFirstChar); 26 27 explicit CFX_XMLParser(const RetainPtr<IFX_SeekableReadStream>& pStream); 28 ~CFX_XMLParser(); 29 30 std::unique_ptr<CFX_XMLDocument> Parse(); 31 32 private: 33 enum class FDE_XmlSyntaxState { 34 Text, 35 Node, 36 Target, 37 Tag, 38 AttriName, 39 AttriEqualSign, 40 AttriQuotation, 41 AttriValue, 42 CloseInstruction, 43 BreakElement, 44 CloseElement, 45 SkipDeclNode, 46 SkipComment, 47 SkipCommentOrDecl, 48 SkipCData, 49 TargetData 50 }; 51 52 bool DoSyntaxParse(CFX_XMLDocument* doc); 53 WideString GetTextData(); 54 void ProcessTextChar(wchar_t ch); 55 void ProcessTargetData(); 56 57 UnownedPtr<CFX_XMLNode> current_node_; 58 RetainPtr<CFX_SeekableStreamProxy> stream_; 59 DataVector<wchar_t> current_text_; 60 size_t xml_plane_size_ = 1024; 61 absl::optional<size_t> entity_start_; 62 }; 63 64 #endif // CORE_FXCRT_XML_CFX_XMLPARSER_H_ 65