xref: /aosp_15_r20/external/pdfium/core/fxcrt/xml/cfx_xmlparser.h (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_XML_CFX_XMLPARSER_H_
8 #define CORE_FXCRT_XML_CFX_XMLPARSER_H_
9 
10 #include <memory>
11 
12 #include "core/fxcrt/data_vector.h"
13 #include "core/fxcrt/retain_ptr.h"
14 #include "core/fxcrt/unowned_ptr.h"
15 #include "core/fxcrt/widestring.h"
16 #include "third_party/abseil-cpp/absl/types/optional.h"
17 
18 class CFX_SeekableStreamProxy;
19 class CFX_XMLDocument;
20 class CFX_XMLNode;
21 class IFX_SeekableReadStream;
22 
23 class CFX_XMLParser final {
24  public:
25   static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);
26 
27   explicit CFX_XMLParser(const RetainPtr<IFX_SeekableReadStream>& pStream);
28   ~CFX_XMLParser();
29 
30   std::unique_ptr<CFX_XMLDocument> Parse();
31 
32  private:
33   enum class FDE_XmlSyntaxState {
34     Text,
35     Node,
36     Target,
37     Tag,
38     AttriName,
39     AttriEqualSign,
40     AttriQuotation,
41     AttriValue,
42     CloseInstruction,
43     BreakElement,
44     CloseElement,
45     SkipDeclNode,
46     SkipComment,
47     SkipCommentOrDecl,
48     SkipCData,
49     TargetData
50   };
51 
52   bool DoSyntaxParse(CFX_XMLDocument* doc);
53   WideString GetTextData();
54   void ProcessTextChar(wchar_t ch);
55   void ProcessTargetData();
56 
57   UnownedPtr<CFX_XMLNode> current_node_;
58   RetainPtr<CFX_SeekableStreamProxy> stream_;
59   DataVector<wchar_t> current_text_;
60   size_t xml_plane_size_ = 1024;
61   absl::optional<size_t> entity_start_;
62 };
63 
64 #endif  // CORE_FXCRT_XML_CFX_XMLPARSER_H_
65