xref: /aosp_15_r20/external/pdfium/xfa/fxfa/cxfa_textparser.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/cxfa_textparser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 
12 #include "core/fxcrt/css/cfx_css.h"
13 #include "core/fxcrt/css/cfx_csscomputedstyle.h"
14 #include "core/fxcrt/css/cfx_cssdeclaration.h"
15 #include "core/fxcrt/css/cfx_cssstyleselector.h"
16 #include "core/fxcrt/css/cfx_cssstylesheet.h"
17 #include "core/fxcrt/fx_codepage.h"
18 #include "core/fxcrt/xml/cfx_xmlelement.h"
19 #include "core/fxcrt/xml/cfx_xmlnode.h"
20 #include "core/fxge/fx_font.h"
21 #include "third_party/base/check.h"
22 #include "third_party/base/notreached.h"
23 #include "xfa/fgas/font/cfgas_fontmgr.h"
24 #include "xfa/fgas/font/cfgas_gefont.h"
25 #include "xfa/fxfa/cxfa_ffapp.h"
26 #include "xfa/fxfa/cxfa_ffdoc.h"
27 #include "xfa/fxfa/cxfa_fontmgr.h"
28 #include "xfa/fxfa/cxfa_textprovider.h"
29 #include "xfa/fxfa/cxfa_texttabstopscontext.h"
30 #include "xfa/fxfa/parser/cxfa_font.h"
31 #include "xfa/fxfa/parser/cxfa_measurement.h"
32 #include "xfa/fxfa/parser/cxfa_para.h"
33 
34 namespace {
35 
36 enum class TabStopStatus {
37   Error,
38   EOS,
39   None,
40   Alignment,
41   StartLeader,
42   Leader,
43   Location,
44 };
45 
GetLowerCaseElementAttributeOrDefault(const CFX_XMLElement * pElement,const WideString & wsName,const WideString & wsDefaultValue)46 WideString GetLowerCaseElementAttributeOrDefault(
47     const CFX_XMLElement* pElement,
48     const WideString& wsName,
49     const WideString& wsDefaultValue) {
50   WideString ws = pElement->GetAttribute(wsName);
51   if (ws.IsEmpty())
52     ws = wsDefaultValue;
53   else
54     ws.MakeLower();
55   return ws;
56 }
57 
58 }  // namespace
59 
60 CXFA_TextParser::CXFA_TextParser() = default;
61 
62 CXFA_TextParser::~CXFA_TextParser() = default;
63 
Reset()64 void CXFA_TextParser::Reset() {
65   m_mapXMLNodeToParseContext.clear();
66   m_bParsed = false;
67 }
68 
InitCSSData(CXFA_TextProvider * pTextProvider)69 void CXFA_TextParser::InitCSSData(CXFA_TextProvider* pTextProvider) {
70   if (!pTextProvider)
71     return;
72 
73   if (!m_pSelector) {
74     m_pSelector = std::make_unique<CFX_CSSStyleSelector>();
75 
76     CXFA_Font* font = pTextProvider->GetFontIfExists();
77     m_pSelector->SetDefaultFontSize(font ? font->GetFontSize() : 10.0f);
78   }
79 
80   if (m_cssInitialized)
81     return;
82 
83   m_cssInitialized = true;
84   auto uaSheet = LoadDefaultSheetStyle();
85   m_pSelector->SetUAStyleSheet(std::move(uaSheet));
86   m_pSelector->UpdateStyleIndex();
87 }
88 
LoadDefaultSheetStyle()89 std::unique_ptr<CFX_CSSStyleSheet> CXFA_TextParser::LoadDefaultSheetStyle() {
90   static const char kStyle[] =
91       "html,body,ol,p,ul{display:block}"
92       "li{display:list-item}"
93       "ol,ul{padding-left:33px;margin:1.12em 0}"
94       "ol{list-style-type:decimal}"
95       "a{color:#0000ff;text-decoration:underline}"
96       "b{font-weight:bolder}"
97       "i{font-style:italic}"
98       "sup{vertical-align:+15em;font-size:.66em}"
99       "sub{vertical-align:-15em;font-size:.66em}";
100   WideString ws = WideString::FromASCII(kStyle);
101   auto sheet = std::make_unique<CFX_CSSStyleSheet>();
102   if (!sheet->LoadBuffer(ws.AsStringView()))
103     return nullptr;
104 
105   return sheet;
106 }
107 
CreateRootStyle(CXFA_TextProvider * pTextProvider)108 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateRootStyle(
109     CXFA_TextProvider* pTextProvider) {
110   CXFA_Para* para = pTextProvider->GetParaIfExists();
111   auto pStyle = m_pSelector->CreateComputedStyle(nullptr);
112   float fLineHeight = 0;
113   float fFontSize = 10;
114 
115   if (para) {
116     fLineHeight = para->GetLineHeight();
117     CFX_CSSLength indent;
118     indent.Set(CFX_CSSLengthUnit::Point, para->GetTextIndent());
119     pStyle->SetTextIndent(indent);
120     CFX_CSSTextAlign hAlign = CFX_CSSTextAlign::Left;
121     switch (para->GetHorizontalAlign()) {
122       case XFA_AttributeValue::Center:
123         hAlign = CFX_CSSTextAlign::Center;
124         break;
125       case XFA_AttributeValue::Right:
126         hAlign = CFX_CSSTextAlign::Right;
127         break;
128       case XFA_AttributeValue::Justify:
129         hAlign = CFX_CSSTextAlign::Justify;
130         break;
131       case XFA_AttributeValue::JustifyAll:
132         hAlign = CFX_CSSTextAlign::JustifyAll;
133         break;
134       case XFA_AttributeValue::Left:
135       case XFA_AttributeValue::Radix:
136         break;
137       default:
138         NOTREACHED_NORETURN();
139     }
140     pStyle->SetTextAlign(hAlign);
141     CFX_CSSRect rtMarginWidth;
142     rtMarginWidth.left.Set(CFX_CSSLengthUnit::Point, para->GetMarginLeft());
143     rtMarginWidth.top.Set(CFX_CSSLengthUnit::Point, para->GetSpaceAbove());
144     rtMarginWidth.right.Set(CFX_CSSLengthUnit::Point, para->GetMarginRight());
145     rtMarginWidth.bottom.Set(CFX_CSSLengthUnit::Point, para->GetSpaceBelow());
146     pStyle->SetMarginWidth(rtMarginWidth);
147   }
148 
149   CXFA_Font* font = pTextProvider->GetFontIfExists();
150   if (font) {
151     pStyle->SetColor(font->GetColor());
152     pStyle->SetFontStyle(font->IsItalic() ? CFX_CSSFontStyle::Italic
153                                           : CFX_CSSFontStyle::Normal);
154     pStyle->SetFontWeight(font->IsBold() ? FXFONT_FW_BOLD : FXFONT_FW_NORMAL);
155     pStyle->SetNumberVerticalAlign(-font->GetBaselineShift());
156     fFontSize = font->GetFontSize();
157     CFX_CSSLength letterSpacing;
158     letterSpacing.Set(CFX_CSSLengthUnit::Point, font->GetLetterSpacing());
159     pStyle->SetLetterSpacing(letterSpacing);
160     Mask<CFX_CSSTEXTDECORATION> dwDecoration;
161     if (font->GetLineThrough() > 0)
162       dwDecoration |= CFX_CSSTEXTDECORATION::kLineThrough;
163     if (font->GetUnderline() > 1)
164       dwDecoration |= CFX_CSSTEXTDECORATION::kDouble;
165     else if (font->GetUnderline() > 0)
166       dwDecoration |= CFX_CSSTEXTDECORATION::kUnderline;
167 
168     pStyle->SetTextDecoration(dwDecoration);
169   }
170   pStyle->SetLineHeight(fLineHeight);
171   pStyle->SetFontSize(fFontSize);
172   return pStyle;
173 }
174 
CreateStyle(const CFX_CSSComputedStyle * pParentStyle)175 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::CreateStyle(
176     const CFX_CSSComputedStyle* pParentStyle) {
177   auto pNewStyle = m_pSelector->CreateComputedStyle(pParentStyle);
178   DCHECK(pNewStyle);
179   if (!pParentStyle)
180     return pNewStyle;
181 
182   Mask<CFX_CSSTEXTDECORATION> dwDecoration = pParentStyle->GetTextDecoration();
183   float fBaseLine = 0;
184   if (pParentStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
185     fBaseLine = pParentStyle->GetNumberVerticalAlign();
186 
187   pNewStyle->SetTextDecoration(dwDecoration);
188   pNewStyle->SetNumberVerticalAlign(fBaseLine);
189 
190   const CFX_CSSRect* pRect = pParentStyle->GetMarginWidth();
191   if (pRect)
192     pNewStyle->SetMarginWidth(*pRect);
193   return pNewStyle;
194 }
195 
ComputeStyle(const CFX_XMLNode * pXMLNode,RetainPtr<const CFX_CSSComputedStyle> pParentStyle)196 RetainPtr<CFX_CSSComputedStyle> CXFA_TextParser::ComputeStyle(
197     const CFX_XMLNode* pXMLNode,
198     RetainPtr<const CFX_CSSComputedStyle> pParentStyle) {
199   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
200   if (it == m_mapXMLNodeToParseContext.end())
201     return nullptr;
202 
203   Context* pContext = it->second.get();
204   if (!pContext)
205     return nullptr;
206 
207   pContext->SetParentStyle(pParentStyle);
208 
209   auto tagProvider = ParseTagInfo(pXMLNode);
210   if (tagProvider->m_bContent)
211     return nullptr;
212 
213   auto pStyle = CreateStyle(pParentStyle);
214   m_pSelector->ComputeStyle(pContext->GetDecls(),
215                             tagProvider->GetAttribute(L"style"),
216                             tagProvider->GetAttribute(L"align"), pStyle.Get());
217   return pStyle;
218 }
219 
DoParse(const CFX_XMLNode * pXMLContainer,CXFA_TextProvider * pTextProvider)220 void CXFA_TextParser::DoParse(const CFX_XMLNode* pXMLContainer,
221                               CXFA_TextProvider* pTextProvider) {
222   if (!pXMLContainer || !pTextProvider || m_bParsed)
223     return;
224 
225   m_bParsed = true;
226   InitCSSData(pTextProvider);
227   auto pRootStyle = CreateRootStyle(pTextProvider);
228   ParseRichText(pXMLContainer, pRootStyle.Get());
229 }
230 
ParseRichText(const CFX_XMLNode * pXMLNode,const CFX_CSSComputedStyle * pParentStyle)231 void CXFA_TextParser::ParseRichText(const CFX_XMLNode* pXMLNode,
232                                     const CFX_CSSComputedStyle* pParentStyle) {
233   if (!pXMLNode)
234     return;
235 
236   auto tagProvider = ParseTagInfo(pXMLNode);
237   if (!tagProvider->m_bTagAvailable)
238     return;
239 
240   RetainPtr<CFX_CSSComputedStyle> pNewStyle;
241   if (!(tagProvider->GetTagName().EqualsASCII("body") &&
242         tagProvider->GetTagName().EqualsASCII("html"))) {
243     auto pTextContext = std::make_unique<Context>();
244     CFX_CSSDisplay eDisplay = CFX_CSSDisplay::Inline;
245     if (!tagProvider->m_bContent) {
246       auto declArray =
247           m_pSelector->MatchDeclarations(tagProvider->GetTagName());
248       pNewStyle = CreateStyle(pParentStyle);
249       m_pSelector->ComputeStyle(declArray, tagProvider->GetAttribute(L"style"),
250                                 tagProvider->GetAttribute(L"align"),
251                                 pNewStyle.Get());
252 
253       if (!declArray.empty())
254         pTextContext->SetDecls(std::move(declArray));
255 
256       eDisplay = pNewStyle->GetDisplay();
257     }
258     pTextContext->SetDisplay(eDisplay);
259     m_mapXMLNodeToParseContext[pXMLNode] = std::move(pTextContext);
260   }
261 
262   for (CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
263        pXMLChild = pXMLChild->GetNextSibling()) {
264     ParseRichText(pXMLChild, pNewStyle.Get());
265   }
266 }
267 
TagValidate(const WideString & wsName) const268 bool CXFA_TextParser::TagValidate(const WideString& wsName) const {
269   static const uint32_t s_XFATagName[] = {
270       0x61,        // a
271       0x62,        // b
272       0x69,        // i
273       0x70,        // p
274       0x0001f714,  // br
275       0x00022a55,  // li
276       0x000239bb,  // ol
277       0x00025881,  // ul
278       0x0bd37faa,  // sub
279       0x0bd37fb8,  // sup
280       0xa73e3af2,  // span
281       0xb182eaae,  // body
282       0xdb8ac455,  // html
283   };
284   return std::binary_search(std::begin(s_XFATagName), std::end(s_XFATagName),
285                             FX_HashCode_GetLoweredW(wsName.AsStringView()));
286 }
287 
288 // static
ParseTagInfo(const CFX_XMLNode * pXMLNode)289 std::unique_ptr<CXFA_TextParser::TagProvider> CXFA_TextParser::ParseTagInfo(
290     const CFX_XMLNode* pXMLNode) {
291   auto tagProvider = std::make_unique<TagProvider>();
292   const CFX_XMLElement* pXMLElement = ToXMLElement(pXMLNode);
293   if (pXMLElement) {
294     WideString wsName = pXMLElement->GetLocalTagName();
295     tagProvider->SetTagName(wsName);
296     tagProvider->m_bTagAvailable = TagValidate(wsName);
297     WideString wsValue = pXMLElement->GetAttribute(L"style");
298     if (!wsValue.IsEmpty())
299       tagProvider->SetAttribute(L"style", wsValue);
300 
301     return tagProvider;
302   }
303   if (pXMLNode->GetType() == CFX_XMLNode::Type::kText) {
304     tagProvider->m_bTagAvailable = true;
305     tagProvider->m_bContent = true;
306   }
307   return tagProvider;
308 }
309 
GetVAlign(CXFA_TextProvider * pTextProvider) const310 XFA_AttributeValue CXFA_TextParser::GetVAlign(
311     CXFA_TextProvider* pTextProvider) const {
312   CXFA_Para* para = pTextProvider->GetParaIfExists();
313   return para ? para->GetVerticalAlign() : XFA_AttributeValue::Top;
314 }
315 
GetTabInterval(const CFX_CSSComputedStyle * pStyle) const316 float CXFA_TextParser::GetTabInterval(
317     const CFX_CSSComputedStyle* pStyle) const {
318   WideString wsValue;
319   if (pStyle && pStyle->GetCustomStyle(L"tab-interval", &wsValue))
320     return CXFA_Measurement(wsValue.AsStringView()).ToUnit(XFA_Unit::Pt);
321   return 36;
322 }
323 
CountTabs(const CFX_CSSComputedStyle * pStyle) const324 int32_t CXFA_TextParser::CountTabs(const CFX_CSSComputedStyle* pStyle) const {
325   WideString wsValue;
326   if (pStyle && pStyle->GetCustomStyle(L"xfa-tab-count", &wsValue))
327     return wsValue.GetInteger();
328   return 0;
329 }
330 
IsSpaceRun(const CFX_CSSComputedStyle * pStyle) const331 bool CXFA_TextParser::IsSpaceRun(const CFX_CSSComputedStyle* pStyle) const {
332   WideString wsValue;
333   return pStyle && pStyle->GetCustomStyle(L"xfa-spacerun", &wsValue) &&
334          wsValue.EqualsASCIINoCase("yes");
335 }
336 
GetFont(CXFA_FFDoc * doc,CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const337 RetainPtr<CFGAS_GEFont> CXFA_TextParser::GetFont(
338     CXFA_FFDoc* doc,
339     CXFA_TextProvider* pTextProvider,
340     const CFX_CSSComputedStyle* pStyle) const {
341   WideString wsFamily = L"Courier";
342   uint32_t dwStyle = 0;
343   CXFA_Font* font = pTextProvider->GetFontIfExists();
344   if (font) {
345     wsFamily = font->GetTypeface();
346     if (font->IsBold())
347       dwStyle |= FXFONT_FORCE_BOLD;
348     if (font->IsItalic())
349       dwStyle |= FXFONT_FORCE_BOLD;
350   }
351 
352   if (pStyle) {
353     absl::optional<WideString> last_family = pStyle->GetLastFontFamily();
354     if (last_family.has_value())
355       wsFamily = last_family.value();
356 
357     dwStyle = 0;
358     if (pStyle->GetFontWeight() > FXFONT_FW_NORMAL)
359       dwStyle |= FXFONT_FORCE_BOLD;
360     if (pStyle->GetFontStyle() == CFX_CSSFontStyle::Italic)
361       dwStyle |= FXFONT_ITALIC;
362   }
363 
364   CXFA_FontMgr* pFontMgr = doc->GetApp()->GetXFAFontMgr();
365   return pFontMgr->GetFont(doc, std::move(wsFamily), dwStyle);
366 }
367 
GetFontSize(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const368 float CXFA_TextParser::GetFontSize(CXFA_TextProvider* pTextProvider,
369                                    const CFX_CSSComputedStyle* pStyle) const {
370   if (pStyle)
371     return pStyle->GetFontSize();
372 
373   CXFA_Font* font = pTextProvider->GetFontIfExists();
374   return font ? font->GetFontSize() : 10;
375 }
376 
GetHorScale(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle,const CFX_XMLNode * pXMLNode) const377 int32_t CXFA_TextParser::GetHorScale(CXFA_TextProvider* pTextProvider,
378                                      const CFX_CSSComputedStyle* pStyle,
379                                      const CFX_XMLNode* pXMLNode) const {
380   if (pStyle) {
381     WideString wsValue;
382     if (pStyle->GetCustomStyle(L"xfa-font-horizontal-scale", &wsValue))
383       return wsValue.GetInteger();
384 
385     while (pXMLNode) {
386       auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
387       if (it != m_mapXMLNodeToParseContext.end()) {
388         Context* pContext = it->second.get();
389         if (pContext && pContext->GetParentStyle() &&
390             pContext->GetParentStyle()->GetCustomStyle(
391                 L"xfa-font-horizontal-scale", &wsValue)) {
392           return wsValue.GetInteger();
393         }
394       }
395       pXMLNode = pXMLNode->GetParent();
396     }
397   }
398 
399   CXFA_Font* font = pTextProvider->GetFontIfExists();
400   return font ? static_cast<int32_t>(font->GetHorizontalScale()) : 100;
401 }
402 
GetVerScale(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const403 int32_t CXFA_TextParser::GetVerScale(CXFA_TextProvider* pTextProvider,
404                                      const CFX_CSSComputedStyle* pStyle) const {
405   if (pStyle) {
406     WideString wsValue;
407     if (pStyle->GetCustomStyle(L"xfa-font-vertical-scale", &wsValue))
408       return wsValue.GetInteger();
409   }
410 
411   CXFA_Font* font = pTextProvider->GetFontIfExists();
412   return font ? static_cast<int32_t>(font->GetVerticalScale()) : 100;
413 }
414 
GetUnderline(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const415 int32_t CXFA_TextParser::GetUnderline(
416     CXFA_TextProvider* pTextProvider,
417     const CFX_CSSComputedStyle* pStyle) const {
418   CXFA_Font* font = pTextProvider->GetFontIfExists();
419   if (!pStyle)
420     return font ? font->GetUnderline() : 0;
421 
422   const Mask<CFX_CSSTEXTDECORATION> dwDecoration = pStyle->GetTextDecoration();
423   if (dwDecoration & CFX_CSSTEXTDECORATION::kDouble)
424     return 2;
425   if (dwDecoration & CFX_CSSTEXTDECORATION::kUnderline)
426     return 1;
427   return 0;
428 }
429 
GetUnderlinePeriod(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const430 XFA_AttributeValue CXFA_TextParser::GetUnderlinePeriod(
431     CXFA_TextProvider* pTextProvider,
432     const CFX_CSSComputedStyle* pStyle) const {
433   WideString wsValue;
434   if (pStyle && pStyle->GetCustomStyle(L"underlinePeriod", &wsValue)) {
435     return wsValue.EqualsASCII("word") ? XFA_AttributeValue::Word
436                                        : XFA_AttributeValue::All;
437   }
438   CXFA_Font* font = pTextProvider->GetFontIfExists();
439   return font ? font->GetUnderlinePeriod() : XFA_AttributeValue::All;
440 }
441 
GetLinethrough(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const442 int32_t CXFA_TextParser::GetLinethrough(
443     CXFA_TextProvider* pTextProvider,
444     const CFX_CSSComputedStyle* pStyle) const {
445   if (pStyle) {
446     const Mask<CFX_CSSTEXTDECORATION> dwDecoration =
447         pStyle->GetTextDecoration();
448     return (dwDecoration & CFX_CSSTEXTDECORATION::kLineThrough) ? 1 : 0;
449   }
450   CXFA_Font* font = pTextProvider->GetFontIfExists();
451   return font ? font->GetLineThrough() : 0;
452 }
453 
GetColor(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const454 FX_ARGB CXFA_TextParser::GetColor(CXFA_TextProvider* pTextProvider,
455                                   const CFX_CSSComputedStyle* pStyle) const {
456   if (pStyle)
457     return pStyle->GetColor();
458 
459   CXFA_Font* font = pTextProvider->GetFontIfExists();
460   return font ? font->GetColor() : 0xFF000000;
461 }
462 
GetBaseline(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle) const463 float CXFA_TextParser::GetBaseline(CXFA_TextProvider* pTextProvider,
464                                    const CFX_CSSComputedStyle* pStyle) const {
465   if (pStyle) {
466     if (pStyle->GetVerticalAlign() == CFX_CSSVerticalAlign::Number)
467       return pStyle->GetNumberVerticalAlign();
468   } else {
469     CXFA_Font* font = pTextProvider->GetFontIfExists();
470     if (font)
471       return font->GetBaselineShift();
472   }
473   return 0;
474 }
475 
GetLineHeight(CXFA_TextProvider * pTextProvider,const CFX_CSSComputedStyle * pStyle,bool bFirst,float fVerScale) const476 float CXFA_TextParser::GetLineHeight(CXFA_TextProvider* pTextProvider,
477                                      const CFX_CSSComputedStyle* pStyle,
478                                      bool bFirst,
479                                      float fVerScale) const {
480   float fLineHeight = 0;
481   if (pStyle) {
482     fLineHeight = pStyle->GetLineHeight();
483   } else {
484     CXFA_Para* para = pTextProvider->GetParaIfExists();
485     if (para)
486       fLineHeight = para->GetLineHeight();
487   }
488 
489   if (bFirst) {
490     float fFontSize = GetFontSize(pTextProvider, pStyle);
491     if (fLineHeight < 0.1f)
492       fLineHeight = fFontSize;
493     else
494       fLineHeight = std::min(fLineHeight, fFontSize);
495   } else if (fLineHeight < 0.1f) {
496     fLineHeight = GetFontSize(pTextProvider, pStyle) * 1.2f;
497   }
498   fLineHeight *= fVerScale;
499   return fLineHeight;
500 }
501 
GetEmbeddedObj(const CXFA_TextProvider * pTextProvider,const CFX_XMLNode * pXMLNode)502 absl::optional<WideString> CXFA_TextParser::GetEmbeddedObj(
503     const CXFA_TextProvider* pTextProvider,
504     const CFX_XMLNode* pXMLNode) {
505   if (!pXMLNode)
506     return absl::nullopt;
507 
508   const CFX_XMLElement* pElement = ToXMLElement(pXMLNode);
509   if (!pElement)
510     return absl::nullopt;
511 
512   WideString wsAttr = pElement->GetAttribute(L"xfa:embed");
513   if (wsAttr.IsEmpty())
514     return absl::nullopt;
515 
516   if (wsAttr[0] == L'#')
517     wsAttr.Delete(0);
518 
519   WideString ws =
520       GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedType", L"som");
521   if (!ws.EqualsASCII("uri"))
522     return absl::nullopt;
523 
524   ws = GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedMode",
525                                              L"formatted");
526   if (!(ws.EqualsASCII("raw") || ws.EqualsASCII("formatted")))
527     return absl::nullopt;
528 
529   return pTextProvider->GetEmbeddedObj(wsAttr);
530 }
531 
GetParseContextFromMap(const CFX_XMLNode * pXMLNode)532 CXFA_TextParser::Context* CXFA_TextParser::GetParseContextFromMap(
533     const CFX_XMLNode* pXMLNode) {
534   auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
535   return it != m_mapXMLNodeToParseContext.end() ? it->second.get() : nullptr;
536 }
537 
GetTabstops(const CFX_CSSComputedStyle * pStyle,CXFA_TextTabstopsContext * pTabstopContext)538 bool CXFA_TextParser::GetTabstops(const CFX_CSSComputedStyle* pStyle,
539                                   CXFA_TextTabstopsContext* pTabstopContext) {
540   if (!pStyle || !pTabstopContext)
541     return false;
542 
543   WideString wsValue;
544   if (!pStyle->GetCustomStyle(L"xfa-tab-stops", &wsValue) &&
545       !pStyle->GetCustomStyle(L"tab-stops", &wsValue)) {
546     return false;
547   }
548 
549   pdfium::span<const wchar_t> spTabStops = wsValue.span();
550   size_t iCur = 0;
551   size_t iLast = 0;
552   WideString wsAlign;
553   TabStopStatus eStatus = TabStopStatus::None;
554   while (iCur < spTabStops.size()) {
555     wchar_t ch = spTabStops[iCur];
556     switch (eStatus) {
557       case TabStopStatus::None:
558         if (ch <= ' ') {
559           iCur++;
560         } else {
561           eStatus = TabStopStatus::Alignment;
562           iLast = iCur;
563         }
564         break;
565       case TabStopStatus::Alignment:
566         if (ch == ' ') {
567           wsAlign = WideStringView(spTabStops.subspan(iLast, iCur - iLast));
568           eStatus = TabStopStatus::StartLeader;
569           iCur++;
570           while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
571             iCur++;
572           iLast = iCur;
573         } else {
574           iCur++;
575         }
576         break;
577       case TabStopStatus::StartLeader:
578         if (ch != 'l') {
579           eStatus = TabStopStatus::Location;
580         } else {
581           int32_t iCount = 0;
582           while (iCur < spTabStops.size()) {
583             ch = spTabStops[iCur];
584             iCur++;
585             if (ch == '(') {
586               iCount++;
587             } else if (ch == ')') {
588               iCount--;
589               if (iCount == 0)
590                 break;
591             }
592           }
593           while (iCur < spTabStops.size() && spTabStops[iCur] <= ' ')
594             iCur++;
595 
596           iLast = iCur;
597           eStatus = TabStopStatus::Location;
598         }
599         break;
600       case TabStopStatus::Location:
601         if (ch == ' ') {
602           uint32_t dwHashCode = FX_HashCode_GetLoweredW(wsAlign.AsStringView());
603           CXFA_Measurement ms(
604               WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
605           float fPos = ms.ToUnit(XFA_Unit::Pt);
606           pTabstopContext->Append(dwHashCode, fPos);
607           wsAlign.clear();
608           eStatus = TabStopStatus::None;
609         }
610         iCur++;
611         break;
612       default:
613         break;
614     }
615   }
616 
617   if (!wsAlign.IsEmpty()) {
618     uint32_t dwHashCode = FX_HashCode_GetLoweredW(wsAlign.AsStringView());
619     CXFA_Measurement ms(
620         WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
621     float fPos = ms.ToUnit(XFA_Unit::Pt);
622     pTabstopContext->Append(dwHashCode, fPos);
623   }
624   return true;
625 }
626 
627 CXFA_TextParser::TagProvider::TagProvider() = default;
628 
629 CXFA_TextParser::TagProvider::~TagProvider() = default;
630 
631 CXFA_TextParser::Context::Context() = default;
632 
633 CXFA_TextParser::Context::~Context() = default;
634 
SetParentStyle(RetainPtr<const CFX_CSSComputedStyle> style)635 void CXFA_TextParser::Context::SetParentStyle(
636     RetainPtr<const CFX_CSSComputedStyle> style) {
637   m_pParentStyle = std::move(style);
638 }
639 
SetDecls(std::vector<const CFX_CSSDeclaration * > && decl)640 void CXFA_TextParser::Context::SetDecls(
641     std::vector<const CFX_CSSDeclaration*>&& decl) {
642   decls_ = std::move(decl);
643 }
644