xref: /aosp_15_r20/external/pdfium/fpdfsdk/fpdf_edittext.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <map>
6 #include <memory>
7 #include <sstream>
8 #include <utility>
9 #include <vector>
10 
11 #include "core/fpdfapi/font/cpdf_cidfont.h"
12 #include "core/fpdfapi/font/cpdf_font.h"
13 #include "core/fpdfapi/page/cpdf_docpagedata.h"
14 #include "core/fpdfapi/page/cpdf_textobject.h"
15 #include "core/fpdfapi/page/cpdf_textstate.h"
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_document.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/cpdf_string.h"
24 #include "core/fpdfapi/render/charposlist.h"
25 #include "core/fpdfapi/render/cpdf_pagerendercontext.h"
26 #include "core/fpdfapi/render/cpdf_rendercontext.h"
27 #include "core/fpdfapi/render/cpdf_renderstatus.h"
28 #include "core/fpdfapi/render/cpdf_textrenderer.h"
29 #include "core/fpdftext/cpdf_textpage.h"
30 #include "core/fxcrt/fx_extension.h"
31 #include "core/fxcrt/fx_string_wrappers.h"
32 #include "core/fxcrt/span_util.h"
33 #include "core/fxcrt/stl_util.h"
34 #include "core/fxcrt/utf16.h"
35 #include "core/fxge/cfx_defaultrenderdevice.h"
36 #include "core/fxge/cfx_fontmgr.h"
37 #include "core/fxge/dib/cfx_dibitmap.h"
38 #include "core/fxge/fx_font.h"
39 #include "core/fxge/text_char_pos.h"
40 #include "fpdfsdk/cpdfsdk_helpers.h"
41 #include "public/fpdf_edit.h"
42 #include "third_party/base/check.h"
43 #include "third_party/base/check_op.h"
44 #include "third_party/base/containers/contains.h"
45 #include "third_party/base/numerics/safe_conversions.h"
46 
47 // These checks are here because core/ and public/ cannot depend on each other.
48 static_assert(static_cast<int>(TextRenderingMode::MODE_UNKNOWN) ==
49                   FPDF_TEXTRENDERMODE_UNKNOWN,
50               "TextRenderingMode::MODE_UNKNOWN value mismatch");
51 static_assert(static_cast<int>(TextRenderingMode::MODE_FILL) ==
52                   FPDF_TEXTRENDERMODE_FILL,
53               "TextRenderingMode::MODE_FILL value mismatch");
54 static_assert(static_cast<int>(TextRenderingMode::MODE_STROKE) ==
55                   FPDF_TEXTRENDERMODE_STROKE,
56               "TextRenderingMode::MODE_STROKE value mismatch");
57 static_assert(static_cast<int>(TextRenderingMode::MODE_FILL_STROKE) ==
58                   FPDF_TEXTRENDERMODE_FILL_STROKE,
59               "TextRenderingMode::MODE_FILL_STROKE value mismatch");
60 static_assert(static_cast<int>(TextRenderingMode::MODE_INVISIBLE) ==
61                   FPDF_TEXTRENDERMODE_INVISIBLE,
62               "TextRenderingMode::MODE_INVISIBLE value mismatch");
63 static_assert(static_cast<int>(TextRenderingMode::MODE_FILL_CLIP) ==
64                   FPDF_TEXTRENDERMODE_FILL_CLIP,
65               "TextRenderingMode::MODE_FILL_CLIP value mismatch");
66 static_assert(static_cast<int>(TextRenderingMode::MODE_STROKE_CLIP) ==
67                   FPDF_TEXTRENDERMODE_STROKE_CLIP,
68               "TextRenderingMode::MODE_STROKE_CLIP value mismatch");
69 static_assert(static_cast<int>(TextRenderingMode::MODE_FILL_STROKE_CLIP) ==
70                   FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP,
71               "TextRenderingMode::MODE_FILL_STROKE_CLIP value mismatch");
72 static_assert(static_cast<int>(TextRenderingMode::MODE_CLIP) ==
73                   FPDF_TEXTRENDERMODE_CLIP,
74               "TextRenderingMode::MODE_CLIP value mismatch");
75 static_assert(static_cast<int>(TextRenderingMode::MODE_LAST) ==
76                   FPDF_TEXTRENDERMODE_LAST,
77               "TextRenderingMode::MODE_LAST value mismatch");
78 
79 namespace {
80 
BaseFontNameForType(CFX_Font * pFont,int font_type)81 ByteString BaseFontNameForType(CFX_Font* pFont, int font_type) {
82   ByteString name = font_type == FPDF_FONT_TYPE1 ? pFont->GetPsName()
83                                                  : pFont->GetBaseFontName();
84   if (!name.IsEmpty())
85     return name;
86 
87   return CFX_Font::kUntitledFontName;
88 }
89 
LoadFontDesc(CPDF_Document * pDoc,const ByteString & font_name,CFX_Font * pFont,pdfium::span<const uint8_t> span,int font_type)90 RetainPtr<CPDF_Dictionary> LoadFontDesc(CPDF_Document* pDoc,
91                                         const ByteString& font_name,
92                                         CFX_Font* pFont,
93                                         pdfium::span<const uint8_t> span,
94                                         int font_type) {
95   auto pFontDesc = pDoc->NewIndirect<CPDF_Dictionary>();
96   pFontDesc->SetNewFor<CPDF_Name>("Type", "FontDescriptor");
97   pFontDesc->SetNewFor<CPDF_Name>("FontName", font_name);
98   int flags = 0;
99   if (FXFT_Is_Face_fixedwidth(pFont->GetFaceRec()))
100     flags |= FXFONT_FIXED_PITCH;
101   if (font_name.Contains("Serif"))
102     flags |= FXFONT_SERIF;
103   if (FXFT_Is_Face_Italic(pFont->GetFaceRec()))
104     flags |= FXFONT_ITALIC;
105   if (FXFT_Is_Face_Bold(pFont->GetFaceRec()))
106     flags |= FXFONT_FORCE_BOLD;
107 
108   // TODO(npm): How do I know if a  font is symbolic, script, allcap, smallcap
109   flags |= FXFONT_NONSYMBOLIC;
110 
111   pFontDesc->SetNewFor<CPDF_Number>("Flags", flags);
112   FX_RECT bbox = pFont->GetBBox().value_or(FX_RECT());
113   pFontDesc->SetRectFor("FontBBox", CFX_FloatRect(bbox));
114 
115   // TODO(npm): calculate italic angle correctly
116   pFontDesc->SetNewFor<CPDF_Number>("ItalicAngle", pFont->IsItalic() ? -12 : 0);
117 
118   pFontDesc->SetNewFor<CPDF_Number>("Ascent", pFont->GetAscent());
119   pFontDesc->SetNewFor<CPDF_Number>("Descent", pFont->GetDescent());
120 
121   // TODO(npm): calculate the capheight, stemV correctly
122   pFontDesc->SetNewFor<CPDF_Number>("CapHeight", pFont->GetAscent());
123   pFontDesc->SetNewFor<CPDF_Number>("StemV", pFont->IsBold() ? 120 : 70);
124 
125   auto pStream = pDoc->NewIndirect<CPDF_Stream>();
126   pStream->SetData(span);
127   // TODO(npm): Lengths for Type1 fonts.
128   if (font_type == FPDF_FONT_TRUETYPE) {
129     pStream->GetMutableDict()->SetNewFor<CPDF_Number>(
130         "Length1", static_cast<int>(span.size()));
131   }
132   ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2";
133   pFontDesc->SetNewFor<CPDF_Reference>(fontFile, pDoc, pStream->GetObjNum());
134   return pFontDesc;
135 }
136 
137 const char ToUnicodeStart[] =
138     "/CIDInit /ProcSet findresource begin\n"
139     "12 dict begin\n"
140     "begincmap\n"
141     "/CIDSystemInfo\n"
142     "<</Registry (Adobe)\n"
143     "/Ordering (Identity)\n"
144     "/Supplement 0\n"
145     ">> def\n"
146     "/CMapName /Adobe-Identity-H def\n"
147     "CMapType 2 def\n"
148     "1 begincodespacerange\n"
149     "<0000> <FFFFF>\n"
150     "endcodespacerange\n";
151 
152 const char ToUnicodeEnd[] =
153     "endcmap\n"
154     "CMapName currentdict /CMap defineresource pop\n"
155     "end\n"
156     "end\n";
157 
AddCharcode(fxcrt::ostringstream * pBuffer,uint32_t number)158 void AddCharcode(fxcrt::ostringstream* pBuffer, uint32_t number) {
159   DCHECK(number <= 0xFFFF);
160   *pBuffer << "<";
161   char ans[4];
162   FXSYS_IntToFourHexChars(number, ans);
163   for (size_t i = 0; i < 4; ++i)
164     *pBuffer << ans[i];
165   *pBuffer << ">";
166 }
167 
168 // PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in
169 // UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description
AddUnicode(fxcrt::ostringstream * pBuffer,uint32_t unicode)170 void AddUnicode(fxcrt::ostringstream* pBuffer, uint32_t unicode) {
171   if (pdfium::IsHighSurrogate(unicode) || pdfium::IsLowSurrogate(unicode)) {
172     unicode = 0;
173   }
174 
175   char ans[8];
176   *pBuffer << "<";
177   size_t numChars = FXSYS_ToUTF16BE(unicode, ans);
178   for (size_t i = 0; i < numChars; ++i)
179     *pBuffer << ans[i];
180   *pBuffer << ">";
181 }
182 
183 // Loads the charcode to unicode mapping into a stream
LoadUnicode(CPDF_Document * pDoc,const std::multimap<uint32_t,uint32_t> & to_unicode)184 RetainPtr<CPDF_Stream> LoadUnicode(
185     CPDF_Document* pDoc,
186     const std::multimap<uint32_t, uint32_t>& to_unicode) {
187   // A map charcode->unicode
188   std::map<uint32_t, uint32_t> char_to_uni;
189   // A map <char_start, char_end> to vector v of unicode characters of size (end
190   // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec
191   // 1.7 Section 5.9.2 says that only the last byte of the unicode may change.
192   std::map<std::pair<uint32_t, uint32_t>, std::vector<uint32_t>>
193       map_range_vector;
194   // A map <start, end> -> unicode
195   // This abbreviates: start->unicode, start+1->unicode+1, etc.
196   // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may
197   // change.
198   std::map<std::pair<uint32_t, uint32_t>, uint32_t> map_range;
199 
200   // Calculate the maps
201   for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) {
202     uint32_t firstCharcode = iter->first;
203     uint32_t firstUnicode = iter->second;
204     if (std::next(iter) == to_unicode.end() ||
205         firstCharcode + 1 != std::next(iter)->first) {
206       char_to_uni[firstCharcode] = firstUnicode;
207       continue;
208     }
209     ++iter;
210     uint32_t curCharcode = iter->first;
211     uint32_t curUnicode = iter->second;
212     if (curCharcode % 256 == 0) {
213       char_to_uni[firstCharcode] = firstUnicode;
214       char_to_uni[curCharcode] = curUnicode;
215       continue;
216     }
217     const size_t maxExtra = 255 - (curCharcode % 256);
218     auto next_it = std::next(iter);
219     if (firstUnicode + 1 != curUnicode) {
220       // Consecutive charcodes mapping to non-consecutive unicodes
221       std::vector<uint32_t> unicodes;
222       unicodes.push_back(firstUnicode);
223       unicodes.push_back(curUnicode);
224       for (size_t i = 0; i < maxExtra; ++i) {
225         if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first)
226           break;
227         ++iter;
228         ++curCharcode;
229         unicodes.push_back(iter->second);
230         next_it = std::next(iter);
231       }
232       DCHECK_EQ(iter->first - firstCharcode + 1, unicodes.size());
233       map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes;
234       continue;
235     }
236     // Consecutive charcodes mapping to consecutive unicodes
237     for (size_t i = 0; i < maxExtra; ++i) {
238       if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first ||
239           curUnicode + 1 != next_it->second) {
240         break;
241       }
242       ++iter;
243       ++curCharcode;
244       ++curUnicode;
245       next_it = std::next(iter);
246     }
247     map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode;
248   }
249   fxcrt::ostringstream buffer;
250   buffer << ToUnicodeStart;
251   // Add maps to buffer
252   buffer << static_cast<uint32_t>(char_to_uni.size()) << " beginbfchar\n";
253   for (const auto& iter : char_to_uni) {
254     AddCharcode(&buffer, iter.first);
255     buffer << " ";
256     AddUnicode(&buffer, iter.second);
257     buffer << "\n";
258   }
259   buffer << "endbfchar\n"
260          << static_cast<uint32_t>(map_range_vector.size() + map_range.size())
261          << " beginbfrange\n";
262   for (const auto& iter : map_range_vector) {
263     const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
264     AddCharcode(&buffer, charcodeRange.first);
265     buffer << " ";
266     AddCharcode(&buffer, charcodeRange.second);
267     buffer << " [";
268     const std::vector<uint32_t>& unicodes = iter.second;
269     for (size_t i = 0; i < unicodes.size(); ++i) {
270       uint32_t uni = unicodes[i];
271       AddUnicode(&buffer, uni);
272       if (i != unicodes.size() - 1)
273         buffer << " ";
274     }
275     buffer << "]\n";
276   }
277   for (const auto& iter : map_range) {
278     const std::pair<uint32_t, uint32_t>& charcodeRange = iter.first;
279     AddCharcode(&buffer, charcodeRange.first);
280     buffer << " ";
281     AddCharcode(&buffer, charcodeRange.second);
282     buffer << " ";
283     AddUnicode(&buffer, iter.second);
284     buffer << "\n";
285   }
286   buffer << "endbfrange\n";
287   buffer << ToUnicodeEnd;
288   // TODO(npm): Encrypt / Compress?
289   auto stream = pDoc->NewIndirect<CPDF_Stream>();
290   stream->SetDataFromStringstream(&buffer);
291   return stream;
292 }
293 
LoadSimpleFont(CPDF_Document * pDoc,std::unique_ptr<CFX_Font> pFont,pdfium::span<const uint8_t> span,int font_type)294 RetainPtr<CPDF_Font> LoadSimpleFont(CPDF_Document* pDoc,
295                                     std::unique_ptr<CFX_Font> pFont,
296                                     pdfium::span<const uint8_t> span,
297                                     int font_type) {
298   auto pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
299   pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
300   pFontDict->SetNewFor<CPDF_Name>(
301       "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType");
302   ByteString name = BaseFontNameForType(pFont.get(), font_type);
303   pFontDict->SetNewFor<CPDF_Name>("BaseFont", name);
304 
305   uint32_t dwGlyphIndex;
306   uint32_t dwCurrentChar = static_cast<uint32_t>(
307       FT_Get_First_Char(pFont->GetFaceRec(), &dwGlyphIndex));
308   static constexpr uint32_t kMaxSimpleFontChar = 0xFF;
309   if (dwCurrentChar > kMaxSimpleFontChar || dwGlyphIndex == 0)
310     return nullptr;
311   pFontDict->SetNewFor<CPDF_Number>("FirstChar",
312                                     static_cast<int>(dwCurrentChar));
313   auto widthsArray = pDoc->NewIndirect<CPDF_Array>();
314   while (true) {
315     widthsArray->AppendNew<CPDF_Number>(pFont->GetGlyphWidth(dwGlyphIndex));
316     uint32_t nextChar = static_cast<uint32_t>(
317         FT_Get_Next_Char(pFont->GetFaceRec(), dwCurrentChar, &dwGlyphIndex));
318     // Simple fonts have 1-byte charcodes only.
319     if (nextChar > kMaxSimpleFontChar || dwGlyphIndex == 0)
320       break;
321     for (uint32_t i = dwCurrentChar + 1; i < nextChar; i++)
322       widthsArray->AppendNew<CPDF_Number>(0);
323     dwCurrentChar = nextChar;
324   }
325   pFontDict->SetNewFor<CPDF_Number>("LastChar",
326                                     static_cast<int>(dwCurrentChar));
327   pFontDict->SetNewFor<CPDF_Reference>("Widths", pDoc,
328                                        widthsArray->GetObjNum());
329   RetainPtr<CPDF_Dictionary> pFontDesc =
330       LoadFontDesc(pDoc, name, pFont.get(), span, font_type);
331 
332   pFontDict->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
333                                        pFontDesc->GetObjNum());
334   return CPDF_DocPageData::FromDocument(pDoc)->GetFont(std::move(pFontDict));
335 }
336 
LoadCompositeFont(CPDF_Document * pDoc,std::unique_ptr<CFX_Font> pFont,pdfium::span<const uint8_t> span,int font_type)337 RetainPtr<CPDF_Font> LoadCompositeFont(CPDF_Document* pDoc,
338                                        std::unique_ptr<CFX_Font> pFont,
339                                        pdfium::span<const uint8_t> span,
340                                        int font_type) {
341   auto pFontDict = pDoc->NewIndirect<CPDF_Dictionary>();
342   pFontDict->SetNewFor<CPDF_Name>("Type", "Font");
343   pFontDict->SetNewFor<CPDF_Name>("Subtype", "Type0");
344   // TODO(npm): Get the correct encoding, if it's not identity.
345   ByteString encoding = "Identity-H";
346   pFontDict->SetNewFor<CPDF_Name>("Encoding", encoding);
347   ByteString name = BaseFontNameForType(pFont.get(), font_type);
348   pFontDict->SetNewFor<CPDF_Name>(
349       "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name);
350 
351   auto pCIDFont = pDoc->NewIndirect<CPDF_Dictionary>();
352   pCIDFont->SetNewFor<CPDF_Name>("Type", "Font");
353   pCIDFont->SetNewFor<CPDF_Name>("Subtype", font_type == FPDF_FONT_TYPE1
354                                                 ? "CIDFontType0"
355                                                 : "CIDFontType2");
356   pCIDFont->SetNewFor<CPDF_Name>("BaseFont", name);
357 
358   // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the
359   // CIDSystemInfo
360   auto pCIDSystemInfo = pDoc->NewIndirect<CPDF_Dictionary>();
361   pCIDSystemInfo->SetNewFor<CPDF_String>("Registry", "Adobe", false);
362   pCIDSystemInfo->SetNewFor<CPDF_String>("Ordering", "Identity", false);
363   pCIDSystemInfo->SetNewFor<CPDF_Number>("Supplement", 0);
364   pCIDFont->SetNewFor<CPDF_Reference>("CIDSystemInfo", pDoc,
365                                       pCIDSystemInfo->GetObjNum());
366 
367   RetainPtr<CPDF_Dictionary> pFontDesc =
368       LoadFontDesc(pDoc, name, pFont.get(), span, font_type);
369   pCIDFont->SetNewFor<CPDF_Reference>("FontDescriptor", pDoc,
370                                       pFontDesc->GetObjNum());
371 
372   uint32_t dwGlyphIndex;
373   uint32_t dwCurrentChar = static_cast<uint32_t>(
374       FT_Get_First_Char(pFont->GetFaceRec(), &dwGlyphIndex));
375   // If it doesn't have a single char, just fail
376   if (dwGlyphIndex == 0 ||
377       dwCurrentChar > pdfium::kMaximumSupplementaryCodePoint) {
378     return nullptr;
379   }
380 
381   std::multimap<uint32_t, uint32_t> to_unicode;
382   std::map<uint32_t, uint32_t> widths;
383   while (true) {
384     if (dwCurrentChar > pdfium::kMaximumSupplementaryCodePoint) {
385       break;
386     }
387 
388     if (!pdfium::Contains(widths, dwGlyphIndex))
389       widths[dwGlyphIndex] = pFont->GetGlyphWidth(dwGlyphIndex);
390     to_unicode.emplace(dwGlyphIndex, dwCurrentChar);
391     dwCurrentChar = static_cast<uint32_t>(
392         FT_Get_Next_Char(pFont->GetFaceRec(), dwCurrentChar, &dwGlyphIndex));
393     if (dwGlyphIndex == 0)
394       break;
395   }
396   auto widthsArray = pDoc->NewIndirect<CPDF_Array>();
397   for (auto it = widths.begin(); it != widths.end(); ++it) {
398     int ch = it->first;
399     int w = it->second;
400     if (std::next(it) == widths.end()) {
401       // Only one char left, use format c [w]
402       auto oneW = pdfium::MakeRetain<CPDF_Array>();
403       oneW->AppendNew<CPDF_Number>(w);
404       widthsArray->AppendNew<CPDF_Number>(ch);
405       widthsArray->Append(oneW);
406       break;
407     }
408     ++it;
409     int next_ch = it->first;
410     int next_w = it->second;
411     if (next_ch == ch + 1 && next_w == w) {
412       // The array can have a group c_first c_last w: all CIDs in the range from
413       // c_first to c_last will have width w
414       widthsArray->AppendNew<CPDF_Number>(ch);
415       ch = next_ch;
416       while (true) {
417         auto next_it = std::next(it);
418         if (next_it == widths.end() || next_it->first != it->first + 1 ||
419             next_it->second != it->second) {
420           break;
421         }
422         ++it;
423         ch = it->first;
424       }
425       widthsArray->AppendNew<CPDF_Number>(ch);
426       widthsArray->AppendNew<CPDF_Number>(w);
427       continue;
428     }
429     // Otherwise we can have a group of the form c [w1 w2 ...]: c has width
430     // w1, c+1 has width w2, etc.
431     widthsArray->AppendNew<CPDF_Number>(ch);
432     auto curWidthArray = pdfium::MakeRetain<CPDF_Array>();
433     curWidthArray->AppendNew<CPDF_Number>(w);
434     curWidthArray->AppendNew<CPDF_Number>(next_w);
435     while (true) {
436       auto next_it = std::next(it);
437       if (next_it == widths.end() || next_it->first != it->first + 1)
438         break;
439       ++it;
440       curWidthArray->AppendNew<CPDF_Number>(static_cast<int>(it->second));
441     }
442     widthsArray->Append(curWidthArray);
443   }
444   pCIDFont->SetNewFor<CPDF_Reference>("W", pDoc, widthsArray->GetObjNum());
445 
446   // TODO(npm): Support vertical writing
447 
448   auto pDescendant = pFontDict->SetNewFor<CPDF_Array>("DescendantFonts");
449   pDescendant->AppendNew<CPDF_Reference>(pDoc, pCIDFont->GetObjNum());
450 
451   RetainPtr<CPDF_Stream> toUnicodeStream = LoadUnicode(pDoc, to_unicode);
452   pFontDict->SetNewFor<CPDF_Reference>("ToUnicode", pDoc,
453                                        toUnicodeStream->GetObjNum());
454   return CPDF_DocPageData::FromDocument(pDoc)->GetFont(pFontDict);
455 }
456 
CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object)457 CPDF_TextObject* CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) {
458   auto* obj = CPDFPageObjectFromFPDFPageObject(page_object);
459   return obj ? obj->AsText() : nullptr;
460 }
461 
FPDFGlyphPathFromCFXPath(const CFX_Path * path)462 FPDF_GLYPHPATH FPDFGlyphPathFromCFXPath(const CFX_Path* path) {
463   return reinterpret_cast<FPDF_GLYPHPATH>(path);
464 }
CFXPathFromFPDFGlyphPath(FPDF_GLYPHPATH path)465 const CFX_Path* CFXPathFromFPDFGlyphPath(FPDF_GLYPHPATH path) {
466   return reinterpret_cast<const CFX_Path*>(path);
467 }
468 
469 }  // namespace
470 
471 FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,FPDF_BYTESTRING font,float font_size)472 FPDFPageObj_NewTextObj(FPDF_DOCUMENT document,
473                        FPDF_BYTESTRING font,
474                        float font_size) {
475   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
476   if (!pDoc)
477     return nullptr;
478 
479   RetainPtr<CPDF_Font> pFont =
480       CPDF_Font::GetStockFont(pDoc, ByteStringView(font));
481   if (!pFont)
482     return nullptr;
483 
484   auto pTextObj = std::make_unique<CPDF_TextObject>();
485   pTextObj->m_TextState.SetFont(std::move(pFont));
486   pTextObj->m_TextState.SetFontSize(font_size);
487   pTextObj->DefaultStates();
488 
489   // Caller takes ownership.
490   return FPDFPageObjectFromCPDFPageObject(pTextObj.release());
491 }
492 
493 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDFText_SetText(FPDF_PAGEOBJECT text_object,FPDF_WIDESTRING text)494 FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) {
495   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object);
496   if (!pTextObj)
497     return false;
498 
499   WideString encodedText = WideStringFromFPDFWideString(text);
500   ByteString byteText;
501   for (wchar_t wc : encodedText) {
502     pTextObj->GetFont()->AppendChar(
503         &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc));
504   }
505   pTextObj->SetText(byteText);
506   return true;
507 }
508 
509 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDFText_SetCharcodes(FPDF_PAGEOBJECT text_object,const uint32_t * charcodes,size_t count)510 FPDFText_SetCharcodes(FPDF_PAGEOBJECT text_object,
511                       const uint32_t* charcodes,
512                       size_t count) {
513   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object);
514   if (!pTextObj)
515     return false;
516 
517   if (!charcodes && count)
518     return false;
519 
520   ByteString byte_text;
521   if (charcodes) {
522     for (size_t i = 0; i < count; ++i) {
523       pTextObj->GetFont()->AppendChar(&byte_text, charcodes[i]);
524     }
525   }
526   pTextObj->SetText(byte_text);
527   return true;
528 }
529 
FPDFText_LoadFont(FPDF_DOCUMENT document,const uint8_t * data,uint32_t size,int font_type,FPDF_BOOL cid)530 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document,
531                                                       const uint8_t* data,
532                                                       uint32_t size,
533                                                       int font_type,
534                                                       FPDF_BOOL cid) {
535   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
536   if (!pDoc || !data || size == 0 ||
537       (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) {
538     return nullptr;
539   }
540 
541   auto span = pdfium::make_span(data, size);
542   auto pFont = std::make_unique<CFX_Font>();
543 
544   // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we
545   // are allowing giving any font that can be loaded on freetype and setting it
546   // as any font type.
547   if (!pFont->LoadEmbedded(span, /*force_vertical=*/false, /*object_tag=*/0))
548     return nullptr;
549 
550   // Caller takes ownership.
551   return FPDFFontFromCPDFFont(
552       cid ? LoadCompositeFont(pDoc, std::move(pFont), span, font_type).Leak()
553           : LoadSimpleFont(pDoc, std::move(pFont), span, font_type).Leak());
554 }
555 
556 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV
FPDFText_LoadStandardFont(FPDF_DOCUMENT document,FPDF_BYTESTRING font)557 FPDFText_LoadStandardFont(FPDF_DOCUMENT document, FPDF_BYTESTRING font) {
558   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
559   if (!pDoc)
560     return nullptr;
561 
562   // Caller takes ownership.
563   return FPDFFontFromCPDFFont(
564       CPDF_Font::GetStockFont(pDoc, ByteStringView(font)).Leak());
565 }
566 
567 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text,float * size)568 FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text, float* size) {
569   if (!size)
570     return false;
571 
572   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text);
573   if (!pTextObj)
574     return false;
575 
576   *size = pTextObj->GetFontSize();
577   return true;
578 }
579 
580 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object,FPDF_TEXTPAGE text_page,FPDF_WCHAR * buffer,unsigned long length)581 FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object,
582                     FPDF_TEXTPAGE text_page,
583                     FPDF_WCHAR* buffer,
584                     unsigned long length) {
585   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object);
586   if (!pTextObj)
587     return 0;
588 
589   CPDF_TextPage* pTextPage = CPDFTextPageFromFPDFTextPage(text_page);
590   if (!pTextPage)
591     return 0;
592 
593   WideString text = pTextPage->GetTextByObject(pTextObj);
594   return Utf16EncodeMaybeCopyAndReturnLength(text, buffer, length);
595 }
596 
597 FPDF_EXPORT FPDF_BITMAP FPDF_CALLCONV
FPDFTextObj_GetRenderedBitmap(FPDF_DOCUMENT document,FPDF_PAGE page,FPDF_PAGEOBJECT text_object,float scale)598 FPDFTextObj_GetRenderedBitmap(FPDF_DOCUMENT document,
599                               FPDF_PAGE page,
600                               FPDF_PAGEOBJECT text_object,
601                               float scale) {
602   CPDF_Document* doc = CPDFDocumentFromFPDFDocument(document);
603   if (!doc)
604     return nullptr;
605 
606   CPDF_Page* optional_page = CPDFPageFromFPDFPage(page);
607   if (optional_page && optional_page->GetDocument() != doc)
608     return nullptr;
609 
610   CPDF_TextObject* text = CPDFTextObjectFromFPDFPageObject(text_object);
611   if (!text)
612     return nullptr;
613 
614   if (scale <= 0)
615     return nullptr;
616 
617   const CFX_Matrix scale_matrix(scale, 0, 0, scale, 0, 0);
618   const CFX_FloatRect& text_rect = text->GetRect();
619   const CFX_FloatRect scaled_text_rect = scale_matrix.TransformRect(text_rect);
620 
621   // `rect` has to use integer values. Round up as needed.
622   const FX_RECT rect = scaled_text_rect.GetOuterRect();
623   if (rect.IsEmpty())
624     return nullptr;
625 
626   auto result_bitmap = pdfium::MakeRetain<CFX_DIBitmap>();
627   if (!result_bitmap->Create(rect.Width(), rect.Height(), FXDIB_Format::kArgb))
628     return nullptr;
629 
630   auto render_context = std::make_unique<CPDF_PageRenderContext>();
631   CPDF_PageRenderContext* render_context_ptr = render_context.get();
632   CPDF_Page::RenderContextClearer clearer(optional_page);
633   if (optional_page)
634     optional_page->SetRenderContext(std::move(render_context));
635 
636   RetainPtr<CPDF_Dictionary> page_resources =
637       optional_page ? optional_page->GetMutablePageResources() : nullptr;
638 
639   auto device = std::make_unique<CFX_DefaultRenderDevice>();
640   CFX_DefaultRenderDevice* device_ptr = device.get();
641   render_context_ptr->m_pDevice = std::move(device);
642   render_context_ptr->m_pContext = std::make_unique<CPDF_RenderContext>(
643       doc, std::move(page_resources), /*pPageCache=*/nullptr);
644 
645   device_ptr->Attach(result_bitmap);
646 
647   CFX_Matrix device_matrix(rect.Width(), 0, 0, rect.Height(), 0, 0);
648   CPDF_RenderStatus status(render_context_ptr->m_pContext.get(), device_ptr);
649   status.SetDeviceMatrix(device_matrix);
650   status.Initialize(nullptr, nullptr);
651 
652   // Need to flip the rendering and also move it to fit within `result_bitmap`.
653   CFX_Matrix render_matrix(1, 0, 0, -1, -text_rect.left, text_rect.top);
654   render_matrix *= scale_matrix;
655   status.RenderSingleObject(text, render_matrix);
656 
657   // Caller takes ownership.
658   return FPDFBitmapFromCFXDIBitmap(result_bitmap.Leak());
659 }
660 
FPDFFont_Close(FPDF_FONT font)661 FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) {
662   // Take back ownership from caller and release.
663   RetainPtr<CPDF_Font>().Unleak(CPDFFontFromFPDFFont(font));
664 }
665 
666 FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV
FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,FPDF_FONT font,float font_size)667 FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document,
668                           FPDF_FONT font,
669                           float font_size) {
670   CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document);
671   CPDF_Font* pFont = CPDFFontFromFPDFFont(font);
672   if (!pDoc || !pFont)
673     return nullptr;
674 
675   auto pTextObj = std::make_unique<CPDF_TextObject>();
676   pTextObj->m_TextState.SetFont(CPDF_DocPageData::FromDocument(pDoc)->GetFont(
677       pFont->GetMutableFontDict()));
678   pTextObj->m_TextState.SetFontSize(font_size);
679   pTextObj->DefaultStates();
680   return FPDFPageObjectFromCPDFPageObject(pTextObj.release());
681 }
682 
683 FPDF_EXPORT FPDF_TEXT_RENDERMODE FPDF_CALLCONV
FPDFTextObj_GetTextRenderMode(FPDF_PAGEOBJECT text)684 FPDFTextObj_GetTextRenderMode(FPDF_PAGEOBJECT text) {
685   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text);
686   if (!pTextObj)
687     return FPDF_TEXTRENDERMODE_UNKNOWN;
688   return static_cast<FPDF_TEXT_RENDERMODE>(pTextObj->GetTextRenderMode());
689 }
690 
691 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDFTextObj_SetTextRenderMode(FPDF_PAGEOBJECT text,FPDF_TEXT_RENDERMODE render_mode)692 FPDFTextObj_SetTextRenderMode(FPDF_PAGEOBJECT text,
693                               FPDF_TEXT_RENDERMODE render_mode) {
694   if (render_mode <= FPDF_TEXTRENDERMODE_UNKNOWN ||
695       render_mode > FPDF_TEXTRENDERMODE_LAST) {
696     return false;
697   }
698 
699   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text);
700   if (!pTextObj)
701     return false;
702 
703   pTextObj->SetTextRenderMode(static_cast<TextRenderingMode>(render_mode));
704   return true;
705 }
706 
FPDFTextObj_GetFont(FPDF_PAGEOBJECT text)707 FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFTextObj_GetFont(FPDF_PAGEOBJECT text) {
708   CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text);
709   if (!pTextObj)
710     return nullptr;
711 
712   // Unretained reference in public API. NOLINTNEXTLINE
713   return FPDFFontFromCPDFFont(pTextObj->GetFont());
714 }
715 
716 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDFFont_GetFontName(FPDF_FONT font,char * buffer,unsigned long length)717 FPDFFont_GetFontName(FPDF_FONT font, char* buffer, unsigned long length) {
718   auto* pFont = CPDFFontFromFPDFFont(font);
719   if (!pFont)
720     return 0;
721 
722   CFX_Font* pCfxFont = pFont->GetFont();
723   ByteString name = pCfxFont->GetFamilyName();
724   const unsigned long dwStringLen =
725       pdfium::base::checked_cast<unsigned long>(name.GetLength() + 1);
726   if (buffer && length >= dwStringLen)
727     memcpy(buffer, name.c_str(), dwStringLen);
728 
729   return dwStringLen;
730 }
731 
FPDFFont_GetFontData(FPDF_FONT font,uint8_t * buffer,size_t buflen,size_t * out_buflen)732 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetFontData(FPDF_FONT font,
733                                                          uint8_t* buffer,
734                                                          size_t buflen,
735                                                          size_t* out_buflen) {
736   auto* cfont = CPDFFontFromFPDFFont(font);
737   if (!cfont || !out_buflen)
738     return false;
739 
740   pdfium::span<uint8_t> data = cfont->GetFont()->GetFontSpan();
741   if (buffer && buflen >= data.size())
742     fxcrt::spancpy(pdfium::make_span(buffer, buflen), data);
743   *out_buflen = data.size();
744   return true;
745 }
746 
FPDFFont_GetIsEmbedded(FPDF_FONT font)747 FPDF_EXPORT int FPDF_CALLCONV FPDFFont_GetIsEmbedded(FPDF_FONT font) {
748   auto* cfont = CPDFFontFromFPDFFont(font);
749   if (!cfont)
750     return -1;
751   return cfont->IsEmbedded() ? 1 : 0;
752 }
753 
FPDFFont_GetFlags(FPDF_FONT font)754 FPDF_EXPORT int FPDF_CALLCONV FPDFFont_GetFlags(FPDF_FONT font) {
755   auto* pFont = CPDFFontFromFPDFFont(font);
756   if (!pFont)
757     return -1;
758 
759   // Return only flags from ISO 32000-1:2008, table 123.
760   return pFont->GetFontFlags() & 0x7ffff;
761 }
762 
FPDFFont_GetWeight(FPDF_FONT font)763 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetWeight(FPDF_FONT font) {
764   auto* pFont = CPDFFontFromFPDFFont(font);
765   return pFont ? pFont->GetFontWeight() : -1;
766 }
767 
FPDFFont_GetItalicAngle(FPDF_FONT font,int * angle)768 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetItalicAngle(FPDF_FONT font,
769                                                             int* angle) {
770   auto* pFont = CPDFFontFromFPDFFont(font);
771   if (!pFont || !angle)
772     return false;
773 
774   *angle = pFont->GetItalicAngle();
775   return true;
776 }
777 
FPDFFont_GetAscent(FPDF_FONT font,float font_size,float * ascent)778 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetAscent(FPDF_FONT font,
779                                                        float font_size,
780                                                        float* ascent) {
781   auto* pFont = CPDFFontFromFPDFFont(font);
782   if (!pFont || !ascent)
783     return false;
784 
785   *ascent = pFont->GetTypeAscent() * font_size / 1000.f;
786   return true;
787 }
788 
FPDFFont_GetDescent(FPDF_FONT font,float font_size,float * descent)789 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetDescent(FPDF_FONT font,
790                                                         float font_size,
791                                                         float* descent) {
792   auto* pFont = CPDFFontFromFPDFFont(font);
793   if (!pFont || !descent)
794     return false;
795 
796   *descent = pFont->GetTypeDescent() * font_size / 1000.f;
797   return true;
798 }
799 
FPDFFont_GetGlyphWidth(FPDF_FONT font,uint32_t glyph,float font_size,float * width)800 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFFont_GetGlyphWidth(FPDF_FONT font,
801                                                            uint32_t glyph,
802                                                            float font_size,
803                                                            float* width) {
804   auto* pFont = CPDFFontFromFPDFFont(font);
805   if (!pFont || !width)
806     return false;
807 
808   uint32_t charcode = pFont->CharCodeFromUnicode(static_cast<wchar_t>(glyph));
809 
810   CPDF_CIDFont* pCIDFont = pFont->AsCIDFont();
811   if (pCIDFont && pCIDFont->IsVertWriting()) {
812     uint16_t cid = pCIDFont->CIDFromCharCode(charcode);
813     *width = pCIDFont->GetVertWidth(cid) * font_size / 1000.f;
814   } else {
815     *width = pFont->GetCharWidthF(charcode) * font_size / 1000.f;
816   }
817 
818   return true;
819 }
820 
821 FPDF_EXPORT FPDF_GLYPHPATH FPDF_CALLCONV
FPDFFont_GetGlyphPath(FPDF_FONT font,uint32_t glyph,float font_size)822 FPDFFont_GetGlyphPath(FPDF_FONT font, uint32_t glyph, float font_size) {
823   auto* pFont = CPDFFontFromFPDFFont(font);
824   if (!pFont)
825     return nullptr;
826 
827   if (!pdfium::base::IsValueInRangeForNumericType<wchar_t>(glyph))
828     return nullptr;
829 
830   uint32_t charcode = pFont->CharCodeFromUnicode(static_cast<wchar_t>(glyph));
831   std::vector<TextCharPos> pos =
832       GetCharPosList(pdfium::make_span(&charcode, 1),
833                      pdfium::span<const float>(), pFont, font_size);
834   if (pos.empty())
835     return nullptr;
836 
837   CFX_Font* pCfxFont;
838   if (pos[0].m_FallbackFontPosition == -1) {
839     pCfxFont = pFont->GetFont();
840     DCHECK(pCfxFont);  // Never null.
841   } else {
842     pCfxFont = pFont->GetFontFallback(pos[0].m_FallbackFontPosition);
843     if (!pCfxFont)
844       return nullptr;
845   }
846 
847   const CFX_Path* pPath =
848       pCfxFont->LoadGlyphPath(pos[0].m_GlyphIndex, pos[0].m_FontCharWidth);
849 
850   return FPDFGlyphPathFromCFXPath(pPath);
851 }
852 
853 FPDF_EXPORT int FPDF_CALLCONV
FPDFGlyphPath_CountGlyphSegments(FPDF_GLYPHPATH glyphpath)854 FPDFGlyphPath_CountGlyphSegments(FPDF_GLYPHPATH glyphpath) {
855   auto* pPath = CFXPathFromFPDFGlyphPath(glyphpath);
856   if (!pPath)
857     return -1;
858 
859   return fxcrt::CollectionSize<int>(pPath->GetPoints());
860 }
861 
862 FPDF_EXPORT FPDF_PATHSEGMENT FPDF_CALLCONV
FPDFGlyphPath_GetGlyphPathSegment(FPDF_GLYPHPATH glyphpath,int index)863 FPDFGlyphPath_GetGlyphPathSegment(FPDF_GLYPHPATH glyphpath, int index) {
864   auto* pPath = CFXPathFromFPDFGlyphPath(glyphpath);
865   if (!pPath)
866     return nullptr;
867 
868   pdfium::span<const CFX_Path::Point> points = pPath->GetPoints();
869   if (!fxcrt::IndexInBounds(points, index))
870     return nullptr;
871 
872   return FPDFPathSegmentFromFXPathPoint(&points[index]);
873 }
874