xref: /aosp_15_r20/external/pdfium/core/fxcrt/cfx_seekablestreamproxy.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/cfx_seekablestreamproxy.h"
8 
9 #include <stdint.h>
10 
11 #include <algorithm>
12 #include <limits>
13 #include <utility>
14 
15 #include "build/build_config.h"
16 #include "core/fxcrt/data_vector.h"
17 #include "core/fxcrt/fx_extension.h"
18 #include "core/fxcrt/fx_safe_types.h"
19 #include "third_party/base/check.h"
20 #include "third_party/base/check_op.h"
21 
22 namespace {
23 
24 // Returns {src bytes consumed, dst chars produced}.
25 // Invalid sequences are silently not output.
UTF8Decode(pdfium::span<const uint8_t> pSrc,pdfium::span<wchar_t> pDst)26 std::pair<size_t, size_t> UTF8Decode(pdfium::span<const uint8_t> pSrc,
27                                      pdfium::span<wchar_t> pDst) {
28   DCHECK(!pDst.empty());
29 
30   uint32_t dwCode = 0;
31   int32_t iPending = 0;
32   size_t iSrcNum = 0;
33   size_t iDstNum = 0;
34   for (size_t iIndex = 0; iIndex < pSrc.size() && iDstNum < pDst.size();
35        ++iIndex) {
36     ++iSrcNum;
37     uint8_t byte = pSrc[iIndex];
38     if (byte < 0x80) {
39       iPending = 0;
40       pDst[iDstNum++] = byte;
41     } else if (byte < 0xc0) {
42       if (iPending < 1)
43         continue;
44 
45       dwCode = dwCode << 6;
46       dwCode |= (byte & 0x3f);
47       --iPending;
48       if (iPending == 0)
49         pDst[iDstNum++] = dwCode;
50     } else if (byte < 0xe0) {
51       iPending = 1;
52       dwCode = (byte & 0x1f);
53     } else if (byte < 0xf0) {
54       iPending = 2;
55       dwCode = (byte & 0x0f);
56     } else if (byte < 0xf8) {
57       iPending = 3;
58       dwCode = (byte & 0x07);
59     } else if (byte < 0xfc) {
60       iPending = 4;
61       dwCode = (byte & 0x03);
62     } else if (byte < 0xfe) {
63       iPending = 5;
64       dwCode = (byte & 0x01);
65     }
66   }
67   return {iSrcNum, iDstNum};
68 }
69 
70 #if defined(WCHAR_T_IS_UTF32)
71 static_assert(sizeof(wchar_t) > 2, "wchar_t is too small");
72 
UTF16ToWChar(void * pBuffer,size_t iLength)73 void UTF16ToWChar(void* pBuffer, size_t iLength) {
74   DCHECK(pBuffer);
75   DCHECK_GT(iLength, 0u);
76 
77   uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
78   wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
79 
80   // Perform self-intersecting copy in reverse order.
81   for (size_t i = iLength; i > 0; --i)
82     pDst[i - 1] = static_cast<wchar_t>(pSrc[i - 1]);
83 }
84 #endif  // defined(WCHAR_T_IS_UTF32)
85 
SwapByteOrder(uint16_t * pStr,size_t iLength)86 void SwapByteOrder(uint16_t* pStr, size_t iLength) {
87   while (iLength-- > 0) {
88     uint16_t wch = *pStr;
89     *pStr++ = (wch >> 8) | (wch << 8);
90   }
91 }
92 
93 }  // namespace
94 
95 #define BOM_UTF8_MASK 0x00FFFFFF
96 #define BOM_UTF8 0x00BFBBEF
97 #define BOM_UTF16_MASK 0x0000FFFF
98 #define BOM_UTF16_BE 0x0000FFFE
99 #define BOM_UTF16_LE 0x0000FEFF
100 
CFX_SeekableStreamProxy(const RetainPtr<IFX_SeekableReadStream> & stream)101 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
102     const RetainPtr<IFX_SeekableReadStream>& stream)
103     : m_pStream(stream) {
104   DCHECK(m_pStream);
105 
106   Seek(From::Begin, 0);
107 
108   uint32_t bom = 0;
109   ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
110 
111   bom &= BOM_UTF8_MASK;
112   if (bom == BOM_UTF8) {
113     m_wBOMLength = 3;
114     m_wCodePage = FX_CodePage::kUTF8;
115   } else {
116     bom &= BOM_UTF16_MASK;
117     if (bom == BOM_UTF16_BE) {
118       m_wBOMLength = 2;
119       m_wCodePage = FX_CodePage::kUTF16BE;
120     } else if (bom == BOM_UTF16_LE) {
121       m_wBOMLength = 2;
122       m_wCodePage = FX_CodePage::kUTF16LE;
123     } else {
124       m_wBOMLength = 0;
125       m_wCodePage = FX_GetACP();
126     }
127   }
128 
129   Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
130 }
131 
132 CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() = default;
133 
GetSize()134 FX_FILESIZE CFX_SeekableStreamProxy::GetSize() {
135   return m_pStream->GetSize();
136 }
137 
GetPosition()138 FX_FILESIZE CFX_SeekableStreamProxy::GetPosition() {
139   return m_iPosition;
140 }
141 
IsEOF()142 bool CFX_SeekableStreamProxy::IsEOF() {
143   return m_iPosition >= GetSize();
144 }
145 
Seek(From eSeek,FX_FILESIZE iOffset)146 void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
147   switch (eSeek) {
148     case From::Begin:
149       m_iPosition = iOffset;
150       break;
151     case From::Current: {
152       FX_SAFE_FILESIZE new_pos = m_iPosition;
153       new_pos += iOffset;
154       m_iPosition =
155           new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
156     } break;
157   }
158   m_iPosition = std::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetSize());
159 }
160 
SetCodePage(FX_CodePage wCodePage)161 void CFX_SeekableStreamProxy::SetCodePage(FX_CodePage wCodePage) {
162   if (m_wBOMLength > 0)
163     return;
164   m_wCodePage = wCodePage;
165 }
166 
ReadData(uint8_t * pBuffer,size_t iBufferSize)167 size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
168   DCHECK(pBuffer);
169   DCHECK(iBufferSize > 0);
170 
171   iBufferSize =
172       std::min(iBufferSize, static_cast<size_t>(GetSize() - m_iPosition));
173   if (iBufferSize <= 0)
174     return 0;
175 
176   if (!m_pStream->ReadBlockAtOffset({pBuffer, iBufferSize}, m_iPosition))
177     return 0;
178 
179   FX_SAFE_FILESIZE new_pos = m_iPosition;
180   new_pos += iBufferSize;
181   m_iPosition = new_pos.ValueOrDefault(m_iPosition);
182   return new_pos.IsValid() ? iBufferSize : 0;
183 }
184 
ReadBlock(wchar_t * pStr,size_t size)185 size_t CFX_SeekableStreamProxy::ReadBlock(wchar_t* pStr, size_t size) {
186   if (!pStr || size == 0)
187     return 0;
188 
189   if (m_wCodePage == FX_CodePage::kUTF16LE ||
190       m_wCodePage == FX_CodePage::kUTF16BE) {
191     size_t iBytes = size * 2;
192     size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
193     size = iLen / 2;
194     if (m_wCodePage == FX_CodePage::kUTF16BE)
195       SwapByteOrder(reinterpret_cast<uint16_t*>(pStr), size);
196 
197 #if defined(WCHAR_T_IS_UTF32)
198     if (size > 0)
199       UTF16ToWChar(pStr, size);
200 #endif
201     return size;
202   }
203 
204   FX_FILESIZE pos = GetPosition();
205   size_t iBytes = std::min(size, static_cast<size_t>(GetSize() - pos));
206   if (iBytes == 0)
207     return 0;
208 
209   DataVector<uint8_t> buf(iBytes);
210   size_t iLen = ReadData(buf.data(), iBytes);
211   if (m_wCodePage != FX_CodePage::kUTF8)
212     return 0;
213 
214   size_t iSrc;
215   std::tie(iSrc, size) = UTF8Decode({buf.data(), iLen}, {pStr, size});
216   Seek(From::Current, iSrc - iLen);
217   return size;
218 }
219