1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
8
9 #include <ostream>
10 #include <utility>
11
12 #include "core/fpdfapi/parser/cpdf_array.h"
13 #include "core/fpdfapi/parser/cpdf_boolean.h"
14 #include "core/fpdfapi/parser/cpdf_dictionary.h"
15 #include "core/fpdfapi/parser/cpdf_number.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fpdfapi/parser/cpdf_string.h"
20 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
21 #include "core/fxcrt/fx_extension.h"
22 #include "core/fxcrt/fx_stream.h"
23 #include "third_party/base/check.h"
24
25 // Indexed by 8-bit character code, contains either:
26 // 'W' - for whitespace: NUL, TAB, CR, LF, FF, SPACE, 0x80, 0xff
27 // 'N' - for numeric: 0123456789+-.
28 // 'D' - for delimiter: %()/<>[]{}
29 // 'R' - otherwise.
30 const char kPDFCharTypes[256] = {
31 // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
32 // SI
33 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R',
34 'R',
35
36 // DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS
37 // US
38 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
39 'R',
40
41 // SP ! " # $ % & ´ ( ) * + , - .
42 // /
43 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N',
44 'D',
45
46 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
47 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D',
48 'R',
49
50 // @ A B C D E F G H I J K L M N O
51 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
52 'R',
53
54 // P Q R S T U V W X Y Z [ \ ] ^ _
55 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
56 'R',
57
58 // ` a b c d e f g h i j k l m n o
59 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
60 'R',
61
62 // p q r s t u v w x y z { | } ~
63 // DEL
64 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R',
65 'R',
66
67 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
68 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
69 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
70 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
71 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
72 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
73 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
74 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
75 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
76
GetHeaderOffset(const RetainPtr<IFX_SeekableReadStream> & pFile)77 absl::optional<FX_FILESIZE> GetHeaderOffset(
78 const RetainPtr<IFX_SeekableReadStream>& pFile) {
79 static constexpr size_t kBufSize = 4;
80 uint8_t buf[kBufSize];
81 for (FX_FILESIZE offset = 0; offset <= 1024; ++offset) {
82 if (!pFile->ReadBlockAtOffset(buf, offset))
83 return absl::nullopt;
84
85 if (memcmp(buf, "%PDF", 4) == 0)
86 return offset;
87 }
88 return absl::nullopt;
89 }
90
PDF_NameDecode(ByteStringView orig)91 ByteString PDF_NameDecode(ByteStringView orig) {
92 size_t src_size = orig.GetLength();
93 size_t out_index = 0;
94 ByteString result;
95 {
96 // Span's lifetime must end before ReleaseBuffer() below.
97 pdfium::span<char> pDest = result.GetBuffer(src_size);
98 for (size_t i = 0; i < src_size; i++) {
99 if (orig[i] == '#' && i + 2 < src_size) {
100 pDest[out_index++] = FXSYS_HexCharToInt(orig[i + 1]) * 16 +
101 FXSYS_HexCharToInt(orig[i + 2]);
102 i += 2;
103 } else {
104 pDest[out_index++] = orig[i];
105 }
106 }
107 }
108 result.ReleaseBuffer(out_index);
109 return result;
110 }
111
PDF_NameEncode(const ByteString & orig)112 ByteString PDF_NameEncode(const ByteString& orig) {
113 const uint8_t* src_buf = reinterpret_cast<const uint8_t*>(orig.c_str());
114 int src_len = orig.GetLength();
115 int dest_len = 0;
116 int i;
117 for (i = 0; i < src_len; i++) {
118 uint8_t ch = src_buf[i];
119 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
120 PDFCharIsDelimiter(ch)) {
121 dest_len += 3;
122 } else {
123 dest_len++;
124 }
125 }
126 if (dest_len == src_len)
127 return orig;
128
129 ByteString res;
130 {
131 // Span's lifetime must end before ReleaseBuffer() below.
132 pdfium::span<char> dest_buf = res.GetBuffer(dest_len);
133 dest_len = 0;
134 for (i = 0; i < src_len; i++) {
135 uint8_t ch = src_buf[i];
136 if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
137 PDFCharIsDelimiter(ch)) {
138 dest_buf[dest_len++] = '#';
139 FXSYS_IntToTwoHexChars(ch, &dest_buf[dest_len]);
140 dest_len += 2;
141 continue;
142 }
143 dest_buf[dest_len++] = ch;
144 }
145 }
146 res.ReleaseBuffer(dest_len);
147 return res;
148 }
149
ReadArrayElementsToVector(const CPDF_Array * pArray,size_t nCount)150 std::vector<float> ReadArrayElementsToVector(const CPDF_Array* pArray,
151 size_t nCount) {
152 DCHECK(pArray);
153 DCHECK(pArray->size() >= nCount);
154 std::vector<float> ret(nCount);
155 for (size_t i = 0; i < nCount; ++i)
156 ret[i] = pArray->GetFloatAt(i);
157 return ret;
158 }
159
ValidateDictType(const CPDF_Dictionary * dict,ByteStringView type)160 bool ValidateDictType(const CPDF_Dictionary* dict, ByteStringView type) {
161 DCHECK(!type.IsEmpty());
162 return dict && dict->GetNameFor("Type") == type;
163 }
164
ValidateDictAllResourcesOfType(const CPDF_Dictionary * dict,ByteStringView type)165 bool ValidateDictAllResourcesOfType(const CPDF_Dictionary* dict,
166 ByteStringView type) {
167 if (!dict)
168 return false;
169
170 CPDF_DictionaryLocker locker(dict);
171 for (const auto& it : locker) {
172 RetainPtr<const CPDF_Dictionary> entry =
173 ToDictionary(it.second->GetDirect());
174 if (!ValidateDictType(entry.Get(), type))
175 return false;
176 }
177 return true;
178 }
179
ValidateFontResourceDict(const CPDF_Dictionary * dict)180 bool ValidateFontResourceDict(const CPDF_Dictionary* dict) {
181 return ValidateDictAllResourcesOfType(dict, "Font");
182 }
183
ValidateDictOptionalType(const CPDF_Dictionary * dict,ByteStringView type)184 bool ValidateDictOptionalType(const CPDF_Dictionary* dict,
185 ByteStringView type) {
186 DCHECK(!type.IsEmpty());
187 return dict && (!dict->KeyExist("Type") || dict->GetNameFor("Type") == type);
188 }
189
operator <<(std::ostream & buf,const CPDF_Object * pObj)190 std::ostream& operator<<(std::ostream& buf, const CPDF_Object* pObj) {
191 if (!pObj) {
192 buf << " null";
193 return buf;
194 }
195 switch (pObj->GetType()) {
196 case CPDF_Object::kNullobj:
197 buf << " null";
198 break;
199 case CPDF_Object::kBoolean:
200 case CPDF_Object::kNumber:
201 buf << " " << pObj->GetString();
202 break;
203 case CPDF_Object::kString:
204 buf << pObj->AsString()->EncodeString();
205 break;
206 case CPDF_Object::kName: {
207 ByteString str = pObj->GetString();
208 buf << "/" << PDF_NameEncode(str);
209 break;
210 }
211 case CPDF_Object::kReference: {
212 buf << " " << pObj->AsReference()->GetRefObjNum() << " 0 R ";
213 break;
214 }
215 case CPDF_Object::kArray: {
216 const CPDF_Array* p = pObj->AsArray();
217 buf << "[";
218 for (size_t i = 0; i < p->size(); i++) {
219 RetainPtr<const CPDF_Object> pElement = p->GetObjectAt(i);
220 if (!pElement->IsInline()) {
221 buf << " " << pElement->GetObjNum() << " 0 R";
222 } else {
223 buf << pElement.Get();
224 }
225 }
226 buf << "]";
227 break;
228 }
229 case CPDF_Object::kDictionary: {
230 CPDF_DictionaryLocker locker(pObj->AsDictionary());
231 buf << "<<";
232 for (const auto& it : locker) {
233 const ByteString& key = it.first;
234 const RetainPtr<CPDF_Object>& pValue = it.second;
235 buf << "/" << PDF_NameEncode(key);
236 if (!pValue->IsInline()) {
237 buf << " " << pValue->GetObjNum() << " 0 R ";
238 } else {
239 buf << pValue;
240 }
241 }
242 buf << ">>";
243 break;
244 }
245 case CPDF_Object::kStream: {
246 RetainPtr<const CPDF_Stream> p(pObj->AsStream());
247 buf << p->GetDict().Get() << "stream\r\n";
248 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(p));
249 pAcc->LoadAllDataRaw();
250 pdfium::span<const uint8_t> span = pAcc->GetSpan();
251 buf.write(reinterpret_cast<const char*>(span.data()), span.size());
252 buf << "\r\nendstream";
253 break;
254 }
255 }
256 return buf;
257 }
258