1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "bcc_zip.h"
18
19 #include <fcntl.h>
20 #include <limits.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/mman.h>
25 #include <unistd.h>
26
27 // Specification of ZIP file format can be found here:
28 // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
29 // For a high level overview of the structure of a ZIP file see
30 // sections 4.3.1 - 4.3.6.
31
32 // Data structures appearing in ZIP files do not contain any
33 // padding and they might be misaligned. To allow us to safely
34 // operate on pointers to such structures and their members, without
35 // worrying of platform specific alignment issues, we define
36 // unaligned_uint16_t and unaligned_uint32_t types with no alignment
37 // requirements.
38 typedef struct {
39 uint8_t raw[2];
40 } unaligned_uint16_t;
41
unaligned_uint16_read(unaligned_uint16_t value)42 static uint16_t unaligned_uint16_read(unaligned_uint16_t value) {
43 uint16_t return_value;
44 memcpy(&return_value, value.raw, sizeof(return_value));
45 return return_value;
46 }
47
48 typedef struct {
49 uint8_t raw[4];
50 } unaligned_uint32_t;
51
unaligned_uint32_read(unaligned_uint32_t value)52 static uint32_t unaligned_uint32_read(unaligned_uint32_t value) {
53 uint32_t return_value;
54 memcpy(&return_value, value.raw, sizeof(return_value));
55 return return_value;
56 }
57
58 #define END_OF_CD_RECORD_MAGIC 0x06054b50
59
60 // See section 4.3.16 of the spec.
61 struct end_of_central_directory_record {
62 // Magic value equal to END_OF_CD_RECORD_MAGIC
63 unaligned_uint32_t magic;
64
65 // Number of the file containing this structure or 0xFFFF if ZIP64 archive.
66 // Zip archive might span multiple files (disks).
67 unaligned_uint16_t this_disk;
68
69 // Number of the file containing the beginning of the central directory or
70 // 0xFFFF if ZIP64 archive.
71 unaligned_uint16_t cd_disk;
72
73 // Number of central directory records on this disk or 0xFFFF if ZIP64
74 // archive.
75 unaligned_uint16_t cd_records;
76
77 // Number of central directory records on all disks or 0xFFFF if ZIP64
78 // archive.
79 unaligned_uint16_t cd_records_total;
80
81 // Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive.
82 unaligned_uint32_t cd_size;
83
84 // Offset of the central directory from the beginning of the archive or
85 // 0xFFFFFFFF if ZIP64 archive.
86 unaligned_uint32_t cd_offset;
87
88 // Length of comment data following end of central driectory record.
89 unaligned_uint16_t comment_length;
90
91 // Up to 64k of arbitrary bytes.
92 // uint8_t comment[comment_length]
93 };
94
95 #define CD_FILE_HEADER_MAGIC 0x02014b50
96 #define FLAG_ENCRYPTED (1 << 0)
97 #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
98
99 // See section 4.3.12 of the spec.
100 struct central_directory_file_header {
101 // Magic value equal to CD_FILE_HEADER_MAGIC.
102 unaligned_uint32_t magic;
103 unaligned_uint16_t version;
104 // Minimum zip version needed to extract the file.
105 unaligned_uint16_t min_version;
106 unaligned_uint16_t flags;
107 unaligned_uint16_t compression;
108 unaligned_uint16_t last_modified_time;
109 unaligned_uint16_t last_modified_date;
110 unaligned_uint32_t crc;
111 unaligned_uint32_t compressed_size;
112 unaligned_uint32_t uncompressed_size;
113 unaligned_uint16_t file_name_length;
114 unaligned_uint16_t extra_field_length;
115 unaligned_uint16_t file_comment_length;
116 // Number of the disk where the file starts or 0xFFFF if ZIP64 archive.
117 unaligned_uint16_t disk;
118 unaligned_uint16_t internal_attributes;
119 unaligned_uint32_t external_attributes;
120 // Offset from the start of the disk containing the local file header to the
121 // start of the local file header.
122 unaligned_uint32_t offset;
123 };
124
125 #define LOCAL_FILE_HEADER_MAGIC 0x04034b50
126
127 // See section 4.3.7 of the spec.
128 struct local_file_header {
129 // Magic value equal to LOCAL_FILE_HEADER_MAGIC.
130 unaligned_uint32_t magic;
131 // Minimum zip version needed to extract the file.
132 unaligned_uint16_t min_version;
133 unaligned_uint16_t flags;
134 unaligned_uint16_t compression;
135 unaligned_uint16_t last_modified_time;
136 unaligned_uint16_t last_modified_date;
137 unaligned_uint32_t crc;
138 unaligned_uint32_t compressed_size;
139 unaligned_uint32_t uncompressed_size;
140 unaligned_uint16_t file_name_length;
141 unaligned_uint16_t extra_field_length;
142 };
143
144 struct bcc_zip_archive {
145 void* data;
146 uint32_t size;
147 uint32_t cd_offset;
148 uint32_t cd_records;
149 };
150
check_access(struct bcc_zip_archive * archive,uint32_t offset,uint32_t size)151 static void* check_access(struct bcc_zip_archive* archive, uint32_t offset,
152 uint32_t size) {
153 if (offset + size > archive->size || offset > offset + size) {
154 return NULL;
155 }
156 return (char *) archive->data + offset;
157 }
158
159 // Returns 0 on success, -1 on error and -2 if the eocd indicates
160 // the archive uses features which are not supported.
try_parse_end_of_central_directory(struct bcc_zip_archive * archive,uint32_t offset)161 static int try_parse_end_of_central_directory(struct bcc_zip_archive* archive,
162 uint32_t offset) {
163 struct end_of_central_directory_record* eocd = check_access(
164 archive, offset, sizeof(struct end_of_central_directory_record));
165 if (eocd == NULL ||
166 unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) {
167 return -1;
168 }
169
170 uint16_t comment_length = unaligned_uint16_read(eocd->comment_length);
171 if (offset + sizeof(struct end_of_central_directory_record) +
172 comment_length !=
173 archive->size) {
174 return -1;
175 }
176
177 uint16_t cd_records = unaligned_uint16_read(eocd->cd_records);
178 if (unaligned_uint16_read(eocd->this_disk) != 0 ||
179 unaligned_uint16_read(eocd->cd_disk) != 0 ||
180 unaligned_uint16_read(eocd->cd_records_total) != cd_records) {
181 // This is a valid eocd, but we only support single-file non-ZIP64 archives.
182 return -2;
183 }
184
185 uint32_t cd_offset = unaligned_uint32_read(eocd->cd_offset);
186 uint32_t cd_size = unaligned_uint32_read(eocd->cd_size);
187 if (check_access(archive, cd_offset, cd_size) == NULL) {
188 return -1;
189 }
190
191 archive->cd_offset = cd_offset;
192 archive->cd_records = cd_records;
193 return 0;
194 }
195
find_central_directory(struct bcc_zip_archive * archive)196 static int find_central_directory(struct bcc_zip_archive* archive) {
197 if (archive->size <= sizeof(struct end_of_central_directory_record)) {
198 return -1;
199 }
200
201 int rc = -1;
202 // Because the end of central directory ends with a variable length array of
203 // up to 0xFFFF bytes we can't know exactly where it starts and need to
204 // search for it at the end of the file, scanning the (limit, offset] range.
205 int64_t offset =
206 (int64_t)archive->size - sizeof(struct end_of_central_directory_record);
207 int64_t limit = offset - (1 << 16);
208 for (; offset >= 0 && offset > limit && rc == -1; offset--) {
209 rc = try_parse_end_of_central_directory(archive, offset);
210 }
211
212 return rc;
213 }
214
bcc_zip_archive_open(const char * path)215 struct bcc_zip_archive* bcc_zip_archive_open(const char* path) {
216 int fd = open(path, O_RDONLY);
217 if (fd < 0) {
218 return NULL;
219 }
220
221 off_t size = lseek(fd, 0, SEEK_END);
222 if (size == (off_t)-1 || size > UINT32_MAX) {
223 close(fd);
224 return NULL;
225 }
226
227 void* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
228 close(fd);
229
230 if (data == MAP_FAILED) {
231 return NULL;
232 }
233
234 struct bcc_zip_archive* archive = malloc(sizeof(struct bcc_zip_archive));
235 if (archive == NULL) {
236 munmap(data, size);
237 return NULL;
238 };
239
240 archive->data = data;
241 archive->size = size;
242 if (find_central_directory(archive)) {
243 munmap(data, size);
244 free(archive);
245 archive = NULL;
246 }
247
248 return archive;
249 }
250
bcc_zip_archive_close(struct bcc_zip_archive * archive)251 void bcc_zip_archive_close(struct bcc_zip_archive* archive) {
252 munmap(archive->data, archive->size);
253 free(archive);
254 }
255
local_file_header_at_offset(struct bcc_zip_archive * archive,uint32_t offset)256 static struct local_file_header* local_file_header_at_offset(
257 struct bcc_zip_archive* archive, uint32_t offset) {
258 struct local_file_header* lfh =
259 check_access(archive, offset, sizeof(struct local_file_header));
260 if (lfh == NULL ||
261 unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) {
262 return NULL;
263 }
264 return lfh;
265 }
266
get_entry_at_offset(struct bcc_zip_archive * archive,uint32_t offset,struct bcc_zip_entry * out)267 static int get_entry_at_offset(struct bcc_zip_archive* archive, uint32_t offset,
268 struct bcc_zip_entry* out) {
269 struct local_file_header* lfh = local_file_header_at_offset(archive, offset);
270 offset += sizeof(struct local_file_header);
271 if (lfh == NULL) {
272 return -1;
273 };
274
275 uint16_t flags = unaligned_uint16_read(lfh->flags);
276 if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) {
277 return -1;
278 }
279
280 uint16_t name_length = unaligned_uint16_read(lfh->file_name_length);
281 const char* name = check_access(archive, offset, name_length);
282 offset += name_length;
283 if (name == NULL) {
284 return -1;
285 }
286
287 uint16_t extra_field_length = unaligned_uint16_read(lfh->extra_field_length);
288 if (check_access(archive, offset, extra_field_length) == NULL) {
289 return -1;
290 }
291 offset += extra_field_length;
292
293 uint32_t compressed_size = unaligned_uint32_read(lfh->compressed_size);
294 void* data = check_access(archive, offset, compressed_size);
295 if (data == NULL) {
296 return -1;
297 }
298
299 out->compression = unaligned_uint16_read(lfh->compression);
300 out->name_length = name_length;
301 out->name = name;
302 out->data = data;
303 out->data_length = compressed_size;
304 out->data_offset = offset;
305
306 return 0;
307 }
308
cd_file_header_at_offset(struct bcc_zip_archive * archive,uint32_t offset)309 static struct central_directory_file_header* cd_file_header_at_offset(
310 struct bcc_zip_archive* archive, uint32_t offset) {
311 struct central_directory_file_header* cdfh = check_access(
312 archive, offset, sizeof(struct central_directory_file_header));
313 if (cdfh == NULL ||
314 unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) {
315 return NULL;
316 }
317 return cdfh;
318 }
319
bcc_zip_archive_find_entry(struct bcc_zip_archive * archive,const char * file_name,struct bcc_zip_entry * out)320 int bcc_zip_archive_find_entry(struct bcc_zip_archive* archive,
321 const char* file_name,
322 struct bcc_zip_entry* out) {
323 size_t file_name_length = strlen(file_name);
324
325 uint32_t offset = archive->cd_offset;
326 for (uint32_t i = 0; i < archive->cd_records; ++i) {
327 struct central_directory_file_header* cdfh =
328 cd_file_header_at_offset(archive, offset);
329 offset += sizeof(struct central_directory_file_header);
330 if (cdfh == NULL) {
331 return -1;
332 }
333
334 uint16_t cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length);
335 const char* cdfh_name = check_access(archive, offset, cdfh_name_length);
336 if (cdfh_name == NULL) {
337 return -1;
338 }
339
340 uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags);
341 if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
342 (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
343 file_name_length == cdfh_name_length &&
344 memcmp(file_name, (char*) archive->data + offset, file_name_length) == 0) {
345 return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
346 out);
347 }
348
349 offset += cdfh_name_length;
350 offset += unaligned_uint16_read(cdfh->extra_field_length);
351 offset += unaligned_uint16_read(cdfh->file_comment_length);
352 }
353
354 return -1;
355 }
356
bcc_zip_archive_find_entry_at_offset(struct bcc_zip_archive * archive,uint32_t target,struct bcc_zip_entry * out)357 int bcc_zip_archive_find_entry_at_offset(struct bcc_zip_archive* archive,
358 uint32_t target,
359 struct bcc_zip_entry* out) {
360 uint32_t offset = archive->cd_offset;
361 for (uint32_t i = 0; i < archive->cd_records; ++i) {
362 struct central_directory_file_header* cdfh =
363 cd_file_header_at_offset(archive, offset);
364 offset += sizeof(struct central_directory_file_header);
365 if (cdfh == NULL) {
366 return -1;
367 }
368
369 uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags);
370 if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
371 (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0) {
372 if (get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
373 out)) {
374 return -1;
375 }
376
377 if ((char*) out->data <= (char*) archive->data + target &&
378 (char*) archive->data + target < (char*) out->data + out->data_length) {
379 return 0;
380 }
381 }
382
383 offset += unaligned_uint16_read(cdfh->file_name_length);
384 offset += unaligned_uint16_read(cdfh->extra_field_length);
385 offset += unaligned_uint16_read(cdfh->file_comment_length);
386 }
387
388 return -1;
389 }
390
bcc_zip_archive_open_and_find(const char * path,struct bcc_zip_entry * out)391 struct bcc_zip_archive* bcc_zip_archive_open_and_find(
392 const char* path, struct bcc_zip_entry* out) {
393 struct bcc_zip_archive* archive = NULL;
394 const char* separator = strstr(path, "!/");
395 if (separator == NULL || separator - path >= PATH_MAX) {
396 return NULL;
397 }
398
399 char archive_path[PATH_MAX];
400 strncpy(archive_path, path, separator - path);
401 archive_path[separator - path] = 0;
402 archive = bcc_zip_archive_open(archive_path);
403 if (archive == NULL) {
404 return NULL;
405 }
406
407 if (bcc_zip_archive_find_entry(archive, separator + 2, out)) {
408 bcc_zip_archive_close(archive);
409 return NULL;
410 }
411
412 return archive;
413 }
414