xref: /aosp_15_r20/external/bcc/src/cc/bcc_zip.c (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bcc_zip.h"
18 
19 #include <fcntl.h>
20 #include <limits.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/mman.h>
25 #include <unistd.h>
26 
27 // Specification of ZIP file format can be found here:
28 // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
29 // For a high level overview of the structure of a ZIP file see
30 // sections 4.3.1 - 4.3.6.
31 
32 // Data structures appearing in ZIP files do not contain any
33 // padding and they might be misaligned. To allow us to safely
34 // operate on pointers to such structures and their members, without
35 // worrying of platform specific alignment issues, we define
36 // unaligned_uint16_t and unaligned_uint32_t types with no alignment
37 // requirements.
38 typedef struct {
39   uint8_t raw[2];
40 } unaligned_uint16_t;
41 
unaligned_uint16_read(unaligned_uint16_t value)42 static uint16_t unaligned_uint16_read(unaligned_uint16_t value) {
43   uint16_t return_value;
44   memcpy(&return_value, value.raw, sizeof(return_value));
45   return return_value;
46 }
47 
48 typedef struct {
49   uint8_t raw[4];
50 } unaligned_uint32_t;
51 
unaligned_uint32_read(unaligned_uint32_t value)52 static uint32_t unaligned_uint32_read(unaligned_uint32_t value) {
53   uint32_t return_value;
54   memcpy(&return_value, value.raw, sizeof(return_value));
55   return return_value;
56 }
57 
58 #define END_OF_CD_RECORD_MAGIC 0x06054b50
59 
60 // See section 4.3.16 of the spec.
61 struct end_of_central_directory_record {
62   // Magic value equal to END_OF_CD_RECORD_MAGIC
63   unaligned_uint32_t magic;
64 
65   // Number of the file containing this structure or 0xFFFF if ZIP64 archive.
66   // Zip archive might span multiple files (disks).
67   unaligned_uint16_t this_disk;
68 
69   // Number of the file containing the beginning of the central directory or
70   // 0xFFFF if ZIP64 archive.
71   unaligned_uint16_t cd_disk;
72 
73   // Number of central directory records on this disk or 0xFFFF if ZIP64
74   // archive.
75   unaligned_uint16_t cd_records;
76 
77   // Number of central directory records on all disks or 0xFFFF if ZIP64
78   // archive.
79   unaligned_uint16_t cd_records_total;
80 
81   // Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive.
82   unaligned_uint32_t cd_size;
83 
84   // Offset of the central directory from the beginning of the archive or
85   // 0xFFFFFFFF if ZIP64 archive.
86   unaligned_uint32_t cd_offset;
87 
88   // Length of comment data following end of central driectory record.
89   unaligned_uint16_t comment_length;
90 
91   // Up to 64k of arbitrary bytes.
92   // uint8_t comment[comment_length]
93 };
94 
95 #define CD_FILE_HEADER_MAGIC 0x02014b50
96 #define FLAG_ENCRYPTED (1 << 0)
97 #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
98 
99 // See section 4.3.12 of the spec.
100 struct central_directory_file_header {
101   // Magic value equal to CD_FILE_HEADER_MAGIC.
102   unaligned_uint32_t magic;
103   unaligned_uint16_t version;
104   // Minimum zip version needed to extract the file.
105   unaligned_uint16_t min_version;
106   unaligned_uint16_t flags;
107   unaligned_uint16_t compression;
108   unaligned_uint16_t last_modified_time;
109   unaligned_uint16_t last_modified_date;
110   unaligned_uint32_t crc;
111   unaligned_uint32_t compressed_size;
112   unaligned_uint32_t uncompressed_size;
113   unaligned_uint16_t file_name_length;
114   unaligned_uint16_t extra_field_length;
115   unaligned_uint16_t file_comment_length;
116   // Number of the disk where the file starts or 0xFFFF if ZIP64 archive.
117   unaligned_uint16_t disk;
118   unaligned_uint16_t internal_attributes;
119   unaligned_uint32_t external_attributes;
120   // Offset from the start of the disk containing the local file header to the
121   // start of the local file header.
122   unaligned_uint32_t offset;
123 };
124 
125 #define LOCAL_FILE_HEADER_MAGIC 0x04034b50
126 
127 // See section 4.3.7 of the spec.
128 struct local_file_header {
129   // Magic value equal to LOCAL_FILE_HEADER_MAGIC.
130   unaligned_uint32_t magic;
131   // Minimum zip version needed to extract the file.
132   unaligned_uint16_t min_version;
133   unaligned_uint16_t flags;
134   unaligned_uint16_t compression;
135   unaligned_uint16_t last_modified_time;
136   unaligned_uint16_t last_modified_date;
137   unaligned_uint32_t crc;
138   unaligned_uint32_t compressed_size;
139   unaligned_uint32_t uncompressed_size;
140   unaligned_uint16_t file_name_length;
141   unaligned_uint16_t extra_field_length;
142 };
143 
144 struct bcc_zip_archive {
145   void* data;
146   uint32_t size;
147   uint32_t cd_offset;
148   uint32_t cd_records;
149 };
150 
check_access(struct bcc_zip_archive * archive,uint32_t offset,uint32_t size)151 static void* check_access(struct bcc_zip_archive* archive, uint32_t offset,
152                           uint32_t size) {
153   if (offset + size > archive->size || offset > offset + size) {
154     return NULL;
155   }
156   return (char *) archive->data + offset;
157 }
158 
159 // Returns 0 on success, -1 on error and -2 if the eocd indicates
160 // the archive uses features which are not supported.
try_parse_end_of_central_directory(struct bcc_zip_archive * archive,uint32_t offset)161 static int try_parse_end_of_central_directory(struct bcc_zip_archive* archive,
162                                               uint32_t offset) {
163   struct end_of_central_directory_record* eocd = check_access(
164       archive, offset, sizeof(struct end_of_central_directory_record));
165   if (eocd == NULL ||
166       unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) {
167     return -1;
168   }
169 
170   uint16_t comment_length = unaligned_uint16_read(eocd->comment_length);
171   if (offset + sizeof(struct end_of_central_directory_record) +
172           comment_length !=
173       archive->size) {
174     return -1;
175   }
176 
177   uint16_t cd_records = unaligned_uint16_read(eocd->cd_records);
178   if (unaligned_uint16_read(eocd->this_disk) != 0 ||
179       unaligned_uint16_read(eocd->cd_disk) != 0 ||
180       unaligned_uint16_read(eocd->cd_records_total) != cd_records) {
181     // This is a valid eocd, but we only support single-file non-ZIP64 archives.
182     return -2;
183   }
184 
185   uint32_t cd_offset = unaligned_uint32_read(eocd->cd_offset);
186   uint32_t cd_size = unaligned_uint32_read(eocd->cd_size);
187   if (check_access(archive, cd_offset, cd_size) == NULL) {
188     return -1;
189   }
190 
191   archive->cd_offset = cd_offset;
192   archive->cd_records = cd_records;
193   return 0;
194 }
195 
find_central_directory(struct bcc_zip_archive * archive)196 static int find_central_directory(struct bcc_zip_archive* archive) {
197   if (archive->size <= sizeof(struct end_of_central_directory_record)) {
198     return -1;
199   }
200 
201   int rc = -1;
202   // Because the end of central directory ends with a variable length array of
203   // up to 0xFFFF bytes we can't know exactly where it starts and need to
204   // search for it at the end of the file, scanning the (limit, offset] range.
205   int64_t offset =
206       (int64_t)archive->size - sizeof(struct end_of_central_directory_record);
207   int64_t limit = offset - (1 << 16);
208   for (; offset >= 0 && offset > limit && rc == -1; offset--) {
209     rc = try_parse_end_of_central_directory(archive, offset);
210   }
211 
212   return rc;
213 }
214 
bcc_zip_archive_open(const char * path)215 struct bcc_zip_archive* bcc_zip_archive_open(const char* path) {
216   int fd = open(path, O_RDONLY);
217   if (fd < 0) {
218     return NULL;
219   }
220 
221   off_t size = lseek(fd, 0, SEEK_END);
222   if (size == (off_t)-1 || size > UINT32_MAX) {
223     close(fd);
224     return NULL;
225   }
226 
227   void* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
228   close(fd);
229 
230   if (data == MAP_FAILED) {
231     return NULL;
232   }
233 
234   struct bcc_zip_archive* archive = malloc(sizeof(struct bcc_zip_archive));
235   if (archive == NULL) {
236     munmap(data, size);
237     return NULL;
238   };
239 
240   archive->data = data;
241   archive->size = size;
242   if (find_central_directory(archive)) {
243     munmap(data, size);
244     free(archive);
245     archive = NULL;
246   }
247 
248   return archive;
249 }
250 
bcc_zip_archive_close(struct bcc_zip_archive * archive)251 void bcc_zip_archive_close(struct bcc_zip_archive* archive) {
252   munmap(archive->data, archive->size);
253   free(archive);
254 }
255 
local_file_header_at_offset(struct bcc_zip_archive * archive,uint32_t offset)256 static struct local_file_header* local_file_header_at_offset(
257     struct bcc_zip_archive* archive, uint32_t offset) {
258   struct local_file_header* lfh =
259       check_access(archive, offset, sizeof(struct local_file_header));
260   if (lfh == NULL ||
261       unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) {
262     return NULL;
263   }
264   return lfh;
265 }
266 
get_entry_at_offset(struct bcc_zip_archive * archive,uint32_t offset,struct bcc_zip_entry * out)267 static int get_entry_at_offset(struct bcc_zip_archive* archive, uint32_t offset,
268                                struct bcc_zip_entry* out) {
269   struct local_file_header* lfh = local_file_header_at_offset(archive, offset);
270   offset += sizeof(struct local_file_header);
271   if (lfh == NULL) {
272     return -1;
273   };
274 
275   uint16_t flags = unaligned_uint16_read(lfh->flags);
276   if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) {
277     return -1;
278   }
279 
280   uint16_t name_length = unaligned_uint16_read(lfh->file_name_length);
281   const char* name = check_access(archive, offset, name_length);
282   offset += name_length;
283   if (name == NULL) {
284     return -1;
285   }
286 
287   uint16_t extra_field_length = unaligned_uint16_read(lfh->extra_field_length);
288   if (check_access(archive, offset, extra_field_length) == NULL) {
289     return -1;
290   }
291   offset += extra_field_length;
292 
293   uint32_t compressed_size = unaligned_uint32_read(lfh->compressed_size);
294   void* data = check_access(archive, offset, compressed_size);
295   if (data == NULL) {
296     return -1;
297   }
298 
299   out->compression = unaligned_uint16_read(lfh->compression);
300   out->name_length = name_length;
301   out->name = name;
302   out->data = data;
303   out->data_length = compressed_size;
304   out->data_offset = offset;
305 
306   return 0;
307 }
308 
cd_file_header_at_offset(struct bcc_zip_archive * archive,uint32_t offset)309 static struct central_directory_file_header* cd_file_header_at_offset(
310     struct bcc_zip_archive* archive, uint32_t offset) {
311   struct central_directory_file_header* cdfh = check_access(
312       archive, offset, sizeof(struct central_directory_file_header));
313   if (cdfh == NULL ||
314       unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) {
315     return NULL;
316   }
317   return cdfh;
318 }
319 
bcc_zip_archive_find_entry(struct bcc_zip_archive * archive,const char * file_name,struct bcc_zip_entry * out)320 int bcc_zip_archive_find_entry(struct bcc_zip_archive* archive,
321                                const char* file_name,
322                                struct bcc_zip_entry* out) {
323   size_t file_name_length = strlen(file_name);
324 
325   uint32_t offset = archive->cd_offset;
326   for (uint32_t i = 0; i < archive->cd_records; ++i) {
327     struct central_directory_file_header* cdfh =
328         cd_file_header_at_offset(archive, offset);
329     offset += sizeof(struct central_directory_file_header);
330     if (cdfh == NULL) {
331       return -1;
332     }
333 
334     uint16_t cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length);
335     const char* cdfh_name = check_access(archive, offset, cdfh_name_length);
336     if (cdfh_name == NULL) {
337       return -1;
338     }
339 
340     uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags);
341     if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
342         (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
343         file_name_length == cdfh_name_length &&
344         memcmp(file_name, (char*) archive->data + offset, file_name_length) == 0) {
345       return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
346                                  out);
347     }
348 
349     offset += cdfh_name_length;
350     offset += unaligned_uint16_read(cdfh->extra_field_length);
351     offset += unaligned_uint16_read(cdfh->file_comment_length);
352   }
353 
354   return -1;
355 }
356 
bcc_zip_archive_find_entry_at_offset(struct bcc_zip_archive * archive,uint32_t target,struct bcc_zip_entry * out)357 int bcc_zip_archive_find_entry_at_offset(struct bcc_zip_archive* archive,
358                                          uint32_t target,
359                                          struct bcc_zip_entry* out) {
360   uint32_t offset = archive->cd_offset;
361   for (uint32_t i = 0; i < archive->cd_records; ++i) {
362     struct central_directory_file_header* cdfh =
363         cd_file_header_at_offset(archive, offset);
364     offset += sizeof(struct central_directory_file_header);
365     if (cdfh == NULL) {
366       return -1;
367     }
368 
369     uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags);
370     if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
371         (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0) {
372       if (get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset),
373                               out)) {
374         return -1;
375       }
376 
377       if ((char*) out->data <= (char*) archive->data + target &&
378           (char*) archive->data + target < (char*) out->data + out->data_length) {
379         return 0;
380       }
381     }
382 
383     offset += unaligned_uint16_read(cdfh->file_name_length);
384     offset += unaligned_uint16_read(cdfh->extra_field_length);
385     offset += unaligned_uint16_read(cdfh->file_comment_length);
386   }
387 
388   return -1;
389 }
390 
bcc_zip_archive_open_and_find(const char * path,struct bcc_zip_entry * out)391 struct bcc_zip_archive* bcc_zip_archive_open_and_find(
392     const char* path, struct bcc_zip_entry* out) {
393   struct bcc_zip_archive* archive = NULL;
394   const char* separator = strstr(path, "!/");
395   if (separator == NULL || separator - path >= PATH_MAX) {
396     return NULL;
397   }
398 
399   char archive_path[PATH_MAX];
400   strncpy(archive_path, path, separator - path);
401   archive_path[separator - path] = 0;
402   archive = bcc_zip_archive_open(archive_path);
403   if (archive == NULL) {
404     return NULL;
405   }
406 
407   if (bcc_zip_archive_find_entry(archive, separator + 2, out)) {
408     bcc_zip_archive_close(archive);
409     return NULL;
410   }
411 
412   return archive;
413 }
414