1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/mime_util.h"
6
7 #include <algorithm>
8 #include <iterator>
9 #include <map>
10 #include <string>
11 #include <string_view>
12 #include <unordered_set>
13
14 #include "base/base64.h"
15 #include "base/check_op.h"
16 #include "base/containers/span.h"
17 #include "base/lazy_instance.h"
18 #include "base/rand_util.h"
19 #include "base/strings/string_number_conversions.h"
20 #include "base/strings/string_split.h"
21 #include "base/strings/string_util.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "build/build_config.h"
24 #include "net/base/platform_mime_util.h"
25 #include "net/http/http_util.h"
26
27 using std::string;
28
29 namespace net {
30
31 // Singleton utility class for mime types.
32 class MimeUtil : public PlatformMimeUtil {
33 public:
34 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
35 std::string* mime_type) const;
36
37 bool GetMimeTypeFromFile(const base::FilePath& file_path,
38 std::string* mime_type) const;
39
40 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
41 std::string* mime_type) const;
42
43 bool GetPreferredExtensionForMimeType(
44 const std::string& mime_type,
45 base::FilePath::StringType* extension) const;
46
47 bool MatchesMimeType(const std::string& mime_type_pattern,
48 const std::string& mime_type) const;
49
50 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
51 std::string* top_level_type,
52 std::string* subtype) const;
53
54 bool IsValidTopLevelMimeType(const std::string& type_string) const;
55
56 private:
57 friend struct base::LazyInstanceTraitsBase<MimeUtil>;
58
59 MimeUtil();
60
61 bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
62 bool include_platform_types,
63 std::string* mime_type) const;
64 }; // class MimeUtil
65
66 // This variable is Leaky because we need to access it from WorkerPool threads.
67 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
68 LAZY_INSTANCE_INITIALIZER;
69
70 struct MimeInfo {
71 const char* const mime_type;
72
73 // Comma-separated list of possible extensions for the type. The first
74 // extension is considered preferred.
75 const char* const extensions;
76 };
77
78 // How to use the MIME maps
79 // ------------------------
80 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
81 //
82 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
83 // kSecondaryMappings.
84 //
85 // kPrimaryMappings:
86 //
87 // Use this for mappings that are critical to the web platform. Mappings you
88 // add to this list take priority over the underlying platform when converting
89 // from file extension -> MIME type. Thus file extensions listed here will
90 // work consistently across platforms.
91 //
92 // kSecondaryMappings:
93 //
94 // Use this for mappings that must exist, but can be overridden by user
95 // preferences.
96 //
97 // The following applies to both lists:
98 //
99 // * The same extension can appear multiple times in the same list under
100 // different MIME types. Extensions that appear earlier take precedence over
101 // those that appear later.
102 //
103 // * A MIME type must not appear more than once in a single list. It is valid
104 // for the same MIME type to appear in kPrimaryMappings and
105 // kSecondaryMappings.
106 //
107 // The MIME maps are used for three types of lookups:
108 //
109 // 1) MIME type -> file extension. Implemented as
110 // GetPreferredExtensionForMimeType().
111 //
112 // Sources are consulted in the following order:
113 //
114 // a) As a special case application/octet-stream is mapped to nothing. Web
115 // sites are supposed to use this MIME type to indicate that the content
116 // is opaque and shouldn't be parsed as any specific type of content. It
117 // doesn't make sense to map this to anything.
118 //
119 // b) The underlying platform. If the operating system has a mapping from
120 // the MIME type to a file extension, then that takes priority. The
121 // platform is assumed to represent the user's preference.
122 //
123 // c) kPrimaryMappings. Order doesn't matter since there should only be at
124 // most one entry per MIME type.
125 //
126 // d) kSecondaryMappings. Again, order doesn't matter.
127 //
128 // 2) File extension -> MIME type. Implemented in GetMimeTypeFromExtension().
129 //
130 // Sources are considered in the following order:
131 //
132 // a) kPrimaryMappings. Order matters here since file extensions can appear
133 // multiple times on these lists. The first mapping in order of
134 // appearance in the list wins.
135 //
136 // b) Underlying platform.
137 //
138 // c) kSecondaryMappings. Again, the order matters.
139 //
140 // 3) File extension -> Well known MIME type. Implemented as
141 // GetWellKnownMimeTypeFromExtension().
142 //
143 // This is similar to 2), with the exception that b) is skipped. I.e. Only
144 // considers the hardcoded mappings in kPrimaryMappings and
145 // kSecondaryMappings.
146
147 // See comments above for details on how this list is used.
148 static const MimeInfo kPrimaryMappings[] = {
149 // Must precede audio/webm .
150 {"video/webm", "webm"},
151
152 // Must precede audio/mp3
153 {"audio/mpeg", "mp3"},
154
155 {"application/wasm", "wasm"},
156 {"application/x-chrome-extension", "crx"},
157 {"application/xhtml+xml", "xhtml,xht,xhtm"},
158 {"audio/flac", "flac"},
159 {"audio/mp3", "mp3"},
160 {"audio/ogg", "ogg,oga,opus"},
161 {"audio/wav", "wav"},
162 {"audio/webm", "webm"},
163 {"audio/x-m4a", "m4a"},
164 {"image/avif", "avif"},
165 {"image/gif", "gif"},
166 {"image/jpeg", "jpeg,jpg"},
167 {"image/png", "png"},
168 {"image/apng", "png,apng"},
169 {"image/svg+xml", "svg,svgz"},
170 {"image/webp", "webp"},
171 {"multipart/related", "mht,mhtml"},
172 {"text/css", "css"},
173 {"text/html", "html,htm,shtml,shtm"},
174 {"text/javascript", "js,mjs"},
175 {"text/xml", "xml"},
176 {"video/mp4", "mp4,m4v"},
177 {"video/ogg", "ogv,ogm"},
178
179 // This is a primary mapping (overrides the platform) rather than secondary
180 // to work around an issue when Excel is installed on Windows. Excel
181 // registers csv as application/vnd.ms-excel instead of text/csv from RFC
182 // 4180. See https://crbug.com/139105.
183 {"text/csv", "csv"},
184 };
185
186 // See comments above for details on how this list is used.
187 static const MimeInfo kSecondaryMappings[] = {
188 // Must precede image/vnd.microsoft.icon .
189 {"image/x-icon", "ico"},
190
191 {"application/epub+zip", "epub"},
192 {"application/font-woff", "woff"},
193 {"application/gzip", "gz,tgz"},
194 {"application/javascript", "js"},
195 {"application/json", "json"}, // Per http://www.ietf.org/rfc/rfc4627.txt.
196 {"application/msword", "doc,dot"},
197 {"application/octet-stream", "bin,exe,com"},
198 {"application/pdf", "pdf"},
199 {"application/pkcs7-mime", "p7m,p7c,p7z"},
200 {"application/pkcs7-signature", "p7s"},
201 {"application/postscript", "ps,eps,ai"},
202 {"application/rdf+xml", "rdf"},
203 {"application/rss+xml", "rss"},
204 {"application/rtf", "rtf"},
205 {"application/vnd.android.package-archive", "apk"},
206 {"application/vnd.mozilla.xul+xml", "xul"},
207 {"application/vnd.ms-excel", "xls"},
208 {"application/vnd.ms-powerpoint", "ppt"},
209 {"application/"
210 "vnd.openxmlformats-officedocument.presentationml.presentation",
211 "pptx"},
212 {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
213 "xlsx"},
214 {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
215 "docx"},
216 {"application/x-gzip", "gz,tgz"},
217 {"application/x-mpegurl", "m3u8"},
218 {"application/x-shockwave-flash", "swf,swl"},
219 {"application/x-tar", "tar"},
220 {"application/x-x509-ca-cert", "cer,crt"},
221 {"application/zip", "zip"},
222 // This is the platform mapping on recent versions of Windows 10.
223 {"audio/webm", "weba"},
224 {"image/bmp", "bmp"},
225 {"image/jpeg", "jfif,pjpeg,pjp"},
226 {"image/tiff", "tiff,tif"},
227 {"image/vnd.microsoft.icon", "ico"},
228 {"image/x-png", "png"},
229 {"image/x-xbitmap", "xbm"},
230 {"message/rfc822", "eml"},
231 {"text/calendar", "ics"},
232 {"text/html", "ehtml"},
233 {"text/plain", "txt,text"},
234 {"text/x-sh", "sh"},
235 {"text/xml", "xsl,xbl,xslt"},
236 {"video/mpeg", "mpeg,mpg"},
237 };
238
239 // Finds mime type of |ext| from |mappings|.
240 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)241 static const char* FindMimeType(const MimeInfo (&mappings)[num_mappings],
242 const std::string& ext) {
243 for (const auto& mapping : mappings) {
244 const char* extensions = mapping.extensions;
245 for (;;) {
246 size_t end_pos = strcspn(extensions, ",");
247 // The length check is required to prevent the StringPiece below from
248 // including uninitialized memory if ext is longer than extensions.
249 if (end_pos == ext.size() &&
250 base::EqualsCaseInsensitiveASCII(
251 std::string_view(extensions, ext.size()), ext)) {
252 return mapping.mime_type;
253 }
254 extensions += end_pos;
255 if (!*extensions)
256 break;
257 extensions += 1; // skip over comma
258 }
259 }
260 return nullptr;
261 }
262
StringToFilePathStringType(std::string_view string_piece)263 static base::FilePath::StringType StringToFilePathStringType(
264 std::string_view string_piece) {
265 #if BUILDFLAG(IS_WIN)
266 return base::UTF8ToWide(string_piece);
267 #else
268 return std::string(string_piece);
269 #endif
270 }
271
272 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
273 // preferred extension in MimeInfo arrays.
274 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],const std::string & mime_type,base::FilePath::StringType * result)275 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
276 const std::string& mime_type,
277 base::FilePath::StringType* result) {
278 // There is no preferred extension for "application/octet-stream".
279 if (mime_type == "application/octet-stream")
280 return false;
281
282 for (const auto& mapping : mappings) {
283 if (mapping.mime_type == mime_type) {
284 const char* extensions = mapping.extensions;
285 const char* extension_end = strchr(extensions, ',');
286 size_t len =
287 extension_end ? extension_end - extensions : strlen(extensions);
288 *result = StringToFilePathStringType(std::string_view(extensions, len));
289 return true;
290 }
291 }
292 return false;
293 }
294
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const295 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
296 string* result) const {
297 return GetMimeTypeFromExtensionHelper(ext, true, result);
298 }
299
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const300 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
301 const base::FilePath::StringType& ext,
302 string* result) const {
303 return GetMimeTypeFromExtensionHelper(ext, false, result);
304 }
305
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension) const306 bool MimeUtil::GetPreferredExtensionForMimeType(
307 const std::string& mime_type,
308 base::FilePath::StringType* extension) const {
309 // Search the MIME type in the platform DB first, then in kPrimaryMappings and
310 // kSecondaryMappings.
311 return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
312 FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
313 FindPreferredExtension(kSecondaryMappings, mime_type, extension);
314 }
315
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const316 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
317 string* result) const {
318 base::FilePath::StringType file_name_str = file_path.Extension();
319 if (file_name_str.empty())
320 return false;
321 return GetMimeTypeFromExtension(file_name_str.substr(1), result);
322 }
323
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const324 bool MimeUtil::GetMimeTypeFromExtensionHelper(
325 const base::FilePath::StringType& ext,
326 bool include_platform_types,
327 string* result) const {
328 DCHECK(ext.empty() || ext[0] != '.')
329 << "extension passed in must not include leading dot";
330
331 // Avoids crash when unable to handle a long file path. See crbug.com/48733.
332 const unsigned kMaxFilePathSize = 65536;
333 if (ext.length() > kMaxFilePathSize)
334 return false;
335
336 // Reject a string which contains null character.
337 base::FilePath::StringType::size_type nul_pos =
338 ext.find(FILE_PATH_LITERAL('\0'));
339 if (nul_pos != base::FilePath::StringType::npos)
340 return false;
341
342 // We implement the same algorithm as Mozilla for mapping a file extension to
343 // a mime type. That is, we first check a hard-coded list (that cannot be
344 // overridden), and then if not found there, we defer to the system registry.
345 // Finally, we scan a secondary hard-coded list to catch types that we can
346 // deduce but that we also want to allow the OS to override.
347
348 base::FilePath path_ext(ext);
349 const string ext_narrow_str = path_ext.AsUTF8Unsafe();
350 const char* mime_type = FindMimeType(kPrimaryMappings, ext_narrow_str);
351 if (mime_type) {
352 *result = mime_type;
353 return true;
354 }
355
356 if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
357 return true;
358
359 mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
360 if (mime_type) {
361 *result = mime_type;
362 return true;
363 }
364
365 return false;
366 }
367
368 MimeUtil::MimeUtil() = default;
369
370 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
371 // must be matched by a parameter in the |mime_type|. If there are no
372 // parameters in the pattern, the match is a success.
373 //
374 // According rfc2045 keys of parameters are case-insensitive, while values may
375 // or may not be case-sensitive, but they are usually case-sensitive. So, this
376 // function matches values in *case-sensitive* manner, however note that this
377 // may produce some false negatives.
MatchesMimeTypeParameters(const std::string & mime_type_pattern,const std::string & mime_type)378 bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
379 const std::string& mime_type) {
380 typedef std::map<std::string, std::string> StringPairMap;
381
382 const std::string::size_type semicolon = mime_type_pattern.find(';');
383 const std::string::size_type test_semicolon = mime_type.find(';');
384 if (semicolon != std::string::npos) {
385 if (test_semicolon == std::string::npos)
386 return false;
387
388 base::StringPairs pattern_parameters;
389 base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
390 '=', ';', &pattern_parameters);
391 base::StringPairs test_parameters;
392 base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
393 '=', ';', &test_parameters);
394
395 // Put the parameters to maps with the keys converted to lower case.
396 StringPairMap pattern_parameter_map;
397 for (const auto& pair : pattern_parameters) {
398 pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
399 }
400
401 StringPairMap test_parameter_map;
402 for (const auto& pair : test_parameters) {
403 test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
404 }
405
406 if (pattern_parameter_map.size() > test_parameter_map.size())
407 return false;
408
409 for (const auto& parameter_pair : pattern_parameter_map) {
410 const auto& test_parameter_pair_it =
411 test_parameter_map.find(parameter_pair.first);
412 if (test_parameter_pair_it == test_parameter_map.end())
413 return false;
414 if (parameter_pair.second != test_parameter_pair_it->second)
415 return false;
416 }
417 }
418
419 return true;
420 }
421
422 // This comparison handles absolute maching and also basic
423 // wildcards. The plugin mime types could be:
424 // application/x-foo
425 // application/*
426 // application/*+xml
427 // *
428 // Also tests mime parameters -- all parameters in the pattern must be present
429 // in the tested type for a match to succeed.
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type) const430 bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
431 const std::string& mime_type) const {
432 if (mime_type_pattern.empty())
433 return false;
434
435 std::string::size_type semicolon = mime_type_pattern.find(';');
436 const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
437 semicolon = mime_type.find(';');
438 const std::string base_type(mime_type.substr(0, semicolon));
439
440 if (base_pattern == "*" || base_pattern == "*/*")
441 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
442
443 const std::string::size_type star = base_pattern.find('*');
444 if (star == std::string::npos) {
445 if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
446 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
447 else
448 return false;
449 }
450
451 // Test length to prevent overlap between |left| and |right|.
452 if (base_type.length() < base_pattern.length() - 1)
453 return false;
454
455 std::string_view base_pattern_piece(base_pattern);
456 std::string_view left(base_pattern_piece.substr(0, star));
457 std::string_view right(base_pattern_piece.substr(star + 1));
458
459 if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
460 return false;
461
462 if (!right.empty() &&
463 !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
464 return false;
465
466 return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
467 }
468
ParseMimeType(const std::string & type_str,std::string * mime_type,base::StringPairs * params)469 bool ParseMimeType(const std::string& type_str,
470 std::string* mime_type,
471 base::StringPairs* params) {
472 // Trim leading and trailing whitespace from type. We include '(' in
473 // the trailing trim set to catch media-type comments, which are not at all
474 // standard, but may occur in rare cases.
475 size_t type_val = type_str.find_first_not_of(HTTP_LWS);
476 type_val = std::min(type_val, type_str.length());
477 size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
478 if (type_end == std::string::npos)
479 type_end = type_str.length();
480
481 // Reject a mime-type if it does not include a slash.
482 size_t slash_pos = type_str.find_first_of('/');
483 if (slash_pos == std::string::npos || slash_pos > type_end)
484 return false;
485 if (mime_type)
486 *mime_type = type_str.substr(type_val, type_end - type_val);
487
488 // Iterate over parameters. Can't split the string around semicolons
489 // preemptively because quoted strings may include semicolons. Mostly matches
490 // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
491 // validate characters are HTTP token code points / HTTP quoted-string token
492 // code points, and ignores spaces after "=" in parameters.
493 if (params)
494 params->clear();
495 std::string::size_type offset = type_str.find_first_of(';', type_end);
496 while (offset < type_str.size()) {
497 DCHECK_EQ(';', type_str[offset]);
498 // Trim off the semicolon.
499 ++offset;
500
501 // Trim off any following spaces.
502 offset = type_str.find_first_not_of(HTTP_LWS, offset);
503 std::string::size_type param_name_start = offset;
504
505 // Extend parameter name until run into a semicolon or equals sign. Per
506 // spec, trailing spaces are not removed.
507 offset = type_str.find_first_of(";=", offset);
508
509 // Nothing more to do if at end of string, or if there's no parameter
510 // value, since names without values aren't allowed.
511 if (offset == std::string::npos || type_str[offset] == ';')
512 continue;
513
514 auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
515 type_str.begin() + offset);
516
517 // Now parse the value.
518 DCHECK_EQ('=', type_str[offset]);
519 // Trim off the '='.
520 offset++;
521
522 // Remove leading spaces. This violates the spec, though it matches
523 // pre-existing behavior.
524 //
525 // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
526 // seems to align more with the spec - not the content-type spec, but the
527 // GET spec's way of getting an encoding, and the spec for handling
528 // boundary values as well.
529 // See https://encoding.spec.whatwg.org/#names-and-labels.
530 offset = type_str.find_first_not_of(HTTP_LWS, offset);
531
532 std::string param_value;
533 if (offset == std::string::npos || type_str[offset] == ';') {
534 // Nothing to do here - an unquoted string of only whitespace should be
535 // skipped.
536 continue;
537 } else if (type_str[offset] != '"') {
538 // If the first character is not a quotation mark, copy data directly.
539 std::string::size_type value_start = offset;
540 offset = type_str.find_first_of(';', offset);
541 std::string::size_type value_end = offset;
542
543 // Remove terminal whitespace. If ran off the end of the string, have to
544 // update |value_end| first.
545 if (value_end == std::string::npos)
546 value_end = type_str.size();
547 while (value_end > value_start &&
548 HttpUtil::IsLWS(type_str[value_end - 1])) {
549 --value_end;
550 }
551
552 param_value = type_str.substr(value_start, value_end - value_start);
553 } else {
554 // Otherwise, append data, with special handling for backslashes, until
555 // a close quote. Do not trim whitespace for quoted-string.
556
557 // Skip open quote.
558 DCHECK_EQ('"', type_str[offset]);
559 ++offset;
560
561 while (offset < type_str.size() && type_str[offset] != '"') {
562 // Skip over backslash and append the next character, when not at
563 // the end of the string. Otherwise, copy the next character (Which may
564 // be a backslash).
565 if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
566 ++offset;
567 }
568 param_value += type_str[offset];
569 ++offset;
570 }
571
572 offset = type_str.find_first_of(';', offset);
573 }
574 if (params)
575 params->emplace_back(param_name, param_value);
576 }
577 return true;
578 }
579
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype) const580 bool MimeUtil::ParseMimeTypeWithoutParameter(std::string_view type_string,
581 std::string* top_level_type,
582 std::string* subtype) const {
583 std::vector<std::string_view> components = base::SplitStringPiece(
584 type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
585 if (components.size() != 2)
586 return false;
587 components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
588 components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
589 if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
590 return false;
591
592 if (top_level_type)
593 top_level_type->assign(std::string(components[0]));
594
595 if (subtype)
596 subtype->assign(std::string(components[1]));
597
598 return true;
599 }
600
601 // See https://www.iana.org/assignments/media-types/media-types.xhtml
602 static const char* const kLegalTopLevelTypes[] = {
603 "application", "audio", "example", "font", "image",
604 "message", "model", "multipart", "text", "video",
605 };
606
IsValidTopLevelMimeType(const std::string & type_string) const607 bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
608 std::string lower_type = base::ToLowerASCII(type_string);
609 for (const char* const legal_type : kLegalTopLevelTypes) {
610 if (lower_type.compare(legal_type) == 0)
611 return true;
612 }
613
614 return type_string.size() > 2 &&
615 base::StartsWith(type_string, "x-",
616 base::CompareCase::INSENSITIVE_ASCII);
617 }
618
619 //----------------------------------------------------------------------------
620 // Wrappers for the singleton
621 //----------------------------------------------------------------------------
622
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)623 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
624 std::string* mime_type) {
625 return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
626 }
627
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)628 bool GetMimeTypeFromFile(const base::FilePath& file_path,
629 std::string* mime_type) {
630 return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
631 }
632
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)633 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
634 std::string* mime_type) {
635 return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
636 }
637
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension)638 bool GetPreferredExtensionForMimeType(const std::string& mime_type,
639 base::FilePath::StringType* extension) {
640 return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
641 extension);
642 }
643
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type)644 bool MatchesMimeType(const std::string& mime_type_pattern,
645 const std::string& mime_type) {
646 return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
647 }
648
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype)649 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
650 std::string* top_level_type,
651 std::string* subtype) {
652 return g_mime_util.Get().ParseMimeTypeWithoutParameter(
653 type_string, top_level_type, subtype);
654 }
655
IsValidTopLevelMimeType(const std::string & type_string)656 bool IsValidTopLevelMimeType(const std::string& type_string) {
657 return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
658 }
659
660 namespace {
661
662 // From http://www.w3schools.com/media/media_mimeref.asp and
663 // http://plugindoc.mozdev.org/winmime.php
664 static const char* const kStandardImageTypes[] = {"image/avif",
665 "image/bmp",
666 "image/cis-cod",
667 "image/gif",
668 "image/ief",
669 "image/jpeg",
670 "image/webp",
671 "image/pict",
672 "image/pipeg",
673 "image/png",
674 "image/svg+xml",
675 "image/tiff",
676 "image/vnd.microsoft.icon",
677 "image/x-cmu-raster",
678 "image/x-cmx",
679 "image/x-icon",
680 "image/x-portable-anymap",
681 "image/x-portable-bitmap",
682 "image/x-portable-graymap",
683 "image/x-portable-pixmap",
684 "image/x-rgb",
685 "image/x-xbitmap",
686 "image/x-xpixmap",
687 "image/x-xwindowdump"};
688 static const char* const kStandardAudioTypes[] = {
689 "audio/aac",
690 "audio/aiff",
691 "audio/amr",
692 "audio/basic",
693 "audio/flac",
694 "audio/midi",
695 "audio/mp3",
696 "audio/mp4",
697 "audio/mpeg",
698 "audio/mpeg3",
699 "audio/ogg",
700 "audio/vorbis",
701 "audio/wav",
702 "audio/webm",
703 "audio/x-m4a",
704 "audio/x-ms-wma",
705 "audio/vnd.rn-realaudio",
706 "audio/vnd.wave"
707 };
708 // https://tools.ietf.org/html/rfc8081
709 static const char* const kStandardFontTypes[] = {
710 "font/collection", "font/otf", "font/sfnt",
711 "font/ttf", "font/woff", "font/woff2",
712 };
713 static const char* const kStandardVideoTypes[] = {
714 "video/avi",
715 "video/divx",
716 "video/flc",
717 "video/mp4",
718 "video/mpeg",
719 "video/ogg",
720 "video/quicktime",
721 "video/sd-video",
722 "video/webm",
723 "video/x-dv",
724 "video/x-m4v",
725 "video/x-mpeg",
726 "video/x-ms-asf",
727 "video/x-ms-wmv"
728 };
729
730 struct StandardType {
731 const char* const leading_mime_type;
732 base::span<const char* const> standard_types;
733 };
734 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
735 {"audio/", kStandardAudioTypes},
736 {"font/", kStandardFontTypes},
737 {"video/", kStandardVideoTypes},
738 {nullptr, {}}};
739
740 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
741 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
742 //
743 // The meaning of |mime_type| depends on the value of |prefix_match|:
744 //
745 // * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
746 // string such as "text/plain".
747 //
748 // * If |prefix_match = true| then |mime_type| is treated as the prefix for a
749 // (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)750 void GetExtensionsFromHardCodedMappings(
751 base::span<const MimeInfo> mappings,
752 const std::string& mime_type,
753 bool prefix_match,
754 std::unordered_set<base::FilePath::StringType>* extensions) {
755 for (const auto& mapping : mappings) {
756 std::string_view cur_mime_type(mapping.mime_type);
757
758 if (base::StartsWith(cur_mime_type, mime_type,
759 base::CompareCase::INSENSITIVE_ASCII) &&
760 (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
761 for (std::string_view this_extension : base::SplitStringPiece(
762 mapping.extensions, ",", base::TRIM_WHITESPACE,
763 base::SPLIT_WANT_ALL)) {
764 extensions->insert(StringToFilePathStringType(this_extension));
765 }
766 }
767 }
768 }
769
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)770 void GetExtensionsHelper(
771 base::span<const char* const> standard_types,
772 const std::string& leading_mime_type,
773 std::unordered_set<base::FilePath::StringType>* extensions) {
774 for (auto* standard_type : standard_types) {
775 g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
776 extensions);
777 }
778
779 // Also look up the extensions from hard-coded mappings in case that some
780 // supported extensions are not registered in the system registry, like ogg.
781 GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
782 extensions);
783
784 GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
785 true, extensions);
786 }
787
788 // Note that the elements in the source set will be appended to the target
789 // vector.
790 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)791 void UnorderedSetToVector(std::unordered_set<T>* source,
792 std::vector<T>* target) {
793 size_t old_target_size = target->size();
794 target->resize(old_target_size + source->size());
795 size_t i = 0;
796 for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
797 (*target)[old_target_size + i] = *iter;
798 }
799
800 // Characters to be used for mime multipart boundary.
801 //
802 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
803 // The RFC 2046 spec says the alphanumeric characters plus the
804 // following characters are legal for boundaries: '()+_,-./:=?
805 // However the following characters, though legal, cause some sites
806 // to fail: (),./:=+
807 constexpr std::string_view kMimeBoundaryCharacters(
808 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
809
810 // Size of mime multipart boundary.
811 const size_t kMimeBoundarySize = 69;
812
813 } // namespace
814
GetExtensionsForMimeType(const std::string & unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)815 void GetExtensionsForMimeType(
816 const std::string& unsafe_mime_type,
817 std::vector<base::FilePath::StringType>* extensions) {
818 if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
819 return;
820
821 const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
822 std::unordered_set<base::FilePath::StringType> unique_extensions;
823
824 if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
825 std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
826
827 // Find the matching StandardType from within kStandardTypes, or fall
828 // through to the last (default) StandardType.
829 const StandardType* type = nullptr;
830 for (const StandardType& standard_type : kStandardTypes) {
831 type = &standard_type;
832 if (type->leading_mime_type &&
833 leading_mime_type == type->leading_mime_type) {
834 break;
835 }
836 }
837 DCHECK(type);
838 GetExtensionsHelper(type->standard_types,
839 leading_mime_type,
840 &unique_extensions);
841 } else {
842 g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
843 &unique_extensions);
844
845 // Also look up the extensions from hard-coded mappings in case that some
846 // supported extensions are not registered in the system registry, like ogg.
847 GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
848 &unique_extensions);
849
850 GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
851 &unique_extensions);
852 }
853
854 UnorderedSetToVector(&unique_extensions, extensions);
855 }
856
GenerateMimeMultipartBoundary()857 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
858 // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
859 // Because encapsulation boundaries must not appear in the body parts being
860 // encapsulated, a user agent must exercise care to choose a unique
861 // boundary. The boundary in the example above could have been the result of
862 // an algorithm designed to produce boundaries with a very low probability
863 // of already existing in the data to be encapsulated without having to
864 // prescan the data.
865 // [...]
866 // the boundary parameter [...] consists of 1 to 70 characters from a set of
867 // characters known to be very robust through email gateways, and NOT ending
868 // with white space.
869 // [...]
870 // boundary := 0*69<bchars> bcharsnospace
871 // bchars := bcharsnospace / " "
872 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
873 // "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
874
875 std::string result;
876 result.reserve(kMimeBoundarySize);
877 result.append("----MultipartBoundary--");
878 while (result.size() < (kMimeBoundarySize - 4)) {
879 char c = kMimeBoundaryCharacters[base::RandInt(
880 0, kMimeBoundaryCharacters.size() - 1)];
881 result.push_back(c);
882 }
883 result.append("----");
884
885 // Not a strict requirement - documentation only.
886 DCHECK_EQ(kMimeBoundarySize, result.size());
887
888 return result;
889 }
890
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)891 void AddMultipartValueForUpload(const std::string& value_name,
892 const std::string& value,
893 const std::string& mime_boundary,
894 const std::string& content_type,
895 std::string* post_data) {
896 DCHECK(post_data);
897 // First line is the boundary.
898 post_data->append("--" + mime_boundary + "\r\n");
899 // Next line is the Content-disposition.
900 post_data->append("Content-Disposition: form-data; name=\"" +
901 value_name + "\"\r\n");
902 if (!content_type.empty()) {
903 // If Content-type is specified, the next line is that.
904 post_data->append("Content-Type: " + content_type + "\r\n");
905 }
906 // Leave an empty line and append the value.
907 post_data->append("\r\n" + value + "\r\n");
908 }
909
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)910 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
911 const std::string& file_name,
912 const std::string& value,
913 const std::string& mime_boundary,
914 const std::string& content_type,
915 std::string* post_data) {
916 DCHECK(post_data);
917 // First line is the boundary.
918 post_data->append("--" + mime_boundary + "\r\n");
919 // Next line is the Content-disposition.
920 post_data->append("Content-Disposition: form-data; name=\"" + value_name +
921 "\"; filename=\"" + file_name + "\"\r\n");
922 if (!content_type.empty()) {
923 // If Content-type is specified, the next line is that.
924 post_data->append("Content-Type: " + content_type + "\r\n");
925 }
926 // Leave an empty line and append the value.
927 post_data->append("\r\n" + value + "\r\n");
928 }
929
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)930 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
931 std::string* post_data) {
932 DCHECK(post_data);
933 post_data->append("--" + mime_boundary + "--\r\n");
934 }
935
936 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
937 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(const std::string & type_string,bool accept_comma_separated)938 std::optional<std::string> ExtractMimeTypeFromMediaType(
939 const std::string& type_string,
940 bool accept_comma_separated) {
941 std::string::size_type end = type_string.find(';');
942 if (accept_comma_separated) {
943 end = std::min(end, type_string.find(','));
944 }
945 std::string top_level_type;
946 std::string subtype;
947 if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
948 &subtype)) {
949 return top_level_type + "/" + subtype;
950 }
951 return std::nullopt;
952 }
953
954 } // namespace net
955