xref: /aosp_15_r20/external/cronet/net/base/mime_util.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/base/mime_util.h"
6 
7 #include <algorithm>
8 #include <iterator>
9 #include <map>
10 #include <string>
11 #include <string_view>
12 #include <unordered_set>
13 
14 #include "base/base64.h"
15 #include "base/check_op.h"
16 #include "base/containers/span.h"
17 #include "base/lazy_instance.h"
18 #include "base/rand_util.h"
19 #include "base/strings/string_number_conversions.h"
20 #include "base/strings/string_split.h"
21 #include "base/strings/string_util.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "build/build_config.h"
24 #include "net/base/platform_mime_util.h"
25 #include "net/http/http_util.h"
26 
27 using std::string;
28 
29 namespace net {
30 
31 // Singleton utility class for mime types.
32 class MimeUtil : public PlatformMimeUtil {
33  public:
34   bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
35                                 std::string* mime_type) const;
36 
37   bool GetMimeTypeFromFile(const base::FilePath& file_path,
38                            std::string* mime_type) const;
39 
40   bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
41                                          std::string* mime_type) const;
42 
43   bool GetPreferredExtensionForMimeType(
44       const std::string& mime_type,
45       base::FilePath::StringType* extension) const;
46 
47   bool MatchesMimeType(const std::string& mime_type_pattern,
48                        const std::string& mime_type) const;
49 
50   bool ParseMimeTypeWithoutParameter(std::string_view type_string,
51                                      std::string* top_level_type,
52                                      std::string* subtype) const;
53 
54   bool IsValidTopLevelMimeType(const std::string& type_string) const;
55 
56  private:
57   friend struct base::LazyInstanceTraitsBase<MimeUtil>;
58 
59   MimeUtil();
60 
61   bool GetMimeTypeFromExtensionHelper(const base::FilePath::StringType& ext,
62                                       bool include_platform_types,
63                                       std::string* mime_type) const;
64 };  // class MimeUtil
65 
66 // This variable is Leaky because we need to access it from WorkerPool threads.
67 static base::LazyInstance<MimeUtil>::Leaky g_mime_util =
68     LAZY_INSTANCE_INITIALIZER;
69 
70 struct MimeInfo {
71   const char* const mime_type;
72 
73   // Comma-separated list of possible extensions for the type. The first
74   // extension is considered preferred.
75   const char* const extensions;
76 };
77 
78 // How to use the MIME maps
79 // ------------------------
80 // READ THIS BEFORE MODIFYING THE MIME MAPPINGS BELOW.
81 //
82 // There are two hardcoded mappings from MIME types: kPrimaryMappings and
83 // kSecondaryMappings.
84 //
85 // kPrimaryMappings:
86 //
87 //   Use this for mappings that are critical to the web platform.  Mappings you
88 //   add to this list take priority over the underlying platform when converting
89 //   from file extension -> MIME type.  Thus file extensions listed here will
90 //   work consistently across platforms.
91 //
92 // kSecondaryMappings:
93 //
94 //   Use this for mappings that must exist, but can be overridden by user
95 //   preferences.
96 //
97 // The following applies to both lists:
98 //
99 // * The same extension can appear multiple times in the same list under
100 //   different MIME types.  Extensions that appear earlier take precedence over
101 //   those that appear later.
102 //
103 // * A MIME type must not appear more than once in a single list.  It is valid
104 //   for the same MIME type to appear in kPrimaryMappings and
105 //   kSecondaryMappings.
106 //
107 // The MIME maps are used for three types of lookups:
108 //
109 // 1) MIME type -> file extension.  Implemented as
110 //    GetPreferredExtensionForMimeType().
111 //
112 //    Sources are consulted in the following order:
113 //
114 //    a) As a special case application/octet-stream is mapped to nothing.  Web
115 //       sites are supposed to use this MIME type to indicate that the content
116 //       is opaque and shouldn't be parsed as any specific type of content.  It
117 //       doesn't make sense to map this to anything.
118 //
119 //    b) The underlying platform.  If the operating system has a mapping from
120 //       the MIME type to a file extension, then that takes priority.  The
121 //       platform is assumed to represent the user's preference.
122 //
123 //    c) kPrimaryMappings.  Order doesn't matter since there should only be at
124 //       most one entry per MIME type.
125 //
126 //    d) kSecondaryMappings.  Again, order doesn't matter.
127 //
128 // 2) File extension -> MIME type.  Implemented in GetMimeTypeFromExtension().
129 //
130 //    Sources are considered in the following order:
131 //
132 //    a) kPrimaryMappings.  Order matters here since file extensions can appear
133 //       multiple times on these lists.  The first mapping in order of
134 //       appearance in the list wins.
135 //
136 //    b) Underlying platform.
137 //
138 //    c) kSecondaryMappings.  Again, the order matters.
139 //
140 // 3) File extension -> Well known MIME type.  Implemented as
141 //    GetWellKnownMimeTypeFromExtension().
142 //
143 //    This is similar to 2), with the exception that b) is skipped.  I.e.  Only
144 //    considers the hardcoded mappings in kPrimaryMappings and
145 //    kSecondaryMappings.
146 
147 // See comments above for details on how this list is used.
148 static const MimeInfo kPrimaryMappings[] = {
149     // Must precede audio/webm .
150     {"video/webm", "webm"},
151 
152     // Must precede audio/mp3
153     {"audio/mpeg", "mp3"},
154 
155     {"application/wasm", "wasm"},
156     {"application/x-chrome-extension", "crx"},
157     {"application/xhtml+xml", "xhtml,xht,xhtm"},
158     {"audio/flac", "flac"},
159     {"audio/mp3", "mp3"},
160     {"audio/ogg", "ogg,oga,opus"},
161     {"audio/wav", "wav"},
162     {"audio/webm", "webm"},
163     {"audio/x-m4a", "m4a"},
164     {"image/avif", "avif"},
165     {"image/gif", "gif"},
166     {"image/jpeg", "jpeg,jpg"},
167     {"image/png", "png"},
168     {"image/apng", "png,apng"},
169     {"image/svg+xml", "svg,svgz"},
170     {"image/webp", "webp"},
171     {"multipart/related", "mht,mhtml"},
172     {"text/css", "css"},
173     {"text/html", "html,htm,shtml,shtm"},
174     {"text/javascript", "js,mjs"},
175     {"text/xml", "xml"},
176     {"video/mp4", "mp4,m4v"},
177     {"video/ogg", "ogv,ogm"},
178 
179     // This is a primary mapping (overrides the platform) rather than secondary
180     // to work around an issue when Excel is installed on Windows. Excel
181     // registers csv as application/vnd.ms-excel instead of text/csv from RFC
182     // 4180. See https://crbug.com/139105.
183     {"text/csv", "csv"},
184 };
185 
186 // See comments above for details on how this list is used.
187 static const MimeInfo kSecondaryMappings[] = {
188     // Must precede image/vnd.microsoft.icon .
189     {"image/x-icon", "ico"},
190 
191     {"application/epub+zip", "epub"},
192     {"application/font-woff", "woff"},
193     {"application/gzip", "gz,tgz"},
194     {"application/javascript", "js"},
195     {"application/json", "json"},  // Per http://www.ietf.org/rfc/rfc4627.txt.
196     {"application/msword", "doc,dot"},
197     {"application/octet-stream", "bin,exe,com"},
198     {"application/pdf", "pdf"},
199     {"application/pkcs7-mime", "p7m,p7c,p7z"},
200     {"application/pkcs7-signature", "p7s"},
201     {"application/postscript", "ps,eps,ai"},
202     {"application/rdf+xml", "rdf"},
203     {"application/rss+xml", "rss"},
204     {"application/rtf", "rtf"},
205     {"application/vnd.android.package-archive", "apk"},
206     {"application/vnd.mozilla.xul+xml", "xul"},
207     {"application/vnd.ms-excel", "xls"},
208     {"application/vnd.ms-powerpoint", "ppt"},
209     {"application/"
210      "vnd.openxmlformats-officedocument.presentationml.presentation",
211      "pptx"},
212     {"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
213      "xlsx"},
214     {"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
215      "docx"},
216     {"application/x-gzip", "gz,tgz"},
217     {"application/x-mpegurl", "m3u8"},
218     {"application/x-shockwave-flash", "swf,swl"},
219     {"application/x-tar", "tar"},
220     {"application/x-x509-ca-cert", "cer,crt"},
221     {"application/zip", "zip"},
222     // This is the platform mapping on recent versions of Windows 10.
223     {"audio/webm", "weba"},
224     {"image/bmp", "bmp"},
225     {"image/jpeg", "jfif,pjpeg,pjp"},
226     {"image/tiff", "tiff,tif"},
227     {"image/vnd.microsoft.icon", "ico"},
228     {"image/x-png", "png"},
229     {"image/x-xbitmap", "xbm"},
230     {"message/rfc822", "eml"},
231     {"text/calendar", "ics"},
232     {"text/html", "ehtml"},
233     {"text/plain", "txt,text"},
234     {"text/x-sh", "sh"},
235     {"text/xml", "xsl,xbl,xslt"},
236     {"video/mpeg", "mpeg,mpg"},
237 };
238 
239 // Finds mime type of |ext| from |mappings|.
240 template <size_t num_mappings>
FindMimeType(const MimeInfo (& mappings)[num_mappings],const std::string & ext)241 static const char* FindMimeType(const MimeInfo (&mappings)[num_mappings],
242                                 const std::string& ext) {
243   for (const auto& mapping : mappings) {
244     const char* extensions = mapping.extensions;
245     for (;;) {
246       size_t end_pos = strcspn(extensions, ",");
247       // The length check is required to prevent the StringPiece below from
248       // including uninitialized memory if ext is longer than extensions.
249       if (end_pos == ext.size() &&
250           base::EqualsCaseInsensitiveASCII(
251               std::string_view(extensions, ext.size()), ext)) {
252         return mapping.mime_type;
253       }
254       extensions += end_pos;
255       if (!*extensions)
256         break;
257       extensions += 1;  // skip over comma
258     }
259   }
260   return nullptr;
261 }
262 
StringToFilePathStringType(std::string_view string_piece)263 static base::FilePath::StringType StringToFilePathStringType(
264     std::string_view string_piece) {
265 #if BUILDFLAG(IS_WIN)
266   return base::UTF8ToWide(string_piece);
267 #else
268   return std::string(string_piece);
269 #endif
270 }
271 
272 // Helper used in MimeUtil::GetPreferredExtensionForMimeType() to search
273 // preferred extension in MimeInfo arrays.
274 template <size_t num_mappings>
FindPreferredExtension(const MimeInfo (& mappings)[num_mappings],const std::string & mime_type,base::FilePath::StringType * result)275 static bool FindPreferredExtension(const MimeInfo (&mappings)[num_mappings],
276                                    const std::string& mime_type,
277                                    base::FilePath::StringType* result) {
278   // There is no preferred extension for "application/octet-stream".
279   if (mime_type == "application/octet-stream")
280     return false;
281 
282   for (const auto& mapping : mappings) {
283     if (mapping.mime_type == mime_type) {
284       const char* extensions = mapping.extensions;
285       const char* extension_end = strchr(extensions, ',');
286       size_t len =
287           extension_end ? extension_end - extensions : strlen(extensions);
288       *result = StringToFilePathStringType(std::string_view(extensions, len));
289       return true;
290     }
291   }
292   return false;
293 }
294 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const295 bool MimeUtil::GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
296                                         string* result) const {
297   return GetMimeTypeFromExtensionHelper(ext, true, result);
298 }
299 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,string * result) const300 bool MimeUtil::GetWellKnownMimeTypeFromExtension(
301     const base::FilePath::StringType& ext,
302     string* result) const {
303   return GetMimeTypeFromExtensionHelper(ext, false, result);
304 }
305 
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension) const306 bool MimeUtil::GetPreferredExtensionForMimeType(
307     const std::string& mime_type,
308     base::FilePath::StringType* extension) const {
309   // Search the MIME type in the platform DB first, then in kPrimaryMappings and
310   // kSecondaryMappings.
311   return GetPlatformPreferredExtensionForMimeType(mime_type, extension) ||
312          FindPreferredExtension(kPrimaryMappings, mime_type, extension) ||
313          FindPreferredExtension(kSecondaryMappings, mime_type, extension);
314 }
315 
GetMimeTypeFromFile(const base::FilePath & file_path,string * result) const316 bool MimeUtil::GetMimeTypeFromFile(const base::FilePath& file_path,
317                                    string* result) const {
318   base::FilePath::StringType file_name_str = file_path.Extension();
319   if (file_name_str.empty())
320     return false;
321   return GetMimeTypeFromExtension(file_name_str.substr(1), result);
322 }
323 
GetMimeTypeFromExtensionHelper(const base::FilePath::StringType & ext,bool include_platform_types,string * result) const324 bool MimeUtil::GetMimeTypeFromExtensionHelper(
325     const base::FilePath::StringType& ext,
326     bool include_platform_types,
327     string* result) const {
328   DCHECK(ext.empty() || ext[0] != '.')
329       << "extension passed in must not include leading dot";
330 
331   // Avoids crash when unable to handle a long file path. See crbug.com/48733.
332   const unsigned kMaxFilePathSize = 65536;
333   if (ext.length() > kMaxFilePathSize)
334     return false;
335 
336   // Reject a string which contains null character.
337   base::FilePath::StringType::size_type nul_pos =
338       ext.find(FILE_PATH_LITERAL('\0'));
339   if (nul_pos != base::FilePath::StringType::npos)
340     return false;
341 
342   // We implement the same algorithm as Mozilla for mapping a file extension to
343   // a mime type.  That is, we first check a hard-coded list (that cannot be
344   // overridden), and then if not found there, we defer to the system registry.
345   // Finally, we scan a secondary hard-coded list to catch types that we can
346   // deduce but that we also want to allow the OS to override.
347 
348   base::FilePath path_ext(ext);
349   const string ext_narrow_str = path_ext.AsUTF8Unsafe();
350   const char* mime_type = FindMimeType(kPrimaryMappings, ext_narrow_str);
351   if (mime_type) {
352     *result = mime_type;
353     return true;
354   }
355 
356   if (include_platform_types && GetPlatformMimeTypeFromExtension(ext, result))
357     return true;
358 
359   mime_type = FindMimeType(kSecondaryMappings, ext_narrow_str);
360   if (mime_type) {
361     *result = mime_type;
362     return true;
363   }
364 
365   return false;
366 }
367 
368 MimeUtil::MimeUtil() = default;
369 
370 // Tests for MIME parameter equality. Each parameter in the |mime_type_pattern|
371 // must be matched by a parameter in the |mime_type|. If there are no
372 // parameters in the pattern, the match is a success.
373 //
374 // According rfc2045 keys of parameters are case-insensitive, while values may
375 // or may not be case-sensitive, but they are usually case-sensitive. So, this
376 // function matches values in *case-sensitive* manner, however note that this
377 // may produce some false negatives.
MatchesMimeTypeParameters(const std::string & mime_type_pattern,const std::string & mime_type)378 bool MatchesMimeTypeParameters(const std::string& mime_type_pattern,
379                                const std::string& mime_type) {
380   typedef std::map<std::string, std::string> StringPairMap;
381 
382   const std::string::size_type semicolon = mime_type_pattern.find(';');
383   const std::string::size_type test_semicolon = mime_type.find(';');
384   if (semicolon != std::string::npos) {
385     if (test_semicolon == std::string::npos)
386       return false;
387 
388     base::StringPairs pattern_parameters;
389     base::SplitStringIntoKeyValuePairs(mime_type_pattern.substr(semicolon + 1),
390                                        '=', ';', &pattern_parameters);
391     base::StringPairs test_parameters;
392     base::SplitStringIntoKeyValuePairs(mime_type.substr(test_semicolon + 1),
393                                        '=', ';', &test_parameters);
394 
395     // Put the parameters to maps with the keys converted to lower case.
396     StringPairMap pattern_parameter_map;
397     for (const auto& pair : pattern_parameters) {
398       pattern_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
399     }
400 
401     StringPairMap test_parameter_map;
402     for (const auto& pair : test_parameters) {
403       test_parameter_map[base::ToLowerASCII(pair.first)] = pair.second;
404     }
405 
406     if (pattern_parameter_map.size() > test_parameter_map.size())
407       return false;
408 
409     for (const auto& parameter_pair : pattern_parameter_map) {
410       const auto& test_parameter_pair_it =
411           test_parameter_map.find(parameter_pair.first);
412       if (test_parameter_pair_it == test_parameter_map.end())
413         return false;
414       if (parameter_pair.second != test_parameter_pair_it->second)
415         return false;
416     }
417   }
418 
419   return true;
420 }
421 
422 // This comparison handles absolute maching and also basic
423 // wildcards.  The plugin mime types could be:
424 //      application/x-foo
425 //      application/*
426 //      application/*+xml
427 //      *
428 // Also tests mime parameters -- all parameters in the pattern must be present
429 // in the tested type for a match to succeed.
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type) const430 bool MimeUtil::MatchesMimeType(const std::string& mime_type_pattern,
431                                const std::string& mime_type) const {
432   if (mime_type_pattern.empty())
433     return false;
434 
435   std::string::size_type semicolon = mime_type_pattern.find(';');
436   const std::string base_pattern(mime_type_pattern.substr(0, semicolon));
437   semicolon = mime_type.find(';');
438   const std::string base_type(mime_type.substr(0, semicolon));
439 
440   if (base_pattern == "*" || base_pattern == "*/*")
441     return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
442 
443   const std::string::size_type star = base_pattern.find('*');
444   if (star == std::string::npos) {
445     if (base::EqualsCaseInsensitiveASCII(base_pattern, base_type))
446       return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
447     else
448       return false;
449   }
450 
451   // Test length to prevent overlap between |left| and |right|.
452   if (base_type.length() < base_pattern.length() - 1)
453     return false;
454 
455   std::string_view base_pattern_piece(base_pattern);
456   std::string_view left(base_pattern_piece.substr(0, star));
457   std::string_view right(base_pattern_piece.substr(star + 1));
458 
459   if (!base::StartsWith(base_type, left, base::CompareCase::INSENSITIVE_ASCII))
460     return false;
461 
462   if (!right.empty() &&
463       !base::EndsWith(base_type, right, base::CompareCase::INSENSITIVE_ASCII))
464     return false;
465 
466   return MatchesMimeTypeParameters(mime_type_pattern, mime_type);
467 }
468 
ParseMimeType(const std::string & type_str,std::string * mime_type,base::StringPairs * params)469 bool ParseMimeType(const std::string& type_str,
470                    std::string* mime_type,
471                    base::StringPairs* params) {
472   // Trim leading and trailing whitespace from type.  We include '(' in
473   // the trailing trim set to catch media-type comments, which are not at all
474   // standard, but may occur in rare cases.
475   size_t type_val = type_str.find_first_not_of(HTTP_LWS);
476   type_val = std::min(type_val, type_str.length());
477   size_t type_end = type_str.find_first_of(HTTP_LWS ";(", type_val);
478   if (type_end == std::string::npos)
479     type_end = type_str.length();
480 
481   // Reject a mime-type if it does not include a slash.
482   size_t slash_pos = type_str.find_first_of('/');
483   if (slash_pos == std::string::npos || slash_pos > type_end)
484     return false;
485   if (mime_type)
486     *mime_type = type_str.substr(type_val, type_end - type_val);
487 
488   // Iterate over parameters. Can't split the string around semicolons
489   // preemptively because quoted strings may include semicolons. Mostly matches
490   // logic in https://mimesniff.spec.whatwg.org/. Main differences: Does not
491   // validate characters are HTTP token code points / HTTP quoted-string token
492   // code points, and ignores spaces after "=" in parameters.
493   if (params)
494     params->clear();
495   std::string::size_type offset = type_str.find_first_of(';', type_end);
496   while (offset < type_str.size()) {
497     DCHECK_EQ(';', type_str[offset]);
498     // Trim off the semicolon.
499     ++offset;
500 
501     // Trim off any following spaces.
502     offset = type_str.find_first_not_of(HTTP_LWS, offset);
503     std::string::size_type param_name_start = offset;
504 
505     // Extend parameter name until run into a semicolon or equals sign.  Per
506     // spec, trailing spaces are not removed.
507     offset = type_str.find_first_of(";=", offset);
508 
509     // Nothing more to do if at end of string, or if there's no parameter
510     // value, since names without values aren't allowed.
511     if (offset == std::string::npos || type_str[offset] == ';')
512       continue;
513 
514     auto param_name = base::MakeStringPiece(type_str.begin() + param_name_start,
515                                             type_str.begin() + offset);
516 
517     // Now parse the value.
518     DCHECK_EQ('=', type_str[offset]);
519     // Trim off the '='.
520     offset++;
521 
522     // Remove leading spaces. This violates the spec, though it matches
523     // pre-existing behavior.
524     //
525     // TODO(mmenke): Consider doing this (only?) after parsing quotes, which
526     // seems to align more with the spec - not the content-type spec, but the
527     // GET spec's way of getting an encoding, and the spec for handling
528     // boundary values as well.
529     // See https://encoding.spec.whatwg.org/#names-and-labels.
530     offset = type_str.find_first_not_of(HTTP_LWS, offset);
531 
532     std::string param_value;
533     if (offset == std::string::npos || type_str[offset] == ';') {
534       // Nothing to do here - an unquoted string of only whitespace should be
535       // skipped.
536       continue;
537     } else if (type_str[offset] != '"') {
538       // If the first character is not a quotation mark, copy data directly.
539       std::string::size_type value_start = offset;
540       offset = type_str.find_first_of(';', offset);
541       std::string::size_type value_end = offset;
542 
543       // Remove terminal whitespace. If ran off the end of the string, have to
544       // update |value_end| first.
545       if (value_end == std::string::npos)
546         value_end = type_str.size();
547       while (value_end > value_start &&
548              HttpUtil::IsLWS(type_str[value_end - 1])) {
549         --value_end;
550       }
551 
552       param_value = type_str.substr(value_start, value_end - value_start);
553     } else {
554       // Otherwise, append data, with special handling for backslashes, until
555       // a close quote.  Do not trim whitespace for quoted-string.
556 
557       // Skip open quote.
558       DCHECK_EQ('"', type_str[offset]);
559       ++offset;
560 
561       while (offset < type_str.size() && type_str[offset] != '"') {
562         // Skip over backslash and append the next character, when not at
563         // the end of the string. Otherwise, copy the next character (Which may
564         // be a backslash).
565         if (type_str[offset] == '\\' && offset + 1 < type_str.size()) {
566           ++offset;
567         }
568         param_value += type_str[offset];
569         ++offset;
570       }
571 
572       offset = type_str.find_first_of(';', offset);
573     }
574     if (params)
575       params->emplace_back(param_name, param_value);
576   }
577   return true;
578 }
579 
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype) const580 bool MimeUtil::ParseMimeTypeWithoutParameter(std::string_view type_string,
581                                              std::string* top_level_type,
582                                              std::string* subtype) const {
583   std::vector<std::string_view> components = base::SplitStringPiece(
584       type_string, "/", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
585   if (components.size() != 2)
586     return false;
587   components[0] = TrimWhitespaceASCII(components[0], base::TRIM_LEADING);
588   components[1] = TrimWhitespaceASCII(components[1], base::TRIM_TRAILING);
589   if (!HttpUtil::IsToken(components[0]) || !HttpUtil::IsToken(components[1]))
590     return false;
591 
592   if (top_level_type)
593     top_level_type->assign(std::string(components[0]));
594 
595   if (subtype)
596     subtype->assign(std::string(components[1]));
597 
598   return true;
599 }
600 
601 // See https://www.iana.org/assignments/media-types/media-types.xhtml
602 static const char* const kLegalTopLevelTypes[] = {
603     "application", "audio", "example",   "font", "image",
604     "message",     "model", "multipart", "text", "video",
605 };
606 
IsValidTopLevelMimeType(const std::string & type_string) const607 bool MimeUtil::IsValidTopLevelMimeType(const std::string& type_string) const {
608   std::string lower_type = base::ToLowerASCII(type_string);
609   for (const char* const legal_type : kLegalTopLevelTypes) {
610     if (lower_type.compare(legal_type) == 0)
611       return true;
612   }
613 
614   return type_string.size() > 2 &&
615          base::StartsWith(type_string, "x-",
616                           base::CompareCase::INSENSITIVE_ASCII);
617 }
618 
619 //----------------------------------------------------------------------------
620 // Wrappers for the singleton
621 //----------------------------------------------------------------------------
622 
GetMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)623 bool GetMimeTypeFromExtension(const base::FilePath::StringType& ext,
624                               std::string* mime_type) {
625   return g_mime_util.Get().GetMimeTypeFromExtension(ext, mime_type);
626 }
627 
GetMimeTypeFromFile(const base::FilePath & file_path,std::string * mime_type)628 bool GetMimeTypeFromFile(const base::FilePath& file_path,
629                          std::string* mime_type) {
630   return g_mime_util.Get().GetMimeTypeFromFile(file_path, mime_type);
631 }
632 
GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType & ext,std::string * mime_type)633 bool GetWellKnownMimeTypeFromExtension(const base::FilePath::StringType& ext,
634                                        std::string* mime_type) {
635   return g_mime_util.Get().GetWellKnownMimeTypeFromExtension(ext, mime_type);
636 }
637 
GetPreferredExtensionForMimeType(const std::string & mime_type,base::FilePath::StringType * extension)638 bool GetPreferredExtensionForMimeType(const std::string& mime_type,
639                                       base::FilePath::StringType* extension) {
640   return g_mime_util.Get().GetPreferredExtensionForMimeType(mime_type,
641                                                             extension);
642 }
643 
MatchesMimeType(const std::string & mime_type_pattern,const std::string & mime_type)644 bool MatchesMimeType(const std::string& mime_type_pattern,
645                      const std::string& mime_type) {
646   return g_mime_util.Get().MatchesMimeType(mime_type_pattern, mime_type);
647 }
648 
ParseMimeTypeWithoutParameter(std::string_view type_string,std::string * top_level_type,std::string * subtype)649 bool ParseMimeTypeWithoutParameter(std::string_view type_string,
650                                    std::string* top_level_type,
651                                    std::string* subtype) {
652   return g_mime_util.Get().ParseMimeTypeWithoutParameter(
653       type_string, top_level_type, subtype);
654 }
655 
IsValidTopLevelMimeType(const std::string & type_string)656 bool IsValidTopLevelMimeType(const std::string& type_string) {
657   return g_mime_util.Get().IsValidTopLevelMimeType(type_string);
658 }
659 
660 namespace {
661 
662 // From http://www.w3schools.com/media/media_mimeref.asp and
663 // http://plugindoc.mozdev.org/winmime.php
664 static const char* const kStandardImageTypes[] = {"image/avif",
665                                                   "image/bmp",
666                                                   "image/cis-cod",
667                                                   "image/gif",
668                                                   "image/ief",
669                                                   "image/jpeg",
670                                                   "image/webp",
671                                                   "image/pict",
672                                                   "image/pipeg",
673                                                   "image/png",
674                                                   "image/svg+xml",
675                                                   "image/tiff",
676                                                   "image/vnd.microsoft.icon",
677                                                   "image/x-cmu-raster",
678                                                   "image/x-cmx",
679                                                   "image/x-icon",
680                                                   "image/x-portable-anymap",
681                                                   "image/x-portable-bitmap",
682                                                   "image/x-portable-graymap",
683                                                   "image/x-portable-pixmap",
684                                                   "image/x-rgb",
685                                                   "image/x-xbitmap",
686                                                   "image/x-xpixmap",
687                                                   "image/x-xwindowdump"};
688 static const char* const kStandardAudioTypes[] = {
689   "audio/aac",
690   "audio/aiff",
691   "audio/amr",
692   "audio/basic",
693   "audio/flac",
694   "audio/midi",
695   "audio/mp3",
696   "audio/mp4",
697   "audio/mpeg",
698   "audio/mpeg3",
699   "audio/ogg",
700   "audio/vorbis",
701   "audio/wav",
702   "audio/webm",
703   "audio/x-m4a",
704   "audio/x-ms-wma",
705   "audio/vnd.rn-realaudio",
706   "audio/vnd.wave"
707 };
708 // https://tools.ietf.org/html/rfc8081
709 static const char* const kStandardFontTypes[] = {
710     "font/collection", "font/otf",  "font/sfnt",
711     "font/ttf",        "font/woff", "font/woff2",
712 };
713 static const char* const kStandardVideoTypes[] = {
714   "video/avi",
715   "video/divx",
716   "video/flc",
717   "video/mp4",
718   "video/mpeg",
719   "video/ogg",
720   "video/quicktime",
721   "video/sd-video",
722   "video/webm",
723   "video/x-dv",
724   "video/x-m4v",
725   "video/x-mpeg",
726   "video/x-ms-asf",
727   "video/x-ms-wmv"
728 };
729 
730 struct StandardType {
731   const char* const leading_mime_type;
732   base::span<const char* const> standard_types;
733 };
734 static const StandardType kStandardTypes[] = {{"image/", kStandardImageTypes},
735                                               {"audio/", kStandardAudioTypes},
736                                               {"font/", kStandardFontTypes},
737                                               {"video/", kStandardVideoTypes},
738                                               {nullptr, {}}};
739 
740 // GetExtensionsFromHardCodedMappings() adds file extensions (without a leading
741 // dot) to the set |extensions|, for all MIME types matching |mime_type|.
742 //
743 // The meaning of |mime_type| depends on the value of |prefix_match|:
744 //
745 //  * If |prefix_match = false| then |mime_type| is an exact (case-insensitive)
746 //    string such as "text/plain".
747 //
748 //  * If |prefix_match = true| then |mime_type| is treated as the prefix for a
749 //    (case-insensitive) string. For instance "Text/" would match "text/plain".
GetExtensionsFromHardCodedMappings(base::span<const MimeInfo> mappings,const std::string & mime_type,bool prefix_match,std::unordered_set<base::FilePath::StringType> * extensions)750 void GetExtensionsFromHardCodedMappings(
751     base::span<const MimeInfo> mappings,
752     const std::string& mime_type,
753     bool prefix_match,
754     std::unordered_set<base::FilePath::StringType>* extensions) {
755   for (const auto& mapping : mappings) {
756     std::string_view cur_mime_type(mapping.mime_type);
757 
758     if (base::StartsWith(cur_mime_type, mime_type,
759                          base::CompareCase::INSENSITIVE_ASCII) &&
760         (prefix_match || (cur_mime_type.length() == mime_type.length()))) {
761       for (std::string_view this_extension : base::SplitStringPiece(
762                mapping.extensions, ",", base::TRIM_WHITESPACE,
763                base::SPLIT_WANT_ALL)) {
764         extensions->insert(StringToFilePathStringType(this_extension));
765       }
766     }
767   }
768 }
769 
GetExtensionsHelper(base::span<const char * const> standard_types,const std::string & leading_mime_type,std::unordered_set<base::FilePath::StringType> * extensions)770 void GetExtensionsHelper(
771     base::span<const char* const> standard_types,
772     const std::string& leading_mime_type,
773     std::unordered_set<base::FilePath::StringType>* extensions) {
774   for (auto* standard_type : standard_types) {
775     g_mime_util.Get().GetPlatformExtensionsForMimeType(standard_type,
776                                                        extensions);
777   }
778 
779   // Also look up the extensions from hard-coded mappings in case that some
780   // supported extensions are not registered in the system registry, like ogg.
781   GetExtensionsFromHardCodedMappings(kPrimaryMappings, leading_mime_type, true,
782                                      extensions);
783 
784   GetExtensionsFromHardCodedMappings(kSecondaryMappings, leading_mime_type,
785                                      true, extensions);
786 }
787 
788 // Note that the elements in the source set will be appended to the target
789 // vector.
790 template <class T>
UnorderedSetToVector(std::unordered_set<T> * source,std::vector<T> * target)791 void UnorderedSetToVector(std::unordered_set<T>* source,
792                           std::vector<T>* target) {
793   size_t old_target_size = target->size();
794   target->resize(old_target_size + source->size());
795   size_t i = 0;
796   for (auto iter = source->begin(); iter != source->end(); ++iter, ++i)
797     (*target)[old_target_size + i] = *iter;
798 }
799 
800 // Characters to be used for mime multipart boundary.
801 //
802 // TODO(rsleevi): crbug.com/575779: Follow the spec or fix the spec.
803 // The RFC 2046 spec says the alphanumeric characters plus the
804 // following characters are legal for boundaries:  '()+_,-./:=?
805 // However the following characters, though legal, cause some sites
806 // to fail: (),./:=+
807 constexpr std::string_view kMimeBoundaryCharacters(
808     "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
809 
810 // Size of mime multipart boundary.
811 const size_t kMimeBoundarySize = 69;
812 
813 }  // namespace
814 
GetExtensionsForMimeType(const std::string & unsafe_mime_type,std::vector<base::FilePath::StringType> * extensions)815 void GetExtensionsForMimeType(
816     const std::string& unsafe_mime_type,
817     std::vector<base::FilePath::StringType>* extensions) {
818   if (unsafe_mime_type == "*/*" || unsafe_mime_type == "*")
819     return;
820 
821   const std::string mime_type = base::ToLowerASCII(unsafe_mime_type);
822   std::unordered_set<base::FilePath::StringType> unique_extensions;
823 
824   if (base::EndsWith(mime_type, "/*", base::CompareCase::INSENSITIVE_ASCII)) {
825     std::string leading_mime_type = mime_type.substr(0, mime_type.length() - 1);
826 
827     // Find the matching StandardType from within kStandardTypes, or fall
828     // through to the last (default) StandardType.
829     const StandardType* type = nullptr;
830     for (const StandardType& standard_type : kStandardTypes) {
831       type = &standard_type;
832       if (type->leading_mime_type &&
833           leading_mime_type == type->leading_mime_type) {
834         break;
835       }
836     }
837     DCHECK(type);
838     GetExtensionsHelper(type->standard_types,
839                         leading_mime_type,
840                         &unique_extensions);
841   } else {
842     g_mime_util.Get().GetPlatformExtensionsForMimeType(mime_type,
843                                                        &unique_extensions);
844 
845     // Also look up the extensions from hard-coded mappings in case that some
846     // supported extensions are not registered in the system registry, like ogg.
847     GetExtensionsFromHardCodedMappings(kPrimaryMappings, mime_type, false,
848                                        &unique_extensions);
849 
850     GetExtensionsFromHardCodedMappings(kSecondaryMappings, mime_type, false,
851                                        &unique_extensions);
852   }
853 
854   UnorderedSetToVector(&unique_extensions, extensions);
855 }
856 
GenerateMimeMultipartBoundary()857 NET_EXPORT std::string GenerateMimeMultipartBoundary() {
858   // Based on RFC 1341, section "7.2.1 Multipart: The common syntax":
859   //   Because encapsulation boundaries must not appear in the body parts being
860   //   encapsulated, a user agent must exercise care to choose a unique
861   //   boundary. The boundary in the example above could have been the result of
862   //   an algorithm designed to produce boundaries with a very low probability
863   //   of already existing in the data to be encapsulated without having to
864   //   prescan the data.
865   //   [...]
866   //   the boundary parameter [...] consists of 1 to 70 characters from a set of
867   //   characters known to be very robust through email gateways, and NOT ending
868   //   with white space.
869   //   [...]
870   //   boundary := 0*69<bchars> bcharsnospace
871   //   bchars := bcharsnospace / " "
872   //   bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /
873   //            "_" / "," / "-" / "." / "/" / ":" / "=" / "?"
874 
875   std::string result;
876   result.reserve(kMimeBoundarySize);
877   result.append("----MultipartBoundary--");
878   while (result.size() < (kMimeBoundarySize - 4)) {
879     char c = kMimeBoundaryCharacters[base::RandInt(
880         0, kMimeBoundaryCharacters.size() - 1)];
881     result.push_back(c);
882   }
883   result.append("----");
884 
885   // Not a strict requirement - documentation only.
886   DCHECK_EQ(kMimeBoundarySize, result.size());
887 
888   return result;
889 }
890 
AddMultipartValueForUpload(const std::string & value_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)891 void AddMultipartValueForUpload(const std::string& value_name,
892                                 const std::string& value,
893                                 const std::string& mime_boundary,
894                                 const std::string& content_type,
895                                 std::string* post_data) {
896   DCHECK(post_data);
897   // First line is the boundary.
898   post_data->append("--" + mime_boundary + "\r\n");
899   // Next line is the Content-disposition.
900   post_data->append("Content-Disposition: form-data; name=\"" +
901                     value_name + "\"\r\n");
902   if (!content_type.empty()) {
903     // If Content-type is specified, the next line is that.
904     post_data->append("Content-Type: " + content_type + "\r\n");
905   }
906   // Leave an empty line and append the value.
907   post_data->append("\r\n" + value + "\r\n");
908 }
909 
AddMultipartValueForUploadWithFileName(const std::string & value_name,const std::string & file_name,const std::string & value,const std::string & mime_boundary,const std::string & content_type,std::string * post_data)910 void AddMultipartValueForUploadWithFileName(const std::string& value_name,
911                                             const std::string& file_name,
912                                             const std::string& value,
913                                             const std::string& mime_boundary,
914                                             const std::string& content_type,
915                                             std::string* post_data) {
916   DCHECK(post_data);
917   // First line is the boundary.
918   post_data->append("--" + mime_boundary + "\r\n");
919   // Next line is the Content-disposition.
920   post_data->append("Content-Disposition: form-data; name=\"" + value_name +
921                     "\"; filename=\"" + file_name + "\"\r\n");
922   if (!content_type.empty()) {
923     // If Content-type is specified, the next line is that.
924     post_data->append("Content-Type: " + content_type + "\r\n");
925   }
926   // Leave an empty line and append the value.
927   post_data->append("\r\n" + value + "\r\n");
928 }
929 
AddMultipartFinalDelimiterForUpload(const std::string & mime_boundary,std::string * post_data)930 void AddMultipartFinalDelimiterForUpload(const std::string& mime_boundary,
931                                          std::string* post_data) {
932   DCHECK(post_data);
933   post_data->append("--" + mime_boundary + "--\r\n");
934 }
935 
936 // TODO(toyoshim): We may prefer to implement a strict RFC2616 media-type
937 // (https://tools.ietf.org/html/rfc2616#section-3.7) parser.
ExtractMimeTypeFromMediaType(const std::string & type_string,bool accept_comma_separated)938 std::optional<std::string> ExtractMimeTypeFromMediaType(
939     const std::string& type_string,
940     bool accept_comma_separated) {
941   std::string::size_type end = type_string.find(';');
942   if (accept_comma_separated) {
943     end = std::min(end, type_string.find(','));
944   }
945   std::string top_level_type;
946   std::string subtype;
947   if (ParseMimeTypeWithoutParameter(type_string.substr(0, end), &top_level_type,
948                                     &subtype)) {
949     return top_level_type + "/" + subtype;
950   }
951   return std::nullopt;
952 }
953 
954 }  // namespace net
955