1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/filename_util.h"
6
7 #include <set>
8
9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h"
11 #include "base/path_service.h"
12 #include "base/strings/escape.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/sys_string_conversions.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "build/build_config.h"
19 #include "net/base/filename_util_internal.h"
20 #include "net/base/net_string_util.h"
21 #include "net/base/url_util.h"
22 #include "net/http/http_content_disposition.h"
23 #include "url/gurl.h"
24
25 namespace net {
26
27 // Prefix to prepend to get a file URL.
28 static const char kFileURLPrefix[] = "file:///";
29
FilePathToFileURL(const base::FilePath & path)30 GURL FilePathToFileURL(const base::FilePath& path) {
31 // Produce a URL like "file:///C:/foo" for a regular file, or
32 // "file://///server/path" for UNC. The URL canonicalizer will fix up the
33 // latter case to be the canonical UNC form: "file://server/path"
34 std::string url_string(kFileURLPrefix);
35
36 // GURL() strips some whitespace and trailing control chars which are valid
37 // in file paths. It also interprets chars such as `%;#?` and maybe `\`, so we
38 // must percent encode these first. Reserve max possible length up front.
39 std::string utf8_path = path.AsUTF8Unsafe();
40 url_string.reserve(url_string.size() + (3 * utf8_path.size()));
41
42 for (auto c : utf8_path) {
43 if (c == '%' || c == ';' || c == '#' || c == '?' ||
44 #if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
45 c == '\\' ||
46 #endif
47 c <= ' ') {
48 url_string += '%';
49 base::AppendHexEncodedByte(static_cast<uint8_t>(c), url_string);
50 } else {
51 url_string += c;
52 }
53 }
54
55 return GURL(url_string);
56 }
57
FileURLToFilePath(const GURL & url,base::FilePath * file_path)58 bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) {
59 *file_path = base::FilePath();
60 base::FilePath::StringType& file_path_str =
61 const_cast<base::FilePath::StringType&>(file_path->value());
62 file_path_str.clear();
63
64 if (!url.is_valid())
65 return false;
66
67 // We may want to change this to a CHECK in the future.
68 if (!url.SchemeIsFile())
69 return false;
70
71 #if BUILDFLAG(IS_WIN)
72 std::string path;
73 std::string host = url.host();
74 if (host.empty()) {
75 // URL contains no host, the path is the filename. In this case, the path
76 // will probably be preceded with a slash, as in "/C:/foo.txt", so we
77 // trim out that here.
78 path = url.path();
79 size_t first_non_slash = path.find_first_not_of("/\\");
80 if (first_non_slash != std::string::npos && first_non_slash > 0)
81 path.erase(0, first_non_slash);
82 } else {
83 // URL contains a host: this means it's UNC. We keep the preceding slash
84 // on the path.
85 path = "\\\\";
86 path.append(host);
87 path.append(url.path());
88 }
89 std::replace(path.begin(), path.end(), '/', '\\');
90 #else // BUILDFLAG(IS_WIN)
91 // On POSIX, there's no obvious interpretation of file:// URLs with a host.
92 // Usually, remote mounts are still mounted onto the local filesystem.
93 // Therefore, we discard all URLs that are not obviously local to prevent
94 // spoofing attacks using file:// URLs. See crbug.com/881675.
95 if (!url.host().empty() && !net::IsLocalhost(url)) {
96 return false;
97 }
98 std::string path = url.path();
99 #endif // !BUILDFLAG(IS_WIN)
100
101 if (path.empty())
102 return false;
103
104 // "%2F" ('/') results in failure, because it represents a literal '/'
105 // character in a path segment (not a path separator). If this were decoded,
106 // it would be interpreted as a path separator on both POSIX and Windows (note
107 // that Firefox *does* decode this, but it was decided on
108 // https://crbug.com/585422 that this represents a potential security risk).
109 // It isn't correct to keep it as "%2F", so this just fails. This is fine,
110 // because '/' is not a valid filename character on either POSIX or Windows.
111 //
112 // A valid URL may include "%00" (NULL) in its path (see
113 // https://crbug.com/1400251), which is considered an illegal filename and
114 // results in failure.
115 std::set<unsigned char> illegal_encoded_bytes{'/', '\0'};
116
117 #if BUILDFLAG(IS_WIN)
118 // "%5C" ('\\') on Windows results in failure, for the same reason as '/'
119 // above. On POSIX, "%5C" simply decodes as '\\', a valid filename character.
120 illegal_encoded_bytes.insert('\\');
121 #endif
122
123 if (base::ContainsEncodedBytes(path, illegal_encoded_bytes))
124 return false;
125
126 // Unescape all percent-encoded sequences, including blocked-for-display
127 // characters, control characters and invalid UTF-8 byte sequences.
128 // Percent-encoded bytes are not meaningful in a file system.
129 path = base::UnescapeBinaryURLComponent(path);
130
131 #if BUILDFLAG(IS_WIN)
132 if (base::IsStringUTF8(path)) {
133 file_path_str.assign(base::UTF8ToWide(path));
134 // We used to try too hard and see if |path| made up entirely of
135 // the 1st 256 characters in the Unicode was a zero-extended UTF-16.
136 // If so, we converted it to 'Latin-1' and checked if the result was UTF-8.
137 // If the check passed, we converted the result to UTF-8.
138 // Otherwise, we treated the result as the native OS encoding.
139 // However, that led to http://crbug.com/4619 and http://crbug.com/14153
140 } else {
141 // Not UTF-8, assume encoding is native codepage and we're done. We know we
142 // are giving the conversion function a nonempty string, and it may fail if
143 // the given string is not in the current encoding and give us an empty
144 // string back. We detect this and report failure.
145 file_path_str = base::SysNativeMBToWide(path);
146 }
147 #else // BUILDFLAG(IS_WIN)
148 // Collapse multiple path slashes into a single path slash.
149 std::string new_path;
150 do {
151 new_path = path;
152 base::ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/");
153 path.swap(new_path);
154 } while (new_path != path);
155
156 file_path_str.assign(path);
157 #endif // !BUILDFLAG(IS_WIN)
158
159 return !file_path_str.empty();
160 }
161
GenerateSafeFileName(const std::string & mime_type,bool ignore_extension,base::FilePath * file_path)162 void GenerateSafeFileName(const std::string& mime_type,
163 bool ignore_extension,
164 base::FilePath* file_path) {
165 // Make sure we get the right file extension
166 EnsureSafeExtension(mime_type, ignore_extension, file_path);
167
168 #if BUILDFLAG(IS_WIN)
169 // Prepend "_" to the file name if it's a reserved name
170 base::FilePath::StringType leaf_name = file_path->BaseName().value();
171 DCHECK(!leaf_name.empty());
172 if (IsReservedNameOnWindows(leaf_name)) {
173 leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name;
174 *file_path = file_path->DirName();
175 if (file_path->value() == base::FilePath::kCurrentDirectory) {
176 *file_path = base::FilePath(leaf_name);
177 } else {
178 *file_path = file_path->Append(leaf_name);
179 }
180 }
181 #endif
182 }
183
IsReservedNameOnWindows(const base::FilePath::StringType & filename)184 bool IsReservedNameOnWindows(const base::FilePath::StringType& filename) {
185 // This list is taken from the MSDN article "Naming a file"
186 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
187 // I also added clock$ because GetSaveFileName seems to consider it as a
188 // reserved name too.
189 static const char* const known_devices[] = {
190 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4",
191 "com5", "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3",
192 "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"};
193 #if BUILDFLAG(IS_WIN)
194 std::string filename_lower = base::ToLowerASCII(base::WideToUTF8(filename));
195 #elif BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
196 std::string filename_lower = base::ToLowerASCII(filename);
197 #endif
198
199 for (const char* const device : known_devices) {
200 // Check for an exact match, or a "DEVICE." prefix.
201 size_t len = strlen(device);
202 if (filename_lower.starts_with(device) &&
203 (filename_lower.size() == len || filename_lower[len] == '.')) {
204 return true;
205 }
206 }
207
208 static const char* const magic_names[] = {
209 // These file names are used by the "Customize folder" feature of the
210 // shell.
211 "desktop.ini",
212 "thumbs.db",
213 };
214
215 for (const char* const magic_name : magic_names) {
216 if (filename_lower == magic_name)
217 return true;
218 }
219
220 return false;
221 }
222
223 } // namespace net
224