1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/check.h"
6 #include "url/third_party/mozilla/url_parse.h"
7 #include "url/url_file.h"
8 #include "url/url_parse_internal.h"
9
10 // Interesting IE file:isms...
11 //
12 // INPUT OUTPUT
13 // ========================= ==============================
14 // file:/foo/bar file:///foo/bar
15 // The result here seems totally invalid!?!? This isn't UNC.
16 //
17 // file:/
18 // file:// or any other number of slashes
19 // IE6 doesn't do anything at all if you click on this link. No error:
20 // nothing. IE6's history system seems to always color this link, so I'm
21 // guessing that it maps internally to the empty URL.
22 //
23 // C:\ file:///C:/
24 // When on a file: URL source page, this link will work. When over HTTP,
25 // the file: URL will appear in the status bar but the link will not work
26 // (security restriction for all file URLs).
27 //
28 // file:foo/ file:foo/ (invalid?!?!?)
29 // file:/foo/ file:///foo/ (invalid?!?!?)
30 // file://foo/ file://foo/ (UNC to server "foo")
31 // file:///foo/ file:///foo/ (invalid, seems to be a file)
32 // file:////foo/ file://foo/ (UNC to server "foo")
33 // Any more than four slashes is also treated as UNC.
34 //
35 // file:C:/ file://C:/
36 // file:/C:/ file://C:/
37 // The number of slashes after "file:" don't matter if the thing following
38 // it looks like an absolute drive path. Also, slashes and backslashes are
39 // equally valid here.
40
41 namespace url {
42
43 namespace {
44
45 // A subcomponent of DoParseFileURL, the input of this function should be a UNC
46 // path name, with the index of the first character after the slashes following
47 // the scheme given in |after_slashes|. This will initialize the host, path,
48 // query, and ref, and leave the other output components untouched
49 // (DoParseFileURL handles these for us).
50 template <typename CHAR>
DoParseUNC(const CHAR * spec,int after_slashes,int spec_len,Parsed * parsed)51 void DoParseUNC(const CHAR* spec,
52 int after_slashes,
53 int spec_len,
54 Parsed* parsed) {
55 int next_slash = FindNextSlash(spec, after_slashes, spec_len);
56
57 // Everything up until that first slash we found (or end of string) is the
58 // host name, which will end up being the UNC host. For example,
59 // "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
60 // Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
61 // in proper UNC notation.
62 if (after_slashes < next_slash)
63 parsed->host = MakeRange(after_slashes, next_slash);
64 else
65 parsed->host.reset();
66 if (next_slash < spec_len) {
67 ParsePathInternal(spec, MakeRange(next_slash, spec_len),
68 &parsed->path, &parsed->query, &parsed->ref);
69 } else {
70 parsed->path.reset();
71 }
72 }
73
74 // A subcomponent of DoParseFileURL, the input should be a local file, with the
75 // beginning of the path indicated by the index in |path_begin|. This will
76 // initialize the host, path, query, and ref, and leave the other output
77 // components untouched (DoParseFileURL handles these for us).
78 template<typename CHAR>
DoParseLocalFile(const CHAR * spec,int path_begin,int spec_len,Parsed * parsed)79 void DoParseLocalFile(const CHAR* spec,
80 int path_begin,
81 int spec_len,
82 Parsed* parsed) {
83 parsed->host.reset();
84 ParsePathInternal(spec, MakeRange(path_begin, spec_len),
85 &parsed->path, &parsed->query, &parsed->ref);
86 }
87
88 // Backend for the external functions that operates on either char type.
89 // Handles cases where there is a scheme, but also when handed the first
90 // character following the "file:" at the beginning of the spec. If so,
91 // this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
92 template<typename CHAR>
DoParseFileURL(const CHAR * spec,int spec_len,Parsed * parsed)93 void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
94 DCHECK(spec_len >= 0);
95
96 // Get the parts we never use for file URLs out of the way.
97 parsed->username.reset();
98 parsed->password.reset();
99 parsed->port.reset();
100
101 // Many of the code paths don't set these, so it's convenient to just clear
102 // them. We'll write them in those cases we need them.
103 parsed->query.reset();
104 parsed->ref.reset();
105
106 // Strip leading & trailing spaces and control characters.
107 int begin = 0;
108 TrimURL(spec, &begin, &spec_len);
109
110 // Find the scheme, if any.
111 int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
112 int after_scheme;
113 int after_slashes;
114 #ifdef WIN32
115 // See how many slashes there are. We want to handle cases like UNC but also
116 // "/c:/foo". This is when there is no scheme, so we can allow pages to do
117 // links like "c:/foo/bar" or "//foo/bar". This is also called by the
118 // relative URL resolver when it determines there is an absolute URL, which
119 // may give us input like "/c:/foo".
120 after_slashes = begin + num_slashes;
121 if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
122 // Windows path, don't try to extract the scheme (for example, "c:\foo").
123 parsed->scheme.reset();
124 after_scheme = after_slashes;
125 } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
126 // Windows UNC path: don't try to extract the scheme, but keep the slashes.
127 parsed->scheme.reset();
128 after_scheme = begin;
129 } else
130 #endif
131 {
132 // ExtractScheme doesn't understand the possibility of filenames with
133 // colons in them, in which case it returns the entire spec up to the
134 // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
135 // the foo.c: scheme.
136 if (!num_slashes &&
137 ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
138 // Offset the results since we gave ExtractScheme a substring.
139 parsed->scheme.begin += begin;
140 after_scheme = parsed->scheme.end() + 1;
141 } else {
142 // No scheme found, remember that.
143 parsed->scheme.reset();
144 after_scheme = begin;
145 }
146 }
147
148 // Handle empty specs ones that contain only whitespace or control chars,
149 // or that are just the scheme (for example "file:").
150 if (after_scheme == spec_len) {
151 parsed->host.reset();
152 parsed->path.reset();
153 return;
154 }
155
156 num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
157 after_slashes = after_scheme + num_slashes;
158 #ifdef WIN32
159 // Check whether the input is a drive again. We checked above for windows
160 // drive specs, but that's only at the very beginning to see if we have a
161 // scheme at all. This test will be duplicated in that case, but will
162 // additionally handle all cases with a real scheme such as "file:///C:/".
163 if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
164 num_slashes != 3) {
165 // Anything not beginning with a drive spec ("c:\") on Windows is treated
166 // as UNC, with the exception of three slashes which always means a file.
167 // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
168 DoParseUNC(spec, after_slashes, spec_len, parsed);
169 return;
170 }
171 #else
172 // file: URL with exactly 2 slashes is considered to have a host component.
173 if (num_slashes == 2) {
174 DoParseUNC(spec, after_slashes, spec_len, parsed);
175 return;
176 }
177 #endif // WIN32
178
179 // Easy and common case, the full path immediately follows the scheme
180 // (modulo slashes), as in "file://c:/foo". Just treat everything from
181 // there to the end as the path. Empty hosts have 0 length instead of -1.
182 // We include the last slash as part of the path if there is one.
183 DoParseLocalFile(spec,
184 num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
185 spec_len, parsed);
186 }
187
188 } // namespace
189
ParseFileURL(const char * url,int url_len,Parsed * parsed)190 void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
191 DoParseFileURL(url, url_len, parsed);
192 }
193
ParseFileURL(const char16_t * url,int url_len,Parsed * parsed)194 void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
195 DoParseFileURL(url, url_len, parsed);
196 }
197
198 } // namespace url
199