1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
6*6777b538SAndroid Build Coastguard Worker #include "url/third_party/mozilla/url_parse.h"
7*6777b538SAndroid Build Coastguard Worker #include "url/url_file.h"
8*6777b538SAndroid Build Coastguard Worker #include "url/url_parse_internal.h"
9*6777b538SAndroid Build Coastguard Worker
10*6777b538SAndroid Build Coastguard Worker // Interesting IE file:isms...
11*6777b538SAndroid Build Coastguard Worker //
12*6777b538SAndroid Build Coastguard Worker // INPUT OUTPUT
13*6777b538SAndroid Build Coastguard Worker // ========================= ==============================
14*6777b538SAndroid Build Coastguard Worker // file:/foo/bar file:///foo/bar
15*6777b538SAndroid Build Coastguard Worker // The result here seems totally invalid!?!? This isn't UNC.
16*6777b538SAndroid Build Coastguard Worker //
17*6777b538SAndroid Build Coastguard Worker // file:/
18*6777b538SAndroid Build Coastguard Worker // file:// or any other number of slashes
19*6777b538SAndroid Build Coastguard Worker // IE6 doesn't do anything at all if you click on this link. No error:
20*6777b538SAndroid Build Coastguard Worker // nothing. IE6's history system seems to always color this link, so I'm
21*6777b538SAndroid Build Coastguard Worker // guessing that it maps internally to the empty URL.
22*6777b538SAndroid Build Coastguard Worker //
23*6777b538SAndroid Build Coastguard Worker // C:\ file:///C:/
24*6777b538SAndroid Build Coastguard Worker // When on a file: URL source page, this link will work. When over HTTP,
25*6777b538SAndroid Build Coastguard Worker // the file: URL will appear in the status bar but the link will not work
26*6777b538SAndroid Build Coastguard Worker // (security restriction for all file URLs).
27*6777b538SAndroid Build Coastguard Worker //
28*6777b538SAndroid Build Coastguard Worker // file:foo/ file:foo/ (invalid?!?!?)
29*6777b538SAndroid Build Coastguard Worker // file:/foo/ file:///foo/ (invalid?!?!?)
30*6777b538SAndroid Build Coastguard Worker // file://foo/ file://foo/ (UNC to server "foo")
31*6777b538SAndroid Build Coastguard Worker // file:///foo/ file:///foo/ (invalid, seems to be a file)
32*6777b538SAndroid Build Coastguard Worker // file:////foo/ file://foo/ (UNC to server "foo")
33*6777b538SAndroid Build Coastguard Worker // Any more than four slashes is also treated as UNC.
34*6777b538SAndroid Build Coastguard Worker //
35*6777b538SAndroid Build Coastguard Worker // file:C:/ file://C:/
36*6777b538SAndroid Build Coastguard Worker // file:/C:/ file://C:/
37*6777b538SAndroid Build Coastguard Worker // The number of slashes after "file:" don't matter if the thing following
38*6777b538SAndroid Build Coastguard Worker // it looks like an absolute drive path. Also, slashes and backslashes are
39*6777b538SAndroid Build Coastguard Worker // equally valid here.
40*6777b538SAndroid Build Coastguard Worker
41*6777b538SAndroid Build Coastguard Worker namespace url {
42*6777b538SAndroid Build Coastguard Worker
43*6777b538SAndroid Build Coastguard Worker namespace {
44*6777b538SAndroid Build Coastguard Worker
45*6777b538SAndroid Build Coastguard Worker // A subcomponent of DoParseFileURL, the input of this function should be a UNC
46*6777b538SAndroid Build Coastguard Worker // path name, with the index of the first character after the slashes following
47*6777b538SAndroid Build Coastguard Worker // the scheme given in |after_slashes|. This will initialize the host, path,
48*6777b538SAndroid Build Coastguard Worker // query, and ref, and leave the other output components untouched
49*6777b538SAndroid Build Coastguard Worker // (DoParseFileURL handles these for us).
50*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
DoParseUNC(const CHAR * spec,int after_slashes,int spec_len,Parsed * parsed)51*6777b538SAndroid Build Coastguard Worker void DoParseUNC(const CHAR* spec,
52*6777b538SAndroid Build Coastguard Worker int after_slashes,
53*6777b538SAndroid Build Coastguard Worker int spec_len,
54*6777b538SAndroid Build Coastguard Worker Parsed* parsed) {
55*6777b538SAndroid Build Coastguard Worker int next_slash = FindNextSlash(spec, after_slashes, spec_len);
56*6777b538SAndroid Build Coastguard Worker
57*6777b538SAndroid Build Coastguard Worker // Everything up until that first slash we found (or end of string) is the
58*6777b538SAndroid Build Coastguard Worker // host name, which will end up being the UNC host. For example,
59*6777b538SAndroid Build Coastguard Worker // "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
60*6777b538SAndroid Build Coastguard Worker // Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
61*6777b538SAndroid Build Coastguard Worker // in proper UNC notation.
62*6777b538SAndroid Build Coastguard Worker if (after_slashes < next_slash)
63*6777b538SAndroid Build Coastguard Worker parsed->host = MakeRange(after_slashes, next_slash);
64*6777b538SAndroid Build Coastguard Worker else
65*6777b538SAndroid Build Coastguard Worker parsed->host.reset();
66*6777b538SAndroid Build Coastguard Worker if (next_slash < spec_len) {
67*6777b538SAndroid Build Coastguard Worker ParsePathInternal(spec, MakeRange(next_slash, spec_len),
68*6777b538SAndroid Build Coastguard Worker &parsed->path, &parsed->query, &parsed->ref);
69*6777b538SAndroid Build Coastguard Worker } else {
70*6777b538SAndroid Build Coastguard Worker parsed->path.reset();
71*6777b538SAndroid Build Coastguard Worker }
72*6777b538SAndroid Build Coastguard Worker }
73*6777b538SAndroid Build Coastguard Worker
74*6777b538SAndroid Build Coastguard Worker // A subcomponent of DoParseFileURL, the input should be a local file, with the
75*6777b538SAndroid Build Coastguard Worker // beginning of the path indicated by the index in |path_begin|. This will
76*6777b538SAndroid Build Coastguard Worker // initialize the host, path, query, and ref, and leave the other output
77*6777b538SAndroid Build Coastguard Worker // components untouched (DoParseFileURL handles these for us).
78*6777b538SAndroid Build Coastguard Worker template<typename CHAR>
DoParseLocalFile(const CHAR * spec,int path_begin,int spec_len,Parsed * parsed)79*6777b538SAndroid Build Coastguard Worker void DoParseLocalFile(const CHAR* spec,
80*6777b538SAndroid Build Coastguard Worker int path_begin,
81*6777b538SAndroid Build Coastguard Worker int spec_len,
82*6777b538SAndroid Build Coastguard Worker Parsed* parsed) {
83*6777b538SAndroid Build Coastguard Worker parsed->host.reset();
84*6777b538SAndroid Build Coastguard Worker ParsePathInternal(spec, MakeRange(path_begin, spec_len),
85*6777b538SAndroid Build Coastguard Worker &parsed->path, &parsed->query, &parsed->ref);
86*6777b538SAndroid Build Coastguard Worker }
87*6777b538SAndroid Build Coastguard Worker
88*6777b538SAndroid Build Coastguard Worker // Backend for the external functions that operates on either char type.
89*6777b538SAndroid Build Coastguard Worker // Handles cases where there is a scheme, but also when handed the first
90*6777b538SAndroid Build Coastguard Worker // character following the "file:" at the beginning of the spec. If so,
91*6777b538SAndroid Build Coastguard Worker // this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
92*6777b538SAndroid Build Coastguard Worker template<typename CHAR>
DoParseFileURL(const CHAR * spec,int spec_len,Parsed * parsed)93*6777b538SAndroid Build Coastguard Worker void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
94*6777b538SAndroid Build Coastguard Worker DCHECK(spec_len >= 0);
95*6777b538SAndroid Build Coastguard Worker
96*6777b538SAndroid Build Coastguard Worker // Get the parts we never use for file URLs out of the way.
97*6777b538SAndroid Build Coastguard Worker parsed->username.reset();
98*6777b538SAndroid Build Coastguard Worker parsed->password.reset();
99*6777b538SAndroid Build Coastguard Worker parsed->port.reset();
100*6777b538SAndroid Build Coastguard Worker
101*6777b538SAndroid Build Coastguard Worker // Many of the code paths don't set these, so it's convenient to just clear
102*6777b538SAndroid Build Coastguard Worker // them. We'll write them in those cases we need them.
103*6777b538SAndroid Build Coastguard Worker parsed->query.reset();
104*6777b538SAndroid Build Coastguard Worker parsed->ref.reset();
105*6777b538SAndroid Build Coastguard Worker
106*6777b538SAndroid Build Coastguard Worker // Strip leading & trailing spaces and control characters.
107*6777b538SAndroid Build Coastguard Worker int begin = 0;
108*6777b538SAndroid Build Coastguard Worker TrimURL(spec, &begin, &spec_len);
109*6777b538SAndroid Build Coastguard Worker
110*6777b538SAndroid Build Coastguard Worker // Find the scheme, if any.
111*6777b538SAndroid Build Coastguard Worker int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
112*6777b538SAndroid Build Coastguard Worker int after_scheme;
113*6777b538SAndroid Build Coastguard Worker int after_slashes;
114*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
115*6777b538SAndroid Build Coastguard Worker // See how many slashes there are. We want to handle cases like UNC but also
116*6777b538SAndroid Build Coastguard Worker // "/c:/foo". This is when there is no scheme, so we can allow pages to do
117*6777b538SAndroid Build Coastguard Worker // links like "c:/foo/bar" or "//foo/bar". This is also called by the
118*6777b538SAndroid Build Coastguard Worker // relative URL resolver when it determines there is an absolute URL, which
119*6777b538SAndroid Build Coastguard Worker // may give us input like "/c:/foo".
120*6777b538SAndroid Build Coastguard Worker after_slashes = begin + num_slashes;
121*6777b538SAndroid Build Coastguard Worker if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
122*6777b538SAndroid Build Coastguard Worker // Windows path, don't try to extract the scheme (for example, "c:\foo").
123*6777b538SAndroid Build Coastguard Worker parsed->scheme.reset();
124*6777b538SAndroid Build Coastguard Worker after_scheme = after_slashes;
125*6777b538SAndroid Build Coastguard Worker } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
126*6777b538SAndroid Build Coastguard Worker // Windows UNC path: don't try to extract the scheme, but keep the slashes.
127*6777b538SAndroid Build Coastguard Worker parsed->scheme.reset();
128*6777b538SAndroid Build Coastguard Worker after_scheme = begin;
129*6777b538SAndroid Build Coastguard Worker } else
130*6777b538SAndroid Build Coastguard Worker #endif
131*6777b538SAndroid Build Coastguard Worker {
132*6777b538SAndroid Build Coastguard Worker // ExtractScheme doesn't understand the possibility of filenames with
133*6777b538SAndroid Build Coastguard Worker // colons in them, in which case it returns the entire spec up to the
134*6777b538SAndroid Build Coastguard Worker // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
135*6777b538SAndroid Build Coastguard Worker // the foo.c: scheme.
136*6777b538SAndroid Build Coastguard Worker if (!num_slashes &&
137*6777b538SAndroid Build Coastguard Worker ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
138*6777b538SAndroid Build Coastguard Worker // Offset the results since we gave ExtractScheme a substring.
139*6777b538SAndroid Build Coastguard Worker parsed->scheme.begin += begin;
140*6777b538SAndroid Build Coastguard Worker after_scheme = parsed->scheme.end() + 1;
141*6777b538SAndroid Build Coastguard Worker } else {
142*6777b538SAndroid Build Coastguard Worker // No scheme found, remember that.
143*6777b538SAndroid Build Coastguard Worker parsed->scheme.reset();
144*6777b538SAndroid Build Coastguard Worker after_scheme = begin;
145*6777b538SAndroid Build Coastguard Worker }
146*6777b538SAndroid Build Coastguard Worker }
147*6777b538SAndroid Build Coastguard Worker
148*6777b538SAndroid Build Coastguard Worker // Handle empty specs ones that contain only whitespace or control chars,
149*6777b538SAndroid Build Coastguard Worker // or that are just the scheme (for example "file:").
150*6777b538SAndroid Build Coastguard Worker if (after_scheme == spec_len) {
151*6777b538SAndroid Build Coastguard Worker parsed->host.reset();
152*6777b538SAndroid Build Coastguard Worker parsed->path.reset();
153*6777b538SAndroid Build Coastguard Worker return;
154*6777b538SAndroid Build Coastguard Worker }
155*6777b538SAndroid Build Coastguard Worker
156*6777b538SAndroid Build Coastguard Worker num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
157*6777b538SAndroid Build Coastguard Worker after_slashes = after_scheme + num_slashes;
158*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
159*6777b538SAndroid Build Coastguard Worker // Check whether the input is a drive again. We checked above for windows
160*6777b538SAndroid Build Coastguard Worker // drive specs, but that's only at the very beginning to see if we have a
161*6777b538SAndroid Build Coastguard Worker // scheme at all. This test will be duplicated in that case, but will
162*6777b538SAndroid Build Coastguard Worker // additionally handle all cases with a real scheme such as "file:///C:/".
163*6777b538SAndroid Build Coastguard Worker if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
164*6777b538SAndroid Build Coastguard Worker num_slashes != 3) {
165*6777b538SAndroid Build Coastguard Worker // Anything not beginning with a drive spec ("c:\") on Windows is treated
166*6777b538SAndroid Build Coastguard Worker // as UNC, with the exception of three slashes which always means a file.
167*6777b538SAndroid Build Coastguard Worker // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
168*6777b538SAndroid Build Coastguard Worker DoParseUNC(spec, after_slashes, spec_len, parsed);
169*6777b538SAndroid Build Coastguard Worker return;
170*6777b538SAndroid Build Coastguard Worker }
171*6777b538SAndroid Build Coastguard Worker #else
172*6777b538SAndroid Build Coastguard Worker // file: URL with exactly 2 slashes is considered to have a host component.
173*6777b538SAndroid Build Coastguard Worker if (num_slashes == 2) {
174*6777b538SAndroid Build Coastguard Worker DoParseUNC(spec, after_slashes, spec_len, parsed);
175*6777b538SAndroid Build Coastguard Worker return;
176*6777b538SAndroid Build Coastguard Worker }
177*6777b538SAndroid Build Coastguard Worker #endif // WIN32
178*6777b538SAndroid Build Coastguard Worker
179*6777b538SAndroid Build Coastguard Worker // Easy and common case, the full path immediately follows the scheme
180*6777b538SAndroid Build Coastguard Worker // (modulo slashes), as in "file://c:/foo". Just treat everything from
181*6777b538SAndroid Build Coastguard Worker // there to the end as the path. Empty hosts have 0 length instead of -1.
182*6777b538SAndroid Build Coastguard Worker // We include the last slash as part of the path if there is one.
183*6777b538SAndroid Build Coastguard Worker DoParseLocalFile(spec,
184*6777b538SAndroid Build Coastguard Worker num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
185*6777b538SAndroid Build Coastguard Worker spec_len, parsed);
186*6777b538SAndroid Build Coastguard Worker }
187*6777b538SAndroid Build Coastguard Worker
188*6777b538SAndroid Build Coastguard Worker } // namespace
189*6777b538SAndroid Build Coastguard Worker
ParseFileURL(const char * url,int url_len,Parsed * parsed)190*6777b538SAndroid Build Coastguard Worker void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
191*6777b538SAndroid Build Coastguard Worker DoParseFileURL(url, url_len, parsed);
192*6777b538SAndroid Build Coastguard Worker }
193*6777b538SAndroid Build Coastguard Worker
ParseFileURL(const char16_t * url,int url_len,Parsed * parsed)194*6777b538SAndroid Build Coastguard Worker void ParseFileURL(const char16_t* url, int url_len, Parsed* parsed) {
195*6777b538SAndroid Build Coastguard Worker DoParseFileURL(url, url_len, parsed);
196*6777b538SAndroid Build Coastguard Worker }
197*6777b538SAndroid Build Coastguard Worker
198*6777b538SAndroid Build Coastguard Worker } // namespace url
199