1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/nix/mime_util_xdg.h"
6
7 #include <arpa/inet.h>
8 #include <memory>
9 #include <utility>
10
11 #include "base/check.h"
12 #include "base/containers/stack.h"
13 #include "base/environment.h"
14 #include "base/files/file_path.h"
15 #include "base/files/file_util.h"
16 #include "base/logging.h"
17 #include "base/nix/xdg_util.h"
18 #include "base/no_destructor.h"
19 #include "base/ranges/algorithm.h"
20 #include "base/strings/string_util.h"
21 #include "base/strings/utf_string_conversion_utils.h"
22 #include "build/build_config.h"
23
24 #if !BUILDFLAG(IS_CHROMEOS)
25 #include "base/synchronization/lock.h"
26 #endif
27
28 namespace base::nix {
29 namespace {
30
31 // Ridiculously large size for a /usr/share/mime/mime.cache file.
32 // Default file is about 100KB, allow up to 10MB.
33 constexpr size_t kMaxMimeTypesFileSize = 10 * 1024 * 1024;
34 // Maximum number of nodes to allow in reverse suffix tree.
35 // Default file has ~3K nodes, allow up to 30K.
36 constexpr size_t kMaxNodes = 30000;
37 // Maximum file extension size.
38 constexpr size_t kMaxExtSize = 100;
39 // Header size in mime.cache file.
40 constexpr size_t kHeaderSize = 40;
41 // Largest valid unicode code point is U+10ffff.
42 constexpr uint32_t kMaxUnicode = 0x10ffff;
43 // Default mime glob weight is 50, max is 100.
44 constexpr uint8_t kDefaultGlobWeight = 50;
45
46 // Path and last modified of mime.cache file.
47 struct FileInfo {
48 FilePath path;
49 Time last_modified;
50 };
51
52 // Load all mime cache files on the system.
LoadAllMimeCacheFiles(MimeTypeMap & map,std::vector<FileInfo> & files)53 void LoadAllMimeCacheFiles(MimeTypeMap& map, std::vector<FileInfo>& files) {
54 std::unique_ptr<Environment> env(Environment::Create());
55 File::Info info;
56 for (const auto& path : GetXDGDataSearchLocations(env.get())) {
57 FilePath mime_cache = path.Append("mime/mime.cache");
58 if (GetFileInfo(mime_cache, &info) && ParseMimeTypes(mime_cache, map)) {
59 files.emplace_back(mime_cache, info.last_modified);
60 }
61 }
62 }
63
64 // Read 4 bytes from string `buf` at `offset` as network order uint32_t.
65 // Returns false if `offset > buf.size() - 4` or `offset` is not aligned to a
66 // 4-byte word boundary, or `*result` is not between `min_result` and
67 // `max_result`. `field_name` is used in error message.
ReadInt(const std::string & buf,uint32_t offset,const std::string & field_name,uint32_t min_result,size_t max_result,uint32_t * result)68 bool ReadInt(const std::string& buf,
69 uint32_t offset,
70 const std::string& field_name,
71 uint32_t min_result,
72 size_t max_result,
73 uint32_t* result) {
74 if (offset > buf.size() - 4 || (offset & 0x3)) {
75 LOG(ERROR) << "Invalid offset=" << offset << " for " << field_name
76 << ", string size=" << buf.size();
77 return false;
78 }
79 *result = ntohl(*reinterpret_cast<const uint32_t*>(buf.c_str() + offset));
80 if (*result < min_result || *result > max_result) {
81 LOG(ERROR) << "Invalid " << field_name << "=" << *result
82 << " not between min_result=" << min_result
83 << " and max_result=" << max_result;
84 return false;
85 }
86 return true;
87 }
88
89 } // namespace
90
ParseMimeTypes(const FilePath & file_path,MimeTypeMap & out_mime_types)91 bool ParseMimeTypes(const FilePath& file_path, MimeTypeMap& out_mime_types) {
92 // File format from
93 // https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.21.html#idm46070612075440
94 // Header:
95 // 2 CARD16 MAJOR_VERSION 1
96 // 2 CARD16 MINOR_VERSION 2
97 // 4 CARD32 ALIAS_LIST_OFFSET
98 // 4 CARD32 PARENT_LIST_OFFSET
99 // 4 CARD32 LITERAL_LIST_OFFSET
100 // 4 CARD32 REVERSE_SUFFIX_TREE_OFFSET
101 // ...
102 // ReverseSuffixTree:
103 // 4 CARD32 N_ROOTS
104 // 4 CARD32 FIRST_ROOT_OFFSET
105 // ReverseSuffixTreeNode:
106 // 4 CARD32 CHARACTER
107 // 4 CARD32 N_CHILDREN
108 // 4 CARD32 FIRST_CHILD_OFFSET
109 // ReverseSuffixTreeLeafNode:
110 // 4 CARD32 0
111 // 4 CARD32 MIME_TYPE_OFFSET
112 // 4 CARD32 WEIGHT in lower 8 bits
113 // FLAGS in rest:
114 // 0x100 = case-sensitive
115
116 std::string buf;
117 if (!ReadFileToStringWithMaxSize(file_path, &buf, kMaxMimeTypesFileSize)) {
118 LOG(ERROR) << "Failed reading in mime.cache file: " << file_path;
119 return false;
120 }
121
122 if (buf.size() < kHeaderSize) {
123 LOG(ERROR) << "Invalid mime.cache file size=" << buf.size();
124 return false;
125 }
126
127 // Validate file[ALIAS_LIST_OFFSET - 1] is null to ensure that any
128 // null-terminated strings dereferenced at addresses below ALIAS_LIST_OFFSET
129 // will not overflow.
130 uint32_t alias_list_offset = 0;
131 if (!ReadInt(buf, 4, "ALIAS_LIST_OFFSET", kHeaderSize, buf.size(),
132 &alias_list_offset)) {
133 return false;
134 }
135 if (buf[alias_list_offset - 1] != 0) {
136 LOG(ERROR) << "Invalid mime.cache file does not contain null prior to "
137 "ALIAS_LIST_OFFSET="
138 << alias_list_offset;
139 return false;
140 }
141
142 // Parse ReverseSuffixTree. Read all nodes and place them on `stack`,
143 // allowing max of kMaxNodes and max extension of kMaxExtSize.
144 uint32_t tree_offset = 0;
145 if (!ReadInt(buf, 16, "REVERSE_SUFFIX_TREE_OFFSET", kHeaderSize, buf.size(),
146 &tree_offset)) {
147 return false;
148 }
149
150 struct Node {
151 std::string ext;
152 uint32_t n_children;
153 uint32_t first_child_offset;
154 };
155
156 // Read root node and put it on the stack.
157 Node root;
158 if (!ReadInt(buf, tree_offset, "N_ROOTS", 0, kMaxUnicode, &root.n_children)) {
159 return false;
160 }
161 if (!ReadInt(buf, tree_offset + 4, "FIRST_ROOT_OFFSET", tree_offset,
162 buf.size(), &root.first_child_offset)) {
163 return false;
164 }
165 stack<Node> stack;
166 stack.push(std::move(root));
167
168 uint32_t num_nodes = 0;
169 while (!stack.empty()) {
170 // Pop top node from the stack and process children.
171 Node n = std::move(stack.top());
172 stack.pop();
173 uint32_t p = n.first_child_offset;
174 for (uint32_t i = 0; i < n.n_children; i++) {
175 uint32_t c = 0;
176 if (!ReadInt(buf, p, "CHARACTER", 0, kMaxUnicode, &c)) {
177 return false;
178 }
179 p += 4;
180
181 // Leaf node, add mime type if it is highest weight.
182 if (c == 0) {
183 uint32_t mime_type_offset = 0;
184 if (!ReadInt(buf, p, "mime type offset", kHeaderSize,
185 alias_list_offset - 1, &mime_type_offset)) {
186 return false;
187 }
188 p += 4;
189 uint8_t weight = kDefaultGlobWeight;
190 if ((p + 3) < buf.size()) {
191 weight = static_cast<uint8_t>(buf[p + 3]);
192 }
193 p += 4;
194 if (n.ext.size() > 0 && n.ext[0] == '.') {
195 std::string ext = n.ext.substr(1);
196 auto it = out_mime_types.find(ext);
197 if (it == out_mime_types.end() || weight > it->second.weight) {
198 out_mime_types[ext] = {std::string(buf.c_str() + mime_type_offset),
199 weight};
200 }
201 }
202 continue;
203 }
204
205 // Regular node, parse and add it to the stack.
206 Node node;
207 WriteUnicodeCharacter(static_cast<int>(c), &node.ext);
208 node.ext += n.ext;
209 if (!ReadInt(buf, p, "N_CHILDREN", 0, kMaxUnicode, &node.n_children)) {
210 return false;
211 }
212 p += 4;
213 if (!ReadInt(buf, p, "FIRST_CHILD_OFFSET", tree_offset, buf.size(),
214 &node.first_child_offset)) {
215 return false;
216 }
217 p += 4;
218
219 // Check limits.
220 if (++num_nodes > kMaxNodes) {
221 LOG(ERROR) << "Exceeded maxium number of nodes=" << kMaxNodes;
222 return false;
223 }
224 if (node.ext.size() > kMaxExtSize) {
225 LOG(WARNING) << "Ignoring large extension exceeds size=" << kMaxExtSize
226 << " ext=" << node.ext;
227 continue;
228 }
229
230 stack.push(std::move(node));
231 }
232 }
233
234 return true;
235 }
236
GetFileMimeType(const FilePath & filepath)237 std::string GetFileMimeType(const FilePath& filepath) {
238 std::string ext = filepath.Extension();
239 if (ext.empty()) {
240 return std::string();
241 }
242
243 static NoDestructor<std::vector<FileInfo>> xdg_mime_files;
244
245 static NoDestructor<MimeTypeMap> mime_type_map([] {
246 MimeTypeMap map;
247 LoadAllMimeCacheFiles(map, *xdg_mime_files);
248 return map;
249 }());
250
251 // Files never change on ChromeOS, but for linux, match xdgmime behavior and
252 // check every 5s and reload if any files have changed.
253 #if !BUILDFLAG(IS_CHROMEOS)
254 static Time last_check;
255 // Lock is required since this may be called on any thread.
256 static NoDestructor<Lock> lock;
257 {
258 AutoLock scoped_lock(*lock);
259
260 Time now = Time::Now();
261 if (last_check + Seconds(5) < now) {
262 if (ranges::any_of(*xdg_mime_files, [](const FileInfo& file_info) {
263 File::Info info;
264 return !GetFileInfo(file_info.path, &info) ||
265 info.last_modified != file_info.last_modified;
266 })) {
267 mime_type_map->clear();
268 xdg_mime_files->clear();
269 LoadAllMimeCacheFiles(*mime_type_map, *xdg_mime_files);
270 }
271 last_check = now;
272 }
273 }
274 #endif
275
276 auto it = mime_type_map->find(ext.substr(1));
277 return it != mime_type_map->end() ? it->second.mime_type : std::string();
278 }
279
280 } // namespace base::nix
281