// Copyright 2012 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/nix/mime_util_xdg.h" #include #include #include #include "base/check.h" #include "base/containers/stack.h" #include "base/environment.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/logging.h" #include "base/nix/xdg_util.h" #include "base/no_destructor.h" #include "base/ranges/algorithm.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversion_utils.h" #include "build/build_config.h" #if !BUILDFLAG(IS_CHROMEOS) #include "base/synchronization/lock.h" #endif namespace base::nix { namespace { // Ridiculously large size for a /usr/share/mime/mime.cache file. // Default file is about 100KB, allow up to 10MB. constexpr size_t kMaxMimeTypesFileSize = 10 * 1024 * 1024; // Maximum number of nodes to allow in reverse suffix tree. // Default file has ~3K nodes, allow up to 30K. constexpr size_t kMaxNodes = 30000; // Maximum file extension size. constexpr size_t kMaxExtSize = 100; // Header size in mime.cache file. constexpr size_t kHeaderSize = 40; // Largest valid unicode code point is U+10ffff. constexpr uint32_t kMaxUnicode = 0x10ffff; // Default mime glob weight is 50, max is 100. constexpr uint8_t kDefaultGlobWeight = 50; // Path and last modified of mime.cache file. struct FileInfo { FilePath path; Time last_modified; }; // Load all mime cache files on the system. void LoadAllMimeCacheFiles(MimeTypeMap& map, std::vector& files) { std::unique_ptr env(Environment::Create()); File::Info info; for (const auto& path : GetXDGDataSearchLocations(env.get())) { FilePath mime_cache = path.Append("mime/mime.cache"); if (GetFileInfo(mime_cache, &info) && ParseMimeTypes(mime_cache, map)) { files.emplace_back(mime_cache, info.last_modified); } } } // Read 4 bytes from string `buf` at `offset` as network order uint32_t. // Returns false if `offset > buf.size() - 4` or `offset` is not aligned to a // 4-byte word boundary, or `*result` is not between `min_result` and // `max_result`. `field_name` is used in error message. bool ReadInt(const std::string& buf, uint32_t offset, const std::string& field_name, uint32_t min_result, size_t max_result, uint32_t* result) { if (offset > buf.size() - 4 || (offset & 0x3)) { LOG(ERROR) << "Invalid offset=" << offset << " for " << field_name << ", string size=" << buf.size(); return false; } *result = ntohl(*reinterpret_cast(buf.c_str() + offset)); if (*result < min_result || *result > max_result) { LOG(ERROR) << "Invalid " << field_name << "=" << *result << " not between min_result=" << min_result << " and max_result=" << max_result; return false; } return true; } } // namespace bool ParseMimeTypes(const FilePath& file_path, MimeTypeMap& out_mime_types) { // File format from // https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.21.html#idm46070612075440 // Header: // 2 CARD16 MAJOR_VERSION 1 // 2 CARD16 MINOR_VERSION 2 // 4 CARD32 ALIAS_LIST_OFFSET // 4 CARD32 PARENT_LIST_OFFSET // 4 CARD32 LITERAL_LIST_OFFSET // 4 CARD32 REVERSE_SUFFIX_TREE_OFFSET // ... // ReverseSuffixTree: // 4 CARD32 N_ROOTS // 4 CARD32 FIRST_ROOT_OFFSET // ReverseSuffixTreeNode: // 4 CARD32 CHARACTER // 4 CARD32 N_CHILDREN // 4 CARD32 FIRST_CHILD_OFFSET // ReverseSuffixTreeLeafNode: // 4 CARD32 0 // 4 CARD32 MIME_TYPE_OFFSET // 4 CARD32 WEIGHT in lower 8 bits // FLAGS in rest: // 0x100 = case-sensitive std::string buf; if (!ReadFileToStringWithMaxSize(file_path, &buf, kMaxMimeTypesFileSize)) { LOG(ERROR) << "Failed reading in mime.cache file: " << file_path; return false; } if (buf.size() < kHeaderSize) { LOG(ERROR) << "Invalid mime.cache file size=" << buf.size(); return false; } // Validate file[ALIAS_LIST_OFFSET - 1] is null to ensure that any // null-terminated strings dereferenced at addresses below ALIAS_LIST_OFFSET // will not overflow. uint32_t alias_list_offset = 0; if (!ReadInt(buf, 4, "ALIAS_LIST_OFFSET", kHeaderSize, buf.size(), &alias_list_offset)) { return false; } if (buf[alias_list_offset - 1] != 0) { LOG(ERROR) << "Invalid mime.cache file does not contain null prior to " "ALIAS_LIST_OFFSET=" << alias_list_offset; return false; } // Parse ReverseSuffixTree. Read all nodes and place them on `stack`, // allowing max of kMaxNodes and max extension of kMaxExtSize. uint32_t tree_offset = 0; if (!ReadInt(buf, 16, "REVERSE_SUFFIX_TREE_OFFSET", kHeaderSize, buf.size(), &tree_offset)) { return false; } struct Node { std::string ext; uint32_t n_children; uint32_t first_child_offset; }; // Read root node and put it on the stack. Node root; if (!ReadInt(buf, tree_offset, "N_ROOTS", 0, kMaxUnicode, &root.n_children)) { return false; } if (!ReadInt(buf, tree_offset + 4, "FIRST_ROOT_OFFSET", tree_offset, buf.size(), &root.first_child_offset)) { return false; } stack stack; stack.push(std::move(root)); uint32_t num_nodes = 0; while (!stack.empty()) { // Pop top node from the stack and process children. Node n = std::move(stack.top()); stack.pop(); uint32_t p = n.first_child_offset; for (uint32_t i = 0; i < n.n_children; i++) { uint32_t c = 0; if (!ReadInt(buf, p, "CHARACTER", 0, kMaxUnicode, &c)) { return false; } p += 4; // Leaf node, add mime type if it is highest weight. if (c == 0) { uint32_t mime_type_offset = 0; if (!ReadInt(buf, p, "mime type offset", kHeaderSize, alias_list_offset - 1, &mime_type_offset)) { return false; } p += 4; uint8_t weight = kDefaultGlobWeight; if ((p + 3) < buf.size()) { weight = static_cast(buf[p + 3]); } p += 4; if (n.ext.size() > 0 && n.ext[0] == '.') { std::string ext = n.ext.substr(1); auto it = out_mime_types.find(ext); if (it == out_mime_types.end() || weight > it->second.weight) { out_mime_types[ext] = {std::string(buf.c_str() + mime_type_offset), weight}; } } continue; } // Regular node, parse and add it to the stack. Node node; WriteUnicodeCharacter(static_cast(c), &node.ext); node.ext += n.ext; if (!ReadInt(buf, p, "N_CHILDREN", 0, kMaxUnicode, &node.n_children)) { return false; } p += 4; if (!ReadInt(buf, p, "FIRST_CHILD_OFFSET", tree_offset, buf.size(), &node.first_child_offset)) { return false; } p += 4; // Check limits. if (++num_nodes > kMaxNodes) { LOG(ERROR) << "Exceeded maxium number of nodes=" << kMaxNodes; return false; } if (node.ext.size() > kMaxExtSize) { LOG(WARNING) << "Ignoring large extension exceeds size=" << kMaxExtSize << " ext=" << node.ext; continue; } stack.push(std::move(node)); } } return true; } std::string GetFileMimeType(const FilePath& filepath) { std::string ext = filepath.Extension(); if (ext.empty()) { return std::string(); } static NoDestructor> xdg_mime_files; static NoDestructor mime_type_map([] { MimeTypeMap map; LoadAllMimeCacheFiles(map, *xdg_mime_files); return map; }()); // Files never change on ChromeOS, but for linux, match xdgmime behavior and // check every 5s and reload if any files have changed. #if !BUILDFLAG(IS_CHROMEOS) static Time last_check; // Lock is required since this may be called on any thread. static NoDestructor lock; { AutoLock scoped_lock(*lock); Time now = Time::Now(); if (last_check + Seconds(5) < now) { if (ranges::any_of(*xdg_mime_files, [](const FileInfo& file_info) { File::Info info; return !GetFileInfo(file_info.path, &info) || info.last_modified != file_info.last_modified; })) { mime_type_map->clear(); xdg_mime_files->clear(); LoadAllMimeCacheFiles(*mime_type_map, *xdg_mime_files); } last_check = now; } } #endif auto it = mime_type_map->find(ext.substr(1)); return it != mime_type_map->end() ? it->second.mime_type : std::string(); } } // namespace base::nix