1*67e74705SXin Li //===--- FileMatchTrie.cpp - ----------------------------------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This file contains the implementation of a FileMatchTrie.
11*67e74705SXin Li //
12*67e74705SXin Li //===----------------------------------------------------------------------===//
13*67e74705SXin Li
14*67e74705SXin Li #include "clang/Tooling/FileMatchTrie.h"
15*67e74705SXin Li #include "llvm/ADT/StringMap.h"
16*67e74705SXin Li #include "llvm/Support/FileSystem.h"
17*67e74705SXin Li #include "llvm/Support/Path.h"
18*67e74705SXin Li #include "llvm/Support/raw_ostream.h"
19*67e74705SXin Li #include <sstream>
20*67e74705SXin Li using namespace clang;
21*67e74705SXin Li using namespace tooling;
22*67e74705SXin Li
23*67e74705SXin Li namespace {
24*67e74705SXin Li /// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent().
25*67e74705SXin Li struct DefaultPathComparator : public PathComparator {
equivalent__anon09fa5cfe0111::DefaultPathComparator26*67e74705SXin Li bool equivalent(StringRef FileA, StringRef FileB) const override {
27*67e74705SXin Li return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
28*67e74705SXin Li }
29*67e74705SXin Li };
30*67e74705SXin Li }
31*67e74705SXin Li
32*67e74705SXin Li namespace clang {
33*67e74705SXin Li namespace tooling {
34*67e74705SXin Li /// \brief A node of the \c FileMatchTrie.
35*67e74705SXin Li ///
36*67e74705SXin Li /// Each node has storage for up to one path and a map mapping a path segment to
37*67e74705SXin Li /// child nodes. The trie starts with an empty root node.
38*67e74705SXin Li class FileMatchTrieNode {
39*67e74705SXin Li public:
40*67e74705SXin Li /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes
41*67e74705SXin Li /// the number of \c NewPath's trailing characters already consumed during
42*67e74705SXin Li /// recursion.
43*67e74705SXin Li ///
44*67e74705SXin Li /// An insert of a path
45*67e74705SXin Li /// 'p'starts at the root node and does the following:
46*67e74705SXin Li /// - If the node is empty, insert 'p' into its storage and abort.
47*67e74705SXin Li /// - If the node has a path 'p2' but no children, take the last path segment
48*67e74705SXin Li /// 's' of 'p2', put a new child into the map at 's' an insert the rest of
49*67e74705SXin Li /// 'p2' there.
50*67e74705SXin Li /// - Insert a new child for the last segment of 'p' and insert the rest of
51*67e74705SXin Li /// 'p' there.
52*67e74705SXin Li ///
53*67e74705SXin Li /// An insert operation is linear in the number of a path's segments.
insert(StringRef NewPath,unsigned ConsumedLength=0)54*67e74705SXin Li void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
55*67e74705SXin Li // We cannot put relative paths into the FileMatchTrie as then a path can be
56*67e74705SXin Li // a postfix of another path, violating a core assumption of the trie.
57*67e74705SXin Li if (llvm::sys::path::is_relative(NewPath))
58*67e74705SXin Li return;
59*67e74705SXin Li if (Path.empty()) {
60*67e74705SXin Li // This is an empty leaf. Store NewPath and return.
61*67e74705SXin Li Path = NewPath;
62*67e74705SXin Li return;
63*67e74705SXin Li }
64*67e74705SXin Li if (Children.empty()) {
65*67e74705SXin Li // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
66*67e74705SXin Li if (NewPath == Path)
67*67e74705SXin Li return;
68*67e74705SXin Li // Make this a node and create a child-leaf with 'Path'.
69*67e74705SXin Li StringRef Element(llvm::sys::path::filename(
70*67e74705SXin Li StringRef(Path).drop_back(ConsumedLength)));
71*67e74705SXin Li Children[Element].Path = Path;
72*67e74705SXin Li }
73*67e74705SXin Li StringRef Element(llvm::sys::path::filename(
74*67e74705SXin Li StringRef(NewPath).drop_back(ConsumedLength)));
75*67e74705SXin Li Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
76*67e74705SXin Li }
77*67e74705SXin Li
78*67e74705SXin Li /// \brief Tries to find the node under this \c FileMatchTrieNode that best
79*67e74705SXin Li /// matches 'FileName'.
80*67e74705SXin Li ///
81*67e74705SXin Li /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
82*67e74705SXin Li /// \c true and an empty string is returned. If no path fits 'FileName', an
83*67e74705SXin Li /// empty string is returned. \c ConsumedLength denotes the number of
84*67e74705SXin Li /// \c Filename's trailing characters already consumed during recursion.
85*67e74705SXin Li ///
86*67e74705SXin Li /// To find the best matching node for a given path 'p', the
87*67e74705SXin Li /// \c findEquivalent() function is called recursively for each path segment
88*67e74705SXin Li /// (back to fron) of 'p' until a node 'n' is reached that does not ..
89*67e74705SXin Li /// - .. have children. In this case it is checked
90*67e74705SXin Li /// whether the stored path is equivalent to 'p'. If yes, the best match is
91*67e74705SXin Li /// found. Otherwise continue with the parent node as if this node did not
92*67e74705SXin Li /// exist.
93*67e74705SXin Li /// - .. a child matching the next path segment. In this case, all children of
94*67e74705SXin Li /// 'n' are an equally good match for 'p'. All children are of 'n' are found
95*67e74705SXin Li /// recursively and their equivalence to 'p' is determined. If none are
96*67e74705SXin Li /// equivalent, continue with the parent node as if 'n' didn't exist. If one
97*67e74705SXin Li /// is equivalent, the best match is found. Otherwise, report and ambigiuity
98*67e74705SXin Li /// error.
findEquivalent(const PathComparator & Comparator,StringRef FileName,bool & IsAmbiguous,unsigned ConsumedLength=0) const99*67e74705SXin Li StringRef findEquivalent(const PathComparator& Comparator,
100*67e74705SXin Li StringRef FileName,
101*67e74705SXin Li bool &IsAmbiguous,
102*67e74705SXin Li unsigned ConsumedLength = 0) const {
103*67e74705SXin Li if (Children.empty()) {
104*67e74705SXin Li if (Comparator.equivalent(StringRef(Path), FileName))
105*67e74705SXin Li return StringRef(Path);
106*67e74705SXin Li return StringRef();
107*67e74705SXin Li }
108*67e74705SXin Li StringRef Element(llvm::sys::path::filename(FileName.drop_back(
109*67e74705SXin Li ConsumedLength)));
110*67e74705SXin Li llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
111*67e74705SXin Li Children.find(Element);
112*67e74705SXin Li if (MatchingChild != Children.end()) {
113*67e74705SXin Li StringRef Result = MatchingChild->getValue().findEquivalent(
114*67e74705SXin Li Comparator, FileName, IsAmbiguous,
115*67e74705SXin Li ConsumedLength + Element.size() + 1);
116*67e74705SXin Li if (!Result.empty() || IsAmbiguous)
117*67e74705SXin Li return Result;
118*67e74705SXin Li }
119*67e74705SXin Li std::vector<StringRef> AllChildren;
120*67e74705SXin Li getAll(AllChildren, MatchingChild);
121*67e74705SXin Li StringRef Result;
122*67e74705SXin Li for (unsigned i = 0; i < AllChildren.size(); i++) {
123*67e74705SXin Li if (Comparator.equivalent(AllChildren[i], FileName)) {
124*67e74705SXin Li if (Result.empty()) {
125*67e74705SXin Li Result = AllChildren[i];
126*67e74705SXin Li } else {
127*67e74705SXin Li IsAmbiguous = true;
128*67e74705SXin Li return StringRef();
129*67e74705SXin Li }
130*67e74705SXin Li }
131*67e74705SXin Li }
132*67e74705SXin Li return Result;
133*67e74705SXin Li }
134*67e74705SXin Li
135*67e74705SXin Li private:
136*67e74705SXin Li /// \brief Gets all paths under this FileMatchTrieNode.
getAll(std::vector<StringRef> & Results,llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const137*67e74705SXin Li void getAll(std::vector<StringRef> &Results,
138*67e74705SXin Li llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
139*67e74705SXin Li if (Path.empty())
140*67e74705SXin Li return;
141*67e74705SXin Li if (Children.empty()) {
142*67e74705SXin Li Results.push_back(StringRef(Path));
143*67e74705SXin Li return;
144*67e74705SXin Li }
145*67e74705SXin Li for (llvm::StringMap<FileMatchTrieNode>::const_iterator
146*67e74705SXin Li It = Children.begin(), E = Children.end();
147*67e74705SXin Li It != E; ++It) {
148*67e74705SXin Li if (It == Except)
149*67e74705SXin Li continue;
150*67e74705SXin Li It->getValue().getAll(Results, Children.end());
151*67e74705SXin Li }
152*67e74705SXin Li }
153*67e74705SXin Li
154*67e74705SXin Li // The stored absolute path in this node. Only valid for leaf nodes, i.e.
155*67e74705SXin Li // nodes where Children.empty().
156*67e74705SXin Li std::string Path;
157*67e74705SXin Li
158*67e74705SXin Li // The children of this node stored in a map based on the next path segment.
159*67e74705SXin Li llvm::StringMap<FileMatchTrieNode> Children;
160*67e74705SXin Li };
161*67e74705SXin Li } // end namespace tooling
162*67e74705SXin Li } // end namespace clang
163*67e74705SXin Li
FileMatchTrie()164*67e74705SXin Li FileMatchTrie::FileMatchTrie()
165*67e74705SXin Li : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
166*67e74705SXin Li
FileMatchTrie(PathComparator * Comparator)167*67e74705SXin Li FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
168*67e74705SXin Li : Root(new FileMatchTrieNode), Comparator(Comparator) {}
169*67e74705SXin Li
~FileMatchTrie()170*67e74705SXin Li FileMatchTrie::~FileMatchTrie() {
171*67e74705SXin Li delete Root;
172*67e74705SXin Li }
173*67e74705SXin Li
insert(StringRef NewPath)174*67e74705SXin Li void FileMatchTrie::insert(StringRef NewPath) {
175*67e74705SXin Li Root->insert(NewPath);
176*67e74705SXin Li }
177*67e74705SXin Li
findEquivalent(StringRef FileName,raw_ostream & Error) const178*67e74705SXin Li StringRef FileMatchTrie::findEquivalent(StringRef FileName,
179*67e74705SXin Li raw_ostream &Error) const {
180*67e74705SXin Li if (llvm::sys::path::is_relative(FileName)) {
181*67e74705SXin Li Error << "Cannot resolve relative paths";
182*67e74705SXin Li return StringRef();
183*67e74705SXin Li }
184*67e74705SXin Li bool IsAmbiguous = false;
185*67e74705SXin Li StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
186*67e74705SXin Li if (IsAmbiguous)
187*67e74705SXin Li Error << "Path is ambiguous";
188*67e74705SXin Li return Result;
189*67e74705SXin Li }
190