GlobPattern.cpp source code [llvm_projects/llvm/lib/Support/GlobPattern.cpp]

1	//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements a glob pattern matcher.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Support/GlobPattern.h"
14	#include "llvm/ADT/StringRef.h"
15	#include "llvm/Support/Errc.h"
16
17	using namespace llvm;
18
19	// Expands character ranges and returns a bitmap.
20	// For example, "a-cf-hz" is expanded to "abcfghz".
21	static Expected<BitVector> expand(StringRef S, StringRef Original) {
22	BitVector BV(`256`, false);
23
24	// Expand X-Y.
25	for (;;) {
26	if (S.size() < `3`)
27	break;
28
29	uint8_t Start = S [`0`];
30	uint8_t End = S [`2`];
31
32	// If it doesn't start with something like X-Y,
33	// consume the first character and proceed.
34	if (S [`1`] != `'-'`) {
35	BV [Start] = true;
36	S = S.substr(Start: `1`);
37	continue;
38	}
39
40	// It must be in the form of X-Y.
41	// Validate it and then interpret the range.
42	if (Start > End)
43	return make_error<StringError>(Args: "invalid glob pattern: " + Original,
44	Args: errc::invalid_argument);
45
46	for (int C = Start; C <= End; ++C)
47	BV [(uint8_t)C] = true;
48	S = S.substr(Start: `3`);
49	}
50
51	for (char C : S)
52	BV [(uint8_t)C] = true;
53	return BV;
54	}
55
56	// Identify brace expansions in S and return the list of patterns they expand
57	// into.
58	static Expected<SmallVector<std::string, `1`>>
59	parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
60	SmallVector<std::string> SubPatterns = {S.str()};
61	if (!MaxSubPatterns \|\| !S.contains(C: `'{'`))
62	return std::move(SubPatterns);
63
64	struct BraceExpansion {
65	size_t Start;
66	size_t Length;
67	SmallVector<StringRef, `2`> Terms;
68	};
69	SmallVector<BraceExpansion, `0`> BraceExpansions;
70
71	BraceExpansion CurrentBE = nullptr*;
72	size_t TermBegin;
73	for (size_t I = `0`, E = S.size(); I != E; ++I) {
74	if (S [I] == `'['`) {
75	I = S.find(C: `']'`, From: I + `2`);
76	if (I == std::string::npos)
77	return make_error<StringError>(Args: "invalid glob pattern, unmatched '['",
78	Args: errc::invalid_argument);
79	} else if (S [I] == `'{'`) {
80	if (CurrentBE)
81	return make_error<StringError>(
82	Args: "nested brace expansions are not supported",
83	Args: errc::invalid_argument);
84	CurrentBE = &BraceExpansions.emplace_back();
85	CurrentBE->Start = I;
86	TermBegin = I + `1`;
87	} else if (S [I] == `','`) {
88	if (!CurrentBE)
89	continue;
90	CurrentBE->Terms.push_back(Elt: S.substr(Start: TermBegin, N: I - TermBegin));
91	TermBegin = I + `1`;
92	} else if (S [I] == `'}'`) {
93	if (!CurrentBE)
94	continue;
95	if (CurrentBE->Terms.empty())
96	return make_error<StringError>(
97	Args: "empty or singleton brace expansions are not supported",
98	Args: errc::invalid_argument);
99	CurrentBE->Terms.push_back(Elt: S.substr(Start: TermBegin, N: I - TermBegin));
100	CurrentBE->Length = I - CurrentBE->Start + `1`;
101	CurrentBE = nullptr;
102	} else if (S [I] == `'\\'`) {
103	if (++I == E)
104	return make_error<StringError>(Args: "invalid glob pattern, stray '\\'",
105	Args: errc::invalid_argument);
106	}
107	}
108	if (CurrentBE)
109	return make_error<StringError>(Args: "incomplete brace expansion",
110	Args: errc::invalid_argument);
111
112	size_t NumSubPatterns = `1`;
113	for (auto &BE : BraceExpansions) {
114	if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
115	NumSubPatterns = std::numeric_limits<size_t>::max();
116	break;
117	}
118	NumSubPatterns *= BE.Terms.size();
119	}
120	if (NumSubPatterns > *MaxSubPatterns)
121	return make_error<StringError>(Args: "too many brace expansions",
122	Args: errc::invalid_argument);
123	// Replace brace expansions in reverse order so that we don't invalidate
124	// earlier start indices
125	for (auto &BE : reverse(C&: BraceExpansions)) {
126	SmallVector<std::string> OrigSubPatterns;
127	std::swap(LHS&: SubPatterns, RHS&: OrigSubPatterns);
128	for (StringRef Term : BE.Terms)
129	for (StringRef Orig : OrigSubPatterns)
130	SubPatterns.emplace_back(Args&: Orig).replace(pos: BE.Start, n: BE.Length, svt: Term);
131	}
132	return std::move(SubPatterns);
133	}
134
135	Expected<GlobPattern>
136	GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
137	GlobPattern Pat;
138
139	// Store the prefix that does not contain any metacharacter.
140	size_t PrefixSize = S.find_first_of(Chars: "?*[{\\");
141	Pat.Prefix = S.substr(Start: `0`, N: PrefixSize);
142	if (PrefixSize == std::string::npos)
143	return Pat;
144	S = S.substr(Start: PrefixSize);
145
146	SmallVector<std::string, `1`> SubPats;
147	if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(Value&: SubPats))
148	return std::move(Err);
149	for (StringRef SubPat : SubPats) {
150	auto SubGlobOrErr = SubGlobPattern::create(Pat: SubPat);
151	if (!SubGlobOrErr)
152	return SubGlobOrErr.takeError();
153	Pat.SubGlobs.push_back(Elt: *SubGlobOrErr);
154	}
155
156	return Pat;
157	}
158
159	Expected<GlobPattern::SubGlobPattern>
160	GlobPattern::SubGlobPattern::create(StringRef S) {
161	SubGlobPattern Pat;
162
163	// Parse brackets.
164	Pat.Pat.assign(in_start: S.begin(), in_end: S.end());
165	for (size_t I = `0`, E = S.size(); I != E; ++I) {
166	if (S [I] == `'['`) {
167	// ']' is allowed as the first character of a character class. '[]' is
168	// invalid. So, just skip the first character.
169	++I;
170	size_t J = S.find(C: `']'`, From: I + `1`);
171	if (J == StringRef::npos)
172	return make_error<StringError>(Args: "invalid glob pattern, unmatched '['",
173	Args: errc::invalid_argument);
174	StringRef Chars = S.substr(Start: I, N: J - I);
175	bool Invert = S [I] == `'^'` \|\| S [I] == `'!'`;
176	Expected<BitVector> BV =
177	Invert ? expand(S: Chars.substr(Start: `1`), Original: S) : expand(S: Chars, Original: S);
178	if (!BV)
179	return BV.takeError();
180	if (Invert)
181	BV ->flip();
182	Pat.Brackets.push_back(Elt: Bracket{.NextOffset: J + `1`, .Bytes: std::move(*BV)});
183	I = J;
184	} else if (S [I] == `'\\'`) {
185	if (++I == E)
186	return make_error<StringError>(Args: "invalid glob pattern, stray '\\'",
187	Args: errc::invalid_argument);
188	}
189	}
190	return Pat;
191	}
192
193	bool GlobPattern::match(StringRef S) const {
194	if (!S.consume_front(Prefix))
195	return false;
196	if (SubGlobs.empty() && S.empty())
197	return true;
198	for (auto &Glob : SubGlobs)
199	if (Glob.match(S))
200	return true;
201	return false;
202	}
203
204	// Factor the pattern into segments split by ''. The segment is matched*
205	// sequentianlly by finding the first occurrence past the end of the previous
206	// match.
207	bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
208	const char P = Pat.data(), SegmentBegin = nullptr, *S = Str.data(),
209	*SavedS = S;
210	const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
211	size_t B = `0`, SavedB = `0`;
212	while (S != End) {
213	if (P == PEnd)
214	;
215	else if (P == `''`) {
216	// The non- substring on the left of '' matches the tail of S. Save the
217	// positions to be used by backtracking if we see a mismatch later.
218	SegmentBegin = ++P;
219	SavedS = S;
220	SavedB = B;
221	continue;
222	} else if (*P == `'['`) {
223	if (Brackets [B].Bytes [uint8_t(*S)]) {
224	P = Pat.data() + Brackets [B++].NextOffset;
225	++S;
226	continue;
227	}
228	} else if (*P == `'\\'`) {
229	if (++P == S) {
230	++P;
231	++S;
232	continue;
233	}
234	} else if (P == S \|\| *P == `'?'`) {
235	++P;
236	++S;
237	continue;
238	}
239	if (!SegmentBegin)
240	return false;
241	// We have seen a ''. Backtrack to the saved positions. Shift the S*
242	// position to probe the next starting position in the segment.
243	P = SegmentBegin;
244	S = ++SavedS;
245	B = SavedB;
246	}
247	// All bytes in Str have been matched. Return true if the rest part of Pat is
248	// empty or contains only ''.*
249	return getPat().find_first_not_of(C: `'*'`, From: P - Pat.data()) == std::string::npos;
250	}
251

Browse the source code of llvm_projects/llvm/lib/Support/GlobPattern.cpp