JSON.cpp source code [llvm_projects/llvm/lib/Support/JSON.cpp]

1	//=== JSON.cpp - JSON value, parsing and serialization - C++ ------------===//*
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===---------------------------------------------------------------------===//
8
9	#include "llvm/Support/JSON.h"
10	#include "llvm/ADT/STLExtras.h"
11	#include "llvm/ADT/StringExtras.h"
12	#include "llvm/Support/ConvertUTF.h"
13	#include "llvm/Support/Error.h"
14	#include "llvm/Support/Format.h"
15	#include "llvm/Support/NativeFormatting.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <cctype>
18	#include <cerrno>
19	#include <optional>
20
21	namespace llvm {
22	namespace json {
23
24	Value &Object::operator[](const ObjectKey &K) {
25	return try_emplace(K, Args: nullptr).first ->getSecond();
26	}
27	Value &Object::operator[](ObjectKey &&K) {
28	return try_emplace(K: std::move(K), Args: nullptr).first ->getSecond();
29	}
30	Value *Object::get(StringRef K) {
31	auto I = find(K);
32	if (I == end())
33	return nullptr;
34	return &I ->second;
35	}
36	const Value Object::get(StringRef K) const* {
37	auto I = find(K);
38	if (I == end())
39	return nullptr;
40	return &I ->second;
41	}
42	std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
43	if (auto *V = get(K))
44	return V->getAsNull();
45	return std::nullopt;
46	}
47	std::optional<bool> Object::getBoolean(StringRef K) const {
48	if (auto *V = get(K))
49	return V->getAsBoolean();
50	return std::nullopt;
51	}
52	std::optional<double> Object::getNumber(StringRef K) const {
53	if (auto *V = get(K))
54	return V->getAsNumber();
55	return std::nullopt;
56	}
57	std::optional<int64_t> Object::getInteger(StringRef K) const {
58	if (auto *V = get(K))
59	return V->getAsInteger();
60	return std::nullopt;
61	}
62	std::optional<llvm::StringRef> Object::getString(StringRef K) const {
63	if (auto *V = get(K))
64	return V->getAsString();
65	return std::nullopt;
66	}
67	const json::Object Object::getObject(StringRef K) const* {
68	if (auto *V = get(K))
69	return V->getAsObject();
70	return nullptr;
71	}
72	json::Object *Object::getObject(StringRef K) {
73	if (auto *V = get(K))
74	return V->getAsObject();
75	return nullptr;
76	}
77	const json::Array Object::getArray(StringRef K) const* {
78	if (auto *V = get(K))
79	return V->getAsArray();
80	return nullptr;
81	}
82	json::Array *Object::getArray(StringRef K) {
83	if (auto *V = get(K))
84	return V->getAsArray();
85	return nullptr;
86	}
87	bool operator==(const Object &LHS, const Object &RHS) {
88	if (LHS.size() != RHS.size())
89	return false;
90	for (const auto &L : LHS) {
91	auto R = RHS.find(K: L.first);
92	if (R == RHS.end() \|\| L.second != R ->second)
93	return false;
94	}
95	return true;
96	}
97
98	Array::Array(std::initializer_list<Value> Elements) {
99	V.reserve(n: Elements.size());
100	for (const Value &V : Elements) {
101	emplace_back(A: nullptr);
102	back().moveFrom(M: std::move(V));
103	}
104	}
105
106	Value::Value(std::initializer_list<Value> Elements)
107	: Value (json::Array (Elements)) {}
108
109	void Value::copyFrom(const Value &M) {
110	Type = M.Type;
111	switch (Type) {
112	case T_Null:
113	case T_Boolean:
114	case T_Double:
115	case T_Integer:
116	case T_UINT64:
117	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
118	break;
119	case T_StringRef:
120	create<StringRef>(V&: M.as<StringRef>());
121	break;
122	case T_String:
123	create<std::string>(V&: M.as<std::string>());
124	break;
125	case T_Object:
126	create<json::Object>(V&: M.as<json::Object>());
127	break;
128	case T_Array:
129	create<json::Array>(V&: M.as<json::Array>());
130	break;
131	}
132	}
133
134	void Value::moveFrom(const Value &&M) {
135	Type = M.Type;
136	switch (Type) {
137	case T_Null:
138	case T_Boolean:
139	case T_Double:
140	case T_Integer:
141	case T_UINT64:
142	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
143	break;
144	case T_StringRef:
145	create<StringRef>(V&: M.as<StringRef>());
146	break;
147	case T_String:
148	create<std::string>(V: std::move(M.as<std::string>()));
149	M.Type = T_Null;
150	break;
151	case T_Object:
152	create<json::Object>(V: std::move(M.as<json::Object>()));
153	M.Type = T_Null;
154	break;
155	case T_Array:
156	create<json::Array>(V: std::move(M.as<json::Array>()));
157	M.Type = T_Null;
158	break;
159	}
160	}
161
162	void Value::destroy() {
163	switch (Type) {
164	case T_Null:
165	case T_Boolean:
166	case T_Double:
167	case T_Integer:
168	case T_UINT64:
169	break;
170	case T_StringRef:
171	as<StringRef>().~StringRef();
172	break;
173	case T_String:
174	as<std::string>().~basic_string();
175	break;
176	case T_Object:
177	as<json::Object>().~Object();
178	break;
179	case T_Array:
180	as<json::Array>().~Array();
181	break;
182	}
183	}
184
185	void Value::print(llvm::raw_ostream &OS) const { OS << *this; }
186
187	bool operator==(const Value &L, const Value &R) {
188	if (L.kind() != R.kind())
189	return false;
190	switch (L.kind()) {
191	case Value::Null:
192	return L.getAsNull() == R.getAsNull();
193	case Value::Boolean:
194	return L.getAsBoolean() == R.getAsBoolean();
195	case Value::Number:
196	// Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
197	// The same integer must convert to the same double, per the standard.
198	// However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
199	// So we avoid floating point promotion for exact comparisons.
200	if (L.Type == Value::T_Integer \|\| R.Type == Value::T_Integer)
201	return L.getAsInteger() == R.getAsInteger();
202	return L.getAsNumber() == R.getAsNumber();
203	case Value::String:
204	return L.getAsString() == R.getAsString();
205	case Value::Array:
206	return L.getAsArray() == R.getAsArray();
207	case Value::Object:
208	return L.getAsObject() == R.getAsObject();
209	}
210	llvm_unreachable("Unknown value kind");
211	}
212
213	void Path::report(llvm::StringLiteral Msg) {
214	// Walk up to the root context, and count the number of segments.
215	unsigned Count = `0`;
216	const Path *P;
217	for (P = this; P->Parent != nullptr; P = P->Parent)
218	++Count;
219	Path::Root *R = P->Seg.root();
220	// Fill in the error message and copy the path (in reverse order).
221	R->ErrorMessage = Msg;
222	R->ErrorPath.resize(new_size: Count);
223	auto It = R->ErrorPath.begin();
224	for (P = this; P->Parent != nullptr; P = P->Parent)
225	*It ++ = P->Seg;
226	}
227
228	Error Path::Root::getError() const {
229	std::string S;
230	raw_string_ostream OS(S);
231	OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
232	if (ErrorPath.empty()) {
233	if (!Name.empty())
234	OS << " when parsing " << Name;
235	} else {
236	OS << " at " << (Name.empty() ? "(root)" : Name);
237	for (const Path::Segment &S : llvm::reverse(C: ErrorPath)) {
238	if (S.isField())
239	OS << `'.'` << S.field();
240	else
241	OS << `'['` << S.index() << `']'`;
242	}
243	}
244	return createStringError(EC: llvm::inconvertibleErrorCode(), S);
245	}
246
247	std::vector<const Object::value_type > sortedElements(const* Object &O) {
248	std::vector<const Object::value_type *> Elements;
249	for (const auto &E : O)
250	Elements.push_back(x: &E);
251	llvm::sort(C&: Elements,
252	Comp: [](const Object::value_type L, const* Object::value_type *R) {
253	return L->first < R->first;
254	});
255	return Elements;
256	}
257
258	// Prints a one-line version of a value that isn't our main focus.
259	// We interleave writes to OS and JOS, exploiting the lack of extra buffering.
260	// This is OK as we own the implementation.
261	static void abbreviate(const Value &V, OStream &JOS) {
262	switch (V.kind()) {
263	case Value::Array:
264	JOS.rawValue(Contents: V.getAsArray()->empty() ? "[]" : "[ ... ]");
265	break;
266	case Value::Object:
267	JOS.rawValue(Contents: V.getAsObject()->empty() ? "{}" : "{ ... }");
268	break;
269	case Value::String: {
270	llvm::StringRef S = *V.getAsString();
271	if (S.size() < `40`) {
272	JOS.value(V);
273	} else {
274	std::string Truncated = fixUTF8(S: S.take_front(N: `37`));
275	Truncated.append(s: "...");
276	JOS.value(V: Truncated);
277	}
278	break;
279	}
280	default:
281	JOS.value(V);
282	}
283	}
284
285	// Prints a semi-expanded version of a value that is our main focus.
286	// Array/Object entries are printed, but not recursively as they may be huge.
287	static void abbreviateChildren(const Value &V, OStream &JOS) {
288	switch (V.kind()) {
289	case Value::Array:
290	JOS.array(Contents: [&] {
291	for (const auto &I : *V.getAsArray())
292	abbreviate(V: I, JOS);
293	});
294	break;
295	case Value::Object:
296	JOS.object(Contents: [&] {
297	for (const auto KV : sortedElements(O: V.getAsObject())) {
298	JOS.attributeBegin(Key: KV->first);
299	abbreviate(V: KV->second, JOS);
300	JOS.attributeEnd();
301	}
302	});
303	break;
304	default:
305	JOS.value(V);
306	}
307	}
308
309	void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
310	OStream JOS(OS, /IndentSize=/`2`);
311	// PrintValue recurses down the path, printing the ancestors of our target.
312	// Siblings of nodes along the path are printed with abbreviate(), and the
313	// target itself is printed with the somewhat richer abbreviateChildren().
314	// 'Recurse' is the lambda itself, to allow recursive calls.
315	auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
316	// Print the target node itself, with the error as a comment.
317	// Also used if we can't follow our path, e.g. it names a field that
318	// should* exist but doesn't.*
319	auto HighlightCurrent = [&] {
320	std::string Comment = "error: ";
321	Comment.append(s: ErrorMessage.data(), n: ErrorMessage.size());
322	JOS.comment(Comment);
323	abbreviateChildren(V, JOS);
324	};
325	if (Path.empty()) // We reached our target.
326	return HighlightCurrent();
327	const Segment &S = Path.back(); // Path is in reverse order.
328	if (S.isField()) {
329	// Current node is an object, path names a field.
330	llvm::StringRef FieldName = S.field();
331	const Object *O = V.getAsObject();
332	if (!O \|\| !O->get(K: FieldName))
333	return HighlightCurrent();
334	JOS.object(Contents: [&] {
335	for (const auto KV : sortedElements(O: O)) {
336	JOS.attributeBegin(Key: KV->first);
337	if (FieldName == StringRef(KV->first))
338	Recurse(KV->second, Path.drop_back(), Recurse);
339	else
340	abbreviate(V: KV->second, JOS);
341	JOS.attributeEnd();
342	}
343	});
344	} else {
345	// Current node is an array, path names an element.
346	const Array *A = V.getAsArray();
347	if (!A \|\| S.index() >= A->size())
348	return HighlightCurrent();
349	JOS.array(Contents: [&] {
350	unsigned Current = `0`;
351	for (const auto &V : *A) {
352	if (Current++ == S.index())
353	Recurse(V, Path.drop_back(), Recurse);
354	else
355	abbreviate(V, JOS);
356	}
357	});
358	}
359	};
360	PrintValue (R, ErrorPath, PrintValue);
361	}
362
363	namespace {
364	// Simple recursive-descent JSON parser.
365	class Parser {
366	public:
367	Parser(StringRef JSON)
368	: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
369
370	bool checkUTF8() {
371	size_t ErrOffset;
372	if (isUTF8(S: StringRef (Start, End - Start), ErrOffset: &ErrOffset))
373	return true;
374	P = Start + ErrOffset; // For line/column calculation.
375	return parseError(Msg: "Invalid UTF-8 sequence");
376	}
377
378	bool parseValue(Value &Out);
379
380	bool assertEnd() {
381	eatWhitespace();
382	if (P == End)
383	return true;
384	return parseError(Msg: "Text after end of document");
385	}
386
387	Error takeError() {
388	assert(Err);
389	return std::move(*Err);
390	}
391
392	private:
393	void eatWhitespace() {
394	while (P != End && (P == `' '` \|\| P == `'\r'` \|\| P == `'\n'` \|\| P == `'\t'`))
395	++P;
396	}
397
398	// On invalid syntax, parseX() functions return false and set Err.
399	bool parseNumber(char First, Value &Out);
400	bool parseString(std::string &Out);
401	bool parseUnicode(std::string &Out);
402	bool parseError(const char Msg); // always returns false*
403
404	char next() { return P == End ? `0` : *P++; }
405	char peek() { return P == End ? `0` : *P; }
406	static bool isNumber(char C) {
407	return C == `'0'` \|\| C == `'1'` \|\| C == `'2'` \|\| C == `'3'` \|\| C == `'4'` \|\|
408	C == `'5'` \|\| C == `'6'` \|\| C == `'7'` \|\| C == `'8'` \|\| C == `'9'` \|\|
409	C == `'e'` \|\| C == `'E'` \|\| C == `'+'` \|\| C == `'-'` \|\| C == `'.'`;
410	}
411
412	std::optional<Error> Err;
413	const char Start, P, *End;
414	};
415	} // namespace
416
417	bool Parser::parseValue(Value &Out) {
418	eatWhitespace();
419	if (P == End)
420	return parseError(Msg: "Unexpected EOF");
421	switch (char C = next()) {
422	// Bare null/true/false are easy - first char identifies them.
423	case `'n'`:
424	Out = nullptr;
425	return (next() == `'u'` && next() == `'l'` && next() == `'l'`) \|\|
426	parseError(Msg: "Invalid JSON value (null?)");
427	case `'t'`:
428	Out = true;
429	return (next() == `'r'` && next() == `'u'` && next() == `'e'`) \|\|
430	parseError(Msg: "Invalid JSON value (true?)");
431	case `'f'`:
432	Out = false;
433	return (next() == `'a'` && next() == `'l'` && next() == `'s'` && next() == `'e'`) \|\|
434	parseError(Msg: "Invalid JSON value (false?)");
435	case `'"'`: {
436	std::string S;
437	if (parseString(Out&: S)) {
438	Out = std::move(S);
439	return true;
440	}
441	return false;
442	}
443	case `'['`: {
444	Out = Array {};
445	Array &A = *Out.getAsArray();
446	eatWhitespace();
447	if (peek() == `']'`) {
448	++P;
449	return true;
450	}
451	for (;;) {
452	A.emplace_back(A: nullptr);
453	if (!parseValue(Out&: A.back()))
454	return false;
455	eatWhitespace();
456	switch (next()) {
457	case `','`:
458	eatWhitespace();
459	continue;
460	case `']'`:
461	return true;
462	default:
463	return parseError(Msg: "Expected , or ] after array element");
464	}
465	}
466	}
467	case `'{'`: {
468	Out = Object {};
469	Object &O = *Out.getAsObject();
470	eatWhitespace();
471	if (peek() == `'}'`) {
472	++P;
473	return true;
474	}
475	for (;;) {
476	if (next() != `'"'`)
477	return parseError(Msg: "Expected object key");
478	std::string K;
479	if (!parseString(Out&: K))
480	return false;
481	eatWhitespace();
482	if (next() != `':'`)
483	return parseError(Msg: "Expected : after object key");
484	eatWhitespace();
485	if (!parseValue(Out&: O [std::move(K)]))
486	return false;
487	eatWhitespace();
488	switch (next()) {
489	case `','`:
490	eatWhitespace();
491	continue;
492	case `'}'`:
493	return true;
494	default:
495	return parseError(Msg: "Expected , or } after object property");
496	}
497	}
498	}
499	default:
500	if (isNumber(C))
501	return parseNumber(First: C, Out);
502	return parseError(Msg: "Invalid JSON value");
503	}
504	}
505
506	bool Parser::parseNumber(char First, Value &Out) {
507	// Read the number into a string. (Must be null-terminated for strto).*
508	SmallString<`24`> S;
509	S.push_back(Elt: First);
510	while (isNumber(C: peek()))
511	S.push_back(Elt: next());
512	char *End;
513	// Try first to parse as integer, and if so preserve full 64 bits.
514	// We check for errno for out of bounds errors and for End == S.end()
515	// to make sure that the numeric string is not malformed.
516	errno = `0`;
517	int64_t I = std::strtoll(nptr: S.c_str(), endptr: &End, base: `10`);
518	if (End == S.end() && errno != ERANGE) {
519	Out = int64_t(I);
520	return true;
521	}
522	// strtroull has a special handling for negative numbers, but in this
523	// case we don't want to do that because negative numbers were already
524	// handled in the previous block.
525	if (First != `'-'`) {
526	errno = `0`;
527	uint64_t UI = std::strtoull(nptr: S.c_str(), endptr: &End, base: `10`);
528	if (End == S.end() && errno != ERANGE) {
529	Out = UI;
530	return true;
531	}
532	}
533	// If it's not an integer
534	Out = std::strtod(nptr: S.c_str(), endptr: &End);
535	return End == S.end() \|\| parseError(Msg: "Invalid JSON value (number?)");
536	}
537
538	bool Parser::parseString(std::string &Out) {
539	// leading quote was already consumed.
540	for (char C = next(); C != `'"'`; C = next()) {
541	if (LLVM_UNLIKELY(P == End))
542	return parseError(Msg: "Unterminated string");
543	if (LLVM_UNLIKELY((C & `0x1f`) == C))
544	return parseError(Msg: "Control character in string");
545	if (LLVM_LIKELY(C != `'\\'`)) {
546	Out.push_back(c: C);
547	continue;
548	}
549	// Handle escape sequence.
550	switch (C = next()) {
551	case `'"'`:
552	case `'\\'`:
553	case `'/'`:
554	Out.push_back(c: C);
555	break;
556	case `'b'`:
557	Out.push_back(c: `'\b'`);
558	break;
559	case `'f'`:
560	Out.push_back(c: `'\f'`);
561	break;
562	case `'n'`:
563	Out.push_back(c: `'\n'`);
564	break;
565	case `'r'`:
566	Out.push_back(c: `'\r'`);
567	break;
568	case `'t'`:
569	Out.push_back(c: `'\t'`);
570	break;
571	case `'u'`:
572	if (!parseUnicode(Out))
573	return false;
574	break;
575	default:
576	return parseError(Msg: "Invalid escape sequence");
577	}
578	}
579	return true;
580	}
581
582	static void encodeUtf8(uint32_t Rune, std::string &Out) {
583	if (Rune < `0x80`) {
584	Out.push_back(c: Rune & `0x7F`);
585	} else if (Rune < `0x800`) {
586	uint8_t FirstByte = `0xC0` \| ((Rune & `0x7C0`) >> `6`);
587	uint8_t SecondByte = `0x80` \| (Rune & `0x3F`);
588	Out.push_back(c: FirstByte);
589	Out.push_back(c: SecondByte);
590	} else if (Rune < `0x10000`) {
591	uint8_t FirstByte = `0xE0` \| ((Rune & `0xF000`) >> `12`);
592	uint8_t SecondByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
593	uint8_t ThirdByte = `0x80` \| (Rune & `0x3F`);
594	Out.push_back(c: FirstByte);
595	Out.push_back(c: SecondByte);
596	Out.push_back(c: ThirdByte);
597	} else if (Rune < `0x110000`) {
598	uint8_t FirstByte = `0xF0` \| ((Rune & `0x1F0000`) >> `18`);
599	uint8_t SecondByte = `0x80` \| ((Rune & `0x3F000`) >> `12`);
600	uint8_t ThirdByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
601	uint8_t FourthByte = `0x80` \| (Rune & `0x3F`);
602	Out.push_back(c: FirstByte);
603	Out.push_back(c: SecondByte);
604	Out.push_back(c: ThirdByte);
605	Out.push_back(c: FourthByte);
606	} else {
607	llvm_unreachable("Invalid codepoint");
608	}
609	}
610
611	// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
612	// May parse several sequential escapes to ensure proper surrogate handling.
613	// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
614	// These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
615	bool Parser::parseUnicode(std::string &Out) {
616	// Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
617	auto Invalid = [&] { Out.append(/ UTF-8 / l: {`'\xef'`, `'\xbf'`, `'\xbd'`}); };
618	// Decodes 4 hex digits from the stream into Out, returns false on error.
619	auto Parse4Hex = [this](uint16_t &Out) -> bool {
620	Out = `0`;
621	char Bytes[] = {next(), next(), next(), next()};
622	for (unsigned char C : Bytes) {
623	if (!std::isxdigit(C))
624	return parseError(Msg: "Invalid \\u escape sequence");
625	Out <<= `4`;
626	Out \|= (C > `'9'`) ? (C & ~`0x20`) - `'A'` + `10` : (C - `'0'`);
627	}
628	return true;
629	};
630	uint16_t First; // UTF-16 code unit from the first \u escape.
631	if (!Parse4Hex (First))
632	return false;
633
634	// We loop to allow proper surrogate-pair error handling.
635	while (true) {
636	// Case 1: the UTF-16 code unit is already a codepoint in the BMP.
637	if (LLVM_LIKELY(First < `0xD800` \|\| First >= `0xE000`)) {
638	encodeUtf8(Rune: First, Out);
639	return true;
640	}
641
642	// Case 2: it's an (unpaired) trailing surrogate.
643	if (LLVM_UNLIKELY(First >= `0xDC00`)) {
644	Invalid ();
645	return true;
646	}
647
648	// Case 3: it's a leading surrogate. We expect a trailing one next.
649	// Case 3a: there's no trailing \u escape. Don't advance in the stream.
650	if (LLVM_UNLIKELY(P + `2` > End \|\| P != `'\\'` \|\| (P + `1`) != `'u'`)) {
651	Invalid (); // Leading surrogate was unpaired.
652	return true;
653	}
654	P += `2`;
655	uint16_t Second;
656	if (!Parse4Hex (Second))
657	return false;
658	// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
659	if (LLVM_UNLIKELY(Second < `0xDC00` \|\| Second >= `0xE000`)) {
660	Invalid (); // Leading surrogate was unpaired.
661	First = Second; // Second escape still needs to be processed.
662	continue;
663	}
664	// Case 3c: a valid surrogate pair encoding an astral codepoint.
665	encodeUtf8(Rune: `0x10000` \| ((First - `0xD800`) << `10`) \| (Second - `0xDC00`), Out);
666	return true;
667	}
668	}
669
670	bool Parser::parseError(const char *Msg) {
671	int Line = `1`;
672	const char *StartOfLine = Start;
673	for (const char *X = Start; X < P; ++X) {
674	if (*X == `0x0A`) {
675	++Line;
676	StartOfLine = X + `1`;
677	}
678	}
679	Err.emplace(
680	args: std::make_unique<ParseError>(args&: Msg, args&: Line, args: P - StartOfLine, args: P - Start));
681	return false;
682	}
683
684	Expected<Value> parse(StringRef JSON) {
685	Parser P(JSON);
686	Value E = nullptr;
687	if (P.checkUTF8())
688	if (P.parseValue(Out&: E))
689	if (P.assertEnd())
690	return std::move(E);
691	return P.takeError();
692	}
693	char ParseError::ID = `0`;
694
695	bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
696	// Fast-path for ASCII, which is valid UTF-8.
697	if (LLVM_LIKELY(isASCII(S)))
698	return true;
699
700	const UTF8 Data = reinterpret_cast<const* UTF8 >(S.data()), Rest = Data;
701	if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
702	return true;
703
704	if (ErrOffset)
705	*ErrOffset = Rest - Data;
706	return false;
707	}
708
709	std::string fixUTF8(llvm::StringRef S) {
710	// This isn't particularly efficient, but is only for error-recovery.
711	std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
712	const UTF8 In8 = reinterpret_cast<const* UTF8 *>(S.data());
713	UTF32 *Out32 = Codepoints.data();
714	ConvertUTF8toUTF32(sourceStart: &In8, sourceEnd: In8 + S.size(), targetStart: &Out32, targetEnd: Out32 + Codepoints.size(),
715	flags: lenientConversion);
716	Codepoints.resize(new_size: Out32 - Codepoints.data());
717	std::string Res(`4` * Codepoints.size(), `0`); // 4 bytes per codepoint suffice
718	const UTF32 *In32 = Codepoints.data();
719	UTF8 Out8 = reinterpret_cast<UTF8 >(&Res [`0`]);
720	ConvertUTF32toUTF8(sourceStart: &In32, sourceEnd: In32 + Codepoints.size(), targetStart: &Out8, targetEnd: Out8 + Res.size(),
721	flags: strictConversion);
722	Res.resize(n: reinterpret_cast<char *>(Out8) - Res.data());
723	return Res;
724	}
725
726	static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
727	OS << `'\"'`;
728	for (unsigned char C : S) {
729	if (C == `0x22` \|\| C == `0x5C`)
730	OS << `'\\'`;
731	if (C >= `0x20`) {
732	OS << C;
733	continue;
734	}
735	OS << `'\\'`;
736	switch (C) {
737	// A few characters are common enough to make short escapes worthwhile.
738	case `'\t'`:
739	OS << `'t'`;
740	break;
741	case `'\n'`:
742	OS << `'n'`;
743	break;
744	case `'\r'`:
745	OS << `'r'`;
746	break;
747	default:
748	OS << `'u'`;
749	llvm::write_hex(S&: OS, N: C, Style: llvm::HexPrintStyle::Lower, Width: `4`);
750	break;
751	}
752	}
753	OS << `'\"'`;
754	}
755
756	void llvm::json::OStream::value(const Value &V) {
757	switch (V.kind()) {
758	case Value::Null:
759	valueBegin();
760	OS << "null";
761	return;
762	case Value::Boolean:
763	valueBegin();
764	OS << (*V.getAsBoolean() ? "true" : "false");
765	return;
766	case Value::Number:
767	valueBegin();
768	if (V.Type == Value::T_Integer)
769	OS << *V.getAsInteger();
770	else if (V.Type == Value::T_UINT64)
771	OS << *V.getAsUINT64();
772	else
773	OS << format(Fmt: "%.g", Vals: std::numeric_limits<double*>::max_digits10,
774	Vals: *V.getAsNumber());
775	return;
776	case Value::String:
777	valueBegin();
778	quote(OS, S: *V.getAsString());
779	return;
780	case Value::Array:
781	return array(Contents: [&] {
782	for (const Value &E : *V.getAsArray())
783	value(V: E);
784	});
785	case Value::Object:
786	return object(Contents: [&] {
787	for (const Object::value_type E : sortedElements(O: V.getAsObject()))
788	attribute(Key: E->first, Contents: E->second);
789	});
790	}
791	}
792
793	void llvm::json::OStream::valueBegin() {
794	assert(Stack.back().Ctx != Object && "Only attributes allowed here");
795	if (Stack.back().HasValue) {
796	assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
797	OS << `','`;
798	}
799	if (Stack.back().Ctx == Array)
800	newline();
801	flushComment();
802	Stack.back().HasValue = true;
803	}
804
805	void OStream::comment(llvm::StringRef Comment) {
806	assert(PendingComment.empty() && "Only one comment per value!");
807	PendingComment = Comment;
808	}
809
810	void OStream::flushComment() {
811	if (PendingComment.empty())
812	return;
813	OS << (IndentSize ? "/* " : "/*");
814	// Be sure not to accidentally emit "/". Transform to "* /".*
815	while (!PendingComment.empty()) {
816	auto Pos = PendingComment.find(Str: "*/");
817	if (Pos == StringRef::npos) {
818	OS << PendingComment;
819	PendingComment = "";
820	} else {
821	OS << PendingComment.take_front(N: Pos) << "* /";
822	PendingComment = PendingComment.drop_front(N: Pos + `2`);
823	}
824	}
825	OS << (IndentSize ? " /" : "/");
826	// Comments are on their own line unless attached to an attribute value.
827	if (Stack.size() > `1` && Stack.back().Ctx == Singleton) {
828	if (IndentSize)
829	OS << `' '`;
830	} else {
831	newline();
832	}
833	}
834
835	void llvm::json::OStream::newline() {
836	if (IndentSize) {
837	OS.write(C: `'\n'`);
838	OS.indent(NumSpaces: Indent);
839	}
840	}
841
842	void llvm::json::OStream::arrayBegin() {
843	valueBegin();
844	Stack.emplace_back();
845	Stack.back().Ctx = Array;
846	Indent += IndentSize;
847	OS << `'['`;
848	}
849
850	void llvm::json::OStream::arrayEnd() {
851	assert(Stack.back().Ctx == Array);
852	Indent -= IndentSize;
853	if (Stack.back().HasValue)
854	newline();
855	OS << `']'`;
856	assert(PendingComment.empty());
857	Stack.pop_back();
858	assert(!Stack.empty());
859	}
860
861	void llvm::json::OStream::objectBegin() {
862	valueBegin();
863	Stack.emplace_back();
864	Stack.back().Ctx = Object;
865	Indent += IndentSize;
866	OS << `'{'`;
867	}
868
869	void llvm::json::OStream::objectEnd() {
870	assert(Stack.back().Ctx == Object);
871	Indent -= IndentSize;
872	if (Stack.back().HasValue)
873	newline();
874	OS << `'}'`;
875	assert(PendingComment.empty());
876	Stack.pop_back();
877	assert(!Stack.empty());
878	}
879
880	void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
881	assert(Stack.back().Ctx == Object);
882	if (Stack.back().HasValue)
883	OS << `','`;
884	newline();
885	flushComment();
886	Stack.back().HasValue = true;
887	Stack.emplace_back();
888	Stack.back().Ctx = Singleton;
889	if (LLVM_LIKELY(isUTF8(Key))) {
890	quote(OS, S: Key);
891	} else {
892	assert(false && "Invalid UTF-8 in attribute key");
893	quote(OS, S: fixUTF8(S: Key));
894	}
895	OS.write(C: `':'`);
896	if (IndentSize)
897	OS.write(C: `' '`);
898	}
899
900	void llvm::json::OStream::attributeEnd() {
901	assert(Stack.back().Ctx == Singleton);
902	assert(Stack.back().HasValue && "Attribute must have a value");
903	assert(PendingComment.empty());
904	Stack.pop_back();
905	assert(Stack.back().Ctx == Object);
906	}
907
908	raw_ostream &llvm::json::OStream::rawValueBegin() {
909	valueBegin();
910	Stack.emplace_back();
911	Stack.back().Ctx = RawValue;
912	return OS;
913	}
914
915	void llvm::json::OStream::rawValueEnd() {
916	assert(Stack.back().Ctx == RawValue);
917	Stack.pop_back();
918	}
919
920	} // namespace json
921	} // namespace llvm
922
923	void llvm::format_provider<llvm::json::Value>::format(
924	const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
925	unsigned IndentAmount = `0`;
926	if (!Options.empty() && Options.getAsInteger(/Radix=/`10`, Result&: IndentAmount))
927	llvm_unreachable("json::Value format options should be an integer");
928	json::OStream (OS, IndentAmount).value(V: E);
929	}
930
931

Browse the source code of llvm_projects/llvm/lib/Support/JSON.cpp