JSON.cpp source code [llvm_projects/llvm/lib/Support/JSON.cpp]

1	//=== JSON.cpp - JSON value, parsing and serialization - C++ ------------===//*
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===---------------------------------------------------------------------===//
8
9	#include "llvm/Support/JSON.h"
10	#include "llvm/ADT/STLExtras.h"
11	#include "llvm/ADT/StringExtras.h"
12	#include "llvm/Support/ConvertUTF.h"
13	#include "llvm/Support/Error.h"
14	#include "llvm/Support/Format.h"
15	#include "llvm/Support/NativeFormatting.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <cctype>
18	#include <cerrno>
19	#include <optional>
20
21	namespace llvm {
22	namespace json {
23
24	Value &Object::operator[](const ObjectKey &K) {
25	return try_emplace(K, Args: nullptr).first ->getSecond();
26	}
27	Value &Object::operator[](ObjectKey &&K) {
28	return try_emplace(K: std::move(K), Args: nullptr).first ->getSecond();
29	}
30	Value *Object::get(StringRef K) {
31	auto I = find(K);
32	if (I == end())
33	return nullptr;
34	return &I ->second;
35	}
36	const Value Object::get(StringRef K) const* {
37	auto I = find(K);
38	if (I == end())
39	return nullptr;
40	return &I ->second;
41	}
42	std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
43	if (auto *V = get(K))
44	return V->getAsNull();
45	return std::nullopt;
46	}
47	std::optional<bool> Object::getBoolean(StringRef K) const {
48	if (auto *V = get(K))
49	return V->getAsBoolean();
50	return std::nullopt;
51	}
52	std::optional<double> Object::getNumber(StringRef K) const {
53	if (auto *V = get(K))
54	return V->getAsNumber();
55	return std::nullopt;
56	}
57	std::optional<int64_t> Object::getInteger(StringRef K) const {
58	if (auto *V = get(K))
59	return V->getAsInteger();
60	return std::nullopt;
61	}
62	std::optional<llvm::StringRef> Object::getString(StringRef K) const {
63	if (auto *V = get(K))
64	return V->getAsString();
65	return std::nullopt;
66	}
67	const json::Object Object::getObject(StringRef K) const* {
68	if (auto *V = get(K))
69	return V->getAsObject();
70	return nullptr;
71	}
72	json::Object *Object::getObject(StringRef K) {
73	if (auto *V = get(K))
74	return V->getAsObject();
75	return nullptr;
76	}
77	const json::Array Object::getArray(StringRef K) const* {
78	if (auto *V = get(K))
79	return V->getAsArray();
80	return nullptr;
81	}
82	json::Array *Object::getArray(StringRef K) {
83	if (auto *V = get(K))
84	return V->getAsArray();
85	return nullptr;
86	}
87	bool operator==(const Object &LHS, const Object &RHS) { return LHS.M == RHS.M; }
88
89	Array::Array(std::initializer_list<Value> Elements) {
90	V.reserve(n: Elements.size());
91	for (const Value &V : Elements) {
92	emplace_back(A: nullptr);
93	back().moveFrom(M: std::move(V));
94	}
95	}
96
97	Value::Value(std::initializer_list<Value> Elements)
98	: Value (json::Array(Elements)) {}
99
100	void Value::copyFrom(const Value &M) {
101	Type = M.Type;
102	switch (Type) {
103	case T_Null:
104	case T_Boolean:
105	case T_Double:
106	case T_Integer:
107	case T_UINT64:
108	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
109	break;
110	case T_StringRef:
111	create<StringRef>(V&: M.as<StringRef>());
112	break;
113	case T_String:
114	create<std::string>(V&: M.as<std::string>());
115	break;
116	case T_Object:
117	create<json::Object>(V&: M.as<json::Object>());
118	break;
119	case T_Array:
120	create<json::Array>(V&: M.as<json::Array>());
121	break;
122	}
123	}
124
125	void Value::moveFrom(const Value &&M) {
126	Type = M.Type;
127	switch (Type) {
128	case T_Null:
129	case T_Boolean:
130	case T_Double:
131	case T_Integer:
132	case T_UINT64:
133	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
134	break;
135	case T_StringRef:
136	create<StringRef>(V&: M.as<StringRef>());
137	break;
138	case T_String:
139	create<std::string>(V: std::move(M.as<std::string>()));
140	break;
141	case T_Object:
142	create<json::Object>(V: std::move(M.as<json::Object>()));
143	break;
144	case T_Array:
145	create<json::Array>(V: std::move(M.as<json::Array>()));
146	break;
147	}
148	const_cast<Value &>(M).destroy();
149	M.Type = T_Null;
150	}
151
152	void Value::destroy() {
153	switch (Type) {
154	case T_Null:
155	case T_Boolean:
156	case T_Double:
157	case T_Integer:
158	case T_UINT64:
159	break;
160	case T_StringRef:
161	as<StringRef>().~StringRef();
162	break;
163	case T_String:
164	as<std::string>().~basic_string();
165	break;
166	case T_Object:
167	as<json::Object>().~Object();
168	break;
169	case T_Array:
170	as<json::Array>().~Array();
171	break;
172	}
173	}
174
175	void Value::print(llvm::raw_ostream &OS) const { OS << *this; }
176
177	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
178	LLVM_DUMP_METHOD void Value::dump() const {
179	print(llvm::dbgs());
180	llvm::dbgs() << `'\n'`;
181	}
182	#endif
183
184	bool operator==(const Value &L, const Value &R) {
185	if (L.kind() != R.kind())
186	return false;
187	switch (L.kind()) {
188	case Value::Null:
189	return L.getAsNull() == R.getAsNull();
190	case Value::Boolean:
191	return L.getAsBoolean() == R.getAsBoolean();
192	case Value::Number:
193	// Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
194	// The same integer must convert to the same double, per the standard.
195	// However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
196	// So we avoid floating point promotion for exact comparisons.
197	if (L.Type == Value::T_Integer \|\| R.Type == Value::T_Integer)
198	return L.getAsInteger() == R.getAsInteger();
199	return L.getAsNumber() == R.getAsNumber();
200	case Value::String:
201	return L.getAsString() == R.getAsString();
202	case Value::Array:
203	return L.getAsArray() == R.getAsArray();
204	case Value::Object:
205	return L.getAsObject() == R.getAsObject();
206	}
207	llvm_unreachable("Unknown value kind");
208	}
209
210	void Path::report(llvm::StringLiteral Msg) {
211	// Walk up to the root context, and count the number of segments.
212	unsigned Count = `0`;
213	const Path *P;
214	for (P = this; P->Parent != nullptr; P = P->Parent)
215	++Count;
216	Path::Root *R = P->Seg.root();
217	// Fill in the error message and copy the path (in reverse order).
218	R->ErrorMessage = Msg;
219	R->ErrorPath.resize(new_size: Count);
220	auto It = R->ErrorPath.begin();
221	for (P = this; P->Parent != nullptr; P = P->Parent)
222	*It ++ = P->Seg;
223	}
224
225	Error Path::Root::getError() const {
226	std::string S;
227	raw_string_ostream OS(S);
228	OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
229	if (ErrorPath.empty()) {
230	if (!Name.empty())
231	OS << " when parsing " << Name;
232	} else {
233	OS << " at " << (Name.empty() ? "(root)" : Name);
234	for (const Path::Segment &S : llvm::reverse(C: ErrorPath)) {
235	if (S.isField())
236	OS << `'.'` << S.field();
237	else
238	OS << `'['` << S.index() << `']'`;
239	}
240	}
241	return createStringError(EC: llvm::inconvertibleErrorCode(), S);
242	}
243
244	std::vector<const Object::value_type > sortedElements(const* Object &O) {
245	std::vector<const Object::value_type *> Elements;
246	for (const auto &E : O)
247	Elements.push_back(x: &E);
248	llvm::sort(C&: Elements,
249	Comp: [](const Object::value_type L, const* Object::value_type *R) {
250	return L->first < R->first;
251	});
252	return Elements;
253	}
254
255	// Prints a one-line version of a value that isn't our main focus.
256	// We interleave writes to OS and JOS, exploiting the lack of extra buffering.
257	// This is OK as we own the implementation.
258	static void abbreviate(const Value &V, OStream &JOS) {
259	switch (V.kind()) {
260	case Value::Array:
261	JOS.rawValue(Contents: V.getAsArray()->empty() ? "[]" : "[ ... ]");
262	break;
263	case Value::Object:
264	JOS.rawValue(Contents: V.getAsObject()->empty() ? "{}" : "{ ... }");
265	break;
266	case Value::String: {
267	llvm::StringRef S = *V.getAsString();
268	if (S.size() < `40`) {
269	JOS.value(V);
270	} else {
271	std::string Truncated = fixUTF8(S: S.take_front(N: `37`));
272	Truncated.append(s: "...");
273	JOS.value(V: Truncated);
274	}
275	break;
276	}
277	default:
278	JOS.value(V);
279	}
280	}
281
282	// Prints a semi-expanded version of a value that is our main focus.
283	// Array/Object entries are printed, but not recursively as they may be huge.
284	static void abbreviateChildren(const Value &V, OStream &JOS) {
285	switch (V.kind()) {
286	case Value::Array:
287	JOS.array(Contents: [&] {
288	for (const auto &I : *V.getAsArray())
289	abbreviate(V: I, JOS);
290	});
291	break;
292	case Value::Object:
293	JOS.object(Contents: [&] {
294	for (const auto KV : sortedElements(O: V.getAsObject())) {
295	JOS.attributeBegin(Key: KV->first);
296	abbreviate(V: KV->second, JOS);
297	JOS.attributeEnd();
298	}
299	});
300	break;
301	default:
302	JOS.value(V);
303	}
304	}
305
306	void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
307	OStream JOS(OS, /IndentSize=/`2`);
308	// PrintValue recurses down the path, printing the ancestors of our target.
309	// Siblings of nodes along the path are printed with abbreviate(), and the
310	// target itself is printed with the somewhat richer abbreviateChildren().
311	// 'Recurse' is the lambda itself, to allow recursive calls.
312	auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
313	// Print the target node itself, with the error as a comment.
314	// Also used if we can't follow our path, e.g. it names a field that
315	// should* exist but doesn't.*
316	auto HighlightCurrent = [&] {
317	std::string Comment = "error: ";
318	Comment.append(s: ErrorMessage.data(), n: ErrorMessage.size());
319	JOS.comment(Comment);
320	abbreviateChildren(V, JOS);
321	};
322	if (Path.empty()) // We reached our target.
323	return HighlightCurrent();
324	const Segment &S = Path.back(); // Path is in reverse order.
325	if (S.isField()) {
326	// Current node is an object, path names a field.
327	llvm::StringRef FieldName = S.field();
328	const Object *O = V.getAsObject();
329	if (!O \|\| !O->get(K: FieldName))
330	return HighlightCurrent();
331	JOS.object(Contents: [&] {
332	for (const auto KV : sortedElements(O: O)) {
333	JOS.attributeBegin(Key: KV->first);
334	if (FieldName == StringRef(KV->first))
335	Recurse(KV->second, Path.drop_back(), Recurse);
336	else
337	abbreviate(V: KV->second, JOS);
338	JOS.attributeEnd();
339	}
340	});
341	} else {
342	// Current node is an array, path names an element.
343	const Array *A = V.getAsArray();
344	if (!A \|\| S.index() >= A->size())
345	return HighlightCurrent();
346	JOS.array(Contents: [&] {
347	unsigned Current = `0`;
348	for (const auto &V : *A) {
349	if (Current++ == S.index())
350	Recurse(V, Path.drop_back(), Recurse);
351	else
352	abbreviate(V, JOS);
353	}
354	});
355	}
356	};
357	PrintValue (R, ErrorPath, PrintValue);
358	}
359
360	namespace {
361	// Simple recursive-descent JSON parser.
362	class Parser {
363	public:
364	Parser(StringRef JSON)
365	: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
366
367	bool checkUTF8() {
368	size_t ErrOffset;
369	if (isUTF8(S: StringRef (Start, End - Start), ErrOffset: &ErrOffset))
370	return true;
371	P = Start + ErrOffset; // For line/column calculation.
372	return parseError(Msg: "Invalid UTF-8 sequence");
373	}
374
375	bool parseValue(Value &Out);
376
377	bool assertEnd() {
378	eatWhitespace();
379	if (P == End)
380	return true;
381	return parseError(Msg: "Text after end of document");
382	}
383
384	Error takeError() {
385	assert(Err);
386	return std::move(*Err);
387	}
388
389	private:
390	void eatWhitespace() {
391	while (P != End && (P == `' '` \|\| P == `'\r'` \|\| P == `'\n'` \|\| P == `'\t'`))
392	++P;
393	}
394
395	// On invalid syntax, parseX() functions return false and set Err.
396	bool parseNumber(char First, Value &Out);
397	bool parseString(std::string &Out);
398	bool parseUnicode(std::string &Out);
399	bool parseError(const char Msg); // always returns false*
400
401	char next() { return P == End ? `0` : *P++; }
402	char peek() { return P == End ? `0` : *P; }
403	static bool isNumber(char C) {
404	return C == `'0'` \|\| C == `'1'` \|\| C == `'2'` \|\| C == `'3'` \|\| C == `'4'` \|\|
405	C == `'5'` \|\| C == `'6'` \|\| C == `'7'` \|\| C == `'8'` \|\| C == `'9'` \|\|
406	C == `'e'` \|\| C == `'E'` \|\| C == `'+'` \|\| C == `'-'` \|\| C == `'.'`;
407	}
408
409	std::optional<Error> Err;
410	const char Start, P, *End;
411	};
412	} // namespace
413
414	bool Parser::parseValue(Value &Out) {
415	eatWhitespace();
416	if (P == End)
417	return parseError(Msg: "Unexpected EOF");
418	switch (char C = next()) {
419	// Bare null/true/false are easy - first char identifies them.
420	case `'n'`:
421	Out = nullptr;
422	return (next() == `'u'` && next() == `'l'` && next() == `'l'`) \|\|
423	parseError(Msg: "Invalid JSON value (null?)");
424	case `'t'`:
425	Out = true;
426	return (next() == `'r'` && next() == `'u'` && next() == `'e'`) \|\|
427	parseError(Msg: "Invalid JSON value (true?)");
428	case `'f'`:
429	Out = false;
430	return (next() == `'a'` && next() == `'l'` && next() == `'s'` && next() == `'e'`) \|\|
431	parseError(Msg: "Invalid JSON value (false?)");
432	case `'"'`: {
433	std::string S;
434	if (parseString(Out&: S)) {
435	Out = std::move(S);
436	return true;
437	}
438	return false;
439	}
440	case `'['`: {
441	Out = Array {};
442	Array &A = *Out.getAsArray();
443	eatWhitespace();
444	if (peek() == `']'`) {
445	++P;
446	return true;
447	}
448	for (;;) {
449	A.emplace_back(A: nullptr);
450	if (!parseValue(Out&: A.back()))
451	return false;
452	eatWhitespace();
453	switch (next()) {
454	case `','`:
455	eatWhitespace();
456	continue;
457	case `']'`:
458	return true;
459	default:
460	return parseError(Msg: "Expected , or ] after array element");
461	}
462	}
463	}
464	case `'{'`: {
465	Out = Object {};
466	Object &O = *Out.getAsObject();
467	eatWhitespace();
468	if (peek() == `'}'`) {
469	++P;
470	return true;
471	}
472	for (;;) {
473	if (next() != `'"'`)
474	return parseError(Msg: "Expected object key");
475	std::string K;
476	if (!parseString(Out&: K))
477	return false;
478	eatWhitespace();
479	if (next() != `':'`)
480	return parseError(Msg: "Expected : after object key");
481	eatWhitespace();
482	if (!parseValue(Out&: O [std::move(K)]))
483	return false;
484	eatWhitespace();
485	switch (next()) {
486	case `','`:
487	eatWhitespace();
488	continue;
489	case `'}'`:
490	return true;
491	default:
492	return parseError(Msg: "Expected , or } after object property");
493	}
494	}
495	}
496	default:
497	if (isNumber(C))
498	return parseNumber(First: C, Out);
499	return parseError(Msg: "Invalid JSON value");
500	}
501	}
502
503	bool Parser::parseNumber(char First, Value &Out) {
504	// Read the number into a string. (Must be null-terminated for strto).*
505	SmallString<`24`> S;
506	S.push_back(Elt: First);
507	while (isNumber(C: peek()))
508	S.push_back(Elt: next());
509	char *End;
510	// Try first to parse as integer, and if so preserve full 64 bits.
511	// We check for errno for out of bounds errors and for End == S.end()
512	// to make sure that the numeric string is not malformed.
513	errno = `0`;
514	int64_t I = std::strtoll(nptr: S.c_str(), endptr: &End, base: `10`);
515	if (End == S.end() && errno != ERANGE) {
516	Out = I;
517	return true;
518	}
519	// strtroull has a special handling for negative numbers, but in this
520	// case we don't want to do that because negative numbers were already
521	// handled in the previous block.
522	if (First != `'-'`) {
523	errno = `0`;
524	uint64_t UI = std::strtoull(nptr: S.c_str(), endptr: &End, base: `10`);
525	if (End == S.end() && errno != ERANGE) {
526	Out = UI;
527	return true;
528	}
529	}
530	// If it's not an integer
531	Out = std::strtod(nptr: S.c_str(), endptr: &End);
532	return End == S.end() \|\| parseError(Msg: "Invalid JSON value (number?)");
533	}
534
535	bool Parser::parseString(std::string &Out) {
536	// leading quote was already consumed.
537	for (char C = next(); C != `'"'`; C = next()) {
538	if (LLVM_UNLIKELY(P == End))
539	return parseError(Msg: "Unterminated string");
540	if (LLVM_UNLIKELY((C & `0x1f`) == C))
541	return parseError(Msg: "Control character in string");
542	if (LLVM_LIKELY(C != `'\\'`)) {
543	Out.push_back(c: C);
544	continue;
545	}
546	// Handle escape sequence.
547	switch (C = next()) {
548	case `'"'`:
549	case `'\\'`:
550	case `'/'`:
551	Out.push_back(c: C);
552	break;
553	case `'b'`:
554	Out.push_back(c: `'\b'`);
555	break;
556	case `'f'`:
557	Out.push_back(c: `'\f'`);
558	break;
559	case `'n'`:
560	Out.push_back(c: `'\n'`);
561	break;
562	case `'r'`:
563	Out.push_back(c: `'\r'`);
564	break;
565	case `'t'`:
566	Out.push_back(c: `'\t'`);
567	break;
568	case `'u'`:
569	if (!parseUnicode(Out))
570	return false;
571	break;
572	default:
573	return parseError(Msg: "Invalid escape sequence");
574	}
575	}
576	return true;
577	}
578
579	static void encodeUtf8(uint32_t Rune, std::string &Out) {
580	if (Rune < `0x80`) {
581	Out.push_back(c: Rune & `0x7F`);
582	} else if (Rune < `0x800`) {
583	uint8_t FirstByte = `0xC0` \| ((Rune & `0x7C0`) >> `6`);
584	uint8_t SecondByte = `0x80` \| (Rune & `0x3F`);
585	Out.push_back(c: FirstByte);
586	Out.push_back(c: SecondByte);
587	} else if (Rune < `0x10000`) {
588	uint8_t FirstByte = `0xE0` \| ((Rune & `0xF000`) >> `12`);
589	uint8_t SecondByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
590	uint8_t ThirdByte = `0x80` \| (Rune & `0x3F`);
591	Out.push_back(c: FirstByte);
592	Out.push_back(c: SecondByte);
593	Out.push_back(c: ThirdByte);
594	} else if (Rune < `0x110000`) {
595	uint8_t FirstByte = `0xF0` \| ((Rune & `0x1F0000`) >> `18`);
596	uint8_t SecondByte = `0x80` \| ((Rune & `0x3F000`) >> `12`);
597	uint8_t ThirdByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
598	uint8_t FourthByte = `0x80` \| (Rune & `0x3F`);
599	Out.push_back(c: FirstByte);
600	Out.push_back(c: SecondByte);
601	Out.push_back(c: ThirdByte);
602	Out.push_back(c: FourthByte);
603	} else {
604	llvm_unreachable("Invalid codepoint");
605	}
606	}
607
608	// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
609	// May parse several sequential escapes to ensure proper surrogate handling.
610	// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
611	// These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
612	bool Parser::parseUnicode(std::string &Out) {
613	// Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
614	auto Invalid = [&] { Out.append(/ UTF-8 / l: {`'\xef'`, `'\xbf'`, `'\xbd'`}); };
615	// Decodes 4 hex digits from the stream into Out, returns false on error.
616	auto Parse4Hex = [this](uint16_t &Out) -> bool {
617	Out = `0`;
618	char Bytes[] = {next(), next(), next(), next()};
619	for (unsigned char C : Bytes) {
620	if (!std::isxdigit(C))
621	return parseError(Msg: "Invalid \\u escape sequence");
622	Out <<= `4`;
623	Out \|= (C > `'9'`) ? (C & ~`0x20`) - `'A'` + `10` : (C - `'0'`);
624	}
625	return true;
626	};
627	uint16_t First; // UTF-16 code unit from the first \u escape.
628	if (!Parse4Hex (First))
629	return false;
630
631	// We loop to allow proper surrogate-pair error handling.
632	while (true) {
633	// Case 1: the UTF-16 code unit is already a codepoint in the BMP.
634	if (LLVM_LIKELY(First < `0xD800` \|\| First >= `0xE000`)) {
635	encodeUtf8(Rune: First, Out);
636	return true;
637	}
638
639	// Case 2: it's an (unpaired) trailing surrogate.
640	if (LLVM_UNLIKELY(First >= `0xDC00`)) {
641	Invalid ();
642	return true;
643	}
644
645	// Case 3: it's a leading surrogate. We expect a trailing one next.
646	// Case 3a: there's no trailing \u escape. Don't advance in the stream.
647	if (LLVM_UNLIKELY(P + `2` > End \|\| P != `'\\'` \|\| (P + `1`) != `'u'`)) {
648	Invalid (); // Leading surrogate was unpaired.
649	return true;
650	}
651	P += `2`;
652	uint16_t Second;
653	if (!Parse4Hex (Second))
654	return false;
655	// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
656	if (LLVM_UNLIKELY(Second < `0xDC00` \|\| Second >= `0xE000`)) {
657	Invalid (); // Leading surrogate was unpaired.
658	First = Second; // Second escape still needs to be processed.
659	continue;
660	}
661	// Case 3c: a valid surrogate pair encoding an astral codepoint.
662	encodeUtf8(Rune: `0x10000` \| ((First - `0xD800`) << `10`) \| (Second - `0xDC00`), Out);
663	return true;
664	}
665	}
666
667	bool Parser::parseError(const char *Msg) {
668	int Line = `1`;
669	const char *StartOfLine = Start;
670	for (const char *X = Start; X < P; ++X) {
671	if (*X == `0x0A`) {
672	++Line;
673	StartOfLine = X + `1`;
674	}
675	}
676	Err.emplace(
677	args: std::make_unique<ParseError>(args&: Msg, args&: Line, args: P - StartOfLine, args: P - Start));
678	return false;
679	}
680
681	Expected<Value> parse(StringRef JSON) {
682	Parser P(JSON);
683	Value E = nullptr;
684	if (P.checkUTF8())
685	if (P.parseValue(Out&: E))
686	if (P.assertEnd())
687	return std::move(E);
688	return P.takeError();
689	}
690
691	char ParseError::ID = `0`;
692
693	// Defined out-of-line to place vtable in this compilation unit.
694	void ParseError::log(llvm::raw_ostream &OS) const {
695	OS << llvm::formatv(Fmt: "[{0}:{1}, byte={2}]: {3}", Vals: Line, Vals: Column, Vals: Offset, Vals: Msg);
696	}
697
698	bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
699	// Fast-path for ASCII, which is valid UTF-8.
700	if (LLVM_LIKELY(isASCII(S)))
701	return true;
702
703	const UTF8 Data = reinterpret_cast<const* UTF8 >(S.data()), Rest = Data;
704	if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
705	return true;
706
707	if (ErrOffset)
708	*ErrOffset = Rest - Data;
709	return false;
710	}
711
712	std::string fixUTF8(llvm::StringRef S) {
713	// This isn't particularly efficient, but is only for error-recovery.
714	std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
715	const UTF8 In8 = reinterpret_cast<const* UTF8 *>(S.data());
716	UTF32 *Out32 = Codepoints.data();
717	ConvertUTF8toUTF32(sourceStart: &In8, sourceEnd: In8 + S.size(), targetStart: &Out32, targetEnd: Out32 + Codepoints.size(),
718	flags: lenientConversion);
719	Codepoints.resize(new_size: Out32 - Codepoints.data());
720	std::string Res(`4` * Codepoints.size(), `0`); // 4 bytes per codepoint suffice
721	const UTF32 *In32 = Codepoints.data();
722	UTF8 Out8 = reinterpret_cast<UTF8 >(&Res [`0`]);
723	ConvertUTF32toUTF8(sourceStart: &In32, sourceEnd: In32 + Codepoints.size(), targetStart: &Out8, targetEnd: Out8 + Res.size(),
724	flags: strictConversion);
725	Res.resize(n: reinterpret_cast<char *>(Out8) - Res.data());
726	return Res;
727	}
728
729	static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
730	OS << `'\"'`;
731	for (unsigned char C : S) {
732	if (C == `0x22` \|\| C == `0x5C`)
733	OS << `'\\'`;
734	if (C >= `0x20`) {
735	OS << C;
736	continue;
737	}
738	OS << `'\\'`;
739	switch (C) {
740	// A few characters are common enough to make short escapes worthwhile.
741	case `'\t'`:
742	OS << `'t'`;
743	break;
744	case `'\n'`:
745	OS << `'n'`;
746	break;
747	case `'\r'`:
748	OS << `'r'`;
749	break;
750	default:
751	OS << `'u'`;
752	llvm::write_hex(S&: OS, N: C, Style: llvm::HexPrintStyle::Lower, Width: `4`);
753	break;
754	}
755	}
756	OS << `'\"'`;
757	}
758
759	void llvm::json::OStream::value(const Value &V) {
760	switch (V.kind()) {
761	case Value::Null:
762	valueBegin();
763	OS << "null";
764	return;
765	case Value::Boolean:
766	valueBegin();
767	OS << (*V.getAsBoolean() ? "true" : "false");
768	return;
769	case Value::Number:
770	valueBegin();
771	if (V.Type == Value::T_Integer)
772	OS << *V.getAsInteger();
773	else if (V.Type == Value::T_UINT64)
774	OS << *V.getAsUINT64();
775	else
776	OS << format(Fmt: "%.g", Vals: std::numeric_limits<double*>::max_digits10,
777	Vals: *V.getAsNumber());
778	return;
779	case Value::String:
780	valueBegin();
781	quote(OS, S: *V.getAsString());
782	return;
783	case Value::Array:
784	return array(Contents: [&] {
785	for (const Value &E : *V.getAsArray())
786	value(V: E);
787	});
788	case Value::Object:
789	return object(Contents: [&] {
790	for (const Object::value_type E : sortedElements(O: V.getAsObject()))
791	attribute(Key: E->first, Contents: E->second);
792	});
793	}
794	}
795
796	void llvm::json::OStream::valueBegin() {
797	assert(Stack.back().Ctx != Object && "Only attributes allowed here");
798	if (Stack.back().HasValue) {
799	assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
800	OS << `','`;
801	}
802	if (Stack.back().Ctx == Array)
803	newline();
804	flushComment();
805	Stack.back().HasValue = true;
806	}
807
808	void OStream::comment(llvm::StringRef Comment) {
809	assert(PendingComment.empty() && "Only one comment per value!");
810	PendingComment = Comment;
811	}
812
813	void OStream::flushComment() {
814	if (PendingComment.empty())
815	return;
816	OS << (IndentSize ? "/* " : "/*");
817	// Be sure not to accidentally emit "/". Transform to "* /".*
818	while (!PendingComment.empty()) {
819	auto Pos = PendingComment.find(Str: "*/");
820	if (Pos == StringRef::npos) {
821	OS << PendingComment;
822	PendingComment = "";
823	} else {
824	OS << PendingComment.take_front(N: Pos) << "* /";
825	PendingComment = PendingComment.drop_front(N: Pos + `2`);
826	}
827	}
828	OS << (IndentSize ? " /" : "/");
829	// Comments are on their own line unless attached to an attribute value.
830	if (Stack.size() > `1` && Stack.back().Ctx == Singleton) {
831	if (IndentSize)
832	OS << `' '`;
833	} else {
834	newline();
835	}
836	}
837
838	void llvm::json::OStream::newline() {
839	if (IndentSize) {
840	OS.write(C: `'\n'`);
841	OS.indent(NumSpaces: Indent);
842	}
843	}
844
845	void llvm::json::OStream::arrayBegin() {
846	valueBegin();
847	Stack.emplace_back();
848	Stack.back().Ctx = Array;
849	Indent += IndentSize;
850	OS << `'['`;
851	}
852
853	void llvm::json::OStream::arrayEnd() {
854	assert(Stack.back().Ctx == Array);
855	Indent -= IndentSize;
856	if (Stack.back().HasValue)
857	newline();
858	OS << `']'`;
859	assert(PendingComment.empty());
860	Stack.pop_back();
861	assert(!Stack.empty());
862	}
863
864	void llvm::json::OStream::objectBegin() {
865	valueBegin();
866	Stack.emplace_back();
867	Stack.back().Ctx = Object;
868	Indent += IndentSize;
869	OS << `'{'`;
870	}
871
872	void llvm::json::OStream::objectEnd() {
873	assert(Stack.back().Ctx == Object);
874	Indent -= IndentSize;
875	if (Stack.back().HasValue)
876	newline();
877	OS << `'}'`;
878	assert(PendingComment.empty());
879	Stack.pop_back();
880	assert(!Stack.empty());
881	}
882
883	void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
884	assert(Stack.back().Ctx == Object);
885	if (Stack.back().HasValue)
886	OS << `','`;
887	newline();
888	flushComment();
889	Stack.back().HasValue = true;
890	Stack.emplace_back();
891	Stack.back().Ctx = Singleton;
892	if (LLVM_LIKELY(isUTF8(Key))) {
893	quote(OS, S: Key);
894	} else {
895	assert(false && "Invalid UTF-8 in attribute key");
896	quote(OS, S: fixUTF8(S: Key));
897	}
898	OS.write(C: `':'`);
899	if (IndentSize)
900	OS.write(C: `' '`);
901	}
902
903	void llvm::json::OStream::attributeEnd() {
904	assert(Stack.back().Ctx == Singleton);
905	assert(Stack.back().HasValue && "Attribute must have a value");
906	assert(PendingComment.empty());
907	Stack.pop_back();
908	assert(Stack.back().Ctx == Object);
909	}
910
911	raw_ostream &llvm::json::OStream::rawValueBegin() {
912	valueBegin();
913	Stack.emplace_back();
914	Stack.back().Ctx = RawValue;
915	return OS;
916	}
917
918	void llvm::json::OStream::rawValueEnd() {
919	assert(Stack.back().Ctx == RawValue);
920	Stack.pop_back();
921	}
922
923	} // namespace json
924	} // namespace llvm
925
926	void llvm::format_provider<llvm::json::Value>::format(
927	const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
928	unsigned IndentAmount = `0`;
929	if (!Options.empty() && Options.getAsInteger(/Radix=/`10`, Result&: IndentAmount))
930	llvm_unreachable("json::Value format options should be an integer");
931	json::OStream(OS, IndentAmount).value(V: E);
932	}
933
934

Browse the source code of llvm_projects/llvm/lib/Support/JSON.cpp