JSON.cpp source code [llvm_projects/llvm/lib/Support/JSON.cpp]

1	//=== JSON.cpp - JSON value, parsing and serialization - C++ ------------===//*
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===---------------------------------------------------------------------===//
8
9	#include "llvm/Support/JSON.h"
10	#include "llvm/ADT/STLExtras.h"
11	#include "llvm/ADT/StringExtras.h"
12	#include "llvm/Support/ConvertUTF.h"
13	#include "llvm/Support/Error.h"
14	#include "llvm/Support/Format.h"
15	#include "llvm/Support/NativeFormatting.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <cctype>
18	#include <cerrno>
19	#include <optional>
20
21	namespace llvm {
22	namespace json {
23
24	Value &Object::operator[](const ObjectKey &K) {
25	return try_emplace(K, Args: nullptr).first ->getSecond();
26	}
27	Value &Object::operator[](ObjectKey &&K) {
28	return try_emplace(K: std::move(K), Args: nullptr).first ->getSecond();
29	}
30	Value *Object::get(StringRef K) {
31	auto I = find(K);
32	if (I == end())
33	return nullptr;
34	return &I ->second;
35	}
36	const Value Object::get(StringRef K) const* {
37	auto I = find(K);
38	if (I == end())
39	return nullptr;
40	return &I ->second;
41	}
42	std::optional<std::nullptr_t> Object::getNull(StringRef K) const {
43	if (auto *V = get(K))
44	return V->getAsNull();
45	return std::nullopt;
46	}
47	std::optional<bool> Object::getBoolean(StringRef K) const {
48	if (auto *V = get(K))
49	return V->getAsBoolean();
50	return std::nullopt;
51	}
52	std::optional<double> Object::getNumber(StringRef K) const {
53	if (auto *V = get(K))
54	return V->getAsNumber();
55	return std::nullopt;
56	}
57	std::optional<int64_t> Object::getInteger(StringRef K) const {
58	if (auto *V = get(K))
59	return V->getAsInteger();
60	return std::nullopt;
61	}
62	std::optional<llvm::StringRef> Object::getString(StringRef K) const {
63	if (auto *V = get(K))
64	return V->getAsString();
65	return std::nullopt;
66	}
67	const json::Object Object::getObject(StringRef K) const* {
68	if (auto *V = get(K))
69	return V->getAsObject();
70	return nullptr;
71	}
72	json::Object *Object::getObject(StringRef K) {
73	if (auto *V = get(K))
74	return V->getAsObject();
75	return nullptr;
76	}
77	const json::Array Object::getArray(StringRef K) const* {
78	if (auto *V = get(K))
79	return V->getAsArray();
80	return nullptr;
81	}
82	json::Array *Object::getArray(StringRef K) {
83	if (auto *V = get(K))
84	return V->getAsArray();
85	return nullptr;
86	}
87	bool operator==(const Object &LHS, const Object &RHS) {
88	if (LHS.size() != RHS.size())
89	return false;
90	for (const auto &L : LHS) {
91	auto R = RHS.find(K: L.first);
92	if (R == RHS.end() \|\| L.second != R ->second)
93	return false;
94	}
95	return true;
96	}
97
98	Array::Array(std::initializer_list<Value> Elements) {
99	V.reserve(n: Elements.size());
100	for (const Value &V : Elements) {
101	emplace_back(A: nullptr);
102	back().moveFrom(M: std::move(V));
103	}
104	}
105
106	Value::Value(std::initializer_list<Value> Elements)
107	: Value (json::Array (Elements)) {}
108
109	void Value::copyFrom(const Value &M) {
110	Type = M.Type;
111	switch (Type) {
112	case T_Null:
113	case T_Boolean:
114	case T_Double:
115	case T_Integer:
116	case T_UINT64:
117	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
118	break;
119	case T_StringRef:
120	create<StringRef>(V&: M.as<StringRef>());
121	break;
122	case T_String:
123	create<std::string>(V&: M.as<std::string>());
124	break;
125	case T_Object:
126	create<json::Object>(V&: M.as<json::Object>());
127	break;
128	case T_Array:
129	create<json::Array>(V&: M.as<json::Array>());
130	break;
131	}
132	}
133
134	void Value::moveFrom(const Value &&M) {
135	Type = M.Type;
136	switch (Type) {
137	case T_Null:
138	case T_Boolean:
139	case T_Double:
140	case T_Integer:
141	case T_UINT64:
142	memcpy(dest: &Union, src: &M.Union, n: sizeof(Union));
143	break;
144	case T_StringRef:
145	create<StringRef>(V&: M.as<StringRef>());
146	break;
147	case T_String:
148	create<std::string>(V: std::move(M.as<std::string>()));
149	M.Type = T_Null;
150	break;
151	case T_Object:
152	create<json::Object>(V: std::move(M.as<json::Object>()));
153	M.Type = T_Null;
154	break;
155	case T_Array:
156	create<json::Array>(V: std::move(M.as<json::Array>()));
157	M.Type = T_Null;
158	break;
159	}
160	}
161
162	void Value::destroy() {
163	switch (Type) {
164	case T_Null:
165	case T_Boolean:
166	case T_Double:
167	case T_Integer:
168	case T_UINT64:
169	break;
170	case T_StringRef:
171	as<StringRef>().~StringRef();
172	break;
173	case T_String:
174	as<std::string>().~basic_string();
175	break;
176	case T_Object:
177	as<json::Object>().~Object();
178	break;
179	case T_Array:
180	as<json::Array>().~Array();
181	break;
182	}
183	}
184
185	bool operator==(const Value &L, const Value &R) {
186	if (L.kind() != R.kind())
187	return false;
188	switch (L.kind()) {
189	case Value::Null:
190	return L.getAsNull() == R.getAsNull();
191	case Value::Boolean:
192	return L.getAsBoolean() == R.getAsBoolean();
193	case Value::Number:
194	// Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
195	// The same integer must convert to the same double, per the standard.
196	// However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
197	// So we avoid floating point promotion for exact comparisons.
198	if (L.Type == Value::T_Integer \|\| R.Type == Value::T_Integer)
199	return L.getAsInteger() == R.getAsInteger();
200	return L.getAsNumber() == R.getAsNumber();
201	case Value::String:
202	return L.getAsString() == R.getAsString();
203	case Value::Array:
204	return L.getAsArray() == R.getAsArray();
205	case Value::Object:
206	return L.getAsObject() == R.getAsObject();
207	}
208	llvm_unreachable("Unknown value kind");
209	}
210
211	void Path::report(llvm::StringLiteral Msg) {
212	// Walk up to the root context, and count the number of segments.
213	unsigned Count = `0`;
214	const Path *P;
215	for (P = this; P->Parent != nullptr; P = P->Parent)
216	++Count;
217	Path::Root *R = P->Seg.root();
218	// Fill in the error message and copy the path (in reverse order).
219	R->ErrorMessage = Msg;
220	R->ErrorPath.resize(new_size: Count);
221	auto It = R->ErrorPath.begin();
222	for (P = this; P->Parent != nullptr; P = P->Parent)
223	*It ++ = P->Seg;
224	}
225
226	Error Path::Root::getError() const {
227	std::string S;
228	raw_string_ostream OS(S);
229	OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
230	if (ErrorPath.empty()) {
231	if (!Name.empty())
232	OS << " when parsing " << Name;
233	} else {
234	OS << " at " << (Name.empty() ? "(root)" : Name);
235	for (const Path::Segment &S : llvm::reverse(C: ErrorPath)) {
236	if (S.isField())
237	OS << `'.'` << S.field();
238	else
239	OS << `'['` << S.index() << `']'`;
240	}
241	}
242	return createStringError(EC: llvm::inconvertibleErrorCode(), S);
243	}
244
245	std::vector<const Object::value_type > sortedElements(const* Object &O) {
246	std::vector<const Object::value_type *> Elements;
247	for (const auto &E : O)
248	Elements.push_back(x: &E);
249	llvm::sort(C&: Elements,
250	Comp: [](const Object::value_type L, const* Object::value_type *R) {
251	return L->first < R->first;
252	});
253	return Elements;
254	}
255
256	// Prints a one-line version of a value that isn't our main focus.
257	// We interleave writes to OS and JOS, exploiting the lack of extra buffering.
258	// This is OK as we own the implementation.
259	static void abbreviate(const Value &V, OStream &JOS) {
260	switch (V.kind()) {
261	case Value::Array:
262	JOS.rawValue(Contents: V.getAsArray()->empty() ? "[]" : "[ ... ]");
263	break;
264	case Value::Object:
265	JOS.rawValue(Contents: V.getAsObject()->empty() ? "{}" : "{ ... }");
266	break;
267	case Value::String: {
268	llvm::StringRef S = *V.getAsString();
269	if (S.size() < `40`) {
270	JOS.value(V);
271	} else {
272	std::string Truncated = fixUTF8(S: S.take_front(N: `37`));
273	Truncated.append(s: "...");
274	JOS.value(V: Truncated);
275	}
276	break;
277	}
278	default:
279	JOS.value(V);
280	}
281	}
282
283	// Prints a semi-expanded version of a value that is our main focus.
284	// Array/Object entries are printed, but not recursively as they may be huge.
285	static void abbreviateChildren(const Value &V, OStream &JOS) {
286	switch (V.kind()) {
287	case Value::Array:
288	JOS.array(Contents: [&] {
289	for (const auto &I : *V.getAsArray())
290	abbreviate(V: I, JOS);
291	});
292	break;
293	case Value::Object:
294	JOS.object(Contents: [&] {
295	for (const auto KV : sortedElements(O: V.getAsObject())) {
296	JOS.attributeBegin(Key: KV->first);
297	abbreviate(V: KV->second, JOS);
298	JOS.attributeEnd();
299	}
300	});
301	break;
302	default:
303	JOS.value(V);
304	}
305	}
306
307	void Path::Root::printErrorContext(const Value &R, raw_ostream &OS) const {
308	OStream JOS(OS, /IndentSize=/`2`);
309	// PrintValue recurses down the path, printing the ancestors of our target.
310	// Siblings of nodes along the path are printed with abbreviate(), and the
311	// target itself is printed with the somewhat richer abbreviateChildren().
312	// 'Recurse' is the lambda itself, to allow recursive calls.
313	auto PrintValue = [&](const Value &V, ArrayRef<Segment> Path, auto &Recurse) {
314	// Print the target node itself, with the error as a comment.
315	// Also used if we can't follow our path, e.g. it names a field that
316	// should* exist but doesn't.*
317	auto HighlightCurrent = [&] {
318	std::string Comment = "error: ";
319	Comment.append(s: ErrorMessage.data(), n: ErrorMessage.size());
320	JOS.comment(Comment);
321	abbreviateChildren(V, JOS);
322	};
323	if (Path.empty()) // We reached our target.
324	return HighlightCurrent();
325	const Segment &S = Path.back(); // Path is in reverse order.
326	if (S.isField()) {
327	// Current node is an object, path names a field.
328	llvm::StringRef FieldName = S.field();
329	const Object *O = V.getAsObject();
330	if (!O \|\| !O->get(K: FieldName))
331	return HighlightCurrent();
332	JOS.object(Contents: [&] {
333	for (const auto KV : sortedElements(O: O)) {
334	JOS.attributeBegin(Key: KV->first);
335	if (FieldName == StringRef(KV->first))
336	Recurse(KV->second, Path.drop_back(), Recurse);
337	else
338	abbreviate(V: KV->second, JOS);
339	JOS.attributeEnd();
340	}
341	});
342	} else {
343	// Current node is an array, path names an element.
344	const Array *A = V.getAsArray();
345	if (!A \|\| S.index() >= A->size())
346	return HighlightCurrent();
347	JOS.array(Contents: [&] {
348	unsigned Current = `0`;
349	for (const auto &V : *A) {
350	if (Current++ == S.index())
351	Recurse(V, Path.drop_back(), Recurse);
352	else
353	abbreviate(V, JOS);
354	}
355	});
356	}
357	};
358	PrintValue (R, ErrorPath, PrintValue);
359	}
360
361	namespace {
362	// Simple recursive-descent JSON parser.
363	class Parser {
364	public:
365	Parser(StringRef JSON)
366	: Start(JSON.begin()), P(JSON.begin()), End(JSON.end()) {}
367
368	bool checkUTF8() {
369	size_t ErrOffset;
370	if (isUTF8(S: StringRef (Start, End - Start), ErrOffset: &ErrOffset))
371	return true;
372	P = Start + ErrOffset; // For line/column calculation.
373	return parseError(Msg: "Invalid UTF-8 sequence");
374	}
375
376	bool parseValue(Value &Out);
377
378	bool assertEnd() {
379	eatWhitespace();
380	if (P == End)
381	return true;
382	return parseError(Msg: "Text after end of document");
383	}
384
385	Error takeError() {
386	assert(Err);
387	return std::move(*Err);
388	}
389
390	private:
391	void eatWhitespace() {
392	while (P != End && (P == `' '` \|\| P == `'\r'` \|\| P == `'\n'` \|\| P == `'\t'`))
393	++P;
394	}
395
396	// On invalid syntax, parseX() functions return false and set Err.
397	bool parseNumber(char First, Value &Out);
398	bool parseString(std::string &Out);
399	bool parseUnicode(std::string &Out);
400	bool parseError(const char Msg); // always returns false*
401
402	char next() { return P == End ? `0` : *P++; }
403	char peek() { return P == End ? `0` : *P; }
404	static bool isNumber(char C) {
405	return C == `'0'` \|\| C == `'1'` \|\| C == `'2'` \|\| C == `'3'` \|\| C == `'4'` \|\|
406	C == `'5'` \|\| C == `'6'` \|\| C == `'7'` \|\| C == `'8'` \|\| C == `'9'` \|\|
407	C == `'e'` \|\| C == `'E'` \|\| C == `'+'` \|\| C == `'-'` \|\| C == `'.'`;
408	}
409
410	std::optional<Error> Err;
411	const char Start, P, *End;
412	};
413	} // namespace
414
415	bool Parser::parseValue(Value &Out) {
416	eatWhitespace();
417	if (P == End)
418	return parseError(Msg: "Unexpected EOF");
419	switch (char C = next()) {
420	// Bare null/true/false are easy - first char identifies them.
421	case `'n'`:
422	Out = nullptr;
423	return (next() == `'u'` && next() == `'l'` && next() == `'l'`) \|\|
424	parseError(Msg: "Invalid JSON value (null?)");
425	case `'t'`:
426	Out = true;
427	return (next() == `'r'` && next() == `'u'` && next() == `'e'`) \|\|
428	parseError(Msg: "Invalid JSON value (true?)");
429	case `'f'`:
430	Out = false;
431	return (next() == `'a'` && next() == `'l'` && next() == `'s'` && next() == `'e'`) \|\|
432	parseError(Msg: "Invalid JSON value (false?)");
433	case `'"'`: {
434	std::string S;
435	if (parseString(Out&: S)) {
436	Out = std::move(S);
437	return true;
438	}
439	return false;
440	}
441	case `'['`: {
442	Out = Array {};
443	Array &A = *Out.getAsArray();
444	eatWhitespace();
445	if (peek() == `']'`) {
446	++P;
447	return true;
448	}
449	for (;;) {
450	A.emplace_back(A: nullptr);
451	if (!parseValue(Out&: A.back()))
452	return false;
453	eatWhitespace();
454	switch (next()) {
455	case `','`:
456	eatWhitespace();
457	continue;
458	case `']'`:
459	return true;
460	default:
461	return parseError(Msg: "Expected , or ] after array element");
462	}
463	}
464	}
465	case `'{'`: {
466	Out = Object {};
467	Object &O = *Out.getAsObject();
468	eatWhitespace();
469	if (peek() == `'}'`) {
470	++P;
471	return true;
472	}
473	for (;;) {
474	if (next() != `'"'`)
475	return parseError(Msg: "Expected object key");
476	std::string K;
477	if (!parseString(Out&: K))
478	return false;
479	eatWhitespace();
480	if (next() != `':'`)
481	return parseError(Msg: "Expected : after object key");
482	eatWhitespace();
483	if (!parseValue(Out&: O [std::move(K)]))
484	return false;
485	eatWhitespace();
486	switch (next()) {
487	case `','`:
488	eatWhitespace();
489	continue;
490	case `'}'`:
491	return true;
492	default:
493	return parseError(Msg: "Expected , or } after object property");
494	}
495	}
496	}
497	default:
498	if (isNumber(C))
499	return parseNumber(First: C, Out);
500	return parseError(Msg: "Invalid JSON value");
501	}
502	}
503
504	bool Parser::parseNumber(char First, Value &Out) {
505	// Read the number into a string. (Must be null-terminated for strto).*
506	SmallString<`24`> S;
507	S.push_back(Elt: First);
508	while (isNumber(C: peek()))
509	S.push_back(Elt: next());
510	char *End;
511	// Try first to parse as integer, and if so preserve full 64 bits.
512	// We check for errno for out of bounds errors and for End == S.end()
513	// to make sure that the numeric string is not malformed.
514	errno = `0`;
515	int64_t I = std::strtoll(nptr: S.c_str(), endptr: &End, base: `10`);
516	if (End == S.end() && errno != ERANGE) {
517	Out = int64_t(I);
518	return true;
519	}
520	// strtroull has a special handling for negative numbers, but in this
521	// case we don't want to do that because negative numbers were already
522	// handled in the previous block.
523	if (First != `'-'`) {
524	errno = `0`;
525	uint64_t UI = std::strtoull(nptr: S.c_str(), endptr: &End, base: `10`);
526	if (End == S.end() && errno != ERANGE) {
527	Out = UI;
528	return true;
529	}
530	}
531	// If it's not an integer
532	Out = std::strtod(nptr: S.c_str(), endptr: &End);
533	return End == S.end() \|\| parseError(Msg: "Invalid JSON value (number?)");
534	}
535
536	bool Parser::parseString(std::string &Out) {
537	// leading quote was already consumed.
538	for (char C = next(); C != `'"'`; C = next()) {
539	if (LLVM_UNLIKELY(P == End))
540	return parseError(Msg: "Unterminated string");
541	if (LLVM_UNLIKELY((C & `0x1f`) == C))
542	return parseError(Msg: "Control character in string");
543	if (LLVM_LIKELY(C != `'\\'`)) {
544	Out.push_back(c: C);
545	continue;
546	}
547	// Handle escape sequence.
548	switch (C = next()) {
549	case `'"'`:
550	case `'\\'`:
551	case `'/'`:
552	Out.push_back(c: C);
553	break;
554	case `'b'`:
555	Out.push_back(c: `'\b'`);
556	break;
557	case `'f'`:
558	Out.push_back(c: `'\f'`);
559	break;
560	case `'n'`:
561	Out.push_back(c: `'\n'`);
562	break;
563	case `'r'`:
564	Out.push_back(c: `'\r'`);
565	break;
566	case `'t'`:
567	Out.push_back(c: `'\t'`);
568	break;
569	case `'u'`:
570	if (!parseUnicode(Out))
571	return false;
572	break;
573	default:
574	return parseError(Msg: "Invalid escape sequence");
575	}
576	}
577	return true;
578	}
579
580	static void encodeUtf8(uint32_t Rune, std::string &Out) {
581	if (Rune < `0x80`) {
582	Out.push_back(c: Rune & `0x7F`);
583	} else if (Rune < `0x800`) {
584	uint8_t FirstByte = `0xC0` \| ((Rune & `0x7C0`) >> `6`);
585	uint8_t SecondByte = `0x80` \| (Rune & `0x3F`);
586	Out.push_back(c: FirstByte);
587	Out.push_back(c: SecondByte);
588	} else if (Rune < `0x10000`) {
589	uint8_t FirstByte = `0xE0` \| ((Rune & `0xF000`) >> `12`);
590	uint8_t SecondByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
591	uint8_t ThirdByte = `0x80` \| (Rune & `0x3F`);
592	Out.push_back(c: FirstByte);
593	Out.push_back(c: SecondByte);
594	Out.push_back(c: ThirdByte);
595	} else if (Rune < `0x110000`) {
596	uint8_t FirstByte = `0xF0` \| ((Rune & `0x1F0000`) >> `18`);
597	uint8_t SecondByte = `0x80` \| ((Rune & `0x3F000`) >> `12`);
598	uint8_t ThirdByte = `0x80` \| ((Rune & `0xFC0`) >> `6`);
599	uint8_t FourthByte = `0x80` \| (Rune & `0x3F`);
600	Out.push_back(c: FirstByte);
601	Out.push_back(c: SecondByte);
602	Out.push_back(c: ThirdByte);
603	Out.push_back(c: FourthByte);
604	} else {
605	llvm_unreachable("Invalid codepoint");
606	}
607	}
608
609	// Parse a UTF-16 \uNNNN escape sequence. "\u" has already been consumed.
610	// May parse several sequential escapes to ensure proper surrogate handling.
611	// We do not use ConvertUTF.h, it can't accept and replace unpaired surrogates.
612	// These are invalid Unicode but valid JSON (RFC 8259, section 8.2).
613	bool Parser::parseUnicode(std::string &Out) {
614	// Invalid UTF is not a JSON error (RFC 8529§8.2). It gets replaced by U+FFFD.
615	auto Invalid = [&] { Out.append(/ UTF-8 / l: {`'\xef'`, `'\xbf'`, `'\xbd'`}); };
616	// Decodes 4 hex digits from the stream into Out, returns false on error.
617	auto Parse4Hex = [this](uint16_t &Out) -> bool {
618	Out = `0`;
619	char Bytes[] = {next(), next(), next(), next()};
620	for (unsigned char C : Bytes) {
621	if (!std::isxdigit(C))
622	return parseError(Msg: "Invalid \\u escape sequence");
623	Out <<= `4`;
624	Out \|= (C > `'9'`) ? (C & ~`0x20`) - `'A'` + `10` : (C - `'0'`);
625	}
626	return true;
627	};
628	uint16_t First; // UTF-16 code unit from the first \u escape.
629	if (!Parse4Hex (First))
630	return false;
631
632	// We loop to allow proper surrogate-pair error handling.
633	while (true) {
634	// Case 1: the UTF-16 code unit is already a codepoint in the BMP.
635	if (LLVM_LIKELY(First < `0xD800` \|\| First >= `0xE000`)) {
636	encodeUtf8(Rune: First, Out);
637	return true;
638	}
639
640	// Case 2: it's an (unpaired) trailing surrogate.
641	if (LLVM_UNLIKELY(First >= `0xDC00`)) {
642	Invalid ();
643	return true;
644	}
645
646	// Case 3: it's a leading surrogate. We expect a trailing one next.
647	// Case 3a: there's no trailing \u escape. Don't advance in the stream.
648	if (LLVM_UNLIKELY(P + `2` > End \|\| P != `'\\'` \|\| (P + `1`) != `'u'`)) {
649	Invalid (); // Leading surrogate was unpaired.
650	return true;
651	}
652	P += `2`;
653	uint16_t Second;
654	if (!Parse4Hex (Second))
655	return false;
656	// Case 3b: there was another \u escape, but it wasn't a trailing surrogate.
657	if (LLVM_UNLIKELY(Second < `0xDC00` \|\| Second >= `0xE000`)) {
658	Invalid (); // Leading surrogate was unpaired.
659	First = Second; // Second escape still needs to be processed.
660	continue;
661	}
662	// Case 3c: a valid surrogate pair encoding an astral codepoint.
663	encodeUtf8(Rune: `0x10000` \| ((First - `0xD800`) << `10`) \| (Second - `0xDC00`), Out);
664	return true;
665	}
666	}
667
668	bool Parser::parseError(const char *Msg) {
669	int Line = `1`;
670	const char *StartOfLine = Start;
671	for (const char *X = Start; X < P; ++X) {
672	if (*X == `0x0A`) {
673	++Line;
674	StartOfLine = X + `1`;
675	}
676	}
677	Err.emplace(
678	args: std::make_unique<ParseError>(args&: Msg, args&: Line, args: P - StartOfLine, args: P - Start));
679	return false;
680	}
681
682	Expected<Value> parse(StringRef JSON) {
683	Parser P(JSON);
684	Value E = nullptr;
685	if (P.checkUTF8())
686	if (P.parseValue(Out&: E))
687	if (P.assertEnd())
688	return std::move(E);
689	return P.takeError();
690	}
691	char ParseError::ID = `0`;
692
693	bool isUTF8(llvm::StringRef S, size_t *ErrOffset) {
694	// Fast-path for ASCII, which is valid UTF-8.
695	if (LLVM_LIKELY(isASCII(S)))
696	return true;
697
698	const UTF8 Data = reinterpret_cast<const* UTF8 >(S.data()), Rest = Data;
699	if (LLVM_LIKELY(isLegalUTF8String(&Rest, Data + S.size())))
700	return true;
701
702	if (ErrOffset)
703	*ErrOffset = Rest - Data;
704	return false;
705	}
706
707	std::string fixUTF8(llvm::StringRef S) {
708	// This isn't particularly efficient, but is only for error-recovery.
709	std::vector<UTF32> Codepoints(S.size()); // 1 codepoint per byte suffices.
710	const UTF8 In8 = reinterpret_cast<const* UTF8 *>(S.data());
711	UTF32 *Out32 = Codepoints.data();
712	ConvertUTF8toUTF32(sourceStart: &In8, sourceEnd: In8 + S.size(), targetStart: &Out32, targetEnd: Out32 + Codepoints.size(),
713	flags: lenientConversion);
714	Codepoints.resize(new_size: Out32 - Codepoints.data());
715	std::string Res(`4` * Codepoints.size(), `0`); // 4 bytes per codepoint suffice
716	const UTF32 *In32 = Codepoints.data();
717	UTF8 Out8 = reinterpret_cast<UTF8 >(&Res [`0`]);
718	ConvertUTF32toUTF8(sourceStart: &In32, sourceEnd: In32 + Codepoints.size(), targetStart: &Out8, targetEnd: Out8 + Res.size(),
719	flags: strictConversion);
720	Res.resize(n: reinterpret_cast<char *>(Out8) - Res.data());
721	return Res;
722	}
723
724	static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
725	OS << `'\"'`;
726	for (unsigned char C : S) {
727	if (C == `0x22` \|\| C == `0x5C`)
728	OS << `'\\'`;
729	if (C >= `0x20`) {
730	OS << C;
731	continue;
732	}
733	OS << `'\\'`;
734	switch (C) {
735	// A few characters are common enough to make short escapes worthwhile.
736	case `'\t'`:
737	OS << `'t'`;
738	break;
739	case `'\n'`:
740	OS << `'n'`;
741	break;
742	case `'\r'`:
743	OS << `'r'`;
744	break;
745	default:
746	OS << `'u'`;
747	llvm::write_hex(S&: OS, N: C, Style: llvm::HexPrintStyle::Lower, Width: `4`);
748	break;
749	}
750	}
751	OS << `'\"'`;
752	}
753
754	void llvm::json::OStream::value(const Value &V) {
755	switch (V.kind()) {
756	case Value::Null:
757	valueBegin();
758	OS << "null";
759	return;
760	case Value::Boolean:
761	valueBegin();
762	OS << (*V.getAsBoolean() ? "true" : "false");
763	return;
764	case Value::Number:
765	valueBegin();
766	if (V.Type == Value::T_Integer)
767	OS << *V.getAsInteger();
768	else if (V.Type == Value::T_UINT64)
769	OS << *V.getAsUINT64();
770	else
771	OS << format(Fmt: "%.g", Vals: std::numeric_limits<double*>::max_digits10,
772	Vals: *V.getAsNumber());
773	return;
774	case Value::String:
775	valueBegin();
776	quote(OS, S: *V.getAsString());
777	return;
778	case Value::Array:
779	return array(Contents: [&] {
780	for (const Value &E : *V.getAsArray())
781	value(V: E);
782	});
783	case Value::Object:
784	return object(Contents: [&] {
785	for (const Object::value_type E : sortedElements(O: V.getAsObject()))
786	attribute(Key: E->first, Contents: E->second);
787	});
788	}
789	}
790
791	void llvm::json::OStream::valueBegin() {
792	assert(Stack.back().Ctx != Object && "Only attributes allowed here");
793	if (Stack.back().HasValue) {
794	assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
795	OS << `','`;
796	}
797	if (Stack.back().Ctx == Array)
798	newline();
799	flushComment();
800	Stack.back().HasValue = true;
801	}
802
803	void OStream::comment(llvm::StringRef Comment) {
804	assert(PendingComment.empty() && "Only one comment per value!");
805	PendingComment = Comment;
806	}
807
808	void OStream::flushComment() {
809	if (PendingComment.empty())
810	return;
811	OS << (IndentSize ? "/* " : "/*");
812	// Be sure not to accidentally emit "/". Transform to "* /".*
813	while (!PendingComment.empty()) {
814	auto Pos = PendingComment.find(Str: "*/");
815	if (Pos == StringRef::npos) {
816	OS << PendingComment;
817	PendingComment = "";
818	} else {
819	OS << PendingComment.take_front(N: Pos) << "* /";
820	PendingComment = PendingComment.drop_front(N: Pos + `2`);
821	}
822	}
823	OS << (IndentSize ? " /" : "/");
824	// Comments are on their own line unless attached to an attribute value.
825	if (Stack.size() > `1` && Stack.back().Ctx == Singleton) {
826	if (IndentSize)
827	OS << `' '`;
828	} else {
829	newline();
830	}
831	}
832
833	void llvm::json::OStream::newline() {
834	if (IndentSize) {
835	OS.write(C: `'\n'`);
836	OS.indent(NumSpaces: Indent);
837	}
838	}
839
840	void llvm::json::OStream::arrayBegin() {
841	valueBegin();
842	Stack.emplace_back();
843	Stack.back().Ctx = Array;
844	Indent += IndentSize;
845	OS << `'['`;
846	}
847
848	void llvm::json::OStream::arrayEnd() {
849	assert(Stack.back().Ctx == Array);
850	Indent -= IndentSize;
851	if (Stack.back().HasValue)
852	newline();
853	OS << `']'`;
854	assert(PendingComment.empty());
855	Stack.pop_back();
856	assert(!Stack.empty());
857	}
858
859	void llvm::json::OStream::objectBegin() {
860	valueBegin();
861	Stack.emplace_back();
862	Stack.back().Ctx = Object;
863	Indent += IndentSize;
864	OS << `'{'`;
865	}
866
867	void llvm::json::OStream::objectEnd() {
868	assert(Stack.back().Ctx == Object);
869	Indent -= IndentSize;
870	if (Stack.back().HasValue)
871	newline();
872	OS << `'}'`;
873	assert(PendingComment.empty());
874	Stack.pop_back();
875	assert(!Stack.empty());
876	}
877
878	void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
879	assert(Stack.back().Ctx == Object);
880	if (Stack.back().HasValue)
881	OS << `','`;
882	newline();
883	flushComment();
884	Stack.back().HasValue = true;
885	Stack.emplace_back();
886	Stack.back().Ctx = Singleton;
887	if (LLVM_LIKELY(isUTF8(Key))) {
888	quote(OS, S: Key);
889	} else {
890	assert(false && "Invalid UTF-8 in attribute key");
891	quote(OS, S: fixUTF8(S: Key));
892	}
893	OS.write(C: `':'`);
894	if (IndentSize)
895	OS.write(C: `' '`);
896	}
897
898	void llvm::json::OStream::attributeEnd() {
899	assert(Stack.back().Ctx == Singleton);
900	assert(Stack.back().HasValue && "Attribute must have a value");
901	assert(PendingComment.empty());
902	Stack.pop_back();
903	assert(Stack.back().Ctx == Object);
904	}
905
906	raw_ostream &llvm::json::OStream::rawValueBegin() {
907	valueBegin();
908	Stack.emplace_back();
909	Stack.back().Ctx = RawValue;
910	return OS;
911	}
912
913	void llvm::json::OStream::rawValueEnd() {
914	assert(Stack.back().Ctx == RawValue);
915	Stack.pop_back();
916	}
917
918	} // namespace json
919	} // namespace llvm
920
921	void llvm::format_provider<llvm::json::Value>::format(
922	const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
923	unsigned IndentAmount = `0`;
924	if (!Options.empty() && Options.getAsInteger(/Radix=/`10`, Result&: IndentAmount))
925	llvm_unreachable("json::Value format options should be an integer");
926	json::OStream (OS, IndentAmount).value(V: E);
927	}
928
929

Browse the source code of llvm_projects/llvm/lib/Support/JSON.cpp