DJB.cpp source code [llvm_projects/llvm/lib/Support/DJB.cpp]

1	//===-- Support/DJB.cpp ---DJB Hash ------------------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains support for the DJ Bernstein hash function.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/Support/DJB.h"
14	#include "llvm/ADT/ArrayRef.h"
15	#include "llvm/Support/Compiler.h"
16	#include "llvm/Support/ConvertUTF.h"
17	#include "llvm/Support/Unicode.h"
18
19	using namespace llvm;
20
21	static UTF32 chopOneUTF32(StringRef &Buffer) {
22	UTF32 C;
23	const UTF8 *const Begin8Const =
24	reinterpret_cast<const UTF8 *>(Buffer.begin());
25	const UTF8 *Begin8 = Begin8Const;
26	UTF32 *Begin32 = &C;
27
28	// In lenient mode we will always end up with a "reasonable" value in C for
29	// non-empty input.
30	assert(!Buffer.empty());
31	ConvertUTF8toUTF32(sourceStart: &Begin8, sourceEnd: reinterpret_cast<const UTF8 *>(Buffer.end()),
32	targetStart: &Begin32, targetEnd: &C + `1`, flags: lenientConversion);
33	Buffer = Buffer.drop_front(N: Begin8 - Begin8Const);
34	return C;
35	}
36
37	static StringRef toUTF8(UTF32 C, MutableArrayRef<UTF8> Storage) {
38	const UTF32 *Begin32 = &C;
39	UTF8 *Begin8 = Storage.begin();
40
41	// The case-folded output should always be a valid unicode character, so use
42	// strict mode here.
43	ConversionResult CR = ConvertUTF32toUTF8(sourceStart: &Begin32, sourceEnd: &C + `1`, targetStart: &Begin8,
44	targetEnd: Storage.end(), flags: strictConversion);
45	assert(CR == conversionOK && "Case folding produced invalid char?");
46	(void)CR;
47	return StringRef (reinterpret_cast<char *>(Storage.begin()),
48	Begin8 - Storage.begin());
49	}
50
51	static UTF32 foldCharDwarf(UTF32 C) {
52	// DWARF v5 addition to the unicode folding rules.
53	// Fold "Latin Small Letter Dotless I" and "Latin Capital Letter I With Dot
54	// Above" into "i".
55	if (C == `0x130` \|\| C == `0x131`)
56	return `'i'`;
57	return sys::unicode::foldCharSimple(C);
58	}
59
60	static std::optional<uint32_t> fastCaseFoldingDjbHash(StringRef Buffer,
61	uint32_t H) {
62	bool AllASCII = true;
63	for (unsigned char C : Buffer) {
64	H = H * `33` + (`'A'` <= C && C <= `'Z'` ? C - `'A'` + `'a'` : C);
65	AllASCII &= C <= `0x7f`;
66	}
67	if (AllASCII)
68	return H;
69	return std::nullopt;
70	}
71
72	uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {
73	if (std::optional<uint32_t> Result = fastCaseFoldingDjbHash(Buffer, H))
74	return *Result;
75
76	std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
77	while (!Buffer.empty()) {
78	UTF32 C = foldCharDwarf(C: chopOneUTF32(Buffer));
79	StringRef Folded = toUTF8(C, Storage);
80	H = djbHash(Buffer: Folded, H);
81	}
82	return H;
83	}
84

Browse the source code of llvm_projects/llvm/lib/Support/DJB.cpp