1 | //===- Base64.cpp ---------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #define INVALID_BASE64_BYTE 64 |
10 | #include "llvm/Support/Base64.h" |
11 | |
12 | static char decodeBase64Byte(uint8_t Ch) { |
13 | constexpr char Inv = INVALID_BASE64_BYTE; |
14 | static const char DecodeTable[] = { |
15 | Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
16 | Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
17 | Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
18 | Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
19 | Inv, Inv, Inv, Inv, Inv, Inv, Inv, Inv, // ........ |
20 | Inv, Inv, Inv, 62, Inv, Inv, Inv, 63, // ...+.../ |
21 | 52, 53, 54, 55, 56, 57, 58, 59, // 01234567 |
22 | 60, 61, Inv, Inv, Inv, 0, Inv, Inv, // 89...=.. |
23 | Inv, 0, 1, 2, 3, 4, 5, 6, // .ABCDEFG |
24 | 7, 8, 9, 10, 11, 12, 13, 14, // HIJKLMNO |
25 | 15, 16, 17, 18, 19, 20, 21, 22, // PQRSTUVW |
26 | 23, 24, 25, Inv, Inv, Inv, Inv, Inv, // XYZ..... |
27 | Inv, 26, 27, 28, 29, 30, 31, 32, // .abcdefg |
28 | 33, 34, 35, 36, 37, 38, 39, 40, // hijklmno |
29 | 41, 42, 43, 44, 45, 46, 47, 48, // pqrstuvw |
30 | 49, 50, 51 // xyz..... |
31 | }; |
32 | if (Ch >= sizeof(DecodeTable)) |
33 | return Inv; |
34 | return DecodeTable[Ch]; |
35 | } |
36 | |
37 | llvm::Error llvm::decodeBase64(llvm::StringRef Input, |
38 | std::vector<char> &Output) { |
39 | constexpr char Base64InvalidByte = INVALID_BASE64_BYTE; |
40 | // Invalid table value with short name to fit in the table init below. The |
41 | // invalid value is 64 since valid base64 values are 0 - 63. |
42 | Output.clear(); |
43 | const uint64_t InputLength = Input.size(); |
44 | if (InputLength == 0) |
45 | return Error::success(); |
46 | // Make sure we have a valid input string length which must be a multiple |
47 | // of 4. |
48 | if ((InputLength % 4) != 0) |
49 | return createStringError(EC: std::errc::illegal_byte_sequence, |
50 | Fmt: "Base64 encoded strings must be a multiple of 4 " |
51 | "bytes in length" ); |
52 | const uint64_t FirstValidEqualIdx = InputLength - 2; |
53 | char Hex64Bytes[4]; |
54 | for (uint64_t Idx = 0; Idx < InputLength; Idx += 4) { |
55 | for (uint64_t ByteOffset = 0; ByteOffset < 4; ++ByteOffset) { |
56 | const uint64_t ByteIdx = Idx + ByteOffset; |
57 | const char Byte = Input[ByteIdx]; |
58 | const char DecodedByte = decodeBase64Byte(Ch: Byte); |
59 | bool Illegal = DecodedByte == Base64InvalidByte; |
60 | if (!Illegal && Byte == '=') { |
61 | if (ByteIdx < FirstValidEqualIdx) { |
62 | // We have an '=' in the middle of the string which is invalid, only |
63 | // the last two characters can be '=' characters. |
64 | Illegal = true; |
65 | } else if (ByteIdx == FirstValidEqualIdx && Input[ByteIdx + 1] != '=') { |
66 | // We have an equal second to last from the end and the last character |
67 | // is not also an equal, so the '=' character is invalid |
68 | Illegal = true; |
69 | } |
70 | } |
71 | if (Illegal) |
72 | return createStringError( |
73 | EC: std::errc::illegal_byte_sequence, |
74 | Fmt: "Invalid Base64 character %#2.2x at index %" PRIu64, Vals: Byte, Vals: ByteIdx); |
75 | Hex64Bytes[ByteOffset] = DecodedByte; |
76 | } |
77 | // Now we have 6 bits of 3 bytes in value in each of the Hex64Bytes bytes. |
78 | // Extract the right bytes into the Output buffer. |
79 | Output.push_back(x: (Hex64Bytes[0] << 2) + ((Hex64Bytes[1] >> 4) & 0x03)); |
80 | Output.push_back(x: (Hex64Bytes[1] << 4) + ((Hex64Bytes[2] >> 2) & 0x0f)); |
81 | Output.push_back(x: (Hex64Bytes[2] << 6) + (Hex64Bytes[3] & 0x3f)); |
82 | } |
83 | // If we had valid trailing '=' characters strip the right number of bytes |
84 | // from the end of the output buffer. We already know that the Input length |
85 | // it a multiple of 4 and is not zero, so direct character access is safe. |
86 | if (Input.back() == '=') { |
87 | Output.pop_back(); |
88 | if (Input[InputLength - 2] == '=') |
89 | Output.pop_back(); |
90 | } |
91 | return Error::success(); |
92 | } |
93 | |