1 | //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/BinaryFormat/Magic.h" |
10 | #include "llvm/ADT/StringRef.h" |
11 | #include "llvm/ADT/Twine.h" |
12 | #include "llvm/BinaryFormat/COFF.h" |
13 | #include "llvm/BinaryFormat/MachO.h" |
14 | #include "llvm/Support/Endian.h" |
15 | #include "llvm/Support/MemoryBuffer.h" |
16 | |
17 | #if !defined(_MSC_VER) && !defined(__MINGW32__) |
18 | #include <unistd.h> |
19 | #else |
20 | #include <io.h> |
21 | #endif |
22 | |
23 | using namespace llvm; |
24 | using namespace llvm::support::endian; |
25 | using namespace llvm::sys::fs; |
26 | |
27 | template <size_t N> |
28 | static bool startswith(StringRef Magic, const char (&S)[N]) { |
29 | return Magic.starts_with(Prefix: StringRef(S, N - 1)); |
30 | } |
31 | |
32 | /// Identify the magic in magic. |
33 | file_magic llvm::identify_magic(StringRef Magic) { |
34 | if (Magic.size() < 4) |
35 | return file_magic::unknown; |
36 | switch ((unsigned char)Magic[0]) { |
37 | case 0x00: { |
38 | // COFF bigobj, CL.exe's LTO object file, or short import library file |
39 | if (startswith(Magic, S: "\0\0\xFF\xFF" )) { |
40 | size_t MinSize = |
41 | offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); |
42 | if (Magic.size() < MinSize) |
43 | return file_magic::coff_import_library; |
44 | |
45 | const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); |
46 | if (memcmp(s1: Start, s2: COFF::BigObjMagic, n: sizeof(COFF::BigObjMagic)) == 0) |
47 | return file_magic::coff_object; |
48 | if (memcmp(s1: Start, s2: COFF::ClGlObjMagic, n: sizeof(COFF::BigObjMagic)) == 0) |
49 | return file_magic::coff_cl_gl_object; |
50 | return file_magic::coff_import_library; |
51 | } |
52 | // Windows resource file |
53 | if (Magic.size() >= sizeof(COFF::WinResMagic) && |
54 | memcmp(s1: Magic.data(), s2: COFF::WinResMagic, n: sizeof(COFF::WinResMagic)) == 0) |
55 | return file_magic::windows_resource; |
56 | // 0x0000 = COFF unknown machine type |
57 | if (Magic[1] == 0) |
58 | return file_magic::coff_object; |
59 | if (startswith(Magic, S: "\0asm" )) |
60 | return file_magic::wasm_object; |
61 | break; |
62 | } |
63 | |
64 | case 0x01: |
65 | // XCOFF format |
66 | if (startswith(Magic, S: "\x01\xDF" )) |
67 | return file_magic::xcoff_object_32; |
68 | if (startswith(Magic, S: "\x01\xF7" )) |
69 | return file_magic::xcoff_object_64; |
70 | break; |
71 | |
72 | case 0x03: |
73 | if (startswith(Magic, S: "\x03\xF0\x00" )) |
74 | return file_magic::goff_object; |
75 | // SPIR-V format in little-endian mode. |
76 | if (startswith(Magic, S: "\x03\x02\x23\x07" )) |
77 | return file_magic::spirv_object; |
78 | break; |
79 | |
80 | case 0x07: // SPIR-V format in big-endian mode. |
81 | if (startswith(Magic, S: "\x07\x23\x02\x03" )) |
82 | return file_magic::spirv_object; |
83 | break; |
84 | |
85 | case 0x10: |
86 | if (startswith(Magic, S: "\x10\xFF\x10\xAD" )) |
87 | return file_magic::offload_binary; |
88 | break; |
89 | |
90 | case 0xDE: // 0x0B17C0DE = BC wraper |
91 | if (startswith(Magic, S: "\xDE\xC0\x17\x0B" )) |
92 | return file_magic::bitcode; |
93 | break; |
94 | case 'B': |
95 | if (startswith(Magic, S: "BC\xC0\xDE" )) |
96 | return file_magic::bitcode; |
97 | break; |
98 | case 'C': |
99 | if (startswith(Magic, S: "CCOB" )) |
100 | return file_magic::offload_bundle_compressed; |
101 | if (startswith(Magic, S: "CPCH" )) |
102 | return file_magic::clang_ast; |
103 | break; |
104 | case '!': |
105 | if (startswith(Magic, S: "!<arch>\n" ) || startswith(Magic, S: "!<thin>\n" )) |
106 | return file_magic::archive; |
107 | break; |
108 | case '<': |
109 | if (startswith(Magic, S: "<bigaf>\n" )) |
110 | return file_magic::archive; |
111 | break; |
112 | case '\177': |
113 | if (startswith(Magic, S: "\177ELF" ) && Magic.size() >= 18) { |
114 | bool Data2MSB = Magic[5] == 2; |
115 | unsigned high = Data2MSB ? 16 : 17; |
116 | unsigned low = Data2MSB ? 17 : 16; |
117 | if (Magic[high] == 0) { |
118 | switch (Magic[low]) { |
119 | default: |
120 | return file_magic::elf; |
121 | case 1: |
122 | return file_magic::elf_relocatable; |
123 | case 2: |
124 | return file_magic::elf_executable; |
125 | case 3: |
126 | return file_magic::elf_shared_object; |
127 | case 4: |
128 | return file_magic::elf_core; |
129 | } |
130 | } |
131 | // It's still some type of ELF file. |
132 | return file_magic::elf; |
133 | } |
134 | break; |
135 | |
136 | case 0xCA: |
137 | if (startswith(Magic, S: "\xCA\xFE\xBA\xBE" ) || |
138 | startswith(Magic, S: "\xCA\xFE\xBA\xBF" )) { |
139 | // This is complicated by an overlap with Java class files. |
140 | // See the Mach-O section in /usr/share/file/magic for details. |
141 | if (Magic.size() >= 8 && Magic[7] < 43) |
142 | return file_magic::macho_universal_binary; |
143 | } |
144 | break; |
145 | |
146 | // The two magic numbers for mach-o are: |
147 | // 0xfeedface - 32-bit mach-o |
148 | // 0xfeedfacf - 64-bit mach-o |
149 | case 0xFE: |
150 | case 0xCE: |
151 | case 0xCF: { |
152 | uint16_t type = 0; |
153 | if (startswith(Magic, S: "\xFE\xED\xFA\xCE" ) || |
154 | startswith(Magic, S: "\xFE\xED\xFA\xCF" )) { |
155 | /* Native endian */ |
156 | size_t MinSize; |
157 | if (Magic[3] == char(0xCE)) |
158 | MinSize = sizeof(MachO::mach_header); |
159 | else |
160 | MinSize = sizeof(MachO::mach_header_64); |
161 | if (Magic.size() >= MinSize) |
162 | type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; |
163 | } else if (startswith(Magic, S: "\xCE\xFA\xED\xFE" ) || |
164 | startswith(Magic, S: "\xCF\xFA\xED\xFE" )) { |
165 | /* Reverse endian */ |
166 | size_t MinSize; |
167 | if (Magic[0] == char(0xCE)) |
168 | MinSize = sizeof(MachO::mach_header); |
169 | else |
170 | MinSize = sizeof(MachO::mach_header_64); |
171 | if (Magic.size() >= MinSize) |
172 | type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12]; |
173 | } |
174 | switch (type) { |
175 | default: |
176 | break; |
177 | case 1: |
178 | return file_magic::macho_object; |
179 | case 2: |
180 | return file_magic::macho_executable; |
181 | case 3: |
182 | return file_magic::macho_fixed_virtual_memory_shared_lib; |
183 | case 4: |
184 | return file_magic::macho_core; |
185 | case 5: |
186 | return file_magic::macho_preload_executable; |
187 | case 6: |
188 | return file_magic::macho_dynamically_linked_shared_lib; |
189 | case 7: |
190 | return file_magic::macho_dynamic_linker; |
191 | case 8: |
192 | return file_magic::macho_bundle; |
193 | case 9: |
194 | return file_magic::macho_dynamically_linked_shared_lib_stub; |
195 | case 10: |
196 | return file_magic::macho_dsym_companion; |
197 | case 11: |
198 | return file_magic::macho_kext_bundle; |
199 | case 12: |
200 | return file_magic::macho_file_set; |
201 | } |
202 | break; |
203 | } |
204 | case 0xF0: // PowerPC Windows |
205 | case 0x83: // Alpha 32-bit |
206 | case 0x84: // Alpha 64-bit |
207 | case 0x66: // MPS R4000 Windows |
208 | case 0x50: // mc68K |
209 | if (startswith(Magic, S: "\x50\xed\x55\xba" )) |
210 | return file_magic::cuda_fatbinary; |
211 | [[fallthrough]]; |
212 | |
213 | case 0x4c: // 80386 Windows |
214 | case 0xc4: // ARMNT Windows |
215 | if (Magic[1] == 0x01) |
216 | return file_magic::coff_object; |
217 | [[fallthrough]]; |
218 | |
219 | case 0x90: // PA-RISC Windows |
220 | case 0x68: // mc68K Windows |
221 | if (Magic[1] == 0x02) |
222 | return file_magic::coff_object; |
223 | break; |
224 | |
225 | case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a |
226 | // Minidump file. |
227 | if (startswith(Magic, S: "MZ" ) && Magic.size() >= 0x3c + 4) { |
228 | uint32_t off = read32le(P: Magic.data() + 0x3c); |
229 | // PE/COFF file, either EXE or DLL. |
230 | if (Magic.substr(Start: off).starts_with( |
231 | Prefix: StringRef(COFF::PEMagic, sizeof(COFF::PEMagic)))) |
232 | return file_magic::pecoff_executable; |
233 | } |
234 | if (Magic.starts_with(Prefix: "Microsoft C/C++ MSF 7.00\r\n" )) |
235 | return file_magic::pdb; |
236 | if (startswith(Magic, S: "MDMP" )) |
237 | return file_magic::minidump; |
238 | break; |
239 | |
240 | case 0x64: // x86-64 or ARM64 Windows. |
241 | if (Magic[1] == char(0x86) || Magic[1] == char(0xaa)) |
242 | return file_magic::coff_object; |
243 | break; |
244 | |
245 | case 0x2d: // YAML '-' MachO TBD. |
246 | if (startswith(Magic, S: "--- !tapi" ) || startswith(Magic, S: "---\narchs:" )) |
247 | return file_magic::tapi_file; |
248 | break; |
249 | case 0x7b: // JSON '{' MachO TBD. |
250 | return file_magic::tapi_file; |
251 | break; |
252 | |
253 | case 'D': // DirectX container file - DXBC |
254 | if (startswith(Magic, S: "DXBC" )) |
255 | return file_magic::dxcontainer_object; |
256 | break; |
257 | |
258 | case 0x41: // ARM64EC windows |
259 | if (Magic[1] == char(0xA6)) |
260 | return file_magic::coff_object; |
261 | break; |
262 | |
263 | case 0x4e: // ARM64X windows |
264 | if (Magic[1] == char(0xA6)) |
265 | return file_magic::coff_object; |
266 | break; |
267 | |
268 | case '_': { |
269 | const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__" ; |
270 | if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, S: OBMagic)) |
271 | return file_magic::offload_bundle; |
272 | break; |
273 | } |
274 | |
275 | default: |
276 | break; |
277 | } |
278 | return file_magic::unknown; |
279 | } |
280 | |
281 | std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) { |
282 | auto FileOrError = MemoryBuffer::getFile(Filename: Path, /*IsText=*/false, |
283 | /*RequiresNullTerminator=*/false); |
284 | if (!FileOrError) |
285 | return FileOrError.getError(); |
286 | |
287 | std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError); |
288 | Result = identify_magic(Magic: FileBuffer->getBuffer()); |
289 | |
290 | return std::error_code(); |
291 | } |
292 | |