| 1 | //===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the operating system Host detection. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/TargetParser/Host.h" |
| 14 | #include "llvm/ADT/Bitfields.h" |
| 15 | #include "llvm/ADT/STLFunctionalExtras.h" |
| 16 | #include "llvm/ADT/SmallVector.h" |
| 17 | #include "llvm/ADT/StringExtras.h" |
| 18 | #include "llvm/ADT/StringMap.h" |
| 19 | #include "llvm/ADT/StringRef.h" |
| 20 | #include "llvm/ADT/StringSwitch.h" |
| 21 | #include "llvm/Config/llvm-config.h" |
| 22 | #include "llvm/Support/MemoryBuffer.h" |
| 23 | #include "llvm/Support/raw_ostream.h" |
| 24 | #include "llvm/TargetParser/RISCVTargetParser.h" |
| 25 | #include "llvm/TargetParser/Triple.h" |
| 26 | #include "llvm/TargetParser/X86TargetParser.h" |
| 27 | #include <string.h> |
| 28 | |
| 29 | // Include the platform-specific parts of this class. |
| 30 | #ifdef LLVM_ON_UNIX |
| 31 | #include "Unix/Host.inc" |
| 32 | #include <sched.h> |
| 33 | #endif |
| 34 | #ifdef _WIN32 |
| 35 | #include "Windows/Host.inc" |
| 36 | #endif |
| 37 | #ifdef _MSC_VER |
| 38 | #include <intrin.h> |
| 39 | #endif |
| 40 | #ifdef __MVS__ |
| 41 | #include "llvm/Support/BCD.h" |
| 42 | #endif |
| 43 | #if defined(__APPLE__) |
| 44 | #include <mach/host_info.h> |
| 45 | #include <mach/mach.h> |
| 46 | #include <mach/mach_host.h> |
| 47 | #include <mach/machine.h> |
| 48 | #include <sys/param.h> |
| 49 | #include <sys/sysctl.h> |
| 50 | #endif |
| 51 | #ifdef _AIX |
| 52 | #include <sys/systemcfg.h> |
| 53 | #endif |
| 54 | #if defined(__sun__) && defined(__svr4__) |
| 55 | #include <kstat.h> |
| 56 | #endif |
| 57 | #if defined(__GNUC__) || defined(__clang__) |
| 58 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) |
| 59 | #include <cpuid.h> |
| 60 | #endif |
| 61 | #endif |
| 62 | |
| 63 | #define DEBUG_TYPE "host-detection" |
| 64 | |
| 65 | //===----------------------------------------------------------------------===// |
| 66 | // |
| 67 | // Implementations of the CPU detection routines |
| 68 | // |
| 69 | //===----------------------------------------------------------------------===// |
| 70 | |
| 71 | using namespace llvm; |
| 72 | |
| 73 | [[maybe_unused]] static std::unique_ptr<llvm::MemoryBuffer> |
| 74 | getProcCpuinfoContent() { |
| 75 | const char *CPUInfoFile = "/proc/cpuinfo" ; |
| 76 | if (const char *CpuinfoIntercept = std::getenv(name: "LLVM_CPUINFO" )) |
| 77 | CPUInfoFile = CpuinfoIntercept; |
| 78 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
| 79 | llvm::MemoryBuffer::getFileAsStream(Filename: CPUInfoFile); |
| 80 | |
| 81 | if (std::error_code EC = Text.getError()) { |
| 82 | llvm::errs() << "Can't read " << CPUInfoFile << ": " << EC.message() |
| 83 | << "\n" ; |
| 84 | return nullptr; |
| 85 | } |
| 86 | return std::move(*Text); |
| 87 | } |
| 88 | |
| 89 | StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { |
| 90 | // Access to the Processor Version Register (PVR) on PowerPC is privileged, |
| 91 | // and so we must use an operating-system interface to determine the current |
| 92 | // processor type. On Linux, this is exposed through the /proc/cpuinfo file. |
| 93 | const char *generic = "generic" ; |
| 94 | |
| 95 | // The cpu line is second (after the 'processor: 0' line), so if this |
| 96 | // buffer is too small then something has changed (or is wrong). |
| 97 | StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); |
| 98 | StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); |
| 99 | |
| 100 | StringRef::const_iterator CIP = CPUInfoStart; |
| 101 | |
| 102 | StringRef::const_iterator CPUStart = nullptr; |
| 103 | size_t CPULen = 0; |
| 104 | |
| 105 | // We need to find the first line which starts with cpu, spaces, and a colon. |
| 106 | // After the colon, there may be some additional spaces and then the cpu type. |
| 107 | while (CIP < CPUInfoEnd && CPUStart == nullptr) { |
| 108 | if (CIP < CPUInfoEnd && *CIP == '\n') |
| 109 | ++CIP; |
| 110 | |
| 111 | if (CIP < CPUInfoEnd && *CIP == 'c') { |
| 112 | ++CIP; |
| 113 | if (CIP < CPUInfoEnd && *CIP == 'p') { |
| 114 | ++CIP; |
| 115 | if (CIP < CPUInfoEnd && *CIP == 'u') { |
| 116 | ++CIP; |
| 117 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) |
| 118 | ++CIP; |
| 119 | |
| 120 | if (CIP < CPUInfoEnd && *CIP == ':') { |
| 121 | ++CIP; |
| 122 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) |
| 123 | ++CIP; |
| 124 | |
| 125 | if (CIP < CPUInfoEnd) { |
| 126 | CPUStart = CIP; |
| 127 | while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && |
| 128 | *CIP != ',' && *CIP != '\n')) |
| 129 | ++CIP; |
| 130 | CPULen = CIP - CPUStart; |
| 131 | } |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | if (CPUStart == nullptr) |
| 138 | while (CIP < CPUInfoEnd && *CIP != '\n') |
| 139 | ++CIP; |
| 140 | } |
| 141 | |
| 142 | if (CPUStart == nullptr) |
| 143 | return generic; |
| 144 | |
| 145 | return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) |
| 146 | .Case(S: "604e" , Value: "604e" ) |
| 147 | .Case(S: "604" , Value: "604" ) |
| 148 | .Case(S: "7400" , Value: "7400" ) |
| 149 | .Case(S: "7410" , Value: "7400" ) |
| 150 | .Case(S: "7447" , Value: "7400" ) |
| 151 | .Case(S: "7455" , Value: "7450" ) |
| 152 | .Case(S: "G4" , Value: "g4" ) |
| 153 | .Case(S: "POWER4" , Value: "970" ) |
| 154 | .Case(S: "PPC970FX" , Value: "970" ) |
| 155 | .Case(S: "PPC970MP" , Value: "970" ) |
| 156 | .Case(S: "G5" , Value: "g5" ) |
| 157 | .Case(S: "POWER5" , Value: "g5" ) |
| 158 | .Case(S: "A2" , Value: "a2" ) |
| 159 | .Case(S: "POWER6" , Value: "pwr6" ) |
| 160 | .Case(S: "POWER7" , Value: "pwr7" ) |
| 161 | .Case(S: "POWER8" , Value: "pwr8" ) |
| 162 | .Case(S: "POWER8E" , Value: "pwr8" ) |
| 163 | .Case(S: "POWER8NVL" , Value: "pwr8" ) |
| 164 | .Case(S: "POWER9" , Value: "pwr9" ) |
| 165 | .Case(S: "POWER10" , Value: "pwr10" ) |
| 166 | .Case(S: "POWER11" , Value: "pwr11" ) |
| 167 | // FIXME: If we get a simulator or machine with the capabilities of |
| 168 | // mcpu=future, we should revisit this and add the name reported by the |
| 169 | // simulator/machine. |
| 170 | .Default(Value: generic); |
| 171 | } |
| 172 | |
| 173 | StringRef |
| 174 | getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware, |
| 175 | StringRef Part, ArrayRef<StringRef> Parts, |
| 176 | function_ref<unsigned()> GetVariant) { |
| 177 | |
| 178 | auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { |
| 179 | if (Parts.size() == 2) |
| 180 | return (Parts[0] == Big && Parts[1] == Little) || |
| 181 | (Parts[1] == Big && Parts[0] == Little); |
| 182 | return false; |
| 183 | }; |
| 184 | |
| 185 | if (Implementer == "0x41" ) { // ARM Ltd. |
| 186 | // MSM8992/8994 may give cpu part for the core that the kernel is running on, |
| 187 | // which is undeterministic and wrong. Always return cortex-a53 for these SoC. |
| 188 | if (Hardware.ends_with(Suffix: "MSM8994" ) || Hardware.ends_with(Suffix: "MSM8996" )) |
| 189 | return "cortex-a53" ; |
| 190 | |
| 191 | // Detect big.LITTLE systems. |
| 192 | if (MatchBigLittle(Parts, "0xd85" , "0xd87" )) |
| 193 | return "cortex-x925" ; |
| 194 | |
| 195 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
| 196 | // values correspond to the "Part number" in the CP15/c0 register. The |
| 197 | // contents are specified in the various processor manuals. |
| 198 | // This corresponds to the Main ID Register in Technical Reference Manuals. |
| 199 | // and is used in programs like sys-utils |
| 200 | return StringSwitch<const char *>(Part) |
| 201 | .Case(S: "0x926" , Value: "arm926ej-s" ) |
| 202 | .Case(S: "0xb02" , Value: "mpcore" ) |
| 203 | .Case(S: "0xb36" , Value: "arm1136j-s" ) |
| 204 | .Case(S: "0xb56" , Value: "arm1156t2-s" ) |
| 205 | .Case(S: "0xb76" , Value: "arm1176jz-s" ) |
| 206 | .Case(S: "0xd8a" , Value: "c1-nano" ) |
| 207 | .Case(S: "0xd90" , Value: "c1-premium" ) |
| 208 | .Case(S: "0xd8b" , Value: "c1-pro" ) |
| 209 | .Case(S: "0xd8c" , Value: "c1-ultra" ) |
| 210 | .Case(S: "0xc05" , Value: "cortex-a5" ) |
| 211 | .Case(S: "0xc07" , Value: "cortex-a7" ) |
| 212 | .Case(S: "0xc08" , Value: "cortex-a8" ) |
| 213 | .Case(S: "0xc09" , Value: "cortex-a9" ) |
| 214 | .Case(S: "0xc0f" , Value: "cortex-a15" ) |
| 215 | .Case(S: "0xc0e" , Value: "cortex-a17" ) |
| 216 | .Case(S: "0xc20" , Value: "cortex-m0" ) |
| 217 | .Case(S: "0xc23" , Value: "cortex-m3" ) |
| 218 | .Case(S: "0xc24" , Value: "cortex-m4" ) |
| 219 | .Case(S: "0xc27" , Value: "cortex-m7" ) |
| 220 | .Case(S: "0xd20" , Value: "cortex-m23" ) |
| 221 | .Case(S: "0xd21" , Value: "cortex-m33" ) |
| 222 | .Case(S: "0xd24" , Value: "cortex-m52" ) |
| 223 | .Case(S: "0xd22" , Value: "cortex-m55" ) |
| 224 | .Case(S: "0xd23" , Value: "cortex-m85" ) |
| 225 | .Case(S: "0xc18" , Value: "cortex-r8" ) |
| 226 | .Case(S: "0xd13" , Value: "cortex-r52" ) |
| 227 | .Case(S: "0xd16" , Value: "cortex-r52plus" ) |
| 228 | .Case(S: "0xd15" , Value: "cortex-r82" ) |
| 229 | .Case(S: "0xd14" , Value: "cortex-r82ae" ) |
| 230 | .Case(S: "0xd02" , Value: "cortex-a34" ) |
| 231 | .Case(S: "0xd04" , Value: "cortex-a35" ) |
| 232 | .Case(S: "0xd8f" , Value: "cortex-a320" ) |
| 233 | .Case(S: "0xd03" , Value: "cortex-a53" ) |
| 234 | .Case(S: "0xd05" , Value: "cortex-a55" ) |
| 235 | .Case(S: "0xd46" , Value: "cortex-a510" ) |
| 236 | .Case(S: "0xd80" , Value: "cortex-a520" ) |
| 237 | .Case(S: "0xd88" , Value: "cortex-a520ae" ) |
| 238 | .Case(S: "0xd07" , Value: "cortex-a57" ) |
| 239 | .Case(S: "0xd06" , Value: "cortex-a65" ) |
| 240 | .Case(S: "0xd43" , Value: "cortex-a65ae" ) |
| 241 | .Case(S: "0xd08" , Value: "cortex-a72" ) |
| 242 | .Case(S: "0xd09" , Value: "cortex-a73" ) |
| 243 | .Case(S: "0xd0a" , Value: "cortex-a75" ) |
| 244 | .Case(S: "0xd0b" , Value: "cortex-a76" ) |
| 245 | .Case(S: "0xd0e" , Value: "cortex-a76ae" ) |
| 246 | .Case(S: "0xd0d" , Value: "cortex-a77" ) |
| 247 | .Case(S: "0xd41" , Value: "cortex-a78" ) |
| 248 | .Case(S: "0xd42" , Value: "cortex-a78ae" ) |
| 249 | .Case(S: "0xd4b" , Value: "cortex-a78c" ) |
| 250 | .Case(S: "0xd47" , Value: "cortex-a710" ) |
| 251 | .Case(S: "0xd4d" , Value: "cortex-a715" ) |
| 252 | .Case(S: "0xd81" , Value: "cortex-a720" ) |
| 253 | .Case(S: "0xd89" , Value: "cortex-a720ae" ) |
| 254 | .Case(S: "0xd87" , Value: "cortex-a725" ) |
| 255 | .Case(S: "0xd44" , Value: "cortex-x1" ) |
| 256 | .Case(S: "0xd4c" , Value: "cortex-x1c" ) |
| 257 | .Case(S: "0xd48" , Value: "cortex-x2" ) |
| 258 | .Case(S: "0xd4e" , Value: "cortex-x3" ) |
| 259 | .Case(S: "0xd82" , Value: "cortex-x4" ) |
| 260 | .Case(S: "0xd85" , Value: "cortex-x925" ) |
| 261 | .Case(S: "0xd4a" , Value: "neoverse-e1" ) |
| 262 | .Case(S: "0xd0c" , Value: "neoverse-n1" ) |
| 263 | .Case(S: "0xd49" , Value: "neoverse-n2" ) |
| 264 | .Case(S: "0xd8e" , Value: "neoverse-n3" ) |
| 265 | .Case(S: "0xd40" , Value: "neoverse-v1" ) |
| 266 | .Case(S: "0xd4f" , Value: "neoverse-v2" ) |
| 267 | .Case(S: "0xd84" , Value: "neoverse-v3" ) |
| 268 | .Case(S: "0xd83" , Value: "neoverse-v3ae" ) |
| 269 | .Default(Value: "generic" ); |
| 270 | } |
| 271 | |
| 272 | if (Implementer == "0x42" || Implementer == "0x43" ) { // Broadcom | Cavium. |
| 273 | return StringSwitch<const char *>(Part) |
| 274 | .Case(S: "0x516" , Value: "thunderx2t99" ) |
| 275 | .Case(S: "0x0516" , Value: "thunderx2t99" ) |
| 276 | .Case(S: "0xaf" , Value: "thunderx2t99" ) |
| 277 | .Case(S: "0x0af" , Value: "thunderx2t99" ) |
| 278 | .Case(S: "0xa1" , Value: "thunderxt88" ) |
| 279 | .Case(S: "0x0a1" , Value: "thunderxt88" ) |
| 280 | .Default(Value: "generic" ); |
| 281 | } |
| 282 | |
| 283 | if (Implementer == "0x46" ) { // Fujitsu Ltd. |
| 284 | return StringSwitch<const char *>(Part) |
| 285 | .Case(S: "0x001" , Value: "a64fx" ) |
| 286 | .Case(S: "0x003" , Value: "fujitsu-monaka" ) |
| 287 | .Default(Value: "generic" ); |
| 288 | } |
| 289 | |
| 290 | if (Implementer == "0x4e" ) { // NVIDIA Corporation |
| 291 | return StringSwitch<const char *>(Part) |
| 292 | .Case(S: "0x004" , Value: "carmel" ) |
| 293 | .Case(S: "0x10" , Value: "olympus" ) |
| 294 | .Case(S: "0x010" , Value: "olympus" ) |
| 295 | .Default(Value: "generic" ); |
| 296 | } |
| 297 | |
| 298 | if (Implementer == "0x48" ) // HiSilicon Technologies, Inc. |
| 299 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
| 300 | // values correspond to the "Part number" in the CP15/c0 register. The |
| 301 | // contents are specified in the various processor manuals. |
| 302 | return StringSwitch<const char *>(Part) |
| 303 | .Case(S: "0xd01" , Value: "tsv110" ) |
| 304 | .Default(Value: "generic" ); |
| 305 | |
| 306 | if (Implementer == "0x51" ) // Qualcomm Technologies, Inc. |
| 307 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
| 308 | // values correspond to the "Part number" in the CP15/c0 register. The |
| 309 | // contents are specified in the various processor manuals. |
| 310 | return StringSwitch<const char *>(Part) |
| 311 | .Case(S: "0x06f" , Value: "krait" ) // APQ8064 |
| 312 | .Case(S: "0x201" , Value: "kryo" ) |
| 313 | .Case(S: "0x205" , Value: "kryo" ) |
| 314 | .Case(S: "0x211" , Value: "kryo" ) |
| 315 | .Case(S: "0x800" , Value: "cortex-a73" ) // Kryo 2xx Gold |
| 316 | .Case(S: "0x801" , Value: "cortex-a73" ) // Kryo 2xx Silver |
| 317 | .Case(S: "0x802" , Value: "cortex-a75" ) // Kryo 3xx Gold |
| 318 | .Case(S: "0x803" , Value: "cortex-a75" ) // Kryo 3xx Silver |
| 319 | .Case(S: "0x804" , Value: "cortex-a76" ) // Kryo 4xx Gold |
| 320 | .Case(S: "0x805" , Value: "cortex-a76" ) // Kryo 4xx/5xx Silver |
| 321 | .Case(S: "0xc00" , Value: "falkor" ) |
| 322 | .Case(S: "0xc01" , Value: "saphira" ) |
| 323 | .Case(S: "0x001" , Value: "oryon-1" ) |
| 324 | .Default(Value: "generic" ); |
| 325 | if (Implementer == "0x53" ) { // Samsung Electronics Co., Ltd. |
| 326 | // The Exynos chips have a convoluted ID scheme that doesn't seem to follow |
| 327 | // any predictive pattern across variants and parts. |
| 328 | |
| 329 | // Look for the CPU variant line, whose value is a 1 digit hexadecimal |
| 330 | // number, corresponding to the Variant bits in the CP15/C0 register. |
| 331 | unsigned Variant = GetVariant(); |
| 332 | |
| 333 | // Convert the CPU part line, whose value is a 3 digit hexadecimal number, |
| 334 | // corresponding to the PartNum bits in the CP15/C0 register. |
| 335 | unsigned PartAsInt; |
| 336 | Part.getAsInteger(Radix: 0, Result&: PartAsInt); |
| 337 | |
| 338 | unsigned Exynos = (Variant << 12) | PartAsInt; |
| 339 | switch (Exynos) { |
| 340 | default: |
| 341 | // Default by falling through to Exynos M3. |
| 342 | [[fallthrough]]; |
| 343 | case 0x1002: |
| 344 | return "exynos-m3" ; |
| 345 | case 0x1003: |
| 346 | return "exynos-m4" ; |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | if (Implementer == "0x61" ) { // Apple |
| 351 | return StringSwitch<const char *>(Part) |
| 352 | .Case(S: "0x020" , Value: "apple-m1" ) |
| 353 | .Case(S: "0x021" , Value: "apple-m1" ) |
| 354 | .Case(S: "0x022" , Value: "apple-m1" ) |
| 355 | .Case(S: "0x023" , Value: "apple-m1" ) |
| 356 | .Case(S: "0x024" , Value: "apple-m1" ) |
| 357 | .Case(S: "0x025" , Value: "apple-m1" ) |
| 358 | .Case(S: "0x028" , Value: "apple-m1" ) |
| 359 | .Case(S: "0x029" , Value: "apple-m1" ) |
| 360 | .Case(S: "0x030" , Value: "apple-m2" ) |
| 361 | .Case(S: "0x031" , Value: "apple-m2" ) |
| 362 | .Case(S: "0x032" , Value: "apple-m2" ) |
| 363 | .Case(S: "0x033" , Value: "apple-m2" ) |
| 364 | .Case(S: "0x034" , Value: "apple-m2" ) |
| 365 | .Case(S: "0x035" , Value: "apple-m2" ) |
| 366 | .Case(S: "0x038" , Value: "apple-m2" ) |
| 367 | .Case(S: "0x039" , Value: "apple-m2" ) |
| 368 | .Case(S: "0x049" , Value: "apple-m3" ) |
| 369 | .Case(S: "0x048" , Value: "apple-m3" ) |
| 370 | .Default(Value: "generic" ); |
| 371 | } |
| 372 | |
| 373 | if (Implementer == "0x63" ) { // Arm China. |
| 374 | return StringSwitch<const char *>(Part) |
| 375 | .Case(S: "0x132" , Value: "star-mc1" ) |
| 376 | .Case(S: "0xd25" , Value: "star-mc3" ) |
| 377 | .Default(Value: "generic" ); |
| 378 | } |
| 379 | |
| 380 | if (Implementer == "0x6d" ) { // Microsoft Corporation. |
| 381 | // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2. |
| 382 | return StringSwitch<const char *>(Part) |
| 383 | .Case(S: "0xd49" , Value: "neoverse-n2" ) |
| 384 | .Default(Value: "generic" ); |
| 385 | } |
| 386 | |
| 387 | if (Implementer == "0xc0" ) { // Ampere Computing |
| 388 | return StringSwitch<const char *>(Part) |
| 389 | .Case(S: "0xac3" , Value: "ampere1" ) |
| 390 | .Case(S: "0xac4" , Value: "ampere1a" ) |
| 391 | .Case(S: "0xac5" , Value: "ampere1b" ) |
| 392 | .Case(S: "0xac7" , Value: "ampere1c" ) |
| 393 | .Default(Value: "generic" ); |
| 394 | } |
| 395 | |
| 396 | return "generic" ; |
| 397 | } |
| 398 | |
| 399 | StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { |
| 400 | // The cpuid register on arm is not accessible from user space. On Linux, |
| 401 | // it is exposed through the /proc/cpuinfo file. |
| 402 | |
| 403 | // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line |
| 404 | // in all cases. |
| 405 | SmallVector<StringRef, 32> Lines; |
| 406 | ProcCpuinfoContent.split(A&: Lines, Separator: '\n'); |
| 407 | |
| 408 | // Look for the CPU implementer and hardware lines, and store the CPU part |
| 409 | // numbers found. |
| 410 | StringRef Implementer; |
| 411 | StringRef Hardware; |
| 412 | SmallVector<StringRef, 32> Parts; |
| 413 | for (StringRef Line : Lines) { |
| 414 | if (Line.consume_front(Prefix: "CPU implementer" )) |
| 415 | Implementer = Line.ltrim(Chars: "\t :" ); |
| 416 | else if (Line.consume_front(Prefix: "Hardware" )) |
| 417 | Hardware = Line.ltrim(Chars: "\t :" ); |
| 418 | else if (Line.consume_front(Prefix: "CPU part" )) |
| 419 | Parts.emplace_back(Args: Line.ltrim(Chars: "\t :" )); |
| 420 | } |
| 421 | |
| 422 | // Last `Part' seen, in case we don't analyse all `Parts' parsed. |
| 423 | StringRef Part = Parts.empty() ? StringRef() : Parts.back(); |
| 424 | |
| 425 | // Remove duplicate `Parts'. |
| 426 | llvm::sort(C&: Parts); |
| 427 | Parts.erase(CS: llvm::unique(R&: Parts), CE: Parts.end()); |
| 428 | |
| 429 | auto GetVariant = [&]() { |
| 430 | unsigned Variant = 0; |
| 431 | for (auto I : Lines) |
| 432 | if (I.consume_front(Prefix: "CPU variant" )) |
| 433 | I.ltrim(Chars: "\t :" ).getAsInteger(Radix: 0, Result&: Variant); |
| 434 | return Variant; |
| 435 | }; |
| 436 | |
| 437 | return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts, |
| 438 | GetVariant); |
| 439 | } |
| 440 | |
| 441 | StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo, |
| 442 | ArrayRef<uint64_t> UniqueCpuInfos) { |
| 443 | // On Windows, the registry provides cached copied of the MIDR_EL1 register. |
| 444 | using PartNum = Bitfield::Element<uint16_t, 4, 12>; |
| 445 | using Implementer = Bitfield::Element<uint16_t, 24, 8>; |
| 446 | using Variant = Bitfield::Element<uint16_t, 20, 4>; |
| 447 | |
| 448 | SmallVector<std::string> PartsHolder; |
| 449 | PartsHolder.reserve(N: UniqueCpuInfos.size()); |
| 450 | for (auto Info : UniqueCpuInfos) |
| 451 | PartsHolder.push_back(Elt: "0x" + utohexstr(X: Bitfield::get<PartNum>(Packed: Info), |
| 452 | /*LowerCase*/ true, |
| 453 | /*Width*/ 3)); |
| 454 | |
| 455 | SmallVector<StringRef> Parts; |
| 456 | Parts.reserve(N: PartsHolder.size()); |
| 457 | for (const auto &Part : PartsHolder) |
| 458 | Parts.push_back(Elt: Part); |
| 459 | |
| 460 | return getHostCPUNameForARMFromComponents( |
| 461 | Implementer: "0x" + utohexstr(X: Bitfield::get<Implementer>(Packed: PrimaryCpuInfo), |
| 462 | /*LowerCase*/ true, |
| 463 | /*Width*/ 2), |
| 464 | /*Hardware*/ "" , |
| 465 | Part: "0x" + utohexstr(X: Bitfield::get<PartNum>(Packed: PrimaryCpuInfo), |
| 466 | /*LowerCase*/ true, |
| 467 | /*Width*/ 3), |
| 468 | Parts, GetVariant: [=]() { return Bitfield::get<Variant>(Packed: PrimaryCpuInfo); }); |
| 469 | } |
| 470 | |
| 471 | namespace { |
| 472 | StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { |
| 473 | switch (Id) { |
| 474 | case 2064: // z900 not supported by LLVM |
| 475 | case 2066: |
| 476 | case 2084: // z990 not supported by LLVM |
| 477 | case 2086: |
| 478 | case 2094: // z9-109 not supported by LLVM |
| 479 | case 2096: |
| 480 | return "generic" ; |
| 481 | case 2097: |
| 482 | case 2098: |
| 483 | return "z10" ; |
| 484 | case 2817: |
| 485 | case 2818: |
| 486 | return "z196" ; |
| 487 | case 2827: |
| 488 | case 2828: |
| 489 | return "zEC12" ; |
| 490 | case 2964: |
| 491 | case 2965: |
| 492 | return HaveVectorSupport? "z13" : "zEC12" ; |
| 493 | case 3906: |
| 494 | case 3907: |
| 495 | return HaveVectorSupport? "z14" : "zEC12" ; |
| 496 | case 8561: |
| 497 | case 8562: |
| 498 | return HaveVectorSupport? "z15" : "zEC12" ; |
| 499 | case 3931: |
| 500 | case 3932: |
| 501 | return HaveVectorSupport? "z16" : "zEC12" ; |
| 502 | case 9175: |
| 503 | case 9176: |
| 504 | default: |
| 505 | return HaveVectorSupport? "z17" : "zEC12" ; |
| 506 | } |
| 507 | } |
| 508 | } // end anonymous namespace |
| 509 | |
| 510 | StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { |
| 511 | // STIDP is a privileged operation, so use /proc/cpuinfo instead. |
| 512 | |
| 513 | // The "processor 0:" line comes after a fair amount of other information, |
| 514 | // including a cache breakdown, but this should be plenty. |
| 515 | SmallVector<StringRef, 32> Lines; |
| 516 | ProcCpuinfoContent.split(A&: Lines, Separator: '\n'); |
| 517 | |
| 518 | // Look for the CPU features. |
| 519 | SmallVector<StringRef, 32> CPUFeatures; |
| 520 | for (StringRef Line : Lines) |
| 521 | if (Line.starts_with(Prefix: "features" )) { |
| 522 | size_t Pos = Line.find(C: ':'); |
| 523 | if (Pos != StringRef::npos) { |
| 524 | Line.drop_front(N: Pos + 1).split(A&: CPUFeatures, Separator: ' '); |
| 525 | break; |
| 526 | } |
| 527 | } |
| 528 | |
| 529 | // We need to check for the presence of vector support independently of |
| 530 | // the machine type, since we may only use the vector register set when |
| 531 | // supported by the kernel (and hypervisor). |
| 532 | bool HaveVectorSupport = llvm::is_contained(Range&: CPUFeatures, Element: "vx" ); |
| 533 | |
| 534 | // Now check the processor machine type. |
| 535 | for (StringRef Line : Lines) { |
| 536 | if (Line.starts_with(Prefix: "processor " )) { |
| 537 | size_t Pos = Line.find(Str: "machine = " ); |
| 538 | if (Pos != StringRef::npos) { |
| 539 | Pos += sizeof("machine = " ) - 1; |
| 540 | unsigned int Id; |
| 541 | if (!Line.drop_front(N: Pos).getAsInteger(Radix: 10, Result&: Id)) |
| 542 | return getCPUNameFromS390Model(Id, HaveVectorSupport); |
| 543 | } |
| 544 | break; |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | return "generic" ; |
| 549 | } |
| 550 | |
| 551 | StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) { |
| 552 | // There are 24 lines in /proc/cpuinfo |
| 553 | SmallVector<StringRef> Lines; |
| 554 | ProcCpuinfoContent.split(A&: Lines, Separator: '\n'); |
| 555 | |
| 556 | // Look for uarch line to determine cpu name |
| 557 | StringRef UArch; |
| 558 | for (StringRef Line : Lines) { |
| 559 | if (Line.starts_with(Prefix: "uarch" )) { |
| 560 | UArch = Line.substr(Start: 5).ltrim(Chars: "\t :" ); |
| 561 | break; |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | return StringSwitch<const char *>(UArch) |
| 566 | .Case(S: "eswin,eic770x" , Value: "sifive-p550" ) |
| 567 | .Case(S: "sifive,u74-mc" , Value: "sifive-u74" ) |
| 568 | .Case(S: "sifive,bullet0" , Value: "sifive-u74" ) |
| 569 | .Default(Value: "" ); |
| 570 | } |
| 571 | |
| 572 | StringRef sys::detail::getHostCPUNameForBPF() { |
| 573 | #if !defined(__linux__) || !defined(__x86_64__) |
| 574 | return "generic" ; |
| 575 | #else |
| 576 | uint8_t v3_insns[40] __attribute__ ((aligned (8))) = |
| 577 | /* BPF_MOV64_IMM(BPF_REG_0, 0) */ |
| 578 | { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 579 | /* BPF_MOV64_IMM(BPF_REG_2, 1) */ |
| 580 | 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
| 581 | /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ |
| 582 | 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 583 | /* BPF_MOV64_IMM(BPF_REG_0, 1) */ |
| 584 | 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
| 585 | /* BPF_EXIT_INSN() */ |
| 586 | 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; |
| 587 | |
| 588 | uint8_t v2_insns[40] __attribute__ ((aligned (8))) = |
| 589 | /* BPF_MOV64_IMM(BPF_REG_0, 0) */ |
| 590 | { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 591 | /* BPF_MOV64_IMM(BPF_REG_2, 1) */ |
| 592 | 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
| 593 | /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ |
| 594 | 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, |
| 595 | /* BPF_MOV64_IMM(BPF_REG_0, 1) */ |
| 596 | 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
| 597 | /* BPF_EXIT_INSN() */ |
| 598 | 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; |
| 599 | |
| 600 | struct bpf_prog_load_attr { |
| 601 | uint32_t prog_type; |
| 602 | uint32_t insn_cnt; |
| 603 | uint64_t insns; |
| 604 | uint64_t license; |
| 605 | uint32_t log_level; |
| 606 | uint32_t log_size; |
| 607 | uint64_t log_buf; |
| 608 | uint32_t kern_version; |
| 609 | uint32_t prog_flags; |
| 610 | } attr = {}; |
| 611 | attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ |
| 612 | attr.insn_cnt = 5; |
| 613 | attr.insns = (uint64_t)v3_insns; |
| 614 | attr.license = (uint64_t)"DUMMY" ; |
| 615 | |
| 616 | int fd = syscall(sysno: 321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, |
| 617 | sizeof(attr)); |
| 618 | if (fd >= 0) { |
| 619 | close(fd: fd); |
| 620 | return "v3" ; |
| 621 | } |
| 622 | |
| 623 | /* Clear the whole attr in case its content changed by syscall. */ |
| 624 | memset(s: &attr, c: 0, n: sizeof(attr)); |
| 625 | attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ |
| 626 | attr.insn_cnt = 5; |
| 627 | attr.insns = (uint64_t)v2_insns; |
| 628 | attr.license = (uint64_t)"DUMMY" ; |
| 629 | fd = syscall(sysno: 321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); |
| 630 | if (fd >= 0) { |
| 631 | close(fd: fd); |
| 632 | return "v2" ; |
| 633 | } |
| 634 | return "v1" ; |
| 635 | #endif |
| 636 | } |
| 637 | |
| 638 | #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ |
| 639 | defined(_M_X64)) && \ |
| 640 | !defined(_M_ARM64EC) |
| 641 | |
| 642 | /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in |
| 643 | /// the specified arguments. If we can't run cpuid on the host, return true. |
| 644 | static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, |
| 645 | unsigned *rECX, unsigned *rEDX) { |
| 646 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) |
| 647 | return !__get_cpuid(leaf: value, eax: rEAX, ebx: rEBX, ecx: rECX, edx: rEDX); |
| 648 | #elif defined(_MSC_VER) |
| 649 | // The MSVC intrinsic is portable across x86 and x64. |
| 650 | int registers[4]; |
| 651 | __cpuid(registers, value); |
| 652 | *rEAX = registers[0]; |
| 653 | *rEBX = registers[1]; |
| 654 | *rECX = registers[2]; |
| 655 | *rEDX = registers[3]; |
| 656 | return false; |
| 657 | #else |
| 658 | return true; |
| 659 | #endif |
| 660 | } |
| 661 | |
| 662 | namespace llvm { |
| 663 | namespace sys { |
| 664 | namespace detail { |
| 665 | namespace x86 { |
| 666 | |
| 667 | VendorSignatures getVendorSignature(unsigned *MaxLeaf) { |
| 668 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; |
| 669 | if (MaxLeaf == nullptr) |
| 670 | MaxLeaf = &EAX; |
| 671 | else |
| 672 | *MaxLeaf = 0; |
| 673 | |
| 674 | if (getX86CpuIDAndInfo(value: 0, rEAX: MaxLeaf, rEBX: &EBX, rECX: &ECX, rEDX: &EDX) || *MaxLeaf < 1) |
| 675 | return VendorSignatures::UNKNOWN; |
| 676 | |
| 677 | // "Genu ineI ntel" |
| 678 | if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) |
| 679 | return VendorSignatures::GENUINE_INTEL; |
| 680 | |
| 681 | // "Auth enti cAMD" |
| 682 | if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) |
| 683 | return VendorSignatures::AUTHENTIC_AMD; |
| 684 | |
| 685 | return VendorSignatures::UNKNOWN; |
| 686 | } |
| 687 | |
| 688 | } // namespace x86 |
| 689 | } // namespace detail |
| 690 | } // namespace sys |
| 691 | } // namespace llvm |
| 692 | |
| 693 | using namespace llvm::sys::detail::x86; |
| 694 | |
| 695 | /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return |
| 696 | /// the 4 values in the specified arguments. If we can't run cpuid on the host, |
| 697 | /// return true. |
| 698 | static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, |
| 699 | unsigned *rEAX, unsigned *rEBX, unsigned *rECX, |
| 700 | unsigned *rEDX) { |
| 701 | // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang |
| 702 | // are such that __cpuidex is defined within cpuid.h for both, we can remove |
| 703 | // the __get_cpuid_count function and share the MSVC implementation between |
| 704 | // all three. |
| 705 | #if (defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER) |
| 706 | return !__get_cpuid_count(leaf: value, subleaf: subleaf, eax: rEAX, ebx: rEBX, ecx: rECX, edx: rEDX); |
| 707 | #elif defined(_MSC_VER) |
| 708 | int registers[4]; |
| 709 | __cpuidex(registers, value, subleaf); |
| 710 | *rEAX = registers[0]; |
| 711 | *rEBX = registers[1]; |
| 712 | *rECX = registers[2]; |
| 713 | *rEDX = registers[3]; |
| 714 | return false; |
| 715 | #else |
| 716 | return true; |
| 717 | #endif |
| 718 | } |
| 719 | |
| 720 | // Read control register 0 (XCR0). Used to detect features such as AVX. |
| 721 | static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { |
| 722 | // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang |
| 723 | // are such that _xgetbv is supported by both, we can unify the implementation |
| 724 | // with MSVC and remove all inline assembly. |
| 725 | #if defined(__GNUC__) || defined(__clang__) |
| 726 | // Check xgetbv; this uses a .byte sequence instead of the instruction |
| 727 | // directly because older assemblers do not include support for xgetbv and |
| 728 | // there is no easy way to conditionally compile based on the assembler used. |
| 729 | __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (*rEAX), "=d" (*rEDX) : "c" (0)); |
| 730 | return false; |
| 731 | #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) |
| 732 | unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); |
| 733 | *rEAX = Result; |
| 734 | *rEDX = Result >> 32; |
| 735 | return false; |
| 736 | #else |
| 737 | return true; |
| 738 | #endif |
| 739 | } |
| 740 | |
| 741 | static void detectX86FamilyModel(unsigned EAX, unsigned *Family, |
| 742 | unsigned *Model) { |
| 743 | *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 |
| 744 | *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 |
| 745 | if (*Family == 6 || *Family == 0xf) { |
| 746 | if (*Family == 0xf) |
| 747 | // Examine extended family ID if family ID is F. |
| 748 | *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 |
| 749 | // Examine extended model ID if family ID is 6 or F. |
| 750 | *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 |
| 751 | } |
| 752 | } |
| 753 | |
| 754 | #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 |
| 755 | |
| 756 | static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, |
| 757 | unsigned Model, |
| 758 | const unsigned *Features, |
| 759 | unsigned *Type, |
| 760 | unsigned *Subtype) { |
| 761 | StringRef CPU; |
| 762 | |
| 763 | switch (Family) { |
| 764 | case 0x3: |
| 765 | CPU = "i386" ; |
| 766 | break; |
| 767 | case 0x4: |
| 768 | CPU = "i486" ; |
| 769 | break; |
| 770 | case 0x5: |
| 771 | if (testFeature(X86::FEATURE_MMX)) { |
| 772 | CPU = "pentium-mmx" ; |
| 773 | break; |
| 774 | } |
| 775 | CPU = "pentium" ; |
| 776 | break; |
| 777 | case 0x6: |
| 778 | switch (Model) { |
| 779 | case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile |
| 780 | // processor, Intel Core 2 Quad processor, Intel Core 2 Quad |
| 781 | // mobile processor, Intel Core 2 Extreme processor, Intel |
| 782 | // Pentium Dual-Core processor, Intel Xeon processor, model |
| 783 | // 0Fh. All processors are manufactured using the 65 nm process. |
| 784 | case 0x16: // Intel Celeron processor model 16h. All processors are |
| 785 | // manufactured using the 65 nm process |
| 786 | CPU = "core2" ; |
| 787 | *Type = X86::INTEL_CORE2; |
| 788 | break; |
| 789 | case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model |
| 790 | // 17h. All processors are manufactured using the 45 nm process. |
| 791 | // |
| 792 | // 45nm: Penryn , Wolfdale, Yorkfield (XE) |
| 793 | case 0x1d: // Intel Xeon processor MP. All processors are manufactured using |
| 794 | // the 45 nm process. |
| 795 | CPU = "penryn" ; |
| 796 | *Type = X86::INTEL_CORE2; |
| 797 | break; |
| 798 | case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All |
| 799 | // processors are manufactured using the 45 nm process. |
| 800 | case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. |
| 801 | // As found in a Summer 2010 model iMac. |
| 802 | case 0x1f: |
| 803 | case 0x2e: // Nehalem EX |
| 804 | CPU = "nehalem" ; |
| 805 | *Type = X86::INTEL_COREI7; |
| 806 | *Subtype = X86::INTEL_COREI7_NEHALEM; |
| 807 | break; |
| 808 | case 0x25: // Intel Core i7, laptop version. |
| 809 | case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All |
| 810 | // processors are manufactured using the 32 nm process. |
| 811 | case 0x2f: // Westmere EX |
| 812 | CPU = "westmere" ; |
| 813 | *Type = X86::INTEL_COREI7; |
| 814 | *Subtype = X86::INTEL_COREI7_WESTMERE; |
| 815 | break; |
| 816 | case 0x2a: // Intel Core i7 processor. All processors are manufactured |
| 817 | // using the 32 nm process. |
| 818 | case 0x2d: |
| 819 | CPU = "sandybridge" ; |
| 820 | *Type = X86::INTEL_COREI7; |
| 821 | *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; |
| 822 | break; |
| 823 | case 0x3a: |
| 824 | case 0x3e: // Ivy Bridge EP |
| 825 | CPU = "ivybridge" ; |
| 826 | *Type = X86::INTEL_COREI7; |
| 827 | *Subtype = X86::INTEL_COREI7_IVYBRIDGE; |
| 828 | break; |
| 829 | |
| 830 | // Haswell: |
| 831 | case 0x3c: |
| 832 | case 0x3f: |
| 833 | case 0x45: |
| 834 | case 0x46: |
| 835 | CPU = "haswell" ; |
| 836 | *Type = X86::INTEL_COREI7; |
| 837 | *Subtype = X86::INTEL_COREI7_HASWELL; |
| 838 | break; |
| 839 | |
| 840 | // Broadwell: |
| 841 | case 0x3d: |
| 842 | case 0x47: |
| 843 | case 0x4f: |
| 844 | case 0x56: |
| 845 | CPU = "broadwell" ; |
| 846 | *Type = X86::INTEL_COREI7; |
| 847 | *Subtype = X86::INTEL_COREI7_BROADWELL; |
| 848 | break; |
| 849 | |
| 850 | // Skylake: |
| 851 | case 0x4e: // Skylake mobile |
| 852 | case 0x5e: // Skylake desktop |
| 853 | case 0x8e: // Kaby Lake mobile |
| 854 | case 0x9e: // Kaby Lake desktop |
| 855 | case 0xa5: // Comet Lake-H/S |
| 856 | case 0xa6: // Comet Lake-U |
| 857 | CPU = "skylake" ; |
| 858 | *Type = X86::INTEL_COREI7; |
| 859 | *Subtype = X86::INTEL_COREI7_SKYLAKE; |
| 860 | break; |
| 861 | |
| 862 | // Rocketlake: |
| 863 | case 0xa7: |
| 864 | CPU = "rocketlake" ; |
| 865 | *Type = X86::INTEL_COREI7; |
| 866 | *Subtype = X86::INTEL_COREI7_ROCKETLAKE; |
| 867 | break; |
| 868 | |
| 869 | // Skylake Xeon: |
| 870 | case 0x55: |
| 871 | *Type = X86::INTEL_COREI7; |
| 872 | if (testFeature(X86::FEATURE_AVX512BF16)) { |
| 873 | CPU = "cooperlake" ; |
| 874 | *Subtype = X86::INTEL_COREI7_COOPERLAKE; |
| 875 | } else if (testFeature(X86::FEATURE_AVX512VNNI)) { |
| 876 | CPU = "cascadelake" ; |
| 877 | *Subtype = X86::INTEL_COREI7_CASCADELAKE; |
| 878 | } else { |
| 879 | CPU = "skylake-avx512" ; |
| 880 | *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; |
| 881 | } |
| 882 | break; |
| 883 | |
| 884 | // Cannonlake: |
| 885 | case 0x66: |
| 886 | CPU = "cannonlake" ; |
| 887 | *Type = X86::INTEL_COREI7; |
| 888 | *Subtype = X86::INTEL_COREI7_CANNONLAKE; |
| 889 | break; |
| 890 | |
| 891 | // Icelake: |
| 892 | case 0x7d: |
| 893 | case 0x7e: |
| 894 | CPU = "icelake-client" ; |
| 895 | *Type = X86::INTEL_COREI7; |
| 896 | *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; |
| 897 | break; |
| 898 | |
| 899 | // Tigerlake: |
| 900 | case 0x8c: |
| 901 | case 0x8d: |
| 902 | CPU = "tigerlake" ; |
| 903 | *Type = X86::INTEL_COREI7; |
| 904 | *Subtype = X86::INTEL_COREI7_TIGERLAKE; |
| 905 | break; |
| 906 | |
| 907 | // Alderlake: |
| 908 | case 0x97: |
| 909 | case 0x9a: |
| 910 | CPU = "alderlake" ; |
| 911 | *Type = X86::INTEL_COREI7; |
| 912 | *Subtype = X86::INTEL_COREI7_ALDERLAKE; |
| 913 | break; |
| 914 | |
| 915 | // Gracemont |
| 916 | case 0xbe: |
| 917 | CPU = "gracemont" ; |
| 918 | *Type = X86::INTEL_COREI7; |
| 919 | *Subtype = X86::INTEL_COREI7_ALDERLAKE; |
| 920 | break; |
| 921 | |
| 922 | // Raptorlake: |
| 923 | case 0xb7: |
| 924 | case 0xba: |
| 925 | case 0xbf: |
| 926 | CPU = "raptorlake" ; |
| 927 | *Type = X86::INTEL_COREI7; |
| 928 | *Subtype = X86::INTEL_COREI7_ALDERLAKE; |
| 929 | break; |
| 930 | |
| 931 | // Meteorlake: |
| 932 | case 0xaa: |
| 933 | case 0xac: |
| 934 | CPU = "meteorlake" ; |
| 935 | *Type = X86::INTEL_COREI7; |
| 936 | *Subtype = X86::INTEL_COREI7_ALDERLAKE; |
| 937 | break; |
| 938 | |
| 939 | // Arrowlake: |
| 940 | case 0xc5: |
| 941 | // Arrowlake U: |
| 942 | case 0xb5: |
| 943 | CPU = "arrowlake" ; |
| 944 | *Type = X86::INTEL_COREI7; |
| 945 | *Subtype = X86::INTEL_COREI7_ARROWLAKE; |
| 946 | break; |
| 947 | |
| 948 | // Arrowlake S: |
| 949 | case 0xc6: |
| 950 | CPU = "arrowlake-s" ; |
| 951 | *Type = X86::INTEL_COREI7; |
| 952 | *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; |
| 953 | break; |
| 954 | |
| 955 | // Lunarlake: |
| 956 | case 0xbd: |
| 957 | CPU = "lunarlake" ; |
| 958 | *Type = X86::INTEL_COREI7; |
| 959 | *Subtype = X86::INTEL_COREI7_ARROWLAKE_S; |
| 960 | break; |
| 961 | |
| 962 | // Pantherlake: |
| 963 | case 0xcc: |
| 964 | CPU = "pantherlake" ; |
| 965 | *Type = X86::INTEL_COREI7; |
| 966 | *Subtype = X86::INTEL_COREI7_PANTHERLAKE; |
| 967 | break; |
| 968 | |
| 969 | // Wildcatlake: |
| 970 | case 0xd5: |
| 971 | CPU = "wildcatlake" ; |
| 972 | *Type = X86::INTEL_COREI7; |
| 973 | *Subtype = X86::INTEL_COREI7_PANTHERLAKE; |
| 974 | break; |
| 975 | |
| 976 | // Graniterapids: |
| 977 | case 0xad: |
| 978 | CPU = "graniterapids" ; |
| 979 | *Type = X86::INTEL_COREI7; |
| 980 | *Subtype = X86::INTEL_COREI7_GRANITERAPIDS; |
| 981 | break; |
| 982 | |
| 983 | // Granite Rapids D: |
| 984 | case 0xae: |
| 985 | CPU = "graniterapids-d" ; |
| 986 | *Type = X86::INTEL_COREI7; |
| 987 | *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D; |
| 988 | break; |
| 989 | |
| 990 | // Icelake Xeon: |
| 991 | case 0x6a: |
| 992 | case 0x6c: |
| 993 | CPU = "icelake-server" ; |
| 994 | *Type = X86::INTEL_COREI7; |
| 995 | *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; |
| 996 | break; |
| 997 | |
| 998 | // Emerald Rapids: |
| 999 | case 0xcf: |
| 1000 | CPU = "emeraldrapids" ; |
| 1001 | *Type = X86::INTEL_COREI7; |
| 1002 | *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; |
| 1003 | break; |
| 1004 | |
| 1005 | // Sapphire Rapids: |
| 1006 | case 0x8f: |
| 1007 | CPU = "sapphirerapids" ; |
| 1008 | *Type = X86::INTEL_COREI7; |
| 1009 | *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; |
| 1010 | break; |
| 1011 | |
| 1012 | case 0x1c: // Most 45 nm Intel Atom processors |
| 1013 | case 0x26: // 45 nm Atom Lincroft |
| 1014 | case 0x27: // 32 nm Atom Medfield |
| 1015 | case 0x35: // 32 nm Atom Midview |
| 1016 | case 0x36: // 32 nm Atom Midview |
| 1017 | CPU = "bonnell" ; |
| 1018 | *Type = X86::INTEL_BONNELL; |
| 1019 | break; |
| 1020 | |
| 1021 | // Atom Silvermont codes from the Intel software optimization guide. |
| 1022 | case 0x37: |
| 1023 | case 0x4a: |
| 1024 | case 0x4d: |
| 1025 | case 0x5a: |
| 1026 | case 0x5d: |
| 1027 | case 0x4c: // really airmont |
| 1028 | CPU = "silvermont" ; |
| 1029 | *Type = X86::INTEL_SILVERMONT; |
| 1030 | break; |
| 1031 | // Goldmont: |
| 1032 | case 0x5c: // Apollo Lake |
| 1033 | case 0x5f: // Denverton |
| 1034 | CPU = "goldmont" ; |
| 1035 | *Type = X86::INTEL_GOLDMONT; |
| 1036 | break; |
| 1037 | case 0x7a: |
| 1038 | CPU = "goldmont-plus" ; |
| 1039 | *Type = X86::INTEL_GOLDMONT_PLUS; |
| 1040 | break; |
| 1041 | case 0x86: |
| 1042 | case 0x8a: // Lakefield |
| 1043 | case 0x96: // Elkhart Lake |
| 1044 | case 0x9c: // Jasper Lake |
| 1045 | CPU = "tremont" ; |
| 1046 | *Type = X86::INTEL_TREMONT; |
| 1047 | break; |
| 1048 | |
| 1049 | // Sierraforest: |
| 1050 | case 0xaf: |
| 1051 | CPU = "sierraforest" ; |
| 1052 | *Type = X86::INTEL_SIERRAFOREST; |
| 1053 | break; |
| 1054 | |
| 1055 | // Grandridge: |
| 1056 | case 0xb6: |
| 1057 | CPU = "grandridge" ; |
| 1058 | *Type = X86::INTEL_GRANDRIDGE; |
| 1059 | break; |
| 1060 | |
| 1061 | // Clearwaterforest: |
| 1062 | case 0xdd: |
| 1063 | CPU = "clearwaterforest" ; |
| 1064 | *Type = X86::INTEL_CLEARWATERFOREST; |
| 1065 | break; |
| 1066 | |
| 1067 | // Xeon Phi (Knights Landing + Knights Mill): |
| 1068 | case 0x57: |
| 1069 | CPU = "knl" ; |
| 1070 | *Type = X86::INTEL_KNL; |
| 1071 | break; |
| 1072 | case 0x85: |
| 1073 | CPU = "knm" ; |
| 1074 | *Type = X86::INTEL_KNM; |
| 1075 | break; |
| 1076 | |
| 1077 | default: // Unknown family 6 CPU, try to guess. |
| 1078 | // Don't both with Type/Subtype here, they aren't used by the caller. |
| 1079 | // They're used above to keep the code in sync with compiler-rt. |
| 1080 | // TODO detect tigerlake host from model |
| 1081 | if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { |
| 1082 | CPU = "tigerlake" ; |
| 1083 | } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { |
| 1084 | CPU = "icelake-client" ; |
| 1085 | } else if (testFeature(X86::FEATURE_AVX512VBMI)) { |
| 1086 | CPU = "cannonlake" ; |
| 1087 | } else if (testFeature(X86::FEATURE_AVX512BF16)) { |
| 1088 | CPU = "cooperlake" ; |
| 1089 | } else if (testFeature(X86::FEATURE_AVX512VNNI)) { |
| 1090 | CPU = "cascadelake" ; |
| 1091 | } else if (testFeature(X86::FEATURE_AVX512VL)) { |
| 1092 | CPU = "skylake-avx512" ; |
| 1093 | } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { |
| 1094 | if (testFeature(X86::FEATURE_SHA)) |
| 1095 | CPU = "goldmont" ; |
| 1096 | else |
| 1097 | CPU = "skylake" ; |
| 1098 | } else if (testFeature(X86::FEATURE_ADX)) { |
| 1099 | CPU = "broadwell" ; |
| 1100 | } else if (testFeature(X86::FEATURE_AVX2)) { |
| 1101 | CPU = "haswell" ; |
| 1102 | } else if (testFeature(X86::FEATURE_AVX)) { |
| 1103 | CPU = "sandybridge" ; |
| 1104 | } else if (testFeature(X86::FEATURE_SSE4_2)) { |
| 1105 | if (testFeature(X86::FEATURE_MOVBE)) |
| 1106 | CPU = "silvermont" ; |
| 1107 | else |
| 1108 | CPU = "nehalem" ; |
| 1109 | } else if (testFeature(X86::FEATURE_SSE4_1)) { |
| 1110 | CPU = "penryn" ; |
| 1111 | } else if (testFeature(X86::FEATURE_SSSE3)) { |
| 1112 | if (testFeature(X86::FEATURE_MOVBE)) |
| 1113 | CPU = "bonnell" ; |
| 1114 | else |
| 1115 | CPU = "core2" ; |
| 1116 | } else if (testFeature(X86::FEATURE_64BIT)) { |
| 1117 | CPU = "core2" ; |
| 1118 | } else if (testFeature(X86::FEATURE_SSE3)) { |
| 1119 | CPU = "yonah" ; |
| 1120 | } else if (testFeature(X86::FEATURE_SSE2)) { |
| 1121 | CPU = "pentium-m" ; |
| 1122 | } else if (testFeature(X86::FEATURE_SSE)) { |
| 1123 | CPU = "pentium3" ; |
| 1124 | } else if (testFeature(X86::FEATURE_MMX)) { |
| 1125 | CPU = "pentium2" ; |
| 1126 | } else { |
| 1127 | CPU = "pentiumpro" ; |
| 1128 | } |
| 1129 | break; |
| 1130 | } |
| 1131 | break; |
| 1132 | case 0xf: { |
| 1133 | if (testFeature(X86::FEATURE_64BIT)) { |
| 1134 | CPU = "nocona" ; |
| 1135 | break; |
| 1136 | } |
| 1137 | if (testFeature(X86::FEATURE_SSE3)) { |
| 1138 | CPU = "prescott" ; |
| 1139 | break; |
| 1140 | } |
| 1141 | CPU = "pentium4" ; |
| 1142 | break; |
| 1143 | } |
| 1144 | case 0x13: |
| 1145 | switch (Model) { |
| 1146 | // Diamond Rapids: |
| 1147 | case 0x01: |
| 1148 | CPU = "diamondrapids" ; |
| 1149 | *Type = X86::INTEL_COREI7; |
| 1150 | *Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS; |
| 1151 | break; |
| 1152 | |
| 1153 | default: // Unknown family 19 CPU. |
| 1154 | break; |
| 1155 | } |
| 1156 | break; |
| 1157 | case 0x12: |
| 1158 | switch (Model) { |
| 1159 | // Novalake: |
| 1160 | case 0x1: |
| 1161 | case 0x3: |
| 1162 | CPU = "novalake" ; |
| 1163 | *Type = X86::INTEL_COREI7; |
| 1164 | *Subtype = X86::INTEL_COREI7_NOVALAKE; |
| 1165 | break; |
| 1166 | default: // Unknown family 0x12 CPU. |
| 1167 | break; |
| 1168 | } |
| 1169 | break; |
| 1170 | |
| 1171 | default: |
| 1172 | break; // Unknown. |
| 1173 | } |
| 1174 | |
| 1175 | return CPU; |
| 1176 | } |
| 1177 | |
| 1178 | static const char *getAMDProcessorTypeAndSubtype(unsigned Family, |
| 1179 | unsigned Model, |
| 1180 | const unsigned *Features, |
| 1181 | unsigned *Type, |
| 1182 | unsigned *Subtype) { |
| 1183 | const char *CPU = nullptr; |
| 1184 | |
| 1185 | switch (Family) { |
| 1186 | case 4: |
| 1187 | CPU = "i486" ; |
| 1188 | break; |
| 1189 | case 5: |
| 1190 | CPU = "pentium" ; |
| 1191 | switch (Model) { |
| 1192 | case 6: |
| 1193 | case 7: |
| 1194 | CPU = "k6" ; |
| 1195 | break; |
| 1196 | case 8: |
| 1197 | CPU = "k6-2" ; |
| 1198 | break; |
| 1199 | case 9: |
| 1200 | case 13: |
| 1201 | CPU = "k6-3" ; |
| 1202 | break; |
| 1203 | case 10: |
| 1204 | CPU = "geode" ; |
| 1205 | break; |
| 1206 | } |
| 1207 | break; |
| 1208 | case 6: |
| 1209 | if (testFeature(X86::FEATURE_SSE)) { |
| 1210 | CPU = "athlon-xp" ; |
| 1211 | break; |
| 1212 | } |
| 1213 | CPU = "athlon" ; |
| 1214 | break; |
| 1215 | case 15: |
| 1216 | if (testFeature(X86::FEATURE_SSE3)) { |
| 1217 | CPU = "k8-sse3" ; |
| 1218 | break; |
| 1219 | } |
| 1220 | CPU = "k8" ; |
| 1221 | break; |
| 1222 | case 16: |
| 1223 | case 18: |
| 1224 | CPU = "amdfam10" ; |
| 1225 | *Type = X86::AMDFAM10H; // "amdfam10" |
| 1226 | switch (Model) { |
| 1227 | case 2: |
| 1228 | *Subtype = X86::AMDFAM10H_BARCELONA; |
| 1229 | break; |
| 1230 | case 4: |
| 1231 | *Subtype = X86::AMDFAM10H_SHANGHAI; |
| 1232 | break; |
| 1233 | case 8: |
| 1234 | *Subtype = X86::AMDFAM10H_ISTANBUL; |
| 1235 | break; |
| 1236 | } |
| 1237 | break; |
| 1238 | case 20: |
| 1239 | CPU = "btver1" ; |
| 1240 | *Type = X86::AMD_BTVER1; |
| 1241 | break; |
| 1242 | case 21: |
| 1243 | CPU = "bdver1" ; |
| 1244 | *Type = X86::AMDFAM15H; |
| 1245 | if (Model >= 0x60 && Model <= 0x7f) { |
| 1246 | CPU = "bdver4" ; |
| 1247 | *Subtype = X86::AMDFAM15H_BDVER4; |
| 1248 | break; // 60h-7Fh: Excavator |
| 1249 | } |
| 1250 | if (Model >= 0x30 && Model <= 0x3f) { |
| 1251 | CPU = "bdver3" ; |
| 1252 | *Subtype = X86::AMDFAM15H_BDVER3; |
| 1253 | break; // 30h-3Fh: Steamroller |
| 1254 | } |
| 1255 | if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { |
| 1256 | CPU = "bdver2" ; |
| 1257 | *Subtype = X86::AMDFAM15H_BDVER2; |
| 1258 | break; // 02h, 10h-1Fh: Piledriver |
| 1259 | } |
| 1260 | if (Model <= 0x0f) { |
| 1261 | *Subtype = X86::AMDFAM15H_BDVER1; |
| 1262 | break; // 00h-0Fh: Bulldozer |
| 1263 | } |
| 1264 | break; |
| 1265 | case 22: |
| 1266 | CPU = "btver2" ; |
| 1267 | *Type = X86::AMD_BTVER2; |
| 1268 | break; |
| 1269 | case 23: |
| 1270 | CPU = "znver1" ; |
| 1271 | *Type = X86::AMDFAM17H; |
| 1272 | if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) || |
| 1273 | (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) || |
| 1274 | (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) || |
| 1275 | (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) || |
| 1276 | (Model >= 0xa0 && Model <= 0xaf)) { |
| 1277 | // Family 17h Models 30h-3Fh (Starship) Zen 2 |
| 1278 | // Family 17h Models 47h (Cardinal) Zen 2 |
| 1279 | // Family 17h Models 60h-67h (Renoir) Zen 2 |
| 1280 | // Family 17h Models 68h-6Fh (Lucienne) Zen 2 |
| 1281 | // Family 17h Models 70h-7Fh (Matisse) Zen 2 |
| 1282 | // Family 17h Models 84h-87h (ProjectX) Zen 2 |
| 1283 | // Family 17h Models 90h-97h (VanGogh) Zen 2 |
| 1284 | // Family 17h Models 98h-9Fh (Mero) Zen 2 |
| 1285 | // Family 17h Models A0h-AFh (Mendocino) Zen 2 |
| 1286 | CPU = "znver2" ; |
| 1287 | *Subtype = X86::AMDFAM17H_ZNVER2; |
| 1288 | break; |
| 1289 | } |
| 1290 | if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) { |
| 1291 | // Family 17h Models 10h-1Fh (Raven1) Zen |
| 1292 | // Family 17h Models 10h-1Fh (Picasso) Zen+ |
| 1293 | // Family 17h Models 20h-2Fh (Raven2 x86) Zen |
| 1294 | *Subtype = X86::AMDFAM17H_ZNVER1; |
| 1295 | break; |
| 1296 | } |
| 1297 | break; |
| 1298 | case 25: |
| 1299 | CPU = "znver3" ; |
| 1300 | *Type = X86::AMDFAM19H; |
| 1301 | if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || |
| 1302 | (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || |
| 1303 | (Model >= 0x50 && Model <= 0x5f)) { |
| 1304 | // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 |
| 1305 | // Family 19h Models 20h-2Fh (Vermeer) Zen 3 |
| 1306 | // Family 19h Models 30h-3Fh (Badami) Zen 3 |
| 1307 | // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+ |
| 1308 | // Family 19h Models 50h-5Fh (Cezanne) Zen 3 |
| 1309 | *Subtype = X86::AMDFAM19H_ZNVER3; |
| 1310 | break; |
| 1311 | } |
| 1312 | if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) || |
| 1313 | (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) || |
| 1314 | (Model >= 0xa0 && Model <= 0xaf)) { |
| 1315 | // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4 |
| 1316 | // Family 19h Models 60h-6Fh (Raphael) Zen 4 |
| 1317 | // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4 |
| 1318 | // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4 |
| 1319 | // Family 19h Models A0h-AFh (Stones-Dense) Zen 4 |
| 1320 | CPU = "znver4" ; |
| 1321 | *Subtype = X86::AMDFAM19H_ZNVER4; |
| 1322 | break; // "znver4" |
| 1323 | } |
| 1324 | break; // family 19h |
| 1325 | case 26: |
| 1326 | CPU = "znver5" ; |
| 1327 | *Type = X86::AMDFAM1AH; |
| 1328 | if (Model <= 0x4f || (Model >= 0x60 && Model <= 0x77) || |
| 1329 | (Model >= 0xd0 && Model <= 0xd7)) { |
| 1330 | // Models 00h-0Fh (Breithorn). |
| 1331 | // Models 10h-1Fh (Breithorn-Dense). |
| 1332 | // Models 20h-2Fh (Strix 1). |
| 1333 | // Models 30h-37h (Strix 2). |
| 1334 | // Models 38h-3Fh (Strix 3). |
| 1335 | // Models 40h-4Fh (Granite Ridge). |
| 1336 | // Models 60h-6Fh (Krackan1). |
| 1337 | // Models 70h-77h (Sarlak). |
| 1338 | // Models D0h-D7h (Annapurna). |
| 1339 | CPU = "znver5" ; |
| 1340 | *Subtype = X86::AMDFAM1AH_ZNVER5; |
| 1341 | break; // "znver5" |
| 1342 | } |
| 1343 | break; |
| 1344 | |
| 1345 | default: |
| 1346 | break; // Unknown AMD CPU. |
| 1347 | } |
| 1348 | |
| 1349 | return CPU; |
| 1350 | } |
| 1351 | |
| 1352 | #undef testFeature |
| 1353 | |
| 1354 | static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, |
| 1355 | unsigned *Features) { |
| 1356 | unsigned EAX, EBX; |
| 1357 | |
| 1358 | auto setFeature = [&](unsigned F) { |
| 1359 | Features[F / 32] |= 1U << (F % 32); |
| 1360 | }; |
| 1361 | |
| 1362 | if ((EDX >> 15) & 1) |
| 1363 | setFeature(X86::FEATURE_CMOV); |
| 1364 | if ((EDX >> 23) & 1) |
| 1365 | setFeature(X86::FEATURE_MMX); |
| 1366 | if ((EDX >> 25) & 1) |
| 1367 | setFeature(X86::FEATURE_SSE); |
| 1368 | if ((EDX >> 26) & 1) |
| 1369 | setFeature(X86::FEATURE_SSE2); |
| 1370 | |
| 1371 | if ((ECX >> 0) & 1) |
| 1372 | setFeature(X86::FEATURE_SSE3); |
| 1373 | if ((ECX >> 1) & 1) |
| 1374 | setFeature(X86::FEATURE_PCLMUL); |
| 1375 | if ((ECX >> 9) & 1) |
| 1376 | setFeature(X86::FEATURE_SSSE3); |
| 1377 | if ((ECX >> 12) & 1) |
| 1378 | setFeature(X86::FEATURE_FMA); |
| 1379 | if ((ECX >> 19) & 1) |
| 1380 | setFeature(X86::FEATURE_SSE4_1); |
| 1381 | if ((ECX >> 20) & 1) { |
| 1382 | setFeature(X86::FEATURE_SSE4_2); |
| 1383 | setFeature(X86::FEATURE_CRC32); |
| 1384 | } |
| 1385 | if ((ECX >> 23) & 1) |
| 1386 | setFeature(X86::FEATURE_POPCNT); |
| 1387 | if ((ECX >> 25) & 1) |
| 1388 | setFeature(X86::FEATURE_AES); |
| 1389 | |
| 1390 | if ((ECX >> 22) & 1) |
| 1391 | setFeature(X86::FEATURE_MOVBE); |
| 1392 | |
| 1393 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV |
| 1394 | // indicates that the AVX registers will be saved and restored on context |
| 1395 | // switch, then we have full AVX support. |
| 1396 | const unsigned AVXBits = (1 << 27) | (1 << 28); |
| 1397 | bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(rEAX: &EAX, rEDX: &EDX) && |
| 1398 | ((EAX & 0x6) == 0x6); |
| 1399 | #if defined(__APPLE__) |
| 1400 | // Darwin lazily saves the AVX512 context on first use: trust that the OS will |
| 1401 | // save the AVX512 context if we use AVX512 instructions, even the bit is not |
| 1402 | // set right now. |
| 1403 | bool HasAVX512Save = true; |
| 1404 | #else |
| 1405 | // AVX512 requires additional context to be saved by the OS. |
| 1406 | bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); |
| 1407 | #endif |
| 1408 | |
| 1409 | if (HasAVX) |
| 1410 | setFeature(X86::FEATURE_AVX); |
| 1411 | |
| 1412 | bool HasLeaf7 = |
| 1413 | MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(value: 0x7, subleaf: 0x0, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 1414 | |
| 1415 | if (HasLeaf7 && ((EBX >> 3) & 1)) |
| 1416 | setFeature(X86::FEATURE_BMI); |
| 1417 | if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) |
| 1418 | setFeature(X86::FEATURE_AVX2); |
| 1419 | if (HasLeaf7 && ((EBX >> 8) & 1)) |
| 1420 | setFeature(X86::FEATURE_BMI2); |
| 1421 | if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) { |
| 1422 | setFeature(X86::FEATURE_AVX512F); |
| 1423 | } |
| 1424 | if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) |
| 1425 | setFeature(X86::FEATURE_AVX512DQ); |
| 1426 | if (HasLeaf7 && ((EBX >> 19) & 1)) |
| 1427 | setFeature(X86::FEATURE_ADX); |
| 1428 | if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) |
| 1429 | setFeature(X86::FEATURE_AVX512IFMA); |
| 1430 | if (HasLeaf7 && ((EBX >> 23) & 1)) |
| 1431 | setFeature(X86::FEATURE_CLFLUSHOPT); |
| 1432 | if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) |
| 1433 | setFeature(X86::FEATURE_AVX512CD); |
| 1434 | if (HasLeaf7 && ((EBX >> 29) & 1)) |
| 1435 | setFeature(X86::FEATURE_SHA); |
| 1436 | if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) |
| 1437 | setFeature(X86::FEATURE_AVX512BW); |
| 1438 | if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) |
| 1439 | setFeature(X86::FEATURE_AVX512VL); |
| 1440 | |
| 1441 | if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) |
| 1442 | setFeature(X86::FEATURE_AVX512VBMI); |
| 1443 | if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) |
| 1444 | setFeature(X86::FEATURE_AVX512VBMI2); |
| 1445 | if (HasLeaf7 && ((ECX >> 8) & 1)) |
| 1446 | setFeature(X86::FEATURE_GFNI); |
| 1447 | if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) |
| 1448 | setFeature(X86::FEATURE_VPCLMULQDQ); |
| 1449 | if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) |
| 1450 | setFeature(X86::FEATURE_AVX512VNNI); |
| 1451 | if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) |
| 1452 | setFeature(X86::FEATURE_AVX512BITALG); |
| 1453 | if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) |
| 1454 | setFeature(X86::FEATURE_AVX512VPOPCNTDQ); |
| 1455 | |
| 1456 | if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) |
| 1457 | setFeature(X86::FEATURE_AVX5124VNNIW); |
| 1458 | if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) |
| 1459 | setFeature(X86::FEATURE_AVX5124FMAPS); |
| 1460 | if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) |
| 1461 | setFeature(X86::FEATURE_AVX512VP2INTERSECT); |
| 1462 | |
| 1463 | // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't |
| 1464 | // return all 0s for invalid subleaves so check the limit. |
| 1465 | bool HasLeaf7Subleaf1 = |
| 1466 | HasLeaf7 && EAX >= 1 && |
| 1467 | !getX86CpuIDAndInfoEx(value: 0x7, subleaf: 0x1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 1468 | if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) |
| 1469 | setFeature(X86::FEATURE_AVX512BF16); |
| 1470 | |
| 1471 | unsigned MaxExtLevel; |
| 1472 | getX86CpuIDAndInfo(value: 0x80000000, rEAX: &MaxExtLevel, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 1473 | |
| 1474 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && |
| 1475 | !getX86CpuIDAndInfo(value: 0x80000001, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 1476 | if (HasExtLeaf1 && ((ECX >> 6) & 1)) |
| 1477 | setFeature(X86::FEATURE_SSE4_A); |
| 1478 | if (HasExtLeaf1 && ((ECX >> 11) & 1)) |
| 1479 | setFeature(X86::FEATURE_XOP); |
| 1480 | if (HasExtLeaf1 && ((ECX >> 16) & 1)) |
| 1481 | setFeature(X86::FEATURE_FMA4); |
| 1482 | |
| 1483 | if (HasExtLeaf1 && ((EDX >> 29) & 1)) |
| 1484 | setFeature(X86::FEATURE_64BIT); |
| 1485 | } |
| 1486 | |
| 1487 | StringRef sys::getHostCPUName() { |
| 1488 | unsigned MaxLeaf = 0; |
| 1489 | const VendorSignatures Vendor = getVendorSignature(MaxLeaf: &MaxLeaf); |
| 1490 | if (Vendor == VendorSignatures::UNKNOWN) |
| 1491 | return "generic" ; |
| 1492 | |
| 1493 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; |
| 1494 | getX86CpuIDAndInfo(value: 0x1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 1495 | |
| 1496 | unsigned Family = 0, Model = 0; |
| 1497 | unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; |
| 1498 | detectX86FamilyModel(EAX, Family: &Family, Model: &Model); |
| 1499 | getAvailableFeatures(ECX, EDX, MaxLeaf, Features); |
| 1500 | |
| 1501 | // These aren't consumed in this file, but we try to keep some source code the |
| 1502 | // same or similar to compiler-rt. |
| 1503 | unsigned Type = 0; |
| 1504 | unsigned Subtype = 0; |
| 1505 | |
| 1506 | StringRef CPU; |
| 1507 | |
| 1508 | if (Vendor == VendorSignatures::GENUINE_INTEL) { |
| 1509 | CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, Type: &Type, |
| 1510 | Subtype: &Subtype); |
| 1511 | } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { |
| 1512 | CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, Type: &Type, |
| 1513 | Subtype: &Subtype); |
| 1514 | } |
| 1515 | |
| 1516 | if (!CPU.empty()) |
| 1517 | return CPU; |
| 1518 | |
| 1519 | return "generic" ; |
| 1520 | } |
| 1521 | |
| 1522 | #elif defined(_M_ARM64) || defined(_M_ARM64EC) |
| 1523 | |
| 1524 | StringRef sys::getHostCPUName() { |
| 1525 | constexpr char CentralProcessorKeyName[] = |
| 1526 | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor" ; |
| 1527 | // Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be |
| 1528 | // enough for the slash and name. |
| 1529 | constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10; |
| 1530 | |
| 1531 | SmallVector<uint64_t> Values; |
| 1532 | uint64_t PrimaryCpuInfo; |
| 1533 | char PrimaryPartKeyName[SubKeyNameMaxSize]; |
| 1534 | DWORD PrimaryPartKeyNameSize = 0; |
| 1535 | HKEY CentralProcessorKey; |
| 1536 | if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ, |
| 1537 | &CentralProcessorKey) == ERROR_SUCCESS) { |
| 1538 | for (unsigned Index = 0; Index < UINT32_MAX; ++Index) { |
| 1539 | char SubKeyName[SubKeyNameMaxSize]; |
| 1540 | DWORD SubKeySize = SubKeyNameMaxSize; |
| 1541 | HKEY SubKey; |
| 1542 | if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize, |
| 1543 | nullptr, nullptr, nullptr, |
| 1544 | nullptr) == ERROR_SUCCESS) && |
| 1545 | (RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ, |
| 1546 | &SubKey) == ERROR_SUCCESS)) { |
| 1547 | // The "CP 4000" registry key contains a cached copy of the MIDR_EL1 |
| 1548 | // register. |
| 1549 | uint64_t RegValue; |
| 1550 | DWORD ActualType; |
| 1551 | DWORD RegValueSize = sizeof(RegValue); |
| 1552 | if ((RegQueryValueExA(SubKey, "CP 4000" , nullptr, &ActualType, |
| 1553 | (PBYTE)&RegValue, |
| 1554 | &RegValueSize) == ERROR_SUCCESS) && |
| 1555 | (ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) { |
| 1556 | // Assume that the part with the "highest" reg key name is the primary |
| 1557 | // part (to match the way that Linux's cpuinfo is written). Win32 |
| 1558 | // makes no guarantees about the order of sub keys, so we have to |
| 1559 | // compare the names. |
| 1560 | if (PrimaryPartKeyNameSize < SubKeySize || |
| 1561 | (PrimaryPartKeyNameSize == SubKeySize && |
| 1562 | ::memcmp(SubKeyName, PrimaryPartKeyName, SubKeySize) > 0)) { |
| 1563 | PrimaryCpuInfo = RegValue; |
| 1564 | ::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1); |
| 1565 | PrimaryPartKeyNameSize = SubKeySize; |
| 1566 | } |
| 1567 | if (!llvm::is_contained(Values, RegValue)) { |
| 1568 | Values.push_back(RegValue); |
| 1569 | } |
| 1570 | } |
| 1571 | RegCloseKey(SubKey); |
| 1572 | } else { |
| 1573 | // No more sub keys. |
| 1574 | break; |
| 1575 | } |
| 1576 | } |
| 1577 | RegCloseKey(CentralProcessorKey); |
| 1578 | } |
| 1579 | |
| 1580 | if (Values.empty()) { |
| 1581 | return "generic" ; |
| 1582 | } |
| 1583 | |
| 1584 | // Win32 makes no guarantees about the order of sub keys, so sort to ensure |
| 1585 | // reproducibility. |
| 1586 | llvm::sort(Values); |
| 1587 | |
| 1588 | return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values); |
| 1589 | } |
| 1590 | |
| 1591 | #elif defined(__APPLE__) && defined(__powerpc__) |
| 1592 | StringRef sys::getHostCPUName() { |
| 1593 | host_basic_info_data_t hostInfo; |
| 1594 | mach_msg_type_number_t infoCount; |
| 1595 | |
| 1596 | infoCount = HOST_BASIC_INFO_COUNT; |
| 1597 | mach_port_t hostPort = mach_host_self(); |
| 1598 | host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, |
| 1599 | &infoCount); |
| 1600 | mach_port_deallocate(mach_task_self(), hostPort); |
| 1601 | |
| 1602 | if (hostInfo.cpu_type != CPU_TYPE_POWERPC) |
| 1603 | return "generic" ; |
| 1604 | |
| 1605 | switch (hostInfo.cpu_subtype) { |
| 1606 | case CPU_SUBTYPE_POWERPC_601: |
| 1607 | return "601" ; |
| 1608 | case CPU_SUBTYPE_POWERPC_602: |
| 1609 | return "602" ; |
| 1610 | case CPU_SUBTYPE_POWERPC_603: |
| 1611 | return "603" ; |
| 1612 | case CPU_SUBTYPE_POWERPC_603e: |
| 1613 | return "603e" ; |
| 1614 | case CPU_SUBTYPE_POWERPC_603ev: |
| 1615 | return "603ev" ; |
| 1616 | case CPU_SUBTYPE_POWERPC_604: |
| 1617 | return "604" ; |
| 1618 | case CPU_SUBTYPE_POWERPC_604e: |
| 1619 | return "604e" ; |
| 1620 | case CPU_SUBTYPE_POWERPC_620: |
| 1621 | return "620" ; |
| 1622 | case CPU_SUBTYPE_POWERPC_750: |
| 1623 | return "750" ; |
| 1624 | case CPU_SUBTYPE_POWERPC_7400: |
| 1625 | return "7400" ; |
| 1626 | case CPU_SUBTYPE_POWERPC_7450: |
| 1627 | return "7450" ; |
| 1628 | case CPU_SUBTYPE_POWERPC_970: |
| 1629 | return "970" ; |
| 1630 | default:; |
| 1631 | } |
| 1632 | |
| 1633 | return "generic" ; |
| 1634 | } |
| 1635 | #elif defined(__linux__) && defined(__powerpc__) |
| 1636 | StringRef sys::getHostCPUName() { |
| 1637 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 1638 | StringRef Content = P ? P->getBuffer() : "" ; |
| 1639 | return detail::getHostCPUNameForPowerPC(Content); |
| 1640 | } |
| 1641 | #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) |
| 1642 | StringRef sys::getHostCPUName() { |
| 1643 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 1644 | StringRef Content = P ? P->getBuffer() : "" ; |
| 1645 | return detail::getHostCPUNameForARM(Content); |
| 1646 | } |
| 1647 | #elif defined(__linux__) && defined(__s390x__) |
| 1648 | StringRef sys::getHostCPUName() { |
| 1649 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 1650 | StringRef Content = P ? P->getBuffer() : "" ; |
| 1651 | return detail::getHostCPUNameForS390x(Content); |
| 1652 | } |
| 1653 | #elif defined(__MVS__) |
| 1654 | StringRef sys::getHostCPUName() { |
| 1655 | // Get pointer to Communications Vector Table (CVT). |
| 1656 | // The pointer is located at offset 16 of the Prefixed Save Area (PSA). |
| 1657 | // It is stored as 31 bit pointer and will be zero-extended to 64 bit. |
| 1658 | int *StartToCVTOffset = reinterpret_cast<int *>(0x10); |
| 1659 | // Since its stored as a 31-bit pointer, get the 4 bytes from the start |
| 1660 | // of address. |
| 1661 | int ReadValue = *StartToCVTOffset; |
| 1662 | // Explicitly clear the high order bit. |
| 1663 | ReadValue = (ReadValue & 0x7FFFFFFF); |
| 1664 | char *CVT = reinterpret_cast<char *>(ReadValue); |
| 1665 | // The model number is located in the CVT prefix at offset -6 and stored as |
| 1666 | // signless packed decimal. |
| 1667 | uint16_t Id = *(uint16_t *)&CVT[-6]; |
| 1668 | // Convert number to integer. |
| 1669 | Id = decodePackedBCD<uint16_t>(Id, false); |
| 1670 | // Check for vector support. It's stored in field CVTFLAG5 (offset 244), |
| 1671 | // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector |
| 1672 | // extension can only be used if bit CVTVEF is on. |
| 1673 | bool HaveVectorSupport = CVT[244] & 0x80; |
| 1674 | return getCPUNameFromS390Model(Id, HaveVectorSupport); |
| 1675 | } |
| 1676 | #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) |
| 1677 | // Copied from <mach/machine.h> in the macOS SDK. |
| 1678 | // |
| 1679 | // Also available here, though usually not as up-to-date: |
| 1680 | // https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.41.3/osfmk/mach/machine.h#L403-L452. |
| 1681 | #define CPUFAMILY_UNKNOWN 0 |
| 1682 | #define CPUFAMILY_ARM_9 0xe73283ae |
| 1683 | #define CPUFAMILY_ARM_11 0x8ff620d8 |
| 1684 | #define CPUFAMILY_ARM_XSCALE 0x53b005f5 |
| 1685 | #define CPUFAMILY_ARM_12 0xbd1b0ae9 |
| 1686 | #define CPUFAMILY_ARM_13 0x0cc90e64 |
| 1687 | #define CPUFAMILY_ARM_14 0x96077ef1 |
| 1688 | #define CPUFAMILY_ARM_15 0xa8511bca |
| 1689 | #define CPUFAMILY_ARM_SWIFT 0x1e2d6381 |
| 1690 | #define CPUFAMILY_ARM_CYCLONE 0x37a09642 |
| 1691 | #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e |
| 1692 | #define CPUFAMILY_ARM_TWISTER 0x92fb37c8 |
| 1693 | #define CPUFAMILY_ARM_HURRICANE 0x67ceee93 |
| 1694 | #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6 |
| 1695 | #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f |
| 1696 | #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2 |
| 1697 | #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3 |
| 1698 | #define CPUFAMILY_ARM_BLIZZARD_AVALANCHE 0xda33d83d |
| 1699 | #define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea |
| 1700 | #define CPUFAMILY_ARM_IBIZA 0xfa33415e |
| 1701 | #define CPUFAMILY_ARM_PALMA 0x72015832 |
| 1702 | #define CPUFAMILY_ARM_COLL 0x2876f5b5 |
| 1703 | #define CPUFAMILY_ARM_LOBOS 0x5f4dea93 |
| 1704 | #define CPUFAMILY_ARM_DONAN 0x6f5129ac |
| 1705 | #define CPUFAMILY_ARM_BRAVA 0x17d5b93a |
| 1706 | #define CPUFAMILY_ARM_TAHITI 0x75d4acb9 |
| 1707 | #define CPUFAMILY_ARM_TUPAI 0x204526d0 |
| 1708 | |
| 1709 | StringRef sys::getHostCPUName() { |
| 1710 | uint32_t Family; |
| 1711 | size_t Length = sizeof(Family); |
| 1712 | sysctlbyname("hw.cpufamily" , &Family, &Length, NULL, 0); |
| 1713 | |
| 1714 | // This is found by testing on actual hardware, and by looking at: |
| 1715 | // https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.41.3/osfmk/arm/cpuid.c#L109-L231. |
| 1716 | // |
| 1717 | // Another great resource is |
| 1718 | // https://github.com/AsahiLinux/docs/wiki/Codenames. |
| 1719 | // |
| 1720 | // NOTE: We choose to return `apple-mX` instead of `apple-aX`, since the M1, |
| 1721 | // M2, M3 etc. aliases are more widely known to users than A14, A15, A16 etc. |
| 1722 | // (and this code is basically only used on host macOS anyways). |
| 1723 | switch (Family) { |
| 1724 | case CPUFAMILY_UNKNOWN: |
| 1725 | return "generic" ; |
| 1726 | case CPUFAMILY_ARM_9: |
| 1727 | return "arm920t" ; // or arm926ej-s |
| 1728 | case CPUFAMILY_ARM_11: |
| 1729 | return "arm1136jf-s" ; |
| 1730 | case CPUFAMILY_ARM_XSCALE: |
| 1731 | return "xscale" ; |
| 1732 | case CPUFAMILY_ARM_12: // Seems unused by the kernel |
| 1733 | return "generic" ; |
| 1734 | case CPUFAMILY_ARM_13: |
| 1735 | return "cortex-a8" ; |
| 1736 | case CPUFAMILY_ARM_14: |
| 1737 | return "cortex-a9" ; |
| 1738 | case CPUFAMILY_ARM_15: |
| 1739 | return "cortex-a7" ; |
| 1740 | case CPUFAMILY_ARM_SWIFT: |
| 1741 | return "swift" ; |
| 1742 | case CPUFAMILY_ARM_CYCLONE: |
| 1743 | return "apple-a7" ; |
| 1744 | case CPUFAMILY_ARM_TYPHOON: |
| 1745 | return "apple-a8" ; |
| 1746 | case CPUFAMILY_ARM_TWISTER: |
| 1747 | return "apple-a9" ; |
| 1748 | case CPUFAMILY_ARM_HURRICANE: |
| 1749 | return "apple-a10" ; |
| 1750 | case CPUFAMILY_ARM_MONSOON_MISTRAL: |
| 1751 | return "apple-a11" ; |
| 1752 | case CPUFAMILY_ARM_VORTEX_TEMPEST: |
| 1753 | return "apple-a12" ; |
| 1754 | case CPUFAMILY_ARM_LIGHTNING_THUNDER: |
| 1755 | return "apple-a13" ; |
| 1756 | case CPUFAMILY_ARM_FIRESTORM_ICESTORM: // A14 / M1 |
| 1757 | return "apple-m1" ; |
| 1758 | case CPUFAMILY_ARM_BLIZZARD_AVALANCHE: // A15 / M2 |
| 1759 | return "apple-m2" ; |
| 1760 | case CPUFAMILY_ARM_EVEREST_SAWTOOTH: // A16 |
| 1761 | case CPUFAMILY_ARM_IBIZA: // M3 |
| 1762 | case CPUFAMILY_ARM_PALMA: // M3 Max |
| 1763 | case CPUFAMILY_ARM_LOBOS: // M3 Pro |
| 1764 | return "apple-m3" ; |
| 1765 | case CPUFAMILY_ARM_COLL: // A17 Pro |
| 1766 | return "apple-a17" ; |
| 1767 | case CPUFAMILY_ARM_DONAN: // M4 |
| 1768 | case CPUFAMILY_ARM_BRAVA: // M4 Max |
| 1769 | case CPUFAMILY_ARM_TAHITI: // A18 Pro |
| 1770 | case CPUFAMILY_ARM_TUPAI: // A18 |
| 1771 | return "apple-m4" ; |
| 1772 | default: |
| 1773 | // Default to the newest CPU we know about. |
| 1774 | return "apple-m4" ; |
| 1775 | } |
| 1776 | } |
| 1777 | #elif defined(_AIX) |
| 1778 | StringRef sys::getHostCPUName() { |
| 1779 | switch (_system_configuration.implementation) { |
| 1780 | case POWER_4: |
| 1781 | if (_system_configuration.version == PV_4_3) |
| 1782 | return "970" ; |
| 1783 | return "pwr4" ; |
| 1784 | case POWER_5: |
| 1785 | if (_system_configuration.version == PV_5) |
| 1786 | return "pwr5" ; |
| 1787 | return "pwr5x" ; |
| 1788 | case POWER_6: |
| 1789 | if (_system_configuration.version == PV_6_Compat) |
| 1790 | return "pwr6" ; |
| 1791 | return "pwr6x" ; |
| 1792 | case POWER_7: |
| 1793 | return "pwr7" ; |
| 1794 | case POWER_8: |
| 1795 | return "pwr8" ; |
| 1796 | case POWER_9: |
| 1797 | return "pwr9" ; |
| 1798 | // TODO: simplify this once the macro is available in all OS levels. |
| 1799 | #ifdef POWER_10 |
| 1800 | case POWER_10: |
| 1801 | #else |
| 1802 | case 0x40000: |
| 1803 | #endif |
| 1804 | return "pwr10" ; |
| 1805 | #ifdef POWER_11 |
| 1806 | case POWER_11: |
| 1807 | #else |
| 1808 | case 0x80000: |
| 1809 | #endif |
| 1810 | return "pwr11" ; |
| 1811 | default: |
| 1812 | return "generic" ; |
| 1813 | } |
| 1814 | } |
| 1815 | #elif defined(__loongarch__) |
| 1816 | StringRef sys::getHostCPUName() { |
| 1817 | // Use processor id to detect cpu name. |
| 1818 | uint32_t processor_id; |
| 1819 | __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r" (processor_id)); |
| 1820 | // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. |
| 1821 | switch (processor_id & 0xf000) { |
| 1822 | case 0xc000: // Loongson 64bit, 4-issue |
| 1823 | return "la464" ; |
| 1824 | case 0xd000: // Loongson 64bit, 6-issue |
| 1825 | return "la664" ; |
| 1826 | // TODO: Others. |
| 1827 | default: |
| 1828 | break; |
| 1829 | } |
| 1830 | return "generic" ; |
| 1831 | } |
| 1832 | #elif defined(__riscv) |
| 1833 | #if defined(__linux__) |
| 1834 | // struct riscv_hwprobe |
| 1835 | struct RISCVHwProbe { |
| 1836 | int64_t Key; |
| 1837 | uint64_t Value; |
| 1838 | }; |
| 1839 | #endif |
| 1840 | |
| 1841 | StringRef sys::getHostCPUName() { |
| 1842 | #if defined(__linux__) |
| 1843 | // Try the hwprobe way first. |
| 1844 | RISCVHwProbe Query[]{{/*RISCV_HWPROBE_KEY_MVENDORID=*/0, 0}, |
| 1845 | {/*RISCV_HWPROBE_KEY_MARCHID=*/1, 0}, |
| 1846 | {/*RISCV_HWPROBE_KEY_MIMPID=*/2, 0}}; |
| 1847 | int Ret = syscall(/*__NR_riscv_hwprobe=*/258, /*pairs=*/Query, |
| 1848 | /*pair_count=*/std::size(Query), /*cpu_count=*/0, |
| 1849 | /*cpus=*/0, /*flags=*/0); |
| 1850 | if (Ret == 0) { |
| 1851 | RISCV::CPUModel Model{static_cast<uint32_t>(Query[0].Value), Query[1].Value, |
| 1852 | Query[2].Value}; |
| 1853 | StringRef Name = RISCV::getCPUNameFromCPUModel(Model); |
| 1854 | if (!Name.empty()) |
| 1855 | return Name; |
| 1856 | } |
| 1857 | |
| 1858 | // Then try the cpuinfo way. |
| 1859 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 1860 | StringRef Content = P ? P->getBuffer() : "" ; |
| 1861 | StringRef Name = detail::getHostCPUNameForRISCV(Content); |
| 1862 | if (!Name.empty()) |
| 1863 | return Name; |
| 1864 | #endif |
| 1865 | #if __riscv_xlen == 64 |
| 1866 | return "generic-rv64" ; |
| 1867 | #elif __riscv_xlen == 32 |
| 1868 | return "generic-rv32" ; |
| 1869 | #else |
| 1870 | #error "Unhandled value of __riscv_xlen" |
| 1871 | #endif |
| 1872 | } |
| 1873 | #elif defined(__sparc__) |
| 1874 | #if defined(__linux__) |
| 1875 | StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { |
| 1876 | SmallVector<StringRef> Lines; |
| 1877 | ProcCpuinfoContent.split(Lines, '\n'); |
| 1878 | |
| 1879 | // Look for cpu line to determine cpu name |
| 1880 | StringRef Cpu; |
| 1881 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
| 1882 | if (Lines[I].starts_with("cpu" )) { |
| 1883 | Cpu = Lines[I].substr(5).ltrim("\t :" ); |
| 1884 | break; |
| 1885 | } |
| 1886 | } |
| 1887 | |
| 1888 | return StringSwitch<const char *>(Cpu) |
| 1889 | .StartsWith("SuperSparc" , "supersparc" ) |
| 1890 | .StartsWith("HyperSparc" , "hypersparc" ) |
| 1891 | .StartsWith("SpitFire" , "ultrasparc" ) |
| 1892 | .StartsWith("BlackBird" , "ultrasparc" ) |
| 1893 | .StartsWith("Sabre" , " ultrasparc" ) |
| 1894 | .StartsWith("Hummingbird" , "ultrasparc" ) |
| 1895 | .StartsWith("Cheetah" , "ultrasparc3" ) |
| 1896 | .StartsWith("Jalapeno" , "ultrasparc3" ) |
| 1897 | .StartsWith("Jaguar" , "ultrasparc3" ) |
| 1898 | .StartsWith("Panther" , "ultrasparc3" ) |
| 1899 | .StartsWith("Serrano" , "ultrasparc3" ) |
| 1900 | .StartsWith("UltraSparc T1" , "niagara" ) |
| 1901 | .StartsWith("UltraSparc T2" , "niagara2" ) |
| 1902 | .StartsWith("UltraSparc T3" , "niagara3" ) |
| 1903 | .StartsWith("UltraSparc T4" , "niagara4" ) |
| 1904 | .StartsWith("UltraSparc T5" , "niagara4" ) |
| 1905 | .StartsWith("LEON" , "leon3" ) |
| 1906 | // niagara7/m8 not supported by LLVM yet. |
| 1907 | .StartsWith("SPARC-M7" , "niagara4" /* "niagara7" */) |
| 1908 | .StartsWith("SPARC-S7" , "niagara4" /* "niagara7" */) |
| 1909 | .StartsWith("SPARC-M8" , "niagara4" /* "m8" */) |
| 1910 | .Default("generic" ); |
| 1911 | } |
| 1912 | #endif |
| 1913 | |
| 1914 | StringRef sys::getHostCPUName() { |
| 1915 | #if defined(__linux__) |
| 1916 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 1917 | StringRef Content = P ? P->getBuffer() : "" ; |
| 1918 | return detail::getHostCPUNameForSPARC(Content); |
| 1919 | #elif defined(__sun__) && defined(__svr4__) |
| 1920 | char *buf = NULL; |
| 1921 | kstat_ctl_t *kc; |
| 1922 | kstat_t *ksp; |
| 1923 | kstat_named_t *brand = NULL; |
| 1924 | |
| 1925 | kc = kstat_open(); |
| 1926 | if (kc != NULL) { |
| 1927 | ksp = kstat_lookup(kc, const_cast<char *>("cpu_info" ), -1, NULL); |
| 1928 | if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && |
| 1929 | ksp->ks_type == KSTAT_TYPE_NAMED) |
| 1930 | brand = |
| 1931 | (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand" )); |
| 1932 | if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) |
| 1933 | buf = KSTAT_NAMED_STR_PTR(brand); |
| 1934 | } |
| 1935 | kstat_close(kc); |
| 1936 | |
| 1937 | return StringSwitch<const char *>(buf) |
| 1938 | .Case("TMS390S10" , "supersparc" ) // Texas Instruments microSPARC I |
| 1939 | .Case("TMS390Z50" , "supersparc" ) // Texas Instruments SuperSPARC I |
| 1940 | .Case("TMS390Z55" , |
| 1941 | "supersparc" ) // Texas Instruments SuperSPARC I with SuperCache |
| 1942 | .Case("MB86904" , "supersparc" ) // Fujitsu microSPARC II |
| 1943 | .Case("MB86907" , "supersparc" ) // Fujitsu TurboSPARC |
| 1944 | .Case("RT623" , "hypersparc" ) // Ross hyperSPARC |
| 1945 | .Case("RT625" , "hypersparc" ) |
| 1946 | .Case("RT626" , "hypersparc" ) |
| 1947 | .Case("UltraSPARC-I" , "ultrasparc" ) |
| 1948 | .Case("UltraSPARC-II" , "ultrasparc" ) |
| 1949 | .Case("UltraSPARC-IIe" , "ultrasparc" ) |
| 1950 | .Case("UltraSPARC-IIi" , "ultrasparc" ) |
| 1951 | .Case("SPARC64-III" , "ultrasparc" ) |
| 1952 | .Case("SPARC64-IV" , "ultrasparc" ) |
| 1953 | .Case("UltraSPARC-III" , "ultrasparc3" ) |
| 1954 | .Case("UltraSPARC-III+" , "ultrasparc3" ) |
| 1955 | .Case("UltraSPARC-IIIi" , "ultrasparc3" ) |
| 1956 | .Case("UltraSPARC-IIIi+" , "ultrasparc3" ) |
| 1957 | .Case("UltraSPARC-IV" , "ultrasparc3" ) |
| 1958 | .Case("UltraSPARC-IV+" , "ultrasparc3" ) |
| 1959 | .Case("SPARC64-V" , "ultrasparc3" ) |
| 1960 | .Case("SPARC64-VI" , "ultrasparc3" ) |
| 1961 | .Case("SPARC64-VII" , "ultrasparc3" ) |
| 1962 | .Case("UltraSPARC-T1" , "niagara" ) |
| 1963 | .Case("UltraSPARC-T2" , "niagara2" ) |
| 1964 | .Case("UltraSPARC-T2" , "niagara2" ) |
| 1965 | .Case("UltraSPARC-T2+" , "niagara2" ) |
| 1966 | .Case("SPARC-T3" , "niagara3" ) |
| 1967 | .Case("SPARC-T4" , "niagara4" ) |
| 1968 | .Case("SPARC-T5" , "niagara4" ) |
| 1969 | // niagara7/m8 not supported by LLVM yet. |
| 1970 | .Case("SPARC-M7" , "niagara4" /* "niagara7" */) |
| 1971 | .Case("SPARC-S7" , "niagara4" /* "niagara7" */) |
| 1972 | .Case("SPARC-M8" , "niagara4" /* "m8" */) |
| 1973 | .Default("generic" ); |
| 1974 | #else |
| 1975 | return "generic" ; |
| 1976 | #endif |
| 1977 | } |
| 1978 | #else |
| 1979 | StringRef sys::getHostCPUName() { return "generic" ; } |
| 1980 | namespace llvm { |
| 1981 | namespace sys { |
| 1982 | namespace detail { |
| 1983 | namespace x86 { |
| 1984 | |
| 1985 | VendorSignatures getVendorSignature(unsigned *MaxLeaf) { |
| 1986 | return VendorSignatures::UNKNOWN; |
| 1987 | } |
| 1988 | |
| 1989 | } // namespace x86 |
| 1990 | } // namespace detail |
| 1991 | } // namespace sys |
| 1992 | } // namespace llvm |
| 1993 | #endif |
| 1994 | |
| 1995 | #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ |
| 1996 | defined(_M_X64)) && \ |
| 1997 | !defined(_M_ARM64EC) |
| 1998 | StringMap<bool> sys::getHostCPUFeatures() { |
| 1999 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; |
| 2000 | unsigned MaxLevel; |
| 2001 | StringMap<bool> Features; |
| 2002 | |
| 2003 | if (getX86CpuIDAndInfo(value: 0, rEAX: &MaxLevel, rEBX: &EBX, rECX: &ECX, rEDX: &EDX) || MaxLevel < 1) |
| 2004 | return Features; |
| 2005 | |
| 2006 | getX86CpuIDAndInfo(value: 1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2007 | |
| 2008 | Features["cx8" ] = (EDX >> 8) & 1; |
| 2009 | Features["cmov" ] = (EDX >> 15) & 1; |
| 2010 | Features["mmx" ] = (EDX >> 23) & 1; |
| 2011 | Features["fxsr" ] = (EDX >> 24) & 1; |
| 2012 | Features["sse" ] = (EDX >> 25) & 1; |
| 2013 | Features["sse2" ] = (EDX >> 26) & 1; |
| 2014 | |
| 2015 | Features["sse3" ] = (ECX >> 0) & 1; |
| 2016 | Features["pclmul" ] = (ECX >> 1) & 1; |
| 2017 | Features["ssse3" ] = (ECX >> 9) & 1; |
| 2018 | Features["cx16" ] = (ECX >> 13) & 1; |
| 2019 | Features["sse4.1" ] = (ECX >> 19) & 1; |
| 2020 | Features["sse4.2" ] = (ECX >> 20) & 1; |
| 2021 | Features["crc32" ] = Features["sse4.2" ]; |
| 2022 | Features["movbe" ] = (ECX >> 22) & 1; |
| 2023 | Features["popcnt" ] = (ECX >> 23) & 1; |
| 2024 | Features["aes" ] = (ECX >> 25) & 1; |
| 2025 | Features["rdrnd" ] = (ECX >> 30) & 1; |
| 2026 | |
| 2027 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV |
| 2028 | // indicates that the AVX registers will be saved and restored on context |
| 2029 | // switch, then we have full AVX support. |
| 2030 | bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(rEAX: &EAX, rEDX: &EDX); |
| 2031 | bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); |
| 2032 | #if defined(__APPLE__) |
| 2033 | // Darwin lazily saves the AVX512 context on first use: trust that the OS will |
| 2034 | // save the AVX512 context if we use AVX512 instructions, even the bit is not |
| 2035 | // set right now. |
| 2036 | bool HasAVX512Save = true; |
| 2037 | #else |
| 2038 | // AVX512 requires additional context to be saved by the OS. |
| 2039 | bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); |
| 2040 | #endif |
| 2041 | // AMX requires additional context to be saved by the OS. |
| 2042 | const unsigned AMXBits = (1 << 17) | (1 << 18); |
| 2043 | bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); |
| 2044 | // APX requires additional context to be saved by the OS. |
| 2045 | bool HasAPXSave = HasXSave && ((EAX >> 19) & 1); |
| 2046 | |
| 2047 | Features["avx" ] = HasAVXSave; |
| 2048 | Features["fma" ] = ((ECX >> 12) & 1) && HasAVXSave; |
| 2049 | // Only enable XSAVE if OS has enabled support for saving YMM state. |
| 2050 | Features["xsave" ] = ((ECX >> 26) & 1) && HasAVXSave; |
| 2051 | Features["f16c" ] = ((ECX >> 29) & 1) && HasAVXSave; |
| 2052 | |
| 2053 | unsigned MaxExtLevel; |
| 2054 | getX86CpuIDAndInfo(value: 0x80000000, rEAX: &MaxExtLevel, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2055 | |
| 2056 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && |
| 2057 | !getX86CpuIDAndInfo(value: 0x80000001, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2058 | Features["sahf" ] = HasExtLeaf1 && ((ECX >> 0) & 1); |
| 2059 | Features["lzcnt" ] = HasExtLeaf1 && ((ECX >> 5) & 1); |
| 2060 | Features["sse4a" ] = HasExtLeaf1 && ((ECX >> 6) & 1); |
| 2061 | Features["prfchw" ] = HasExtLeaf1 && ((ECX >> 8) & 1); |
| 2062 | Features["xop" ] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; |
| 2063 | Features["lwp" ] = HasExtLeaf1 && ((ECX >> 15) & 1); |
| 2064 | Features["fma4" ] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; |
| 2065 | Features["tbm" ] = HasExtLeaf1 && ((ECX >> 21) & 1); |
| 2066 | Features["mwaitx" ] = HasExtLeaf1 && ((ECX >> 29) & 1); |
| 2067 | |
| 2068 | Features["64bit" ] = HasExtLeaf1 && ((EDX >> 29) & 1); |
| 2069 | |
| 2070 | // Miscellaneous memory related features, detected by |
| 2071 | // using the 0x80000008 leaf of the CPUID instruction |
| 2072 | bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && |
| 2073 | !getX86CpuIDAndInfo(value: 0x80000008, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2074 | Features["clzero" ] = HasExtLeaf8 && ((EBX >> 0) & 1); |
| 2075 | Features["rdpru" ] = HasExtLeaf8 && ((EBX >> 4) & 1); |
| 2076 | Features["wbnoinvd" ] = HasExtLeaf8 && ((EBX >> 9) & 1); |
| 2077 | |
| 2078 | bool HasExtLeaf21 = MaxExtLevel >= 0x80000021 && |
| 2079 | !getX86CpuIDAndInfo(value: 0x80000021, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2080 | // AMD cpuid bit for prefetchi is different from Intel |
| 2081 | Features["prefetchi" ] = HasExtLeaf21 && ((EAX >> 20) & 1); |
| 2082 | |
| 2083 | bool HasLeaf7 = |
| 2084 | MaxLevel >= 7 && !getX86CpuIDAndInfoEx(value: 0x7, subleaf: 0x0, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2085 | |
| 2086 | Features["fsgsbase" ] = HasLeaf7 && ((EBX >> 0) & 1); |
| 2087 | Features["sgx" ] = HasLeaf7 && ((EBX >> 2) & 1); |
| 2088 | Features["bmi" ] = HasLeaf7 && ((EBX >> 3) & 1); |
| 2089 | // AVX2 is only supported if we have the OS save support from AVX. |
| 2090 | Features["avx2" ] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; |
| 2091 | Features["bmi2" ] = HasLeaf7 && ((EBX >> 8) & 1); |
| 2092 | Features["invpcid" ] = HasLeaf7 && ((EBX >> 10) & 1); |
| 2093 | Features["rtm" ] = HasLeaf7 && ((EBX >> 11) & 1); |
| 2094 | // AVX512 is only supported if the OS supports the context save for it. |
| 2095 | Features["avx512f" ] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; |
| 2096 | Features["avx512dq" ] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; |
| 2097 | Features["rdseed" ] = HasLeaf7 && ((EBX >> 18) & 1); |
| 2098 | Features["adx" ] = HasLeaf7 && ((EBX >> 19) & 1); |
| 2099 | Features["avx512ifma" ] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; |
| 2100 | Features["clflushopt" ] = HasLeaf7 && ((EBX >> 23) & 1); |
| 2101 | Features["clwb" ] = HasLeaf7 && ((EBX >> 24) & 1); |
| 2102 | Features["avx512cd" ] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; |
| 2103 | Features["sha" ] = HasLeaf7 && ((EBX >> 29) & 1); |
| 2104 | Features["avx512bw" ] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; |
| 2105 | Features["avx512vl" ] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; |
| 2106 | |
| 2107 | Features["avx512vbmi" ] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; |
| 2108 | Features["pku" ] = HasLeaf7 && ((ECX >> 4) & 1); |
| 2109 | Features["waitpkg" ] = HasLeaf7 && ((ECX >> 5) & 1); |
| 2110 | Features["avx512vbmi2" ] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; |
| 2111 | Features["shstk" ] = HasLeaf7 && ((ECX >> 7) & 1); |
| 2112 | Features["gfni" ] = HasLeaf7 && ((ECX >> 8) & 1); |
| 2113 | Features["vaes" ] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; |
| 2114 | Features["vpclmulqdq" ] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; |
| 2115 | Features["avx512vnni" ] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; |
| 2116 | Features["avx512bitalg" ] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; |
| 2117 | Features["avx512vpopcntdq" ] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; |
| 2118 | Features["rdpid" ] = HasLeaf7 && ((ECX >> 22) & 1); |
| 2119 | Features["kl" ] = HasLeaf7 && ((ECX >> 23) & 1); // key locker |
| 2120 | Features["cldemote" ] = HasLeaf7 && ((ECX >> 25) & 1); |
| 2121 | Features["movdiri" ] = HasLeaf7 && ((ECX >> 27) & 1); |
| 2122 | Features["movdir64b" ] = HasLeaf7 && ((ECX >> 28) & 1); |
| 2123 | Features["enqcmd" ] = HasLeaf7 && ((ECX >> 29) & 1); |
| 2124 | |
| 2125 | Features["uintr" ] = HasLeaf7 && ((EDX >> 5) & 1); |
| 2126 | Features["avx512vp2intersect" ] = |
| 2127 | HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; |
| 2128 | Features["serialize" ] = HasLeaf7 && ((EDX >> 14) & 1); |
| 2129 | Features["tsxldtrk" ] = HasLeaf7 && ((EDX >> 16) & 1); |
| 2130 | // There are two CPUID leafs which information associated with the pconfig |
| 2131 | // instruction: |
| 2132 | // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th |
| 2133 | // bit of EDX), while the EAX=0x1b leaf returns information on the |
| 2134 | // availability of specific pconfig leafs. |
| 2135 | // The target feature here only refers to the the first of these two. |
| 2136 | // Users might need to check for the availability of specific pconfig |
| 2137 | // leaves using cpuid, since that information is ignored while |
| 2138 | // detecting features using the "-march=native" flag. |
| 2139 | // For more info, see X86 ISA docs. |
| 2140 | Features["pconfig" ] = HasLeaf7 && ((EDX >> 18) & 1); |
| 2141 | Features["amx-bf16" ] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; |
| 2142 | Features["avx512fp16" ] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save; |
| 2143 | Features["amx-tile" ] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; |
| 2144 | Features["amx-int8" ] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; |
| 2145 | // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't |
| 2146 | // return all 0s for invalid subleaves so check the limit. |
| 2147 | bool HasLeaf7Subleaf1 = |
| 2148 | HasLeaf7 && EAX >= 1 && |
| 2149 | !getX86CpuIDAndInfoEx(value: 0x7, subleaf: 0x1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2150 | Features["sha512" ] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1); |
| 2151 | Features["sm3" ] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1); |
| 2152 | Features["sm4" ] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1); |
| 2153 | Features["raoint" ] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); |
| 2154 | Features["avxvnni" ] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; |
| 2155 | Features["avx512bf16" ] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; |
| 2156 | Features["amx-fp16" ] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave; |
| 2157 | Features["cmpccxadd" ] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1); |
| 2158 | Features["hreset" ] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); |
| 2159 | Features["avxifma" ] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave; |
| 2160 | Features["movrs" ] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1); |
| 2161 | Features["avxvnniint8" ] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave; |
| 2162 | Features["avxneconvert" ] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave; |
| 2163 | Features["amx-complex" ] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave; |
| 2164 | Features["avxvnniint16" ] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave; |
| 2165 | Features["prefetchi" ] |= HasLeaf7Subleaf1 && ((EDX >> 14) & 1); |
| 2166 | Features["usermsr" ] = HasLeaf7Subleaf1 && ((EDX >> 15) & 1); |
| 2167 | bool HasAVX10 = HasLeaf7Subleaf1 && ((EDX >> 19) & 1); |
| 2168 | bool HasAPXF = HasLeaf7Subleaf1 && ((EDX >> 21) & 1) && HasAPXSave; |
| 2169 | Features["egpr" ] = HasAPXF; |
| 2170 | #ifndef _WIN32 |
| 2171 | // TODO: We may need to check OS or MSVC version once unwinder opcodes |
| 2172 | // support PUSH2/POP2/PPX. |
| 2173 | Features["push2pop2" ] = HasAPXF; |
| 2174 | Features["ppx" ] = HasAPXF; |
| 2175 | #endif |
| 2176 | Features["ndd" ] = HasAPXF; |
| 2177 | Features["ccmp" ] = HasAPXF; |
| 2178 | Features["nf" ] = HasAPXF; |
| 2179 | Features["cf" ] = HasAPXF; |
| 2180 | Features["zu" ] = HasAPXF; |
| 2181 | |
| 2182 | bool HasLeafD = MaxLevel >= 0xd && |
| 2183 | !getX86CpuIDAndInfoEx(value: 0xd, subleaf: 0x1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2184 | |
| 2185 | // Only enable XSAVE if OS has enabled support for saving YMM state. |
| 2186 | Features["xsaveopt" ] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; |
| 2187 | Features["xsavec" ] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; |
| 2188 | Features["xsaves" ] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; |
| 2189 | |
| 2190 | bool HasLeaf14 = MaxLevel >= 0x14 && |
| 2191 | !getX86CpuIDAndInfoEx(value: 0x14, subleaf: 0x0, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2192 | |
| 2193 | Features["ptwrite" ] = HasLeaf14 && ((EBX >> 4) & 1); |
| 2194 | |
| 2195 | bool HasLeaf19 = |
| 2196 | MaxLevel >= 0x19 && !getX86CpuIDAndInfo(value: 0x19, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2197 | Features["widekl" ] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); |
| 2198 | |
| 2199 | bool HasLeaf1E = MaxLevel >= 0x1e && |
| 2200 | !getX86CpuIDAndInfoEx(value: 0x1e, subleaf: 0x1, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2201 | Features["amx-fp8" ] = HasLeaf1E && ((EAX >> 4) & 1) && HasAMXSave; |
| 2202 | Features["amx-tf32" ] = HasLeaf1E && ((EAX >> 6) & 1) && HasAMXSave; |
| 2203 | Features["amx-avx512" ] = HasLeaf1E && ((EAX >> 7) & 1) && HasAMXSave; |
| 2204 | Features["amx-movrs" ] = HasLeaf1E && ((EAX >> 8) & 1) && HasAMXSave; |
| 2205 | |
| 2206 | bool HasLeaf24 = MaxLevel >= 0x24 && |
| 2207 | !getX86CpuIDAndInfoEx(value: 0x24, subleaf: 0x0, rEAX: &EAX, rEBX: &EBX, rECX: &ECX, rEDX: &EDX); |
| 2208 | |
| 2209 | int AVX10Ver = HasLeaf24 ? (EBX & 0xff) : 0; |
| 2210 | Features["avx10.1" ] = HasAVX10 && AVX10Ver >= 1; |
| 2211 | Features["avx10.2" ] = HasAVX10 && AVX10Ver >= 2; |
| 2212 | |
| 2213 | return Features; |
| 2214 | } |
| 2215 | #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) |
| 2216 | StringMap<bool> sys::getHostCPUFeatures() { |
| 2217 | StringMap<bool> Features; |
| 2218 | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
| 2219 | if (!P) |
| 2220 | return Features; |
| 2221 | |
| 2222 | SmallVector<StringRef, 32> Lines; |
| 2223 | P->getBuffer().split(Lines, '\n'); |
| 2224 | |
| 2225 | SmallVector<StringRef, 32> CPUFeatures; |
| 2226 | |
| 2227 | // Look for the CPU features. |
| 2228 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
| 2229 | if (Lines[I].starts_with("Features" )) { |
| 2230 | Lines[I].split(CPUFeatures, ' '); |
| 2231 | break; |
| 2232 | } |
| 2233 | |
| 2234 | #if defined(__aarch64__) |
| 2235 | // All of these are "crypto" features, but we must sift out actual features |
| 2236 | // as the former meaning of "crypto" as a single feature is no more. |
| 2237 | enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; |
| 2238 | uint32_t crypto = 0; |
| 2239 | #endif |
| 2240 | |
| 2241 | for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { |
| 2242 | StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) |
| 2243 | #if defined(__aarch64__) |
| 2244 | .Case("asimd" , "neon" ) |
| 2245 | .Case("fp" , "fp-armv8" ) |
| 2246 | .Case("crc32" , "crc" ) |
| 2247 | .Case("atomics" , "lse" ) |
| 2248 | .Case("rng" , "rand" ) |
| 2249 | .Case("sha3" , "sha3" ) |
| 2250 | .Case("sm4" , "sm4" ) |
| 2251 | .Case("sve" , "sve" ) |
| 2252 | .Case("sve2" , "sve2" ) |
| 2253 | .Case("sveaes" , "sve-aes" ) |
| 2254 | .Case("svesha3" , "sve-sha3" ) |
| 2255 | .Case("svesm4" , "sve-sm4" ) |
| 2256 | #else |
| 2257 | .Case("half" , "fp16" ) |
| 2258 | .Case("neon" , "neon" ) |
| 2259 | .Case("vfpv3" , "vfp3" ) |
| 2260 | .Case("vfpv3d16" , "vfp3d16" ) |
| 2261 | .Case("vfpv4" , "vfp4" ) |
| 2262 | .Case("idiva" , "hwdiv-arm" ) |
| 2263 | .Case("idivt" , "hwdiv" ) |
| 2264 | #endif |
| 2265 | .Default("" ); |
| 2266 | |
| 2267 | #if defined(__aarch64__) |
| 2268 | // We need to check crypto separately since we need all of the crypto |
| 2269 | // extensions to enable the subtarget feature |
| 2270 | if (CPUFeatures[I] == "aes" ) |
| 2271 | crypto |= CAP_AES; |
| 2272 | else if (CPUFeatures[I] == "pmull" ) |
| 2273 | crypto |= CAP_PMULL; |
| 2274 | else if (CPUFeatures[I] == "sha1" ) |
| 2275 | crypto |= CAP_SHA1; |
| 2276 | else if (CPUFeatures[I] == "sha2" ) |
| 2277 | crypto |= CAP_SHA2; |
| 2278 | #endif |
| 2279 | |
| 2280 | if (LLVMFeatureStr != "" ) |
| 2281 | Features[LLVMFeatureStr] = true; |
| 2282 | } |
| 2283 | |
| 2284 | #if defined(__aarch64__) |
| 2285 | // LLVM has decided some AArch64 CPUs have all the instructions they _may_ |
| 2286 | // have, as opposed to all the instructions they _must_ have, so allow runtime |
| 2287 | // information to correct us on that. |
| 2288 | uint32_t Aes = CAP_AES | CAP_PMULL; |
| 2289 | uint32_t Sha2 = CAP_SHA1 | CAP_SHA2; |
| 2290 | Features["aes" ] = (crypto & Aes) == Aes; |
| 2291 | Features["sha2" ] = (crypto & Sha2) == Sha2; |
| 2292 | |
| 2293 | // Even if an underlying core supports SVE, it might not be available if |
| 2294 | // it's disabled by the OS, or some other layer. Disable SVE if we don't |
| 2295 | // detect support at runtime. |
| 2296 | if (!Features.contains("sve" )) |
| 2297 | Features["sve" ] = false; |
| 2298 | |
| 2299 | // Also disable RNG if we can't detect support at runtime. |
| 2300 | if (!Features.contains("rand" )) |
| 2301 | Features["rand" ] = false; |
| 2302 | #endif |
| 2303 | |
| 2304 | return Features; |
| 2305 | } |
| 2306 | #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64) || \ |
| 2307 | defined(__arm64ec__) || defined(_M_ARM64EC)) |
| 2308 | #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE |
| 2309 | #define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 |
| 2310 | #endif |
| 2311 | #ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE |
| 2312 | #define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44 |
| 2313 | #endif |
| 2314 | #ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE |
| 2315 | #define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 |
| 2316 | #endif |
| 2317 | #ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE |
| 2318 | #define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 |
| 2319 | #endif |
| 2320 | #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE |
| 2321 | #define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 |
| 2322 | #endif |
| 2323 | #ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE |
| 2324 | #define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48 |
| 2325 | #endif |
| 2326 | #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE |
| 2327 | #define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50 |
| 2328 | #endif |
| 2329 | #ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE |
| 2330 | #define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51 |
| 2331 | #endif |
| 2332 | #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE |
| 2333 | #define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55 |
| 2334 | #endif |
| 2335 | #ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE |
| 2336 | #define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56 |
| 2337 | #endif |
| 2338 | #ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE |
| 2339 | #define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58 |
| 2340 | #endif |
| 2341 | #ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE |
| 2342 | #define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59 |
| 2343 | #endif |
| 2344 | #ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE |
| 2345 | #define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66 |
| 2346 | #endif |
| 2347 | #ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE |
| 2348 | #define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67 |
| 2349 | #endif |
| 2350 | #ifndef PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE |
| 2351 | #define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68 |
| 2352 | #endif |
| 2353 | #ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE |
| 2354 | #define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70 |
| 2355 | #endif |
| 2356 | #ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE |
| 2357 | #define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71 |
| 2358 | #endif |
| 2359 | #ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE |
| 2360 | #define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85 |
| 2361 | #endif |
| 2362 | #ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE |
| 2363 | #define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86 |
| 2364 | #endif |
| 2365 | |
| 2366 | StringMap<bool> sys::getHostCPUFeatures() { |
| 2367 | StringMap<bool> Features; |
| 2368 | |
| 2369 | // If we're asking the OS at runtime, believe what the OS says |
| 2370 | Features["crc" ] = |
| 2371 | IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); |
| 2372 | Features["lse" ] = |
| 2373 | IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE); |
| 2374 | Features["dotprod" ] = |
| 2375 | IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); |
| 2376 | Features["jsconv" ] = |
| 2377 | IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE); |
| 2378 | Features["rcpc" ] = |
| 2379 | IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE); |
| 2380 | Features["sve" ] = |
| 2381 | IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE); |
| 2382 | Features["sve2" ] = |
| 2383 | IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE); |
| 2384 | Features["sve2p1" ] = |
| 2385 | IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE); |
| 2386 | Features["sve-aes" ] = |
| 2387 | IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE); |
| 2388 | Features["sve-bitperm" ] = |
| 2389 | IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE); |
| 2390 | Features["sve-sha3" ] = |
| 2391 | IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE); |
| 2392 | Features["sve-sm4" ] = |
| 2393 | IsProcessorFeaturePresent(PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE); |
| 2394 | Features["f32mm" ] = |
| 2395 | IsProcessorFeaturePresent(PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE); |
| 2396 | Features["f64mm" ] = |
| 2397 | IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE); |
| 2398 | Features["i8mm" ] = |
| 2399 | IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE); |
| 2400 | Features["fullfp16" ] = |
| 2401 | IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE); |
| 2402 | Features["bf16" ] = |
| 2403 | IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE); |
| 2404 | Features["sme" ] = |
| 2405 | IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE); |
| 2406 | Features["sme2" ] = |
| 2407 | IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE); |
| 2408 | Features["sme-i16i64" ] = |
| 2409 | IsProcessorFeaturePresent(PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE); |
| 2410 | Features["sme-f64f64" ] = |
| 2411 | IsProcessorFeaturePresent(PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE); |
| 2412 | |
| 2413 | // Avoid inferring "crypto" means more than the traditional AES + SHA2 |
| 2414 | bool TradCrypto = |
| 2415 | IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE); |
| 2416 | Features["aes" ] = TradCrypto; |
| 2417 | Features["sha2" ] = TradCrypto; |
| 2418 | |
| 2419 | return Features; |
| 2420 | } |
| 2421 | #elif defined(__linux__) && defined(__loongarch__) |
| 2422 | #include <sys/auxv.h> |
| 2423 | StringMap<bool> sys::getHostCPUFeatures() { |
| 2424 | unsigned long hwcap = getauxval(AT_HWCAP); |
| 2425 | bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU |
| 2426 | uint32_t cpucfg2 = 0x2, cpucfg3 = 0x3; |
| 2427 | __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r" (cpucfg2)); |
| 2428 | __asm__("cpucfg %[cpucfg3], %[cpucfg3]\n\t" : [cpucfg3] "+r" (cpucfg3)); |
| 2429 | |
| 2430 | StringMap<bool> Features; |
| 2431 | |
| 2432 | Features["f" ] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP |
| 2433 | Features["d" ] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP |
| 2434 | |
| 2435 | Features["lsx" ] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX |
| 2436 | Features["lasx" ] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX |
| 2437 | Features["lvz" ] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ |
| 2438 | |
| 2439 | Features["frecipe" ] = cpucfg2 & (1U << 25); // CPUCFG.2.FRECIPE |
| 2440 | Features["div32" ] = cpucfg2 & (1U << 26); // CPUCFG.2.DIV32 |
| 2441 | Features["lam-bh" ] = cpucfg2 & (1U << 27); // CPUCFG.2.LAM_BH |
| 2442 | Features["lamcas" ] = cpucfg2 & (1U << 28); // CPUCFG.2.LAMCAS |
| 2443 | Features["scq" ] = cpucfg2 & (1U << 30); // CPUCFG.2.SCQ |
| 2444 | |
| 2445 | Features["ld-seq-sa" ] = cpucfg3 & (1U << 23); // CPUCFG.3.LD_SEQ_SA |
| 2446 | |
| 2447 | // TODO: Need to complete. |
| 2448 | // Features["llacq-screl"] = cpucfg2 & (1U << 29); // CPUCFG.2.LLACQ_SCREL |
| 2449 | return Features; |
| 2450 | } |
| 2451 | #elif defined(__linux__) && defined(__riscv) |
| 2452 | StringMap<bool> sys::getHostCPUFeatures() { |
| 2453 | RISCVHwProbe Query[]{{/*RISCV_HWPROBE_KEY_BASE_BEHAVIOR=*/3, 0}, |
| 2454 | {/*RISCV_HWPROBE_KEY_IMA_EXT_0=*/4, 0}, |
| 2455 | {/*RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF=*/9, 0}}; |
| 2456 | int Ret = syscall(/*__NR_riscv_hwprobe=*/258, /*pairs=*/Query, |
| 2457 | /*pair_count=*/std::size(Query), /*cpu_count=*/0, |
| 2458 | /*cpus=*/0, /*flags=*/0); |
| 2459 | if (Ret != 0) |
| 2460 | return {}; |
| 2461 | |
| 2462 | StringMap<bool> Features; |
| 2463 | uint64_t BaseMask = Query[0].Value; |
| 2464 | // Check whether RISCV_HWPROBE_BASE_BEHAVIOR_IMA is set. |
| 2465 | if (BaseMask & 1) { |
| 2466 | Features["i" ] = true; |
| 2467 | Features["m" ] = true; |
| 2468 | Features["a" ] = true; |
| 2469 | } |
| 2470 | |
| 2471 | uint64_t ExtMask = Query[1].Value; |
| 2472 | Features["f" ] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD |
| 2473 | Features["d" ] = ExtMask & (1 << 0); // RISCV_HWPROBE_IMA_FD |
| 2474 | Features["c" ] = ExtMask & (1 << 1); // RISCV_HWPROBE_IMA_C |
| 2475 | Features["v" ] = ExtMask & (1 << 2); // RISCV_HWPROBE_IMA_V |
| 2476 | Features["zba" ] = ExtMask & (1 << 3); // RISCV_HWPROBE_EXT_ZBA |
| 2477 | Features["zbb" ] = ExtMask & (1 << 4); // RISCV_HWPROBE_EXT_ZBB |
| 2478 | Features["zbs" ] = ExtMask & (1 << 5); // RISCV_HWPROBE_EXT_ZBS |
| 2479 | Features["zicboz" ] = ExtMask & (1 << 6); // RISCV_HWPROBE_EXT_ZICBOZ |
| 2480 | Features["zbc" ] = ExtMask & (1 << 7); // RISCV_HWPROBE_EXT_ZBC |
| 2481 | Features["zbkb" ] = ExtMask & (1 << 8); // RISCV_HWPROBE_EXT_ZBKB |
| 2482 | Features["zbkc" ] = ExtMask & (1 << 9); // RISCV_HWPROBE_EXT_ZBKC |
| 2483 | Features["zbkx" ] = ExtMask & (1 << 10); // RISCV_HWPROBE_EXT_ZBKX |
| 2484 | Features["zknd" ] = ExtMask & (1 << 11); // RISCV_HWPROBE_EXT_ZKND |
| 2485 | Features["zkne" ] = ExtMask & (1 << 12); // RISCV_HWPROBE_EXT_ZKNE |
| 2486 | Features["zknh" ] = ExtMask & (1 << 13); // RISCV_HWPROBE_EXT_ZKNH |
| 2487 | Features["zksed" ] = ExtMask & (1 << 14); // RISCV_HWPROBE_EXT_ZKSED |
| 2488 | Features["zksh" ] = ExtMask & (1 << 15); // RISCV_HWPROBE_EXT_ZKSH |
| 2489 | Features["zkt" ] = ExtMask & (1 << 16); // RISCV_HWPROBE_EXT_ZKT |
| 2490 | Features["zvbb" ] = ExtMask & (1 << 17); // RISCV_HWPROBE_EXT_ZVBB |
| 2491 | Features["zvbc" ] = ExtMask & (1 << 18); // RISCV_HWPROBE_EXT_ZVBC |
| 2492 | Features["zvkb" ] = ExtMask & (1 << 19); // RISCV_HWPROBE_EXT_ZVKB |
| 2493 | Features["zvkg" ] = ExtMask & (1 << 20); // RISCV_HWPROBE_EXT_ZVKG |
| 2494 | Features["zvkned" ] = ExtMask & (1 << 21); // RISCV_HWPROBE_EXT_ZVKNED |
| 2495 | Features["zvknha" ] = ExtMask & (1 << 22); // RISCV_HWPROBE_EXT_ZVKNHA |
| 2496 | Features["zvknhb" ] = ExtMask & (1 << 23); // RISCV_HWPROBE_EXT_ZVKNHB |
| 2497 | Features["zvksed" ] = ExtMask & (1 << 24); // RISCV_HWPROBE_EXT_ZVKSED |
| 2498 | Features["zvksh" ] = ExtMask & (1 << 25); // RISCV_HWPROBE_EXT_ZVKSH |
| 2499 | Features["zvkt" ] = ExtMask & (1 << 26); // RISCV_HWPROBE_EXT_ZVKT |
| 2500 | Features["zfh" ] = ExtMask & (1 << 27); // RISCV_HWPROBE_EXT_ZFH |
| 2501 | Features["zfhmin" ] = ExtMask & (1 << 28); // RISCV_HWPROBE_EXT_ZFHMIN |
| 2502 | Features["zihintntl" ] = ExtMask & (1 << 29); // RISCV_HWPROBE_EXT_ZIHINTNTL |
| 2503 | Features["zvfh" ] = ExtMask & (1 << 30); // RISCV_HWPROBE_EXT_ZVFH |
| 2504 | Features["zvfhmin" ] = ExtMask & (1ULL << 31); // RISCV_HWPROBE_EXT_ZVFHMIN |
| 2505 | Features["zfa" ] = ExtMask & (1ULL << 32); // RISCV_HWPROBE_EXT_ZFA |
| 2506 | Features["ztso" ] = ExtMask & (1ULL << 33); // RISCV_HWPROBE_EXT_ZTSO |
| 2507 | Features["zacas" ] = ExtMask & (1ULL << 34); // RISCV_HWPROBE_EXT_ZACAS |
| 2508 | Features["zicond" ] = ExtMask & (1ULL << 35); // RISCV_HWPROBE_EXT_ZICOND |
| 2509 | Features["zihintpause" ] = |
| 2510 | ExtMask & (1ULL << 36); // RISCV_HWPROBE_EXT_ZIHINTPAUSE |
| 2511 | Features["zve32x" ] = ExtMask & (1ULL << 37); // RISCV_HWPROBE_EXT_ZVE32X |
| 2512 | Features["zve32f" ] = ExtMask & (1ULL << 38); // RISCV_HWPROBE_EXT_ZVE32F |
| 2513 | Features["zve64x" ] = ExtMask & (1ULL << 39); // RISCV_HWPROBE_EXT_ZVE64X |
| 2514 | Features["zve64f" ] = ExtMask & (1ULL << 40); // RISCV_HWPROBE_EXT_ZVE64F |
| 2515 | Features["zve64d" ] = ExtMask & (1ULL << 41); // RISCV_HWPROBE_EXT_ZVE64D |
| 2516 | Features["zimop" ] = ExtMask & (1ULL << 42); // RISCV_HWPROBE_EXT_ZIMOP |
| 2517 | Features["zca" ] = ExtMask & (1ULL << 43); // RISCV_HWPROBE_EXT_ZCA |
| 2518 | Features["zcb" ] = ExtMask & (1ULL << 44); // RISCV_HWPROBE_EXT_ZCB |
| 2519 | Features["zcd" ] = ExtMask & (1ULL << 45); // RISCV_HWPROBE_EXT_ZCD |
| 2520 | Features["zcf" ] = ExtMask & (1ULL << 46); // RISCV_HWPROBE_EXT_ZCF |
| 2521 | Features["zcmop" ] = ExtMask & (1ULL << 47); // RISCV_HWPROBE_EXT_ZCMOP |
| 2522 | Features["zawrs" ] = ExtMask & (1ULL << 48); // RISCV_HWPROBE_EXT_ZAWRS |
| 2523 | |
| 2524 | // Check whether the processor supports fast misaligned scalar memory access. |
| 2525 | // NOTE: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF is only available on |
| 2526 | // Linux 6.11 or later. If it is not recognized, the key field will be cleared |
| 2527 | // to -1. |
| 2528 | if (Query[2].Key != -1 && |
| 2529 | Query[2].Value == /*RISCV_HWPROBE_MISALIGNED_SCALAR_FAST=*/3) |
| 2530 | Features["unaligned-scalar-mem" ] = true; |
| 2531 | |
| 2532 | return Features; |
| 2533 | } |
| 2534 | #else |
| 2535 | StringMap<bool> sys::getHostCPUFeatures() { return {}; } |
| 2536 | #endif |
| 2537 | |
| 2538 | #if __APPLE__ |
| 2539 | /// \returns the \p triple, but with the Host's arch spliced in. |
| 2540 | static Triple withHostArch(Triple T) { |
| 2541 | #if defined(__arm__) |
| 2542 | T.setArch(Triple::arm); |
| 2543 | T.setArchName("arm" ); |
| 2544 | #elif defined(__arm64e__) |
| 2545 | T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e); |
| 2546 | T.setArchName("arm64e" ); |
| 2547 | #elif defined(__aarch64__) |
| 2548 | T.setArch(Triple::aarch64); |
| 2549 | T.setArchName("arm64" ); |
| 2550 | #elif defined(__x86_64h__) |
| 2551 | T.setArch(Triple::x86_64); |
| 2552 | T.setArchName("x86_64h" ); |
| 2553 | #elif defined(__x86_64__) |
| 2554 | T.setArch(Triple::x86_64); |
| 2555 | T.setArchName("x86_64" ); |
| 2556 | #elif defined(__i386__) |
| 2557 | T.setArch(Triple::x86); |
| 2558 | T.setArchName("i386" ); |
| 2559 | #elif defined(__powerpc__) |
| 2560 | T.setArch(Triple::ppc); |
| 2561 | T.setArchName("powerpc" ); |
| 2562 | #else |
| 2563 | # error "Unimplemented host arch fixup" |
| 2564 | #endif |
| 2565 | return T; |
| 2566 | } |
| 2567 | #endif |
| 2568 | |
| 2569 | std::string sys::getProcessTriple() { |
| 2570 | std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); |
| 2571 | Triple PT(Triple::normalize(Str: TargetTripleString)); |
| 2572 | |
| 2573 | #if __APPLE__ |
| 2574 | /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of |
| 2575 | /// the slices. This fixes that up. |
| 2576 | PT = withHostArch(PT); |
| 2577 | #endif |
| 2578 | |
| 2579 | if (sizeof(void *) == 8 && PT.isArch32Bit()) |
| 2580 | PT = PT.get64BitArchVariant(); |
| 2581 | if (sizeof(void *) == 4 && PT.isArch64Bit()) |
| 2582 | PT = PT.get32BitArchVariant(); |
| 2583 | |
| 2584 | return PT.str(); |
| 2585 | } |
| 2586 | |
| 2587 | void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) { |
| 2588 | #if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO |
| 2589 | std::string CPU = std::string(sys::getHostCPUName()); |
| 2590 | if (CPU == "generic" ) |
| 2591 | CPU = "(unknown)" ; |
| 2592 | OS << " Default target: " << sys::getDefaultTargetTriple() << '\n' |
| 2593 | << " Host CPU: " << CPU << '\n'; |
| 2594 | #endif |
| 2595 | } |
| 2596 | |