| 1 | //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "RISCVMatInt.h" |
| 10 | #include "MCTargetDesc/RISCVMCTargetDesc.h" |
| 11 | #include "llvm/ADT/APInt.h" |
| 12 | #include "llvm/MC/MCInstBuilder.h" |
| 13 | #include "llvm/Support/MathExtras.h" |
| 14 | using namespace llvm; |
| 15 | |
| 16 | static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { |
| 17 | if (!HasRVC) |
| 18 | return Res.size(); |
| 19 | |
| 20 | int Cost = 0; |
| 21 | for (auto Instr : Res) { |
| 22 | // Assume instructions that aren't listed aren't compressible. |
| 23 | bool Compressed = false; |
| 24 | switch (Instr.getOpcode()) { |
| 25 | case RISCV::QC_E_LI: |
| 26 | // One 48-bit instruction takes the space of 1.5 regular instructions. |
| 27 | Cost += 150; |
| 28 | continue; |
| 29 | case RISCV::SLLI: |
| 30 | case RISCV::SRLI: |
| 31 | Compressed = true; |
| 32 | break; |
| 33 | case RISCV::ADDI: |
| 34 | case RISCV::ADDIW: |
| 35 | case RISCV::LUI: |
| 36 | Compressed = isInt<6>(x: Instr.getImm()); |
| 37 | break; |
| 38 | } |
| 39 | // Two RVC instructions take the same space as one RVI instruction, but |
| 40 | // can take longer to execute than the single RVI instruction. Thus, we |
| 41 | // consider that two RVC instruction are slightly more costly than one |
| 42 | // RVI instruction. For longer sequences of RVC instructions the space |
| 43 | // savings can be worth it, though. The costs below try to model that. |
| 44 | if (!Compressed) |
| 45 | Cost += 100; // Baseline cost of one RVI instruction: 100%. |
| 46 | else |
| 47 | Cost += 70; // 70% cost of baseline. |
| 48 | } |
| 49 | return Cost; |
| 50 | } |
| 51 | |
| 52 | // Recursively generate a sequence for materializing an integer. |
| 53 | static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI, |
| 54 | RISCVMatInt::InstSeq &Res) { |
| 55 | bool IsRV64 = STI.hasFeature(Feature: RISCV::Feature64Bit); |
| 56 | |
| 57 | // Use BSETI for a single bit that can't be expressed by a single LUI or ADDI. |
| 58 | if (STI.hasFeature(Feature: RISCV::FeatureStdExtZbs) && isPowerOf2_64(Value: Val) && |
| 59 | (!isInt<32>(x: Val) || Val == 0x800)) { |
| 60 | Res.emplace_back(Args: RISCV::BSETI, Args: Log2_64(Value: Val)); |
| 61 | return; |
| 62 | } |
| 63 | |
| 64 | if (!IsRV64 && STI.hasFeature(Feature: RISCV::FeatureVendorXqcili)) { |
| 65 | bool FitsOneStandardInst = ((Val & 0xFFF) == 0) || isInt<12>(x: Val); |
| 66 | |
| 67 | // 20-bit signed immediates that don't fit into `ADDI` or `LUI` should use |
| 68 | // `QC.LI` (a single 32-bit instruction). |
| 69 | if (!FitsOneStandardInst && isInt<20>(x: Val)) { |
| 70 | Res.emplace_back(Args: RISCV::QC_LI, Args&: Val); |
| 71 | return; |
| 72 | } |
| 73 | |
| 74 | // 32-bit signed immediates that don't fit into `ADDI`, `LUI` or `QC.LI` |
| 75 | // should use `QC.E.LI` (a single 48-bit instruction). |
| 76 | if (!FitsOneStandardInst && isInt<32>(x: Val)) { |
| 77 | Res.emplace_back(Args: RISCV::QC_E_LI, Args&: Val); |
| 78 | return; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | if (isInt<32>(x: Val)) { |
| 83 | // Depending on the active bits in the immediate Value v, the following |
| 84 | // instruction sequences are emitted: |
| 85 | // |
| 86 | // v == 0 : ADDI |
| 87 | // v[0,12) != 0 && v[12,32) == 0 : ADDI |
| 88 | // v[0,12) == 0 && v[12,32) != 0 : LUI |
| 89 | // v[0,32) != 0 : LUI+ADDI(W) |
| 90 | int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF; |
| 91 | int64_t Lo12 = SignExtend64<12>(x: Val); |
| 92 | |
| 93 | if (Hi20) |
| 94 | Res.emplace_back(Args: RISCV::LUI, Args&: Hi20); |
| 95 | |
| 96 | if (Lo12 || Hi20 == 0) { |
| 97 | unsigned AddiOpc = RISCV::ADDI; |
| 98 | if (IsRV64 && Hi20) { |
| 99 | // Use ADDIW rather than ADDI only when necessary for correctness. As |
| 100 | // noted in RISCVOptWInstrs, this helps reduce test differences vs |
| 101 | // RV32 without being a pessimization. |
| 102 | int64_t LuiRes = SignExtend64<32>(x: Hi20 << 12); |
| 103 | if (!isInt<32>(x: LuiRes + Lo12)) |
| 104 | AddiOpc = RISCV::ADDIW; |
| 105 | } |
| 106 | Res.emplace_back(Args&: AddiOpc, Args&: Lo12); |
| 107 | } |
| 108 | return; |
| 109 | } |
| 110 | |
| 111 | assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target" ); |
| 112 | |
| 113 | // In the worst case, for a full 64-bit constant, a sequence of 8 instructions |
| 114 | // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note |
| 115 | // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits |
| 116 | // while the following ADDI instructions contribute up to 12 bits each. |
| 117 | // |
| 118 | // On the first glance, implementing this seems to be possible by simply |
| 119 | // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left |
| 120 | // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the |
| 121 | // fact that ADDI performs a sign extended addition, doing it like that would |
| 122 | // only be possible when at most 11 bits of the ADDI instructions are used. |
| 123 | // Using all 12 bits of the ADDI instructions, like done by GAS, actually |
| 124 | // requires that the constant is processed starting with the least significant |
| 125 | // bit. |
| 126 | // |
| 127 | // In the following, constants are processed from LSB to MSB but instruction |
| 128 | // emission is performed from MSB to LSB by recursively calling |
| 129 | // generateInstSeq. In each recursion, first the lowest 12 bits are removed |
| 130 | // from the constant and the optimal shift amount, which can be greater than |
| 131 | // 12 bits if the constant is sparse, is determined. Then, the shifted |
| 132 | // remaining constant is processed recursively and gets emitted as soon as it |
| 133 | // fits into 32 bits. The emission of the shifts and additions is subsequently |
| 134 | // performed when the recursion returns. |
| 135 | |
| 136 | int64_t Lo12 = SignExtend64<12>(x: Val); |
| 137 | Val = (uint64_t)Val - (uint64_t)Lo12; |
| 138 | |
| 139 | int ShiftAmount = 0; |
| 140 | bool Unsigned = false; |
| 141 | |
| 142 | // Val might now be valid for LUI without needing a shift. |
| 143 | if (!isInt<32>(x: Val)) { |
| 144 | ShiftAmount = llvm::countr_zero(Val: (uint64_t)Val); |
| 145 | Val >>= ShiftAmount; |
| 146 | |
| 147 | // If the remaining bits don't fit in 12 bits, we might be able to reduce |
| 148 | // the // shift amount in order to use LUI which will zero the lower 12 |
| 149 | // bits. |
| 150 | if (ShiftAmount > 12 && !isInt<12>(x: Val)) { |
| 151 | if (isInt<32>(x: (uint64_t)Val << 12)) { |
| 152 | // Reduce the shift amount and add zeros to the LSBs so it will match |
| 153 | // LUI. |
| 154 | ShiftAmount -= 12; |
| 155 | Val = (uint64_t)Val << 12; |
| 156 | } else if (isUInt<32>(x: (uint64_t)Val << 12) && |
| 157 | STI.hasFeature(Feature: RISCV::FeatureStdExtZba)) { |
| 158 | // Reduce the shift amount and add zeros to the LSBs so it will match |
| 159 | // LUI, then shift left with SLLI.UW to clear the upper 32 set bits. |
| 160 | ShiftAmount -= 12; |
| 161 | Val = ((uint64_t)Val << 12) | (0xffffffffull << 32); |
| 162 | Unsigned = true; |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | // Try to use SLLI_UW for Val when it is uint32 but not int32. |
| 167 | if (isUInt<32>(x: Val) && !isInt<32>(x: Val) && |
| 168 | STI.hasFeature(Feature: RISCV::FeatureStdExtZba)) { |
| 169 | // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with |
| 170 | // SLLI_UW. |
| 171 | Val = ((uint64_t)Val) | (0xffffffffull << 32); |
| 172 | Unsigned = true; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | generateInstSeqImpl(Val, STI, Res); |
| 177 | |
| 178 | // Skip shift if we were able to use LUI directly. |
| 179 | if (ShiftAmount) { |
| 180 | unsigned Opc = Unsigned ? RISCV::SLLI_UW : RISCV::SLLI; |
| 181 | Res.emplace_back(Args&: Opc, Args&: ShiftAmount); |
| 182 | } |
| 183 | |
| 184 | if (Lo12) |
| 185 | Res.emplace_back(Args: RISCV::ADDI, Args&: Lo12); |
| 186 | } |
| 187 | |
| 188 | static unsigned (int64_t Val) { |
| 189 | // for case: 0b111..1..xxxxxx1..1.. |
| 190 | unsigned LeadingOnes = llvm::countl_one(Value: (uint64_t)Val); |
| 191 | unsigned TrailingOnes = llvm::countr_one(Value: (uint64_t)Val); |
| 192 | if (TrailingOnes > 0 && TrailingOnes < 64 && |
| 193 | (LeadingOnes + TrailingOnes) > (64 - 12)) |
| 194 | return 64 - TrailingOnes; |
| 195 | |
| 196 | // for case: 0bxxx1..1..1...xxx |
| 197 | unsigned UpperTrailingOnes = llvm::countr_one(Value: Hi_32(Value: Val)); |
| 198 | unsigned LowerLeadingOnes = llvm::countl_one(Value: Lo_32(Value: Val)); |
| 199 | if (UpperTrailingOnes < 32 && |
| 200 | (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12)) |
| 201 | return 32 - UpperTrailingOnes; |
| 202 | |
| 203 | return 0; |
| 204 | } |
| 205 | |
| 206 | static void generateInstSeqLeadingZeros(int64_t Val, const MCSubtargetInfo &STI, |
| 207 | RISCVMatInt::InstSeq &Res) { |
| 208 | assert(Val > 0 && "Expected positive val" ); |
| 209 | |
| 210 | unsigned LeadingZeros = llvm::countl_zero(Val: (uint64_t)Val); |
| 211 | uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; |
| 212 | // Fill in the bits that will be shifted out with 1s. An example where this |
| 213 | // helps is trailing one masks with 32 or more ones. This will generate |
| 214 | // ADDI -1 and an SRLI. |
| 215 | ShiftedVal |= maskTrailingOnes<uint64_t>(N: LeadingZeros); |
| 216 | |
| 217 | RISCVMatInt::InstSeq TmpSeq; |
| 218 | generateInstSeqImpl(Val: ShiftedVal, STI, Res&: TmpSeq); |
| 219 | |
| 220 | // Keep the new sequence if it is an improvement or the original is empty. |
| 221 | if ((TmpSeq.size() + 1) < Res.size() || |
| 222 | (Res.empty() && TmpSeq.size() < 8)) { |
| 223 | TmpSeq.emplace_back(Args: RISCV::SRLI, Args&: LeadingZeros); |
| 224 | Res = TmpSeq; |
| 225 | } |
| 226 | |
| 227 | // Some cases can benefit from filling the lower bits with zeros instead. |
| 228 | ShiftedVal &= maskTrailingZeros<uint64_t>(N: LeadingZeros); |
| 229 | TmpSeq.clear(); |
| 230 | generateInstSeqImpl(Val: ShiftedVal, STI, Res&: TmpSeq); |
| 231 | |
| 232 | // Keep the new sequence if it is an improvement or the original is empty. |
| 233 | if ((TmpSeq.size() + 1) < Res.size() || |
| 234 | (Res.empty() && TmpSeq.size() < 8)) { |
| 235 | TmpSeq.emplace_back(Args: RISCV::SRLI, Args&: LeadingZeros); |
| 236 | Res = TmpSeq; |
| 237 | } |
| 238 | |
| 239 | // If we have exactly 32 leading zeros and Zba, we can try using zext.w at |
| 240 | // the end of the sequence. |
| 241 | if (LeadingZeros == 32 && STI.hasFeature(Feature: RISCV::FeatureStdExtZba)) { |
| 242 | // Try replacing upper bits with 1. |
| 243 | uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(N: LeadingZeros); |
| 244 | TmpSeq.clear(); |
| 245 | generateInstSeqImpl(Val: LeadingOnesVal, STI, Res&: TmpSeq); |
| 246 | |
| 247 | // Keep the new sequence if it is an improvement. |
| 248 | if ((TmpSeq.size() + 1) < Res.size() || |
| 249 | (Res.empty() && TmpSeq.size() < 8)) { |
| 250 | TmpSeq.emplace_back(Args: RISCV::ADD_UW, Args: 0); |
| 251 | Res = TmpSeq; |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | namespace llvm::RISCVMatInt { |
| 257 | InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI) { |
| 258 | RISCVMatInt::InstSeq Res; |
| 259 | generateInstSeqImpl(Val, STI, Res); |
| 260 | |
| 261 | // If the low 12 bits are non-zero, the first expansion may end with an ADDI |
| 262 | // or ADDIW. If there are trailing zeros, try generating a sign extended |
| 263 | // constant with no trailing zeros and use a final SLLI to restore them. |
| 264 | if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) { |
| 265 | unsigned TrailingZeros = llvm::countr_zero(Val: (uint64_t)Val); |
| 266 | int64_t ShiftedVal = Val >> TrailingZeros; |
| 267 | // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since |
| 268 | // its more compressible. But only if LUI+ADDI(W) isn't fusable. |
| 269 | // NOTE: We don't check for C extension to minimize differences in generated |
| 270 | // code. |
| 271 | bool IsShiftedCompressible = |
| 272 | isInt<6>(x: ShiftedVal) && !STI.hasFeature(Feature: RISCV::TuneLUIADDIFusion); |
| 273 | RISCVMatInt::InstSeq TmpSeq; |
| 274 | generateInstSeqImpl(Val: ShiftedVal, STI, Res&: TmpSeq); |
| 275 | |
| 276 | // Keep the new sequence if it is an improvement. |
| 277 | if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) { |
| 278 | TmpSeq.emplace_back(Args: RISCV::SLLI, Args&: TrailingZeros); |
| 279 | Res = TmpSeq; |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // If we have a 1 or 2 instruction sequence this is the best we can do. This |
| 284 | // will always be true for RV32 and will often be true for RV64. |
| 285 | if (Res.size() <= 2) |
| 286 | return Res; |
| 287 | |
| 288 | assert(STI.hasFeature(RISCV::Feature64Bit) && |
| 289 | "Expected RV32 to only need 2 instructions" ); |
| 290 | |
| 291 | // If the lower 13 bits are something like 0x17ff, try to add 1 to change the |
| 292 | // lower 13 bits to 0x1800. We can restore this with an ADDI of -1 at the end |
| 293 | // of the sequence. Call generateInstSeqImpl on the new constant which may |
| 294 | // subtract 0xfffffffffffff800 to create another ADDI. This will leave a |
| 295 | // constant with more than 12 trailing zeros for the next recursive step. |
| 296 | if ((Val & 0xfff) != 0 && (Val & 0x1800) == 0x1000) { |
| 297 | int64_t Imm12 = -(0x800 - (Val & 0xfff)); |
| 298 | int64_t AdjustedVal = Val - Imm12; |
| 299 | RISCVMatInt::InstSeq TmpSeq; |
| 300 | generateInstSeqImpl(Val: AdjustedVal, STI, Res&: TmpSeq); |
| 301 | |
| 302 | // Keep the new sequence if it is an improvement. |
| 303 | if ((TmpSeq.size() + 1) < Res.size()) { |
| 304 | TmpSeq.emplace_back(Args: RISCV::ADDI, Args&: Imm12); |
| 305 | Res = TmpSeq; |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | // If the constant is positive we might be able to generate a shifted constant |
| 310 | // with no leading zeros and use a final SRLI to restore them. |
| 311 | if (Val > 0 && Res.size() > 2) { |
| 312 | generateInstSeqLeadingZeros(Val, STI, Res); |
| 313 | } |
| 314 | |
| 315 | // If the constant is negative, trying inverting and using our trailing zero |
| 316 | // optimizations. Use an xori to invert the final value. |
| 317 | if (Val < 0 && Res.size() > 3) { |
| 318 | uint64_t InvertedVal = ~(uint64_t)Val; |
| 319 | RISCVMatInt::InstSeq TmpSeq; |
| 320 | generateInstSeqLeadingZeros(Val: InvertedVal, STI, Res&: TmpSeq); |
| 321 | |
| 322 | // Keep it if we found a sequence that is smaller after inverting. |
| 323 | if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) { |
| 324 | TmpSeq.emplace_back(Args: RISCV::XORI, Args: -1); |
| 325 | Res = TmpSeq; |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | // If the Low and High halves are the same, use pack. The pack instruction |
| 330 | // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the |
| 331 | // lower half and rs2 in the upper half. |
| 332 | if (Res.size() > 2 && STI.hasFeature(Feature: RISCV::FeatureStdExtZbkb)) { |
| 333 | int64_t LoVal = SignExtend64<32>(x: Val); |
| 334 | int64_t HiVal = SignExtend64<32>(x: Val >> 32); |
| 335 | if (LoVal == HiVal) { |
| 336 | RISCVMatInt::InstSeq TmpSeq; |
| 337 | generateInstSeqImpl(Val: LoVal, STI, Res&: TmpSeq); |
| 338 | if ((TmpSeq.size() + 1) < Res.size()) { |
| 339 | TmpSeq.emplace_back(Args: RISCV::PACK, Args: 0); |
| 340 | Res = TmpSeq; |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | // Perform optimization with BSETI in the Zbs extension. |
| 346 | if (Res.size() > 2 && STI.hasFeature(Feature: RISCV::FeatureStdExtZbs)) { |
| 347 | // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to zero. |
| 348 | // Xor that with original value to get which bits should be set by BSETI. |
| 349 | uint64_t Lo = Val & 0x7fffffff; |
| 350 | uint64_t Hi = Val ^ Lo; |
| 351 | assert(Hi != 0); |
| 352 | RISCVMatInt::InstSeq TmpSeq; |
| 353 | |
| 354 | if (Lo != 0) |
| 355 | generateInstSeqImpl(Val: Lo, STI, Res&: TmpSeq); |
| 356 | |
| 357 | if (TmpSeq.size() + llvm::popcount(Value: Hi) < Res.size()) { |
| 358 | do { |
| 359 | TmpSeq.emplace_back(Args: RISCV::BSETI, Args: llvm::countr_zero(Val: Hi)); |
| 360 | Hi &= (Hi - 1); // Clear lowest set bit. |
| 361 | } while (Hi != 0); |
| 362 | Res = TmpSeq; |
| 363 | } |
| 364 | |
| 365 | // Fold LI 1 + SLLI into BSETI. |
| 366 | if (Res[0].getOpcode() == RISCV::ADDI && Res[0].getImm() == 1 && |
| 367 | Res[1].getOpcode() == RISCV::SLLI) { |
| 368 | Res.erase(CI: Res.begin()); // Remove ADDI. |
| 369 | Res.front() = Inst(RISCV::BSETI, Res.front().getImm()); // Patch SLLI. |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | // Perform optimization with BCLRI in the Zbs extension. |
| 374 | if (Res.size() > 2 && STI.hasFeature(Feature: RISCV::FeatureStdExtZbs)) { |
| 375 | // Create a simm32 value for LUI+ADDIW by forcing the upper 33 bits to one. |
| 376 | // Xor that with original value to get which bits should be cleared by |
| 377 | // BCLRI. |
| 378 | uint64_t Lo = Val | 0xffffffff80000000; |
| 379 | uint64_t Hi = Val ^ Lo; |
| 380 | assert(Hi != 0); |
| 381 | |
| 382 | RISCVMatInt::InstSeq TmpSeq; |
| 383 | generateInstSeqImpl(Val: Lo, STI, Res&: TmpSeq); |
| 384 | |
| 385 | if (TmpSeq.size() + llvm::popcount(Value: Hi) < Res.size()) { |
| 386 | do { |
| 387 | TmpSeq.emplace_back(Args: RISCV::BCLRI, Args: llvm::countr_zero(Val: Hi)); |
| 388 | Hi &= (Hi - 1); // Clear lowest set bit. |
| 389 | } while (Hi != 0); |
| 390 | Res = TmpSeq; |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | // Perform optimization with SH*ADD in the Zba extension. |
| 395 | if (Res.size() > 2 && STI.hasFeature(Feature: RISCV::FeatureStdExtZba)) { |
| 396 | int64_t Div = 0; |
| 397 | unsigned Opc = 0; |
| 398 | RISCVMatInt::InstSeq TmpSeq; |
| 399 | // Select the opcode and divisor. |
| 400 | if ((Val % 3) == 0 && isInt<32>(x: Val / 3)) { |
| 401 | Div = 3; |
| 402 | Opc = RISCV::SH1ADD; |
| 403 | } else if ((Val % 5) == 0 && isInt<32>(x: Val / 5)) { |
| 404 | Div = 5; |
| 405 | Opc = RISCV::SH2ADD; |
| 406 | } else if ((Val % 9) == 0 && isInt<32>(x: Val / 9)) { |
| 407 | Div = 9; |
| 408 | Opc = RISCV::SH3ADD; |
| 409 | } |
| 410 | // Build the new instruction sequence. |
| 411 | if (Div > 0) { |
| 412 | generateInstSeqImpl(Val: Val / Div, STI, Res&: TmpSeq); |
| 413 | if ((TmpSeq.size() + 1) < Res.size()) { |
| 414 | TmpSeq.emplace_back(Args&: Opc, Args: 0); |
| 415 | Res = TmpSeq; |
| 416 | } |
| 417 | } else { |
| 418 | // Try to use LUI+SH*ADD+ADDI. |
| 419 | int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; |
| 420 | int64_t Lo12 = SignExtend64<12>(x: Val); |
| 421 | Div = 0; |
| 422 | if (isInt<32>(x: Hi52 / 3) && (Hi52 % 3) == 0) { |
| 423 | Div = 3; |
| 424 | Opc = RISCV::SH1ADD; |
| 425 | } else if (isInt<32>(x: Hi52 / 5) && (Hi52 % 5) == 0) { |
| 426 | Div = 5; |
| 427 | Opc = RISCV::SH2ADD; |
| 428 | } else if (isInt<32>(x: Hi52 / 9) && (Hi52 % 9) == 0) { |
| 429 | Div = 9; |
| 430 | Opc = RISCV::SH3ADD; |
| 431 | } |
| 432 | // Build the new instruction sequence. |
| 433 | if (Div > 0) { |
| 434 | // For Val that has zero Lo12 (implies Val equals to Hi52) should has |
| 435 | // already been processed to LUI+SH*ADD by previous optimization. |
| 436 | assert(Lo12 != 0 && |
| 437 | "unexpected instruction sequence for immediate materialisation" ); |
| 438 | assert(TmpSeq.empty() && "Expected empty TmpSeq" ); |
| 439 | generateInstSeqImpl(Val: Hi52 / Div, STI, Res&: TmpSeq); |
| 440 | if ((TmpSeq.size() + 2) < Res.size()) { |
| 441 | TmpSeq.emplace_back(Args&: Opc, Args: 0); |
| 442 | TmpSeq.emplace_back(Args: RISCV::ADDI, Args&: Lo12); |
| 443 | Res = TmpSeq; |
| 444 | } |
| 445 | } |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | // Perform optimization with rori in the Zbb and th.srri in the XTheadBb |
| 450 | // extension. |
| 451 | if (Res.size() > 2 && (STI.hasFeature(Feature: RISCV::FeatureStdExtZbb) || |
| 452 | STI.hasFeature(Feature: RISCV::FeatureVendorXTHeadBb))) { |
| 453 | if (unsigned Rotate = extractRotateInfo(Val)) { |
| 454 | RISCVMatInt::InstSeq TmpSeq; |
| 455 | uint64_t NegImm12 = llvm::rotl<uint64_t>(V: Val, R: Rotate); |
| 456 | assert(isInt<12>(NegImm12)); |
| 457 | TmpSeq.emplace_back(Args: RISCV::ADDI, Args&: NegImm12); |
| 458 | TmpSeq.emplace_back(Args: STI.hasFeature(Feature: RISCV::FeatureStdExtZbb) |
| 459 | ? RISCV::RORI |
| 460 | : RISCV::TH_SRRI, |
| 461 | Args&: Rotate); |
| 462 | Res = TmpSeq; |
| 463 | } |
| 464 | } |
| 465 | return Res; |
| 466 | } |
| 467 | |
| 468 | void generateMCInstSeq(int64_t Val, const MCSubtargetInfo &STI, |
| 469 | MCRegister DestReg, SmallVectorImpl<MCInst> &Insts) { |
| 470 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); |
| 471 | |
| 472 | MCRegister SrcReg = RISCV::X0; |
| 473 | for (RISCVMatInt::Inst &Inst : Seq) { |
| 474 | switch (Inst.getOpndKind()) { |
| 475 | case RISCVMatInt::Imm: |
| 476 | Insts.push_back(Elt: MCInstBuilder(Inst.getOpcode()) |
| 477 | .addReg(Reg: DestReg) |
| 478 | .addImm(Val: Inst.getImm())); |
| 479 | break; |
| 480 | case RISCVMatInt::RegX0: |
| 481 | Insts.push_back(Elt: MCInstBuilder(Inst.getOpcode()) |
| 482 | .addReg(Reg: DestReg) |
| 483 | .addReg(Reg: SrcReg) |
| 484 | .addReg(Reg: RISCV::X0)); |
| 485 | break; |
| 486 | case RISCVMatInt::RegReg: |
| 487 | Insts.push_back(Elt: MCInstBuilder(Inst.getOpcode()) |
| 488 | .addReg(Reg: DestReg) |
| 489 | .addReg(Reg: SrcReg) |
| 490 | .addReg(Reg: SrcReg)); |
| 491 | break; |
| 492 | case RISCVMatInt::RegImm: |
| 493 | Insts.push_back(Elt: MCInstBuilder(Inst.getOpcode()) |
| 494 | .addReg(Reg: DestReg) |
| 495 | .addReg(Reg: SrcReg) |
| 496 | .addImm(Val: Inst.getImm())); |
| 497 | break; |
| 498 | } |
| 499 | |
| 500 | // Only the first instruction has X0 as its source. |
| 501 | SrcReg = DestReg; |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, |
| 506 | unsigned &ShiftAmt, unsigned &AddOpc) { |
| 507 | int64_t LoVal = SignExtend64<32>(x: Val); |
| 508 | if (LoVal == 0) |
| 509 | return RISCVMatInt::InstSeq(); |
| 510 | |
| 511 | // Subtract the LoVal to emulate the effect of the final ADD. |
| 512 | uint64_t Tmp = (uint64_t)Val - (uint64_t)LoVal; |
| 513 | assert(Tmp != 0); |
| 514 | |
| 515 | // Use trailing zero counts to figure how far we need to shift LoVal to line |
| 516 | // up with the remaining constant. |
| 517 | // TODO: This algorithm assumes all non-zero bits in the low 32 bits of the |
| 518 | // final constant come from LoVal. |
| 519 | unsigned TzLo = llvm::countr_zero(Val: (uint64_t)LoVal); |
| 520 | unsigned TzHi = llvm::countr_zero(Val: Tmp); |
| 521 | assert(TzLo < 32 && TzHi >= 32); |
| 522 | ShiftAmt = TzHi - TzLo; |
| 523 | AddOpc = RISCV::ADD; |
| 524 | |
| 525 | if (Tmp == ((uint64_t)LoVal << ShiftAmt)) |
| 526 | return RISCVMatInt::generateInstSeq(Val: LoVal, STI); |
| 527 | |
| 528 | // If we have Zba, we can use (ADD_UW X, (SLLI X, 32)). |
| 529 | if (STI.hasFeature(Feature: RISCV::FeatureStdExtZba) && Lo_32(Value: Val) == Hi_32(Value: Val)) { |
| 530 | ShiftAmt = 32; |
| 531 | AddOpc = RISCV::ADD_UW; |
| 532 | return RISCVMatInt::generateInstSeq(Val: LoVal, STI); |
| 533 | } |
| 534 | |
| 535 | return RISCVMatInt::InstSeq(); |
| 536 | } |
| 537 | |
| 538 | int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, |
| 539 | bool CompressionCost, bool FreeZeroes) { |
| 540 | bool IsRV64 = STI.hasFeature(Feature: RISCV::Feature64Bit); |
| 541 | bool HasRVC = CompressionCost && STI.hasFeature(Feature: RISCV::FeatureStdExtZca); |
| 542 | int PlatRegSize = IsRV64 ? 64 : 32; |
| 543 | |
| 544 | // Split the constant into platform register sized chunks, and calculate cost |
| 545 | // of each chunk. |
| 546 | int Cost = 0; |
| 547 | for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) { |
| 548 | APInt Chunk = Val.ashr(ShiftAmt: ShiftVal).sextOrTrunc(width: PlatRegSize); |
| 549 | if (FreeZeroes && Chunk.getSExtValue() == 0) |
| 550 | continue; |
| 551 | InstSeq MatSeq = generateInstSeq(Val: Chunk.getSExtValue(), STI); |
| 552 | Cost += getInstSeqCost(Res&: MatSeq, HasRVC); |
| 553 | } |
| 554 | return std::max(a: FreeZeroes ? 0 : 1, b: Cost); |
| 555 | } |
| 556 | |
| 557 | OpndKind Inst::getOpndKind() const { |
| 558 | switch (Opc) { |
| 559 | default: |
| 560 | llvm_unreachable("Unexpected opcode!" ); |
| 561 | case RISCV::LUI: |
| 562 | case RISCV::QC_LI: |
| 563 | case RISCV::QC_E_LI: |
| 564 | return RISCVMatInt::Imm; |
| 565 | case RISCV::ADD_UW: |
| 566 | return RISCVMatInt::RegX0; |
| 567 | case RISCV::SH1ADD: |
| 568 | case RISCV::SH2ADD: |
| 569 | case RISCV::SH3ADD: |
| 570 | case RISCV::PACK: |
| 571 | return RISCVMatInt::RegReg; |
| 572 | case RISCV::ADDI: |
| 573 | case RISCV::ADDIW: |
| 574 | case RISCV::XORI: |
| 575 | case RISCV::SLLI: |
| 576 | case RISCV::SRLI: |
| 577 | case RISCV::SLLI_UW: |
| 578 | case RISCV::RORI: |
| 579 | case RISCV::BSETI: |
| 580 | case RISCV::BCLRI: |
| 581 | case RISCV::TH_SRRI: |
| 582 | return RISCVMatInt::RegImm; |
| 583 | } |
| 584 | } |
| 585 | |
| 586 | } // namespace llvm::RISCVMatInt |
| 587 | |