1//===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass does some optimizations for *W instructions at the MI level.
10//
11// First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12// because the sign extended bits aren't consumed or because the input was
13// already sign extended by an earlier instruction.
14//
15// Then:
16// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17// it removes the -w suffix from opw instructions whenever all users are
18// dependent only on the lower word of the result of the instruction.
19// The cases handled are:
20// * addi.w because it helps reduce test differences between LA32 and LA64
21// w/o being a pessimization.
22//
23// 2. Or if explicit enabled or the target prefers instructions with W suffix,
24// it adds the W suffix to the instruction whenever all users are dependent
25// only on the lower word of the result of the instruction.
26// The cases handled are:
27// * add.d/addi.d/sub.d/mul.d.
28// * slli.d with imm < 32.
29// * ld.d/ld.wu.
30//===---------------------------------------------------------------------===//
31
32#include "LoongArch.h"
33#include "LoongArchMachineFunctionInfo.h"
34#include "LoongArchSubtarget.h"
35#include "llvm/ADT/SmallSet.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/MachineFunctionPass.h"
38#include "llvm/CodeGen/TargetInstrInfo.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-opt-w-instrs"
43#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
44
45STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
46STATISTIC(NumTransformedToWInstrs,
47 "Number of instructions transformed to W-ops");
48
49static cl::opt<bool>
50 DisableSExtWRemoval("loongarch-disable-sextw-removal",
51 cl::desc("Disable removal of sign-extend insn"),
52 cl::init(Val: false), cl::Hidden);
53static cl::opt<bool>
54 DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55 cl::desc("Disable convert to D suffix"),
56 cl::init(Val: false), cl::Hidden);
57
58namespace {
59
60class LoongArchOptWInstrs : public MachineFunctionPass {
61public:
62 static char ID;
63
64 LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
65
66 bool runOnMachineFunction(MachineFunction &MF) override;
67 bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
68 const LoongArchSubtarget &ST,
69 MachineRegisterInfo &MRI);
70 bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
71 const LoongArchSubtarget &ST,
72 MachineRegisterInfo &MRI);
73 bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
74 const LoongArchSubtarget &ST,
75 MachineRegisterInfo &MRI);
76
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.setPreservesCFG();
79 MachineFunctionPass::getAnalysisUsage(AU);
80 }
81
82 StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
83};
84
85} // end anonymous namespace
86
87char LoongArchOptWInstrs::ID = 0;
88INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,
89 false, false)
90
91FunctionPass *llvm::createLoongArchOptWInstrsPass() {
92 return new LoongArchOptWInstrs();
93}
94
95// Checks if all users only demand the lower \p OrigBits of the original
96// instruction's result.
97// TODO: handle multiple interdependent transformations
98static bool hasAllNBitUsers(const MachineInstr &OrigMI,
99 const LoongArchSubtarget &ST,
100 const MachineRegisterInfo &MRI, unsigned OrigBits) {
101
102 SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
103 SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
104
105 Worklist.push_back(Elt: std::make_pair(x: &OrigMI, y&: OrigBits));
106
107 while (!Worklist.empty()) {
108 auto P = Worklist.pop_back_val();
109 const MachineInstr *MI = P.first;
110 unsigned Bits = P.second;
111
112 if (!Visited.insert(V: P).second)
113 continue;
114
115 // Only handle instructions with one def.
116 if (MI->getNumExplicitDefs() != 1)
117 return false;
118
119 Register DestReg = MI->getOperand(i: 0).getReg();
120 if (!DestReg.isVirtual())
121 return false;
122
123 for (auto &UserOp : MRI.use_nodbg_operands(Reg: DestReg)) {
124 const MachineInstr *UserMI = UserOp.getParent();
125 unsigned OpIdx = UserOp.getOperandNo();
126
127 switch (UserMI->getOpcode()) {
128 default:
129 // TODO: Add vector
130 return false;
131
132 case LoongArch::ADD_W:
133 case LoongArch::ADDI_W:
134 case LoongArch::SUB_W:
135 case LoongArch::ALSL_W:
136 case LoongArch::ALSL_WU:
137 case LoongArch::MUL_W:
138 case LoongArch::MULH_W:
139 case LoongArch::MULH_WU:
140 case LoongArch::MULW_D_W:
141 case LoongArch::MULW_D_WU:
142 // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
143 // case LoongArch::DIV_W:
144 // case LoongArch::DIV_WU:
145 // case LoongArch::MOD_W:
146 // case LoongArch::MOD_WU:
147 case LoongArch::SLL_W:
148 case LoongArch::SLLI_W:
149 case LoongArch::SRL_W:
150 case LoongArch::SRLI_W:
151 case LoongArch::SRA_W:
152 case LoongArch::SRAI_W:
153 case LoongArch::ROTR_W:
154 case LoongArch::ROTRI_W:
155 case LoongArch::CLO_W:
156 case LoongArch::CLZ_W:
157 case LoongArch::CTO_W:
158 case LoongArch::CTZ_W:
159 case LoongArch::BYTEPICK_W:
160 case LoongArch::REVB_2H:
161 case LoongArch::BITREV_4B:
162 case LoongArch::BITREV_W:
163 case LoongArch::BSTRINS_W:
164 case LoongArch::BSTRPICK_W:
165 case LoongArch::CRC_W_W_W:
166 case LoongArch::CRCC_W_W_W:
167 case LoongArch::MOVGR2FCSR:
168 case LoongArch::MOVGR2FRH_W:
169 case LoongArch::MOVGR2FR_W_64:
170 if (Bits >= 32)
171 break;
172 return false;
173 case LoongArch::MOVGR2CF:
174 if (Bits >= 1)
175 break;
176 return false;
177 case LoongArch::EXT_W_B:
178 if (Bits >= 8)
179 break;
180 return false;
181 case LoongArch::EXT_W_H:
182 if (Bits >= 16)
183 break;
184 return false;
185
186 case LoongArch::SRLI_D: {
187 // If we are shifting right by less than Bits, and users don't demand
188 // any bits that were shifted into [Bits-1:0], then we can consider this
189 // as an N-Bit user.
190 unsigned ShAmt = UserMI->getOperand(i: 2).getImm();
191 if (Bits > ShAmt) {
192 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y: Bits - ShAmt));
193 break;
194 }
195 return false;
196 }
197
198 // these overwrite higher input bits, otherwise the lower word of output
199 // depends only on the lower word of input. So check their uses read W.
200 case LoongArch::SLLI_D:
201 if (Bits >= (ST.getGRLen() - UserMI->getOperand(i: 2).getImm()))
202 break;
203 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
204 break;
205 case LoongArch::ANDI: {
206 uint64_t Imm = UserMI->getOperand(i: 2).getImm();
207 if (Bits >= (unsigned)llvm::bit_width(Value: Imm))
208 break;
209 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
210 break;
211 }
212 case LoongArch::ORI: {
213 uint64_t Imm = UserMI->getOperand(i: 2).getImm();
214 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(Value: ~Imm))
215 break;
216 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
217 break;
218 }
219
220 case LoongArch::SLL_D:
221 // Operand 2 is the shift amount which uses log2(grlen) bits.
222 if (OpIdx == 2) {
223 if (Bits >= Log2_32(Value: ST.getGRLen()))
224 break;
225 return false;
226 }
227 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
228 break;
229
230 case LoongArch::SRA_D:
231 case LoongArch::SRL_D:
232 case LoongArch::ROTR_D:
233 // Operand 2 is the shift amount which uses 6 bits.
234 if (OpIdx == 2 && Bits >= Log2_32(Value: ST.getGRLen()))
235 break;
236 return false;
237
238 case LoongArch::ST_B:
239 case LoongArch::STX_B:
240 case LoongArch::STGT_B:
241 case LoongArch::STLE_B:
242 case LoongArch::IOCSRWR_B:
243 // The first argument is the value to store.
244 if (OpIdx == 0 && Bits >= 8)
245 break;
246 return false;
247 case LoongArch::ST_H:
248 case LoongArch::STX_H:
249 case LoongArch::STGT_H:
250 case LoongArch::STLE_H:
251 case LoongArch::IOCSRWR_H:
252 // The first argument is the value to store.
253 if (OpIdx == 0 && Bits >= 16)
254 break;
255 return false;
256 case LoongArch::ST_W:
257 case LoongArch::STX_W:
258 case LoongArch::SCREL_W:
259 case LoongArch::STPTR_W:
260 case LoongArch::STGT_W:
261 case LoongArch::STLE_W:
262 case LoongArch::IOCSRWR_W:
263 // The first argument is the value to store.
264 if (OpIdx == 0 && Bits >= 32)
265 break;
266 return false;
267
268 case LoongArch::CRC_W_B_W:
269 case LoongArch::CRCC_W_B_W:
270 if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
271 break;
272 return false;
273 case LoongArch::CRC_W_H_W:
274 case LoongArch::CRCC_W_H_W:
275 if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
276 break;
277 return false;
278 case LoongArch::CRC_W_D_W:
279 case LoongArch::CRCC_W_D_W:
280 if (OpIdx == 2 && Bits >= 32)
281 break;
282 return false;
283
284 // For these, lower word of output in these operations, depends only on
285 // the lower word of input. So, we check all uses only read lower word.
286 case LoongArch::COPY:
287 case LoongArch::PHI:
288 case LoongArch::ADD_D:
289 case LoongArch::ADDI_D:
290 case LoongArch::SUB_D:
291 case LoongArch::MUL_D:
292 case LoongArch::AND:
293 case LoongArch::OR:
294 case LoongArch::NOR:
295 case LoongArch::XOR:
296 case LoongArch::XORI:
297 case LoongArch::ANDN:
298 case LoongArch::ORN:
299 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
300 break;
301
302 case LoongArch::MASKNEZ:
303 case LoongArch::MASKEQZ:
304 if (OpIdx != 1)
305 return false;
306 Worklist.push_back(Elt: std::make_pair(x&: UserMI, y&: Bits));
307 break;
308 }
309 }
310 }
311
312 return true;
313}
314
315static bool hasAllWUsers(const MachineInstr &OrigMI,
316 const LoongArchSubtarget &ST,
317 const MachineRegisterInfo &MRI) {
318 return hasAllNBitUsers(OrigMI, ST, MRI, OrigBits: 32);
319}
320
321// This function returns true if the machine instruction always outputs a value
322// where bits 63:32 match bit 31.
323static bool isSignExtendingOpW(const MachineInstr &MI,
324 const MachineRegisterInfo &MRI, unsigned OpNo) {
325 switch (MI.getOpcode()) {
326 // Normal cases
327 case LoongArch::ADD_W:
328 case LoongArch::SUB_W:
329 case LoongArch::ADDI_W:
330 case LoongArch::ALSL_W:
331 case LoongArch::LU12I_W:
332 case LoongArch::SLT:
333 case LoongArch::SLTU:
334 case LoongArch::SLTI:
335 case LoongArch::SLTUI:
336 case LoongArch::ANDI:
337 case LoongArch::MUL_W:
338 case LoongArch::MULH_W:
339 case LoongArch::MULH_WU:
340 case LoongArch::DIV_W:
341 case LoongArch::MOD_W:
342 case LoongArch::DIV_WU:
343 case LoongArch::MOD_WU:
344 case LoongArch::SLL_W:
345 case LoongArch::SRL_W:
346 case LoongArch::SRA_W:
347 case LoongArch::ROTR_W:
348 case LoongArch::SLLI_W:
349 case LoongArch::SRLI_W:
350 case LoongArch::SRAI_W:
351 case LoongArch::ROTRI_W:
352 case LoongArch::EXT_W_B:
353 case LoongArch::EXT_W_H:
354 case LoongArch::CLO_W:
355 case LoongArch::CLZ_W:
356 case LoongArch::CTO_W:
357 case LoongArch::CTZ_W:
358 case LoongArch::BYTEPICK_W:
359 case LoongArch::REVB_2H:
360 case LoongArch::BITREV_4B:
361 case LoongArch::BITREV_W:
362 case LoongArch::BSTRINS_W:
363 case LoongArch::BSTRPICK_W:
364 case LoongArch::LD_B:
365 case LoongArch::LD_H:
366 case LoongArch::LD_W:
367 case LoongArch::LD_BU:
368 case LoongArch::LD_HU:
369 case LoongArch::LL_W:
370 case LoongArch::LLACQ_W:
371 case LoongArch::RDTIMEL_W:
372 case LoongArch::RDTIMEH_W:
373 case LoongArch::CPUCFG:
374 case LoongArch::LDX_B:
375 case LoongArch::LDX_H:
376 case LoongArch::LDX_W:
377 case LoongArch::LDX_BU:
378 case LoongArch::LDX_HU:
379 case LoongArch::LDPTR_W:
380 case LoongArch::LDGT_B:
381 case LoongArch::LDGT_H:
382 case LoongArch::LDGT_W:
383 case LoongArch::LDLE_B:
384 case LoongArch::LDLE_H:
385 case LoongArch::LDLE_W:
386 case LoongArch::AMSWAP_B:
387 case LoongArch::AMSWAP_H:
388 case LoongArch::AMSWAP_W:
389 case LoongArch::AMADD_B:
390 case LoongArch::AMADD_H:
391 case LoongArch::AMADD_W:
392 case LoongArch::AMAND_W:
393 case LoongArch::AMOR_W:
394 case LoongArch::AMXOR_W:
395 case LoongArch::AMMAX_W:
396 case LoongArch::AMMIN_W:
397 case LoongArch::AMMAX_WU:
398 case LoongArch::AMMIN_WU:
399 case LoongArch::AMSWAP__DB_B:
400 case LoongArch::AMSWAP__DB_H:
401 case LoongArch::AMSWAP__DB_W:
402 case LoongArch::AMADD__DB_B:
403 case LoongArch::AMADD__DB_H:
404 case LoongArch::AMADD__DB_W:
405 case LoongArch::AMAND__DB_W:
406 case LoongArch::AMOR__DB_W:
407 case LoongArch::AMXOR__DB_W:
408 case LoongArch::AMMAX__DB_W:
409 case LoongArch::AMMIN__DB_W:
410 case LoongArch::AMMAX__DB_WU:
411 case LoongArch::AMMIN__DB_WU:
412 case LoongArch::AMCAS_B:
413 case LoongArch::AMCAS_H:
414 case LoongArch::AMCAS_W:
415 case LoongArch::AMCAS__DB_B:
416 case LoongArch::AMCAS__DB_H:
417 case LoongArch::AMCAS__DB_W:
418 case LoongArch::CRC_W_B_W:
419 case LoongArch::CRC_W_H_W:
420 case LoongArch::CRC_W_W_W:
421 case LoongArch::CRC_W_D_W:
422 case LoongArch::CRCC_W_B_W:
423 case LoongArch::CRCC_W_H_W:
424 case LoongArch::CRCC_W_W_W:
425 case LoongArch::CRCC_W_D_W:
426 case LoongArch::IOCSRRD_B:
427 case LoongArch::IOCSRRD_H:
428 case LoongArch::IOCSRRD_W:
429 case LoongArch::MOVFR2GR_S:
430 case LoongArch::MOVFCSR2GR:
431 case LoongArch::MOVCF2GR:
432 case LoongArch::MOVFRH2GR_S:
433 case LoongArch::MOVFR2GR_S_64:
434 // TODO: Add vector
435 return true;
436 // Special cases that require checking operands.
437 // shifting right sufficiently makes the value 32-bit sign-extended
438 case LoongArch::SRAI_D:
439 return MI.getOperand(i: 2).getImm() >= 32;
440 case LoongArch::SRLI_D:
441 return MI.getOperand(i: 2).getImm() > 32;
442 // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
443 case LoongArch::ADDI_D:
444 case LoongArch::ORI:
445 return MI.getOperand(i: 1).isReg() &&
446 MI.getOperand(i: 1).getReg() == LoongArch::R0;
447 // A bits extract is sign extended if the msb is less than 31.
448 case LoongArch::BSTRPICK_D:
449 return MI.getOperand(i: 2).getImm() < 31;
450 // Copying from R0 produces zero.
451 case LoongArch::COPY:
452 return MI.getOperand(i: 1).getReg() == LoongArch::R0;
453 // Ignore the scratch register destination.
454 case LoongArch::PseudoMaskedAtomicSwap32:
455 case LoongArch::PseudoAtomicSwap32:
456 case LoongArch::PseudoMaskedAtomicLoadAdd32:
457 case LoongArch::PseudoMaskedAtomicLoadSub32:
458 case LoongArch::PseudoAtomicLoadNand32:
459 case LoongArch::PseudoMaskedAtomicLoadNand32:
460 case LoongArch::PseudoAtomicLoadAdd32:
461 case LoongArch::PseudoAtomicLoadSub32:
462 case LoongArch::PseudoAtomicLoadAnd32:
463 case LoongArch::PseudoAtomicLoadOr32:
464 case LoongArch::PseudoAtomicLoadXor32:
465 case LoongArch::PseudoMaskedAtomicLoadUMax32:
466 case LoongArch::PseudoMaskedAtomicLoadUMin32:
467 case LoongArch::PseudoCmpXchg32:
468 case LoongArch::PseudoMaskedCmpXchg32:
469 case LoongArch::PseudoMaskedAtomicLoadMax32:
470 case LoongArch::PseudoMaskedAtomicLoadMin32:
471 return OpNo == 0;
472 }
473
474 return false;
475}
476
477static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
478 const MachineRegisterInfo &MRI,
479 SmallPtrSetImpl<MachineInstr *> &FixableDef) {
480 SmallSet<Register, 4> Visited;
481 SmallVector<Register, 4> Worklist;
482
483 auto AddRegToWorkList = [&](Register SrcReg) {
484 if (!SrcReg.isVirtual())
485 return false;
486 Worklist.push_back(Elt: SrcReg);
487 return true;
488 };
489
490 if (!AddRegToWorkList(SrcReg))
491 return false;
492
493 while (!Worklist.empty()) {
494 Register Reg = Worklist.pop_back_val();
495
496 // If we already visited this register, we don't need to check it again.
497 if (!Visited.insert(V: Reg).second)
498 continue;
499
500 MachineInstr *MI = MRI.getVRegDef(Reg);
501 if (!MI)
502 continue;
503
504 int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
505 assert(OpNo != -1 && "Couldn't find register");
506
507 // If this is a sign extending operation we don't need to look any further.
508 if (isSignExtendingOpW(MI: *MI, MRI, OpNo))
509 continue;
510
511 // Is this an instruction that propagates sign extend?
512 switch (MI->getOpcode()) {
513 default:
514 // Unknown opcode, give up.
515 return false;
516 case LoongArch::COPY: {
517 const MachineFunction *MF = MI->getMF();
518 const LoongArchMachineFunctionInfo *LAFI =
519 MF->getInfo<LoongArchMachineFunctionInfo>();
520
521 // If this is the entry block and the register is livein, see if we know
522 // it is sign extended.
523 if (MI->getParent() == &MF->front()) {
524 Register VReg = MI->getOperand(i: 0).getReg();
525 if (MF->getRegInfo().isLiveIn(Reg: VReg) && LAFI->isSExt32Register(Reg: VReg))
526 continue;
527 }
528
529 Register CopySrcReg = MI->getOperand(i: 1).getReg();
530 if (CopySrcReg == LoongArch::R4) {
531 // For a method return value, we check the ZExt/SExt flags in attribute.
532 // We assume the following code sequence for method call.
533 // PseudoCALL @bar, ...
534 // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
535 // %0:gpr = COPY $r4
536 //
537 // We use the PseudoCall to look up the IR function being called to find
538 // its return attributes.
539 const MachineBasicBlock *MBB = MI->getParent();
540 auto II = MI->getIterator();
541 if (II == MBB->instr_begin() ||
542 (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
543 return false;
544
545 const MachineInstr &CallMI = *(--II);
546 if (!CallMI.isCall() || !CallMI.getOperand(i: 0).isGlobal())
547 return false;
548
549 auto *CalleeFn =
550 dyn_cast_if_present<Function>(Val: CallMI.getOperand(i: 0).getGlobal());
551 if (!CalleeFn)
552 return false;
553
554 auto *IntTy = dyn_cast<IntegerType>(Val: CalleeFn->getReturnType());
555 if (!IntTy)
556 return false;
557
558 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
559 unsigned BitWidth = IntTy->getBitWidth();
560 if ((BitWidth <= 32 && Attrs.hasAttribute(Kind: Attribute::SExt)) ||
561 (BitWidth < 32 && Attrs.hasAttribute(Kind: Attribute::ZExt)))
562 continue;
563 }
564
565 if (!AddRegToWorkList(CopySrcReg))
566 return false;
567
568 break;
569 }
570
571 // For these, we just need to check if the 1st operand is sign extended.
572 case LoongArch::MOD_D:
573 case LoongArch::ANDI:
574 case LoongArch::ORI:
575 case LoongArch::XORI:
576 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
577 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
578 // Logical operations use a sign extended 12-bit immediate.
579 if (!AddRegToWorkList(MI->getOperand(i: 1).getReg()))
580 return false;
581
582 break;
583 case LoongArch::MOD_DU:
584 case LoongArch::AND:
585 case LoongArch::OR:
586 case LoongArch::XOR:
587 case LoongArch::ANDN:
588 case LoongArch::ORN:
589 case LoongArch::PHI: {
590 // If all incoming values are sign-extended, the output of AND, OR, XOR,
591 // or PHI is also sign-extended.
592
593 // The input registers for PHI are operand 1, 3, ...
594 // The input registers for others are operand 1 and 2.
595 unsigned B = 1, E = 3, D = 1;
596 switch (MI->getOpcode()) {
597 case LoongArch::PHI:
598 E = MI->getNumOperands();
599 D = 2;
600 break;
601 }
602
603 for (unsigned I = B; I != E; I += D) {
604 if (!MI->getOperand(i: I).isReg())
605 return false;
606
607 if (!AddRegToWorkList(MI->getOperand(i: I).getReg()))
608 return false;
609 }
610
611 break;
612 }
613
614 case LoongArch::MASKEQZ:
615 case LoongArch::MASKNEZ:
616 // Instructions return zero or operand 1. Result is sign extended if
617 // operand 1 is sign extended.
618 if (!AddRegToWorkList(MI->getOperand(i: 1).getReg()))
619 return false;
620 break;
621
622 // With these opcode, we can "fix" them with the W-version
623 // if we know all users of the result only rely on bits 31:0
624 case LoongArch::SLLI_D:
625 // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
626 if (MI->getOperand(i: 2).getImm() >= 32)
627 return false;
628 [[fallthrough]];
629 case LoongArch::ADDI_D:
630 case LoongArch::ADD_D:
631 case LoongArch::LD_D:
632 case LoongArch::LD_WU:
633 case LoongArch::MUL_D:
634 case LoongArch::SUB_D:
635 if (hasAllWUsers(OrigMI: *MI, ST, MRI)) {
636 FixableDef.insert(Ptr: MI);
637 break;
638 }
639 return false;
640 // If all incoming values are sign-extended and all users only use
641 // the lower 32 bits, then convert them to W versions.
642 case LoongArch::DIV_D: {
643 if (!AddRegToWorkList(MI->getOperand(i: 1).getReg()))
644 return false;
645 if (!AddRegToWorkList(MI->getOperand(i: 2).getReg()))
646 return false;
647 if (hasAllWUsers(OrigMI: *MI, ST, MRI)) {
648 FixableDef.insert(Ptr: MI);
649 break;
650 }
651 return false;
652 }
653 }
654 }
655
656 // If we get here, then every node we visited produces a sign extended value
657 // or propagated sign extended values. So the result must be sign extended.
658 return true;
659}
660
661static unsigned getWOp(unsigned Opcode) {
662 switch (Opcode) {
663 case LoongArch::ADDI_D:
664 return LoongArch::ADDI_W;
665 case LoongArch::ADD_D:
666 return LoongArch::ADD_W;
667 case LoongArch::DIV_D:
668 return LoongArch::DIV_W;
669 case LoongArch::LD_D:
670 case LoongArch::LD_WU:
671 return LoongArch::LD_W;
672 case LoongArch::MUL_D:
673 return LoongArch::MUL_W;
674 case LoongArch::SLLI_D:
675 return LoongArch::SLLI_W;
676 case LoongArch::SUB_D:
677 return LoongArch::SUB_W;
678 default:
679 llvm_unreachable("Unexpected opcode for replacement with W variant");
680 }
681}
682
683bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
684 const LoongArchInstrInfo &TII,
685 const LoongArchSubtarget &ST,
686 MachineRegisterInfo &MRI) {
687 if (DisableSExtWRemoval)
688 return false;
689
690 bool MadeChange = false;
691 for (MachineBasicBlock &MBB : MF) {
692 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MBB)) {
693 // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
694 if (!LoongArch::isSEXT_W(MI))
695 continue;
696
697 Register SrcReg = MI.getOperand(i: 1).getReg();
698
699 SmallPtrSet<MachineInstr *, 4> FixableDefs;
700
701 // If all users only use the lower bits, this sext.w is redundant.
702 // Or if all definitions reaching MI sign-extend their output,
703 // then sext.w is redundant.
704 if (!hasAllWUsers(OrigMI: MI, ST, MRI) &&
705 !isSignExtendedW(SrcReg, ST, MRI, FixableDef&: FixableDefs))
706 continue;
707
708 Register DstReg = MI.getOperand(i: 0).getReg();
709 if (!MRI.constrainRegClass(Reg: SrcReg, RC: MRI.getRegClass(Reg: DstReg)))
710 continue;
711
712 // Convert Fixable instructions to their W versions.
713 for (MachineInstr *Fixable : FixableDefs) {
714 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
715 Fixable->setDesc(TII.get(Opcode: getWOp(Opcode: Fixable->getOpcode())));
716 Fixable->clearFlag(Flag: MachineInstr::MIFlag::NoSWrap);
717 Fixable->clearFlag(Flag: MachineInstr::MIFlag::NoUWrap);
718 Fixable->clearFlag(Flag: MachineInstr::MIFlag::IsExact);
719 LLVM_DEBUG(dbgs() << " with " << *Fixable);
720 ++NumTransformedToWInstrs;
721 }
722
723 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
724 MRI.replaceRegWith(FromReg: DstReg, ToReg: SrcReg);
725 MRI.clearKillFlags(Reg: SrcReg);
726 MI.eraseFromParent();
727 ++NumRemovedSExtW;
728 MadeChange = true;
729 }
730 }
731
732 return MadeChange;
733}
734
735bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
736 const LoongArchInstrInfo &TII,
737 const LoongArchSubtarget &ST,
738 MachineRegisterInfo &MRI) {
739 bool MadeChange = false;
740 for (MachineBasicBlock &MBB : MF) {
741 for (MachineInstr &MI : MBB) {
742 unsigned Opc;
743 switch (MI.getOpcode()) {
744 default:
745 continue;
746 case LoongArch::ADDI_W:
747 Opc = LoongArch::ADDI_D;
748 break;
749 }
750
751 if (hasAllWUsers(OrigMI: MI, ST, MRI)) {
752 MI.setDesc(TII.get(Opcode: Opc));
753 MadeChange = true;
754 }
755 }
756 }
757
758 return MadeChange;
759}
760
761bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
762 const LoongArchInstrInfo &TII,
763 const LoongArchSubtarget &ST,
764 MachineRegisterInfo &MRI) {
765 bool MadeChange = false;
766 for (MachineBasicBlock &MBB : MF) {
767 for (MachineInstr &MI : MBB) {
768 unsigned WOpc;
769 // TODO: Add more?
770 switch (MI.getOpcode()) {
771 default:
772 continue;
773 case LoongArch::ADD_D:
774 WOpc = LoongArch::ADD_W;
775 break;
776 case LoongArch::ADDI_D:
777 WOpc = LoongArch::ADDI_W;
778 break;
779 case LoongArch::SUB_D:
780 WOpc = LoongArch::SUB_W;
781 break;
782 case LoongArch::MUL_D:
783 WOpc = LoongArch::MUL_W;
784 break;
785 case LoongArch::SLLI_D:
786 // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
787 if (MI.getOperand(i: 2).getImm() >= 32)
788 continue;
789 WOpc = LoongArch::SLLI_W;
790 break;
791 case LoongArch::LD_D:
792 case LoongArch::LD_WU:
793 WOpc = LoongArch::LD_W;
794 break;
795 }
796
797 if (hasAllWUsers(OrigMI: MI, ST, MRI)) {
798 LLVM_DEBUG(dbgs() << "Replacing " << MI);
799 MI.setDesc(TII.get(Opcode: WOpc));
800 MI.clearFlag(Flag: MachineInstr::MIFlag::NoSWrap);
801 MI.clearFlag(Flag: MachineInstr::MIFlag::NoUWrap);
802 MI.clearFlag(Flag: MachineInstr::MIFlag::IsExact);
803 LLVM_DEBUG(dbgs() << " with " << MI);
804 ++NumTransformedToWInstrs;
805 MadeChange = true;
806 }
807 }
808 }
809
810 return MadeChange;
811}
812
813bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
814 if (skipFunction(F: MF.getFunction()))
815 return false;
816
817 MachineRegisterInfo &MRI = MF.getRegInfo();
818 const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();
819 const LoongArchInstrInfo &TII = *ST.getInstrInfo();
820
821 if (!ST.is64Bit())
822 return false;
823
824 bool MadeChange = false;
825 MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
826
827 if (!(DisableCvtToDSuffix || ST.preferWInst()))
828 MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
829
830 if (ST.preferWInst())
831 MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
832
833 return MadeChange;
834}
835