1 | //===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines an instruction selector for the SystemZ target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SystemZTargetMachine.h" |
14 | #include "SystemZISelLowering.h" |
15 | #include "llvm/Analysis/AliasAnalysis.h" |
16 | #include "llvm/CodeGen/SelectionDAGISel.h" |
17 | #include "llvm/Support/Debug.h" |
18 | #include "llvm/Support/KnownBits.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | #define DEBUG_TYPE "systemz-isel" |
24 | #define PASS_NAME "SystemZ DAG->DAG Pattern Instruction Selection" |
25 | |
26 | namespace { |
27 | // Used to build addressing modes. |
28 | struct SystemZAddressingMode { |
29 | // The shape of the address. |
30 | enum AddrForm { |
31 | // base+displacement |
32 | FormBD, |
33 | |
34 | // base+displacement+index for load and store operands |
35 | FormBDXNormal, |
36 | |
37 | // base+displacement+index for load address operands |
38 | FormBDXLA, |
39 | |
40 | // base+displacement+index+ADJDYNALLOC |
41 | FormBDXDynAlloc |
42 | }; |
43 | AddrForm Form; |
44 | |
45 | // The type of displacement. The enum names here correspond directly |
46 | // to the definitions in SystemZOperand.td. We could split them into |
47 | // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it. |
48 | enum DispRange { |
49 | Disp12Only, |
50 | Disp12Pair, |
51 | Disp20Only, |
52 | Disp20Only128, |
53 | Disp20Pair |
54 | }; |
55 | DispRange DR; |
56 | |
57 | // The parts of the address. The address is equivalent to: |
58 | // |
59 | // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0) |
60 | SDValue Base; |
61 | int64_t Disp; |
62 | SDValue Index; |
63 | bool IncludesDynAlloc; |
64 | |
65 | SystemZAddressingMode(AddrForm form, DispRange dr) |
66 | : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {} |
67 | |
68 | // True if the address can have an index register. |
69 | bool hasIndexField() { return Form != FormBD; } |
70 | |
71 | // True if the address can (and must) include ADJDYNALLOC. |
72 | bool isDynAlloc() { return Form == FormBDXDynAlloc; } |
73 | |
74 | void dump(const llvm::SelectionDAG *DAG) { |
75 | errs() << "SystemZAddressingMode " << this << '\n'; |
76 | |
77 | errs() << " Base " ; |
78 | if (Base.getNode()) |
79 | Base.getNode()->dump(G: DAG); |
80 | else |
81 | errs() << "null\n" ; |
82 | |
83 | if (hasIndexField()) { |
84 | errs() << " Index " ; |
85 | if (Index.getNode()) |
86 | Index.getNode()->dump(G: DAG); |
87 | else |
88 | errs() << "null\n" ; |
89 | } |
90 | |
91 | errs() << " Disp " << Disp; |
92 | if (IncludesDynAlloc) |
93 | errs() << " + ADJDYNALLOC" ; |
94 | errs() << '\n'; |
95 | } |
96 | }; |
97 | |
98 | // Return a mask with Count low bits set. |
99 | static uint64_t allOnes(unsigned int Count) { |
100 | assert(Count <= 64); |
101 | if (Count > 63) |
102 | return UINT64_MAX; |
103 | return (uint64_t(1) << Count) - 1; |
104 | } |
105 | |
106 | // Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation |
107 | // given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and |
108 | // Rotate (I5). The combined operand value is effectively: |
109 | // |
110 | // (or (rotl Input, Rotate), ~Mask) |
111 | // |
112 | // for RNSBG and: |
113 | // |
114 | // (and (rotl Input, Rotate), Mask) |
115 | // |
116 | // otherwise. The output value has BitSize bits, although Input may be |
117 | // narrower (in which case the upper bits are don't care), or wider (in which |
118 | // case the result will be truncated as part of the operation). |
119 | struct RxSBGOperands { |
120 | RxSBGOperands(unsigned Op, SDValue N) |
121 | : Opcode(Op), BitSize(N.getValueSizeInBits()), |
122 | Mask(allOnes(Count: BitSize)), Input(N), Start(64 - BitSize), End(63), |
123 | Rotate(0) {} |
124 | |
125 | unsigned Opcode; |
126 | unsigned BitSize; |
127 | uint64_t Mask; |
128 | SDValue Input; |
129 | unsigned Start; |
130 | unsigned End; |
131 | unsigned Rotate; |
132 | }; |
133 | |
134 | class SystemZDAGToDAGISel : public SelectionDAGISel { |
135 | const SystemZSubtarget *Subtarget; |
136 | |
137 | // Used by SystemZOperands.td to create integer constants. |
138 | inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { |
139 | return CurDAG->getTargetConstant(Val: Imm, DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0)); |
140 | } |
141 | |
142 | const SystemZTargetMachine &getTargetMachine() const { |
143 | return static_cast<const SystemZTargetMachine &>(TM); |
144 | } |
145 | |
146 | const SystemZInstrInfo *getInstrInfo() const { |
147 | return Subtarget->getInstrInfo(); |
148 | } |
149 | |
150 | // Try to fold more of the base or index of AM into AM, where IsBase |
151 | // selects between the base and index. |
152 | bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const; |
153 | |
154 | // Try to describe N in AM, returning true on success. |
155 | bool selectAddress(SDValue N, SystemZAddressingMode &AM) const; |
156 | |
157 | // Extract individual target operands from matched address AM. |
158 | void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, |
159 | SDValue &Base, SDValue &Disp) const; |
160 | void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, |
161 | SDValue &Base, SDValue &Disp, SDValue &Index) const; |
162 | |
163 | // Try to match Addr as a FormBD address with displacement type DR. |
164 | // Return true on success, storing the base and displacement in |
165 | // Base and Disp respectively. |
166 | bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, |
167 | SDValue &Base, SDValue &Disp) const; |
168 | |
169 | // Try to match Addr as a FormBDX address with displacement type DR. |
170 | // Return true on success and if the result had no index. Store the |
171 | // base and displacement in Base and Disp respectively. |
172 | bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, |
173 | SDValue &Base, SDValue &Disp) const; |
174 | |
175 | // Try to match Addr as a FormBDX* address of form Form with |
176 | // displacement type DR. Return true on success, storing the base, |
177 | // displacement and index in Base, Disp and Index respectively. |
178 | bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, |
179 | SystemZAddressingMode::DispRange DR, SDValue Addr, |
180 | SDValue &Base, SDValue &Disp, SDValue &Index) const; |
181 | |
182 | // PC-relative address matching routines used by SystemZOperands.td. |
183 | bool selectPCRelAddress(SDValue Addr, SDValue &Target) const { |
184 | if (SystemZISD::isPCREL(Opcode: Addr.getOpcode())) { |
185 | Target = Addr.getOperand(i: 0); |
186 | return true; |
187 | } |
188 | return false; |
189 | } |
190 | |
191 | // BD matching routines used by SystemZOperands.td. |
192 | bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
193 | return selectBDAddr(DR: SystemZAddressingMode::Disp12Only, Addr, Base, Disp); |
194 | } |
195 | bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
196 | return selectBDAddr(DR: SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); |
197 | } |
198 | bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
199 | return selectBDAddr(DR: SystemZAddressingMode::Disp20Only, Addr, Base, Disp); |
200 | } |
201 | bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
202 | return selectBDAddr(DR: SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); |
203 | } |
204 | |
205 | // MVI matching routines used by SystemZOperands.td. |
206 | bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
207 | return selectMVIAddr(DR: SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); |
208 | } |
209 | bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
210 | return selectMVIAddr(DR: SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); |
211 | } |
212 | |
213 | // BDX matching routines used by SystemZOperands.td. |
214 | bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
215 | SDValue &Index) const { |
216 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
217 | DR: SystemZAddressingMode::Disp12Only, |
218 | Addr, Base, Disp, Index); |
219 | } |
220 | bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
221 | SDValue &Index) const { |
222 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
223 | DR: SystemZAddressingMode::Disp12Pair, |
224 | Addr, Base, Disp, Index); |
225 | } |
226 | bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
227 | SDValue &Index) const { |
228 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXDynAlloc, |
229 | DR: SystemZAddressingMode::Disp12Only, |
230 | Addr, Base, Disp, Index); |
231 | } |
232 | bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
233 | SDValue &Index) const { |
234 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
235 | DR: SystemZAddressingMode::Disp20Only, |
236 | Addr, Base, Disp, Index); |
237 | } |
238 | bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, |
239 | SDValue &Index) const { |
240 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
241 | DR: SystemZAddressingMode::Disp20Only128, |
242 | Addr, Base, Disp, Index); |
243 | } |
244 | bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
245 | SDValue &Index) const { |
246 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
247 | DR: SystemZAddressingMode::Disp20Pair, |
248 | Addr, Base, Disp, Index); |
249 | } |
250 | bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
251 | SDValue &Index) const { |
252 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXLA, |
253 | DR: SystemZAddressingMode::Disp12Pair, |
254 | Addr, Base, Disp, Index); |
255 | } |
256 | bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
257 | SDValue &Index) const { |
258 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXLA, |
259 | DR: SystemZAddressingMode::Disp20Pair, |
260 | Addr, Base, Disp, Index); |
261 | } |
262 | |
263 | // Try to match Addr as an address with a base, 12-bit displacement |
264 | // and index, where the index is element Elem of a vector. |
265 | // Return true on success, storing the base, displacement and vector |
266 | // in Base, Disp and Index respectively. |
267 | bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, |
268 | SDValue &Disp, SDValue &Index) const; |
269 | |
270 | // Check whether (or Op (and X InsertMask)) is effectively an insertion |
271 | // of X into bits InsertMask of some Y != Op. Return true if so and |
272 | // set Op to that Y. |
273 | bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const; |
274 | |
275 | // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. |
276 | // Return true on success. |
277 | bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const; |
278 | |
279 | // Try to fold some of RxSBG.Input into other fields of RxSBG. |
280 | // Return true on success. |
281 | bool expandRxSBG(RxSBGOperands &RxSBG) const; |
282 | |
283 | // Return an undefined value of type VT. |
284 | SDValue getUNDEF(const SDLoc &DL, EVT VT) const; |
285 | |
286 | // Convert N to VT, if it isn't already. |
287 | SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const; |
288 | |
289 | // Try to implement AND or shift node N using RISBG with the zero flag set. |
290 | // Return the selected node on success, otherwise return null. |
291 | bool tryRISBGZero(SDNode *N); |
292 | |
293 | // Try to use RISBG or Opcode to implement OR or XOR node N. |
294 | // Return the selected node on success, otherwise return null. |
295 | bool tryRxSBG(SDNode *N, unsigned Opcode); |
296 | |
297 | // If Op0 is null, then Node is a constant that can be loaded using: |
298 | // |
299 | // (Opcode UpperVal LowerVal) |
300 | // |
301 | // If Op0 is nonnull, then Node can be implemented using: |
302 | // |
303 | // (Opcode (Opcode Op0 UpperVal) LowerVal) |
304 | void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, |
305 | uint64_t UpperVal, uint64_t LowerVal); |
306 | |
307 | void loadVectorConstant(const SystemZVectorConstantInfo &VCI, |
308 | SDNode *Node); |
309 | |
310 | SDNode *loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL); |
311 | |
312 | // Try to use gather instruction Opcode to implement vector insertion N. |
313 | bool tryGather(SDNode *N, unsigned Opcode); |
314 | |
315 | // Try to use scatter instruction Opcode to implement store Store. |
316 | bool tryScatter(StoreSDNode *Store, unsigned Opcode); |
317 | |
318 | // Change a chain of {load; op; store} of the same value into a simple op |
319 | // through memory of that value, if the uses of the modified value and its |
320 | // address are suitable. |
321 | bool tryFoldLoadStoreIntoMemOperand(SDNode *Node); |
322 | |
323 | // Return true if Load and Store are loads and stores of the same size |
324 | // and are guaranteed not to overlap. Such operations can be implemented |
325 | // using block (SS-format) instructions. |
326 | // |
327 | // Partial overlap would lead to incorrect code, since the block operations |
328 | // are logically bytewise, even though they have a fast path for the |
329 | // non-overlapping case. We also need to avoid full overlap (i.e. two |
330 | // addresses that might be equal at run time) because although that case |
331 | // would be handled correctly, it might be implemented by millicode. |
332 | bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const; |
333 | |
334 | // N is a (store (load Y), X) pattern. Return true if it can use an MVC |
335 | // from Y to X. |
336 | bool storeLoadCanUseMVC(SDNode *N) const; |
337 | |
338 | // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true |
339 | // if A[1 - I] == X and if N can use a block operation like NC from A[I] |
340 | // to X. |
341 | bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; |
342 | |
343 | // Return true if N (a load or a store) fullfills the alignment |
344 | // requirements for a PC-relative access. |
345 | bool storeLoadIsAligned(SDNode *N) const; |
346 | |
347 | // Return the load extension type of a load or atomic load. |
348 | ISD::LoadExtType getLoadExtType(SDNode *N) const; |
349 | |
350 | // Try to expand a boolean SELECT_CCMASK using an IPM sequence. |
351 | SDValue expandSelectBoolean(SDNode *Node); |
352 | |
353 | // Return true if the flags of N and the subtarget allows for |
354 | // reassociation, in which case a reg/reg opcode is needed as input to the |
355 | // MachineCombiner. |
356 | bool shouldSelectForReassoc(SDNode *N) const; |
357 | |
358 | public: |
359 | SystemZDAGToDAGISel() = delete; |
360 | |
361 | SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOptLevel OptLevel) |
362 | : SelectionDAGISel(TM, OptLevel) {} |
363 | |
364 | bool runOnMachineFunction(MachineFunction &MF) override { |
365 | const Function &F = MF.getFunction(); |
366 | if (F.getFnAttribute(Kind: "fentry-call" ).getValueAsString() != "true" ) { |
367 | if (F.hasFnAttribute(Kind: "mnop-mcount" )) |
368 | report_fatal_error(reason: "mnop-mcount only supported with fentry-call" ); |
369 | if (F.hasFnAttribute(Kind: "mrecord-mcount" )) |
370 | report_fatal_error(reason: "mrecord-mcount only supported with fentry-call" ); |
371 | } |
372 | |
373 | Subtarget = &MF.getSubtarget<SystemZSubtarget>(); |
374 | return SelectionDAGISel::runOnMachineFunction(mf&: MF); |
375 | } |
376 | |
377 | // Override SelectionDAGISel. |
378 | void Select(SDNode *Node) override; |
379 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
380 | InlineAsm::ConstraintCode ConstraintID, |
381 | std::vector<SDValue> &OutOps) override; |
382 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; |
383 | void PreprocessISelDAG() override; |
384 | |
385 | // Include the pieces autogenerated from the target description. |
386 | #include "SystemZGenDAGISel.inc" |
387 | }; |
388 | |
389 | class SystemZDAGToDAGISelLegacy : public SelectionDAGISelLegacy { |
390 | public: |
391 | static char ID; |
392 | explicit SystemZDAGToDAGISelLegacy(SystemZTargetMachine &TM, |
393 | CodeGenOptLevel OptLevel) |
394 | : SelectionDAGISelLegacy( |
395 | ID, std::make_unique<SystemZDAGToDAGISel>(args&: TM, args&: OptLevel)) {} |
396 | }; |
397 | } // end anonymous namespace |
398 | |
399 | char SystemZDAGToDAGISelLegacy::ID = 0; |
400 | |
401 | INITIALIZE_PASS(SystemZDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) |
402 | |
403 | FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, |
404 | CodeGenOptLevel OptLevel) { |
405 | return new SystemZDAGToDAGISelLegacy(TM, OptLevel); |
406 | } |
407 | |
408 | // Return true if Val should be selected as a displacement for an address |
409 | // with range DR. Here we're interested in the range of both the instruction |
410 | // described by DR and of any pairing instruction. |
411 | static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { |
412 | switch (DR) { |
413 | case SystemZAddressingMode::Disp12Only: |
414 | return isUInt<12>(x: Val); |
415 | |
416 | case SystemZAddressingMode::Disp12Pair: |
417 | case SystemZAddressingMode::Disp20Only: |
418 | case SystemZAddressingMode::Disp20Pair: |
419 | return isInt<20>(x: Val); |
420 | |
421 | case SystemZAddressingMode::Disp20Only128: |
422 | return isInt<20>(x: Val) && isInt<20>(x: Val + 8); |
423 | } |
424 | llvm_unreachable("Unhandled displacement range" ); |
425 | } |
426 | |
427 | // Change the base or index in AM to Value, where IsBase selects |
428 | // between the base and index. |
429 | static void changeComponent(SystemZAddressingMode &AM, bool IsBase, |
430 | SDValue Value) { |
431 | if (IsBase) |
432 | AM.Base = Value; |
433 | else |
434 | AM.Index = Value; |
435 | } |
436 | |
437 | // The base or index of AM is equivalent to Value + ADJDYNALLOC, |
438 | // where IsBase selects between the base and index. Try to fold the |
439 | // ADJDYNALLOC into AM. |
440 | static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase, |
441 | SDValue Value) { |
442 | if (AM.isDynAlloc() && !AM.IncludesDynAlloc) { |
443 | changeComponent(AM, IsBase, Value); |
444 | AM.IncludesDynAlloc = true; |
445 | return true; |
446 | } |
447 | return false; |
448 | } |
449 | |
450 | // The base of AM is equivalent to Base + Index. Try to use Index as |
451 | // the index register. |
452 | static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, |
453 | SDValue Index) { |
454 | if (AM.hasIndexField() && !AM.Index.getNode()) { |
455 | AM.Base = Base; |
456 | AM.Index = Index; |
457 | return true; |
458 | } |
459 | return false; |
460 | } |
461 | |
462 | // The base or index of AM is equivalent to Op0 + Op1, where IsBase selects |
463 | // between the base and index. Try to fold Op1 into AM's displacement. |
464 | static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, |
465 | SDValue Op0, uint64_t Op1) { |
466 | // First try adjusting the displacement. |
467 | int64_t TestDisp = AM.Disp + Op1; |
468 | if (selectDisp(DR: AM.DR, Val: TestDisp)) { |
469 | changeComponent(AM, IsBase, Value: Op0); |
470 | AM.Disp = TestDisp; |
471 | return true; |
472 | } |
473 | |
474 | // We could consider forcing the displacement into a register and |
475 | // using it as an index, but it would need to be carefully tuned. |
476 | return false; |
477 | } |
478 | |
479 | bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, |
480 | bool IsBase) const { |
481 | SDValue N = IsBase ? AM.Base : AM.Index; |
482 | unsigned Opcode = N.getOpcode(); |
483 | // Look through no-op truncations. |
484 | if (Opcode == ISD::TRUNCATE && N.getOperand(i: 0).getValueSizeInBits() <= 64) { |
485 | N = N.getOperand(i: 0); |
486 | Opcode = N.getOpcode(); |
487 | } |
488 | if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(Op: N)) { |
489 | SDValue Op0 = N.getOperand(i: 0); |
490 | SDValue Op1 = N.getOperand(i: 1); |
491 | |
492 | unsigned Op0Code = Op0->getOpcode(); |
493 | unsigned Op1Code = Op1->getOpcode(); |
494 | |
495 | if (Op0Code == SystemZISD::ADJDYNALLOC) |
496 | return expandAdjDynAlloc(AM, IsBase, Value: Op1); |
497 | if (Op1Code == SystemZISD::ADJDYNALLOC) |
498 | return expandAdjDynAlloc(AM, IsBase, Value: Op0); |
499 | |
500 | if (Op0Code == ISD::Constant) |
501 | return expandDisp(AM, IsBase, Op0: Op1, |
502 | Op1: cast<ConstantSDNode>(Val&: Op0)->getSExtValue()); |
503 | if (Op1Code == ISD::Constant) |
504 | return expandDisp(AM, IsBase, Op0, |
505 | Op1: cast<ConstantSDNode>(Val&: Op1)->getSExtValue()); |
506 | |
507 | if (IsBase && expandIndex(AM, Base: Op0, Index: Op1)) |
508 | return true; |
509 | } |
510 | if (Opcode == SystemZISD::PCREL_OFFSET) { |
511 | SDValue Full = N.getOperand(i: 0); |
512 | SDValue Base = N.getOperand(i: 1); |
513 | SDValue Anchor = Base.getOperand(i: 0); |
514 | uint64_t Offset = (cast<GlobalAddressSDNode>(Val&: Full)->getOffset() - |
515 | cast<GlobalAddressSDNode>(Val&: Anchor)->getOffset()); |
516 | return expandDisp(AM, IsBase, Op0: Base, Op1: Offset); |
517 | } |
518 | return false; |
519 | } |
520 | |
521 | // Return true if an instruction with displacement range DR should be |
522 | // used for displacement value Val. selectDisp(DR, Val) must already hold. |
523 | static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { |
524 | assert(selectDisp(DR, Val) && "Invalid displacement" ); |
525 | switch (DR) { |
526 | case SystemZAddressingMode::Disp12Only: |
527 | case SystemZAddressingMode::Disp20Only: |
528 | case SystemZAddressingMode::Disp20Only128: |
529 | return true; |
530 | |
531 | case SystemZAddressingMode::Disp12Pair: |
532 | // Use the other instruction if the displacement is too large. |
533 | return isUInt<12>(x: Val); |
534 | |
535 | case SystemZAddressingMode::Disp20Pair: |
536 | // Use the other instruction if the displacement is small enough. |
537 | return !isUInt<12>(x: Val); |
538 | } |
539 | llvm_unreachable("Unhandled displacement range" ); |
540 | } |
541 | |
542 | // Return true if Base + Disp + Index should be performed by LA(Y). |
543 | static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { |
544 | // Don't use LA(Y) for constants. |
545 | if (!Base) |
546 | return false; |
547 | |
548 | // Always use LA(Y) for frame addresses, since we know that the destination |
549 | // register is almost always (perhaps always) going to be different from |
550 | // the frame register. |
551 | if (Base->getOpcode() == ISD::FrameIndex) |
552 | return true; |
553 | |
554 | if (Disp) { |
555 | // Always use LA(Y) if there is a base, displacement and index. |
556 | if (Index) |
557 | return true; |
558 | |
559 | // Always use LA if the displacement is small enough. It should always |
560 | // be no worse than AGHI (and better if it avoids a move). |
561 | if (isUInt<12>(x: Disp)) |
562 | return true; |
563 | |
564 | // For similar reasons, always use LAY if the constant is too big for AGHI. |
565 | // LAY should be no worse than AGFI. |
566 | if (!isInt<16>(x: Disp)) |
567 | return true; |
568 | } else { |
569 | // Don't use LA for plain registers. |
570 | if (!Index) |
571 | return false; |
572 | |
573 | // Don't use LA for plain addition if the index operand is only used |
574 | // once. It should be a natural two-operand addition in that case. |
575 | if (Index->hasOneUse()) |
576 | return false; |
577 | |
578 | // Prefer addition if the second operation is sign-extended, in the |
579 | // hope of using AGF. |
580 | unsigned IndexOpcode = Index->getOpcode(); |
581 | if (IndexOpcode == ISD::SIGN_EXTEND || |
582 | IndexOpcode == ISD::SIGN_EXTEND_INREG) |
583 | return false; |
584 | } |
585 | |
586 | // Don't use LA for two-operand addition if either operand is only |
587 | // used once. The addition instructions are better in that case. |
588 | if (Base->hasOneUse()) |
589 | return false; |
590 | |
591 | return true; |
592 | } |
593 | |
594 | // Return true if Addr is suitable for AM, updating AM if so. |
595 | bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, |
596 | SystemZAddressingMode &AM) const { |
597 | // Start out assuming that the address will need to be loaded separately, |
598 | // then try to extend it as much as we can. |
599 | AM.Base = Addr; |
600 | |
601 | // First try treating the address as a constant. |
602 | if (Addr.getOpcode() == ISD::Constant && |
603 | expandDisp(AM, IsBase: true, Op0: SDValue(), |
604 | Op1: cast<ConstantSDNode>(Val&: Addr)->getSExtValue())) |
605 | ; |
606 | // Also see if it's a bare ADJDYNALLOC. |
607 | else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC && |
608 | expandAdjDynAlloc(AM, IsBase: true, Value: SDValue())) |
609 | ; |
610 | else |
611 | // Otherwise try expanding each component. |
612 | while (expandAddress(AM, IsBase: true) || |
613 | (AM.Index.getNode() && expandAddress(AM, IsBase: false))) |
614 | continue; |
615 | |
616 | // Reject cases where it isn't profitable to use LA(Y). |
617 | if (AM.Form == SystemZAddressingMode::FormBDXLA && |
618 | !shouldUseLA(Base: AM.Base.getNode(), Disp: AM.Disp, Index: AM.Index.getNode())) |
619 | return false; |
620 | |
621 | // Reject cases where the other instruction in a pair should be used. |
622 | if (!isValidDisp(DR: AM.DR, Val: AM.Disp)) |
623 | return false; |
624 | |
625 | // Make sure that ADJDYNALLOC is included where necessary. |
626 | if (AM.isDynAlloc() && !AM.IncludesDynAlloc) |
627 | return false; |
628 | |
629 | LLVM_DEBUG(AM.dump(CurDAG)); |
630 | return true; |
631 | } |
632 | |
633 | // Insert a node into the DAG at least before Pos. This will reposition |
634 | // the node as needed, and will assign it a node ID that is <= Pos's ID. |
635 | // Note that this does *not* preserve the uniqueness of node IDs! |
636 | // The selection DAG must no longer depend on their uniqueness when this |
637 | // function is used. |
638 | static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { |
639 | if (N->getNodeId() == -1 || |
640 | (SelectionDAGISel::getUninvalidatedNodeId(N: N.getNode()) > |
641 | SelectionDAGISel::getUninvalidatedNodeId(N: Pos))) { |
642 | DAG->RepositionNode(Position: Pos->getIterator(), N: N.getNode()); |
643 | // Mark Node as invalid for pruning as after this it may be a successor to a |
644 | // selected node but otherwise be in the same position of Pos. |
645 | // Conservatively mark it with the same -abs(Id) to assure node id |
646 | // invariant is preserved. |
647 | N->setNodeId(Pos->getNodeId()); |
648 | SelectionDAGISel::InvalidateNodeId(N: N.getNode()); |
649 | } |
650 | } |
651 | |
652 | void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, |
653 | EVT VT, SDValue &Base, |
654 | SDValue &Disp) const { |
655 | Base = AM.Base; |
656 | if (!Base.getNode()) |
657 | // Register 0 means "no base". This is mostly useful for shifts. |
658 | Base = CurDAG->getRegister(Reg: 0, VT); |
659 | else if (Base.getOpcode() == ISD::FrameIndex) { |
660 | // Lower a FrameIndex to a TargetFrameIndex. |
661 | int64_t FrameIndex = cast<FrameIndexSDNode>(Val&: Base)->getIndex(); |
662 | Base = CurDAG->getTargetFrameIndex(FI: FrameIndex, VT); |
663 | } else if (Base.getValueType() != VT) { |
664 | // Truncate values from i64 to i32, for shifts. |
665 | assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && |
666 | "Unexpected truncation" ); |
667 | SDLoc DL(Base); |
668 | SDValue Trunc = CurDAG->getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Base); |
669 | insertDAGNode(DAG: CurDAG, Pos: Base.getNode(), N: Trunc); |
670 | Base = Trunc; |
671 | } |
672 | |
673 | // Lower the displacement to a TargetConstant. |
674 | Disp = CurDAG->getTargetConstant(Val: AM.Disp, DL: SDLoc(Base), VT); |
675 | } |
676 | |
677 | void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, |
678 | EVT VT, SDValue &Base, |
679 | SDValue &Disp, |
680 | SDValue &Index) const { |
681 | getAddressOperands(AM, VT, Base, Disp); |
682 | |
683 | Index = AM.Index; |
684 | if (!Index.getNode()) |
685 | // Register 0 means "no index". |
686 | Index = CurDAG->getRegister(Reg: 0, VT); |
687 | } |
688 | |
689 | bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, |
690 | SDValue Addr, SDValue &Base, |
691 | SDValue &Disp) const { |
692 | SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); |
693 | if (!selectAddress(Addr, AM)) |
694 | return false; |
695 | |
696 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp); |
697 | return true; |
698 | } |
699 | |
700 | bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, |
701 | SDValue Addr, SDValue &Base, |
702 | SDValue &Disp) const { |
703 | SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); |
704 | if (!selectAddress(Addr, AM) || AM.Index.getNode()) |
705 | return false; |
706 | |
707 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp); |
708 | return true; |
709 | } |
710 | |
711 | bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, |
712 | SystemZAddressingMode::DispRange DR, |
713 | SDValue Addr, SDValue &Base, |
714 | SDValue &Disp, SDValue &Index) const { |
715 | SystemZAddressingMode AM(Form, DR); |
716 | if (!selectAddress(Addr, AM)) |
717 | return false; |
718 | |
719 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp, Index); |
720 | return true; |
721 | } |
722 | |
723 | bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, |
724 | SDValue &Base, |
725 | SDValue &Disp, |
726 | SDValue &Index) const { |
727 | SDValue Regs[2]; |
728 | if (selectBDXAddr12Only(Addr, Base&: Regs[0], Disp, Index&: Regs[1]) && |
729 | Regs[0].getNode() && Regs[1].getNode()) { |
730 | for (unsigned int I = 0; I < 2; ++I) { |
731 | Base = Regs[I]; |
732 | Index = Regs[1 - I]; |
733 | // We can't tell here whether the index vector has the right type |
734 | // for the access; the caller needs to do that instead. |
735 | if (Index.getOpcode() == ISD::ZERO_EXTEND) |
736 | Index = Index.getOperand(i: 0); |
737 | if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
738 | Index.getOperand(i: 1) == Elem) { |
739 | Index = Index.getOperand(i: 0); |
740 | return true; |
741 | } |
742 | } |
743 | } |
744 | return false; |
745 | } |
746 | |
747 | bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, |
748 | uint64_t InsertMask) const { |
749 | // We're only interested in cases where the insertion is into some operand |
750 | // of Op, rather than into Op itself. The only useful case is an AND. |
751 | if (Op.getOpcode() != ISD::AND) |
752 | return false; |
753 | |
754 | // We need a constant mask. |
755 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1).getNode()); |
756 | if (!MaskNode) |
757 | return false; |
758 | |
759 | // It's not an insertion of Op.getOperand(0) if the two masks overlap. |
760 | uint64_t AndMask = MaskNode->getZExtValue(); |
761 | if (InsertMask & AndMask) |
762 | return false; |
763 | |
764 | // It's only an insertion if all bits are covered or are known to be zero. |
765 | // The inner check covers all cases but is more expensive. |
766 | uint64_t Used = allOnes(Count: Op.getValueSizeInBits()); |
767 | if (Used != (AndMask | InsertMask)) { |
768 | KnownBits Known = CurDAG->computeKnownBits(Op: Op.getOperand(i: 0)); |
769 | if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue())) |
770 | return false; |
771 | } |
772 | |
773 | Op = Op.getOperand(i: 0); |
774 | return true; |
775 | } |
776 | |
777 | bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, |
778 | uint64_t Mask) const { |
779 | const SystemZInstrInfo *TII = getInstrInfo(); |
780 | if (RxSBG.Rotate != 0) |
781 | Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); |
782 | Mask &= RxSBG.Mask; |
783 | if (TII->isRxSBGMask(Mask, BitSize: RxSBG.BitSize, Start&: RxSBG.Start, End&: RxSBG.End)) { |
784 | RxSBG.Mask = Mask; |
785 | return true; |
786 | } |
787 | return false; |
788 | } |
789 | |
790 | // Return true if any bits of (RxSBG.Input & Mask) are significant. |
791 | static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) { |
792 | // Rotate the mask in the same way as RxSBG.Input is rotated. |
793 | if (RxSBG.Rotate != 0) |
794 | Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate))); |
795 | return (Mask & RxSBG.Mask) != 0; |
796 | } |
797 | |
798 | bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { |
799 | SDValue N = RxSBG.Input; |
800 | unsigned Opcode = N.getOpcode(); |
801 | switch (Opcode) { |
802 | case ISD::TRUNCATE: { |
803 | if (RxSBG.Opcode == SystemZ::RNSBG) |
804 | return false; |
805 | if (N.getOperand(i: 0).getValueSizeInBits() > 64) |
806 | return false; |
807 | uint64_t BitSize = N.getValueSizeInBits(); |
808 | uint64_t Mask = allOnes(Count: BitSize); |
809 | if (!refineRxSBGMask(RxSBG, Mask)) |
810 | return false; |
811 | RxSBG.Input = N.getOperand(i: 0); |
812 | return true; |
813 | } |
814 | case ISD::AND: { |
815 | if (RxSBG.Opcode == SystemZ::RNSBG) |
816 | return false; |
817 | |
818 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
819 | if (!MaskNode) |
820 | return false; |
821 | |
822 | SDValue Input = N.getOperand(i: 0); |
823 | uint64_t Mask = MaskNode->getZExtValue(); |
824 | if (!refineRxSBGMask(RxSBG, Mask)) { |
825 | // If some bits of Input are already known zeros, those bits will have |
826 | // been removed from the mask. See if adding them back in makes the |
827 | // mask suitable. |
828 | KnownBits Known = CurDAG->computeKnownBits(Op: Input); |
829 | Mask |= Known.Zero.getZExtValue(); |
830 | if (!refineRxSBGMask(RxSBG, Mask)) |
831 | return false; |
832 | } |
833 | RxSBG.Input = Input; |
834 | return true; |
835 | } |
836 | |
837 | case ISD::OR: { |
838 | if (RxSBG.Opcode != SystemZ::RNSBG) |
839 | return false; |
840 | |
841 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
842 | if (!MaskNode) |
843 | return false; |
844 | |
845 | SDValue Input = N.getOperand(i: 0); |
846 | uint64_t Mask = ~MaskNode->getZExtValue(); |
847 | if (!refineRxSBGMask(RxSBG, Mask)) { |
848 | // If some bits of Input are already known ones, those bits will have |
849 | // been removed from the mask. See if adding them back in makes the |
850 | // mask suitable. |
851 | KnownBits Known = CurDAG->computeKnownBits(Op: Input); |
852 | Mask &= ~Known.One.getZExtValue(); |
853 | if (!refineRxSBGMask(RxSBG, Mask)) |
854 | return false; |
855 | } |
856 | RxSBG.Input = Input; |
857 | return true; |
858 | } |
859 | |
860 | case ISD::ROTL: { |
861 | // Any 64-bit rotate left can be merged into the RxSBG. |
862 | if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64) |
863 | return false; |
864 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
865 | if (!CountNode) |
866 | return false; |
867 | |
868 | RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; |
869 | RxSBG.Input = N.getOperand(i: 0); |
870 | return true; |
871 | } |
872 | |
873 | case ISD::ANY_EXTEND: |
874 | // Bits above the extended operand are don't-care. |
875 | RxSBG.Input = N.getOperand(i: 0); |
876 | return true; |
877 | |
878 | case ISD::ZERO_EXTEND: |
879 | if (RxSBG.Opcode != SystemZ::RNSBG) { |
880 | // Restrict the mask to the extended operand. |
881 | unsigned InnerBitSize = N.getOperand(i: 0).getValueSizeInBits(); |
882 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: InnerBitSize))) |
883 | return false; |
884 | |
885 | RxSBG.Input = N.getOperand(i: 0); |
886 | return true; |
887 | } |
888 | [[fallthrough]]; |
889 | |
890 | case ISD::SIGN_EXTEND: { |
891 | // Check that the extension bits are don't-care (i.e. are masked out |
892 | // by the final mask). |
893 | unsigned BitSize = N.getValueSizeInBits(); |
894 | unsigned InnerBitSize = N.getOperand(i: 0).getValueSizeInBits(); |
895 | if (maskMatters(RxSBG, Mask: allOnes(Count: BitSize) - allOnes(Count: InnerBitSize))) { |
896 | // In the case where only the sign bit is active, increase Rotate with |
897 | // the extension width. |
898 | if (RxSBG.Mask == 1 && RxSBG.Rotate == 1) |
899 | RxSBG.Rotate += (BitSize - InnerBitSize); |
900 | else |
901 | return false; |
902 | } |
903 | |
904 | RxSBG.Input = N.getOperand(i: 0); |
905 | return true; |
906 | } |
907 | |
908 | case ISD::SHL: { |
909 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
910 | if (!CountNode) |
911 | return false; |
912 | |
913 | uint64_t Count = CountNode->getZExtValue(); |
914 | unsigned BitSize = N.getValueSizeInBits(); |
915 | if (Count < 1 || Count >= BitSize) |
916 | return false; |
917 | |
918 | if (RxSBG.Opcode == SystemZ::RNSBG) { |
919 | // Treat (shl X, count) as (rotl X, size-count) as long as the bottom |
920 | // count bits from RxSBG.Input are ignored. |
921 | if (maskMatters(RxSBG, Mask: allOnes(Count))) |
922 | return false; |
923 | } else { |
924 | // Treat (shl X, count) as (and (rotl X, count), ~0<<count). |
925 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: BitSize - Count) << Count)) |
926 | return false; |
927 | } |
928 | |
929 | RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; |
930 | RxSBG.Input = N.getOperand(i: 0); |
931 | return true; |
932 | } |
933 | |
934 | case ISD::SRL: |
935 | case ISD::SRA: { |
936 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
937 | if (!CountNode) |
938 | return false; |
939 | |
940 | uint64_t Count = CountNode->getZExtValue(); |
941 | unsigned BitSize = N.getValueSizeInBits(); |
942 | if (Count < 1 || Count >= BitSize) |
943 | return false; |
944 | |
945 | if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { |
946 | // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top |
947 | // count bits from RxSBG.Input are ignored. |
948 | if (maskMatters(RxSBG, Mask: allOnes(Count) << (BitSize - Count))) |
949 | return false; |
950 | } else { |
951 | // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), |
952 | // which is similar to SLL above. |
953 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: BitSize - Count))) |
954 | return false; |
955 | } |
956 | |
957 | RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; |
958 | RxSBG.Input = N.getOperand(i: 0); |
959 | return true; |
960 | } |
961 | default: |
962 | return false; |
963 | } |
964 | } |
965 | |
966 | SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const { |
967 | SDNode *N = CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT); |
968 | return SDValue(N, 0); |
969 | } |
970 | |
971 | SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT, |
972 | SDValue N) const { |
973 | if (N.getValueType() == MVT::i32 && VT == MVT::i64) |
974 | return CurDAG->getTargetInsertSubreg(SRIdx: SystemZ::subreg_l32, |
975 | DL, VT, Operand: getUNDEF(DL, VT: MVT::i64), Subreg: N); |
976 | if (N.getValueType() == MVT::i64 && VT == MVT::i32) |
977 | return CurDAG->getTargetExtractSubreg(SRIdx: SystemZ::subreg_l32, DL, VT, Operand: N); |
978 | assert(N.getValueType() == VT && "Unexpected value types" ); |
979 | return N; |
980 | } |
981 | |
982 | bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { |
983 | SDLoc DL(N); |
984 | EVT VT = N->getValueType(ResNo: 0); |
985 | if (!VT.isInteger() || VT.getSizeInBits() > 64) |
986 | return false; |
987 | RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); |
988 | unsigned Count = 0; |
989 | while (expandRxSBG(RxSBG&: RISBG)) |
990 | // The widening or narrowing is expected to be free. |
991 | // Counting widening or narrowing as a saved operation will result in |
992 | // preferring an R*SBG over a simple shift/logical instruction. |
993 | if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND && |
994 | RISBG.Input.getOpcode() != ISD::TRUNCATE) |
995 | Count += 1; |
996 | if (Count == 0 || isa<ConstantSDNode>(Val: RISBG.Input)) |
997 | return false; |
998 | |
999 | // Prefer to use normal shift instructions over RISBG, since they can handle |
1000 | // all cases and are sometimes shorter. |
1001 | if (Count == 1 && N->getOpcode() != ISD::AND) |
1002 | return false; |
1003 | |
1004 | // Prefer register extensions like LLC over RISBG. Also prefer to start |
1005 | // out with normal ANDs if one instruction would be enough. We can convert |
1006 | // these ANDs into an RISBG later if a three-address instruction is useful. |
1007 | if (RISBG.Rotate == 0) { |
1008 | bool PreferAnd = false; |
1009 | // Prefer AND for any 32-bit and-immediate operation. |
1010 | if (VT == MVT::i32) |
1011 | PreferAnd = true; |
1012 | // As well as for any 64-bit operation that can be implemented via LLC(R), |
1013 | // LLH(R), LLGT(R), or one of the and-immediate instructions. |
1014 | else if (RISBG.Mask == 0xff || |
1015 | RISBG.Mask == 0xffff || |
1016 | RISBG.Mask == 0x7fffffff || |
1017 | SystemZ::isImmLF(Val: ~RISBG.Mask) || |
1018 | SystemZ::isImmHF(Val: ~RISBG.Mask)) |
1019 | PreferAnd = true; |
1020 | // And likewise for the LLZRGF instruction, which doesn't have a register |
1021 | // to register version. |
1022 | else if (auto *Load = dyn_cast<LoadSDNode>(Val&: RISBG.Input)) { |
1023 | if (Load->getMemoryVT() == MVT::i32 && |
1024 | (Load->getExtensionType() == ISD::EXTLOAD || |
1025 | Load->getExtensionType() == ISD::ZEXTLOAD) && |
1026 | RISBG.Mask == 0xffffff00 && |
1027 | Subtarget->hasLoadAndZeroRightmostByte()) |
1028 | PreferAnd = true; |
1029 | } |
1030 | if (PreferAnd) { |
1031 | // Replace the current node with an AND. Note that the current node |
1032 | // might already be that same AND, in which case it is already CSE'd |
1033 | // with it, and we must not call ReplaceNode. |
1034 | SDValue In = convertTo(DL, VT, N: RISBG.Input); |
1035 | SDValue Mask = CurDAG->getConstant(Val: RISBG.Mask, DL, VT); |
1036 | SDValue New = CurDAG->getNode(Opcode: ISD::AND, DL, VT, N1: In, N2: Mask); |
1037 | if (N != New.getNode()) { |
1038 | insertDAGNode(DAG: CurDAG, Pos: N, N: Mask); |
1039 | insertDAGNode(DAG: CurDAG, Pos: N, N: New); |
1040 | ReplaceNode(F: N, T: New.getNode()); |
1041 | N = New.getNode(); |
1042 | } |
1043 | // Now, select the machine opcode to implement this operation. |
1044 | if (!N->isMachineOpcode()) |
1045 | SelectCode(N); |
1046 | return true; |
1047 | } |
1048 | } |
1049 | |
1050 | unsigned Opcode = SystemZ::RISBG; |
1051 | // Prefer RISBGN if available, since it does not clobber CC. |
1052 | if (Subtarget->hasMiscellaneousExtensions()) |
1053 | Opcode = SystemZ::RISBGN; |
1054 | EVT OpcodeVT = MVT::i64; |
1055 | if (VT == MVT::i32 && Subtarget->hasHighWord() && |
1056 | // We can only use the 32-bit instructions if all source bits are |
1057 | // in the low 32 bits without wrapping, both after rotation (because |
1058 | // of the smaller range for Start and End) and before rotation |
1059 | // (because the input value is truncated). |
1060 | RISBG.Start >= 32 && RISBG.End >= RISBG.Start && |
1061 | ((RISBG.Start + RISBG.Rotate) & 63) >= 32 && |
1062 | ((RISBG.End + RISBG.Rotate) & 63) >= |
1063 | ((RISBG.Start + RISBG.Rotate) & 63)) { |
1064 | Opcode = SystemZ::RISBMux; |
1065 | OpcodeVT = MVT::i32; |
1066 | RISBG.Start &= 31; |
1067 | RISBG.End &= 31; |
1068 | } |
1069 | SDValue Ops[5] = { |
1070 | getUNDEF(DL, VT: OpcodeVT), |
1071 | convertTo(DL, VT: OpcodeVT, N: RISBG.Input), |
1072 | CurDAG->getTargetConstant(Val: RISBG.Start, DL, VT: MVT::i32), |
1073 | CurDAG->getTargetConstant(Val: RISBG.End | 128, DL, VT: MVT::i32), |
1074 | CurDAG->getTargetConstant(Val: RISBG.Rotate, DL, VT: MVT::i32) |
1075 | }; |
1076 | SDValue New = convertTo( |
1077 | DL, VT, N: SDValue(CurDAG->getMachineNode(Opcode, dl: DL, VT: OpcodeVT, Ops), 0)); |
1078 | ReplaceNode(F: N, T: New.getNode()); |
1079 | return true; |
1080 | } |
1081 | |
1082 | bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { |
1083 | SDLoc DL(N); |
1084 | EVT VT = N->getValueType(ResNo: 0); |
1085 | if (!VT.isInteger() || VT.getSizeInBits() > 64) |
1086 | return false; |
1087 | // Try treating each operand of N as the second operand of the RxSBG |
1088 | // and see which goes deepest. |
1089 | RxSBGOperands RxSBG[] = { |
1090 | RxSBGOperands(Opcode, N->getOperand(Num: 0)), |
1091 | RxSBGOperands(Opcode, N->getOperand(Num: 1)) |
1092 | }; |
1093 | unsigned Count[] = { 0, 0 }; |
1094 | for (unsigned I = 0; I < 2; ++I) |
1095 | while (RxSBG[I].Input->hasOneUse() && expandRxSBG(RxSBG&: RxSBG[I])) |
1096 | // In cases of multiple users it seems better to keep the simple |
1097 | // instruction as they are one cycle faster, and it also helps in cases |
1098 | // where both inputs share a common node. |
1099 | // The widening or narrowing is expected to be free. Counting widening |
1100 | // or narrowing as a saved operation will result in preferring an R*SBG |
1101 | // over a simple shift/logical instruction. |
1102 | if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND && |
1103 | RxSBG[I].Input.getOpcode() != ISD::TRUNCATE) |
1104 | Count[I] += 1; |
1105 | |
1106 | // Do nothing if neither operand is suitable. |
1107 | if (Count[0] == 0 && Count[1] == 0) |
1108 | return false; |
1109 | |
1110 | // Pick the deepest second operand. |
1111 | unsigned I = Count[0] > Count[1] ? 0 : 1; |
1112 | SDValue Op0 = N->getOperand(Num: I ^ 1); |
1113 | |
1114 | // Prefer IC for character insertions from memory. |
1115 | if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) |
1116 | if (auto *Load = dyn_cast<LoadSDNode>(Val: Op0.getNode())) |
1117 | if (Load->getMemoryVT() == MVT::i8) |
1118 | return false; |
1119 | |
1120 | // See whether we can avoid an AND in the first operand by converting |
1121 | // ROSBG to RISBG. |
1122 | if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op&: Op0, InsertMask: RxSBG[I].Mask)) { |
1123 | Opcode = SystemZ::RISBG; |
1124 | // Prefer RISBGN if available, since it does not clobber CC. |
1125 | if (Subtarget->hasMiscellaneousExtensions()) |
1126 | Opcode = SystemZ::RISBGN; |
1127 | } |
1128 | |
1129 | SDValue Ops[5] = { |
1130 | convertTo(DL, VT: MVT::i64, N: Op0), |
1131 | convertTo(DL, VT: MVT::i64, N: RxSBG[I].Input), |
1132 | CurDAG->getTargetConstant(Val: RxSBG[I].Start, DL, VT: MVT::i32), |
1133 | CurDAG->getTargetConstant(Val: RxSBG[I].End, DL, VT: MVT::i32), |
1134 | CurDAG->getTargetConstant(Val: RxSBG[I].Rotate, DL, VT: MVT::i32) |
1135 | }; |
1136 | SDValue New = convertTo( |
1137 | DL, VT, N: SDValue(CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::i64, Ops), 0)); |
1138 | ReplaceNode(F: N, T: New.getNode()); |
1139 | return true; |
1140 | } |
1141 | |
1142 | void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, |
1143 | SDValue Op0, uint64_t UpperVal, |
1144 | uint64_t LowerVal) { |
1145 | EVT VT = Node->getValueType(ResNo: 0); |
1146 | SDLoc DL(Node); |
1147 | SDValue Upper = CurDAG->getConstant(Val: UpperVal, DL, VT); |
1148 | if (Op0.getNode()) |
1149 | Upper = CurDAG->getNode(Opcode, DL, VT, N1: Op0, N2: Upper); |
1150 | |
1151 | { |
1152 | // When we haven't passed in Op0, Upper will be a constant. In order to |
1153 | // prevent folding back to the large immediate in `Or = getNode(...)` we run |
1154 | // SelectCode first and end up with an opaque machine node. This means that |
1155 | // we need to use a handle to keep track of Upper in case it gets CSE'd by |
1156 | // SelectCode. |
1157 | // |
1158 | // Note that in the case where Op0 is passed in we could just call |
1159 | // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing |
1160 | // the handle at all, but it's fine to do it here. |
1161 | // |
1162 | // TODO: This is a pretty hacky way to do this. Can we do something that |
1163 | // doesn't require a two paragraph explanation? |
1164 | HandleSDNode Handle(Upper); |
1165 | SelectCode(N: Upper.getNode()); |
1166 | Upper = Handle.getValue(); |
1167 | } |
1168 | |
1169 | SDValue Lower = CurDAG->getConstant(Val: LowerVal, DL, VT); |
1170 | SDValue Or = CurDAG->getNode(Opcode, DL, VT, N1: Upper, N2: Lower); |
1171 | |
1172 | ReplaceNode(F: Node, T: Or.getNode()); |
1173 | |
1174 | SelectCode(N: Or.getNode()); |
1175 | } |
1176 | |
1177 | void SystemZDAGToDAGISel::loadVectorConstant( |
1178 | const SystemZVectorConstantInfo &VCI, SDNode *Node) { |
1179 | assert((VCI.Opcode == SystemZISD::BYTE_MASK || |
1180 | VCI.Opcode == SystemZISD::REPLICATE || |
1181 | VCI.Opcode == SystemZISD::ROTATE_MASK) && |
1182 | "Bad opcode!" ); |
1183 | assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type" ); |
1184 | EVT VT = Node->getValueType(ResNo: 0); |
1185 | SDLoc DL(Node); |
1186 | SmallVector<SDValue, 2> Ops; |
1187 | for (unsigned OpVal : VCI.OpVals) |
1188 | Ops.push_back(Elt: CurDAG->getTargetConstant(Val: OpVal, DL, VT: MVT::i32)); |
1189 | SDValue Op = CurDAG->getNode(Opcode: VCI.Opcode, DL, VT: VCI.VecVT, Ops); |
1190 | |
1191 | if (VCI.VecVT == VT.getSimpleVT()) |
1192 | ReplaceNode(F: Node, T: Op.getNode()); |
1193 | else if (VT.getSizeInBits() == 128) { |
1194 | SDValue BitCast = CurDAG->getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1195 | ReplaceNode(F: Node, T: BitCast.getNode()); |
1196 | SelectCode(N: BitCast.getNode()); |
1197 | } else { // float or double |
1198 | unsigned SubRegIdx = |
1199 | (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); |
1200 | ReplaceNode( |
1201 | F: Node, T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: Op).getNode()); |
1202 | } |
1203 | SelectCode(N: Op.getNode()); |
1204 | } |
1205 | |
1206 | SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) { |
1207 | SDNode *ResNode; |
1208 | assert (VT.getSizeInBits() == 128); |
1209 | |
1210 | SDValue CP = CurDAG->getTargetConstantPool( |
1211 | C: ConstantInt::get(Ty: Type::getInt128Ty(C&: *CurDAG->getContext()), V: Val), |
1212 | VT: TLI->getPointerTy(DL: CurDAG->getDataLayout())); |
1213 | |
1214 | EVT PtrVT = CP.getValueType(); |
1215 | SDValue Ops[] = { |
1216 | SDValue(CurDAG->getMachineNode(Opcode: SystemZ::LARL, dl: DL, VT: PtrVT, Op1: CP), 0), |
1217 | CurDAG->getTargetConstant(Val: 0, DL, VT: PtrVT), |
1218 | CurDAG->getRegister(Reg: 0, VT: PtrVT), |
1219 | CurDAG->getEntryNode() |
1220 | }; |
1221 | ResNode = CurDAG->getMachineNode(Opcode: SystemZ::VL, dl: DL, VT1: VT, VT2: MVT::Other, Ops); |
1222 | |
1223 | // Annotate ResNode with memory operand information so that MachineInstr |
1224 | // queries work properly. This e.g. gives the register allocation the |
1225 | // required information for rematerialization. |
1226 | MachineFunction& MF = CurDAG->getMachineFunction(); |
1227 | MachineMemOperand *MemOp = |
1228 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
1229 | F: MachineMemOperand::MOLoad, Size: 16, BaseAlignment: Align(8)); |
1230 | |
1231 | CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp}); |
1232 | return ResNode; |
1233 | } |
1234 | |
1235 | bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { |
1236 | SDValue ElemV = N->getOperand(Num: 2); |
1237 | auto *ElemN = dyn_cast<ConstantSDNode>(Val&: ElemV); |
1238 | if (!ElemN) |
1239 | return false; |
1240 | |
1241 | unsigned Elem = ElemN->getZExtValue(); |
1242 | EVT VT = N->getValueType(ResNo: 0); |
1243 | if (Elem >= VT.getVectorNumElements()) |
1244 | return false; |
1245 | |
1246 | auto *Load = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: 1)); |
1247 | if (!Load || !Load->hasNUsesOfValue(NUses: 1, Value: 0)) |
1248 | return false; |
1249 | if (Load->getMemoryVT().getSizeInBits() != |
1250 | Load->getValueType(ResNo: 0).getSizeInBits()) |
1251 | return false; |
1252 | |
1253 | SDValue Base, Disp, Index; |
1254 | if (!selectBDVAddr12Only(Addr: Load->getBasePtr(), Elem: ElemV, Base, Disp, Index) || |
1255 | Index.getValueType() != VT.changeVectorElementTypeToInteger()) |
1256 | return false; |
1257 | |
1258 | SDLoc DL(Load); |
1259 | SDValue Ops[] = { |
1260 | N->getOperand(Num: 0), Base, Disp, Index, |
1261 | CurDAG->getTargetConstant(Val: Elem, DL, VT: MVT::i32), Load->getChain() |
1262 | }; |
1263 | SDNode *Res = CurDAG->getMachineNode(Opcode, dl: DL, VT1: VT, VT2: MVT::Other, Ops); |
1264 | ReplaceUses(F: SDValue(Load, 1), T: SDValue(Res, 1)); |
1265 | ReplaceNode(F: N, T: Res); |
1266 | return true; |
1267 | } |
1268 | |
1269 | bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { |
1270 | SDValue Value = Store->getValue(); |
1271 | if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
1272 | return false; |
1273 | if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits()) |
1274 | return false; |
1275 | |
1276 | SDValue ElemV = Value.getOperand(i: 1); |
1277 | auto *ElemN = dyn_cast<ConstantSDNode>(Val&: ElemV); |
1278 | if (!ElemN) |
1279 | return false; |
1280 | |
1281 | SDValue Vec = Value.getOperand(i: 0); |
1282 | EVT VT = Vec.getValueType(); |
1283 | unsigned Elem = ElemN->getZExtValue(); |
1284 | if (Elem >= VT.getVectorNumElements()) |
1285 | return false; |
1286 | |
1287 | SDValue Base, Disp, Index; |
1288 | if (!selectBDVAddr12Only(Addr: Store->getBasePtr(), Elem: ElemV, Base, Disp, Index) || |
1289 | Index.getValueType() != VT.changeVectorElementTypeToInteger()) |
1290 | return false; |
1291 | |
1292 | SDLoc DL(Store); |
1293 | SDValue Ops[] = { |
1294 | Vec, Base, Disp, Index, CurDAG->getTargetConstant(Val: Elem, DL, VT: MVT::i32), |
1295 | Store->getChain() |
1296 | }; |
1297 | ReplaceNode(F: Store, T: CurDAG->getMachineNode(Opcode, dl: DL, VT: MVT::Other, Ops)); |
1298 | return true; |
1299 | } |
1300 | |
1301 | // Check whether or not the chain ending in StoreNode is suitable for doing |
1302 | // the {load; op; store} to modify transformation. |
1303 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, |
1304 | SDValue StoredVal, SelectionDAG *CurDAG, |
1305 | LoadSDNode *&LoadNode, |
1306 | SDValue &InputChain) { |
1307 | // Is the stored value result 0 of the operation? |
1308 | if (StoredVal.getResNo() != 0) |
1309 | return false; |
1310 | |
1311 | // Are there other uses of the loaded value than the operation? |
1312 | if (!StoredVal.getNode()->hasNUsesOfValue(NUses: 1, Value: 0)) |
1313 | return false; |
1314 | |
1315 | // Is the store non-extending and non-indexed? |
1316 | if (!ISD::isNormalStore(N: StoreNode) || StoreNode->isNonTemporal()) |
1317 | return false; |
1318 | |
1319 | SDValue Load = StoredVal->getOperand(Num: 0); |
1320 | // Is the stored value a non-extending and non-indexed load? |
1321 | if (!ISD::isNormalLoad(N: Load.getNode())) |
1322 | return false; |
1323 | |
1324 | // Return LoadNode by reference. |
1325 | LoadNode = cast<LoadSDNode>(Val&: Load); |
1326 | |
1327 | // Is store the only read of the loaded value? |
1328 | if (!Load.hasOneUse()) |
1329 | return false; |
1330 | |
1331 | // Is the address of the store the same as the load? |
1332 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || |
1333 | LoadNode->getOffset() != StoreNode->getOffset()) |
1334 | return false; |
1335 | |
1336 | // Check if the chain is produced by the load or is a TokenFactor with |
1337 | // the load output chain as an operand. Return InputChain by reference. |
1338 | SDValue Chain = StoreNode->getChain(); |
1339 | |
1340 | bool ChainCheck = false; |
1341 | if (Chain == Load.getValue(R: 1)) { |
1342 | ChainCheck = true; |
1343 | InputChain = LoadNode->getChain(); |
1344 | } else if (Chain.getOpcode() == ISD::TokenFactor) { |
1345 | SmallVector<SDValue, 4> ChainOps; |
1346 | SmallVector<const SDNode *, 4> LoopWorklist; |
1347 | SmallPtrSet<const SDNode *, 16> Visited; |
1348 | const unsigned int Max = 1024; |
1349 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { |
1350 | SDValue Op = Chain.getOperand(i); |
1351 | if (Op == Load.getValue(R: 1)) { |
1352 | ChainCheck = true; |
1353 | // Drop Load, but keep its chain. No cycle check necessary. |
1354 | ChainOps.push_back(Elt: Load.getOperand(i: 0)); |
1355 | continue; |
1356 | } |
1357 | LoopWorklist.push_back(Elt: Op.getNode()); |
1358 | ChainOps.push_back(Elt: Op); |
1359 | } |
1360 | |
1361 | if (ChainCheck) { |
1362 | // Add the other operand of StoredVal to worklist. |
1363 | for (SDValue Op : StoredVal->ops()) |
1364 | if (Op.getNode() != LoadNode) |
1365 | LoopWorklist.push_back(Elt: Op.getNode()); |
1366 | |
1367 | // Check if Load is reachable from any of the nodes in the worklist. |
1368 | if (SDNode::hasPredecessorHelper(N: Load.getNode(), Visited, Worklist&: LoopWorklist, MaxSteps: Max, |
1369 | TopologicalPrune: true)) |
1370 | return false; |
1371 | |
1372 | // Make a new TokenFactor with all the other input chains except |
1373 | // for the load. |
1374 | InputChain = CurDAG->getNode(Opcode: ISD::TokenFactor, DL: SDLoc(Chain), |
1375 | VT: MVT::Other, Ops: ChainOps); |
1376 | } |
1377 | } |
1378 | if (!ChainCheck) |
1379 | return false; |
1380 | |
1381 | return true; |
1382 | } |
1383 | |
1384 | // Change a chain of {load; op; store} of the same value into a simple op |
1385 | // through memory of that value, if the uses of the modified value and its |
1386 | // address are suitable. |
1387 | // |
1388 | // The tablegen pattern memory operand pattern is currently not able to match |
1389 | // the case where the CC on the original operation are used. |
1390 | // |
1391 | // See the equivalent routine in X86ISelDAGToDAG for further comments. |
1392 | bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) { |
1393 | StoreSDNode *StoreNode = cast<StoreSDNode>(Val: Node); |
1394 | SDValue StoredVal = StoreNode->getOperand(Num: 1); |
1395 | unsigned Opc = StoredVal->getOpcode(); |
1396 | SDLoc DL(StoreNode); |
1397 | |
1398 | // Before we try to select anything, make sure this is memory operand size |
1399 | // and opcode we can handle. Note that this must match the code below that |
1400 | // actually lowers the opcodes. |
1401 | EVT MemVT = StoreNode->getMemoryVT(); |
1402 | unsigned NewOpc = 0; |
1403 | bool NegateOperand = false; |
1404 | switch (Opc) { |
1405 | default: |
1406 | return false; |
1407 | case SystemZISD::SSUBO: |
1408 | NegateOperand = true; |
1409 | [[fallthrough]]; |
1410 | case SystemZISD::SADDO: |
1411 | if (MemVT == MVT::i32) |
1412 | NewOpc = SystemZ::ASI; |
1413 | else if (MemVT == MVT::i64) |
1414 | NewOpc = SystemZ::AGSI; |
1415 | else |
1416 | return false; |
1417 | break; |
1418 | case SystemZISD::USUBO: |
1419 | NegateOperand = true; |
1420 | [[fallthrough]]; |
1421 | case SystemZISD::UADDO: |
1422 | if (MemVT == MVT::i32) |
1423 | NewOpc = SystemZ::ALSI; |
1424 | else if (MemVT == MVT::i64) |
1425 | NewOpc = SystemZ::ALGSI; |
1426 | else |
1427 | return false; |
1428 | break; |
1429 | } |
1430 | |
1431 | LoadSDNode *LoadNode = nullptr; |
1432 | SDValue InputChain; |
1433 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode, |
1434 | InputChain)) |
1435 | return false; |
1436 | |
1437 | SDValue Operand = StoredVal.getOperand(i: 1); |
1438 | auto *OperandC = dyn_cast<ConstantSDNode>(Val&: Operand); |
1439 | if (!OperandC) |
1440 | return false; |
1441 | auto OperandV = OperandC->getAPIntValue(); |
1442 | if (NegateOperand) |
1443 | OperandV = -OperandV; |
1444 | if (OperandV.getSignificantBits() > 8) |
1445 | return false; |
1446 | Operand = CurDAG->getTargetConstant(Val: OperandV, DL, VT: MemVT); |
1447 | |
1448 | SDValue Base, Disp; |
1449 | if (!selectBDAddr20Only(Addr: StoreNode->getBasePtr(), Base, Disp)) |
1450 | return false; |
1451 | |
1452 | SDValue Ops[] = { Base, Disp, Operand, InputChain }; |
1453 | MachineSDNode *Result = |
1454 | CurDAG->getMachineNode(Opcode: NewOpc, dl: DL, VT1: MVT::i32, VT2: MVT::Other, Ops); |
1455 | CurDAG->setNodeMemRefs( |
1456 | N: Result, NewMemRefs: {StoreNode->getMemOperand(), LoadNode->getMemOperand()}); |
1457 | |
1458 | ReplaceUses(F: SDValue(StoreNode, 0), T: SDValue(Result, 1)); |
1459 | ReplaceUses(F: SDValue(StoredVal.getNode(), 1), T: SDValue(Result, 0)); |
1460 | CurDAG->RemoveDeadNode(N: Node); |
1461 | return true; |
1462 | } |
1463 | |
1464 | bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, |
1465 | LoadSDNode *Load) const { |
1466 | // Check that the two memory operands have the same size. |
1467 | if (Load->getMemoryVT() != Store->getMemoryVT()) |
1468 | return false; |
1469 | |
1470 | // Volatility stops an access from being decomposed. |
1471 | if (Load->isVolatile() || Store->isVolatile()) |
1472 | return false; |
1473 | |
1474 | // There's no chance of overlap if the load is invariant. |
1475 | if (Load->isInvariant() && Load->isDereferenceable()) |
1476 | return true; |
1477 | |
1478 | // Otherwise we need to check whether there's an alias. |
1479 | const Value *V1 = Load->getMemOperand()->getValue(); |
1480 | const Value *V2 = Store->getMemOperand()->getValue(); |
1481 | if (!V1 || !V2) |
1482 | return false; |
1483 | |
1484 | // Reject equality. |
1485 | uint64_t Size = Load->getMemoryVT().getStoreSize(); |
1486 | int64_t End1 = Load->getSrcValueOffset() + Size; |
1487 | int64_t End2 = Store->getSrcValueOffset() + Size; |
1488 | if (V1 == V2 && End1 == End2) |
1489 | return false; |
1490 | |
1491 | return AA->isNoAlias(LocA: MemoryLocation(V1, End1, Load->getAAInfo()), |
1492 | LocB: MemoryLocation(V2, End2, Store->getAAInfo())); |
1493 | } |
1494 | |
1495 | bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { |
1496 | auto *Store = cast<StoreSDNode>(Val: N); |
1497 | auto *Load = cast<LoadSDNode>(Val: Store->getValue()); |
1498 | |
1499 | // Prefer not to use MVC if either address can use ... RELATIVE LONG |
1500 | // instructions. |
1501 | uint64_t Size = Load->getMemoryVT().getStoreSize(); |
1502 | if (Size > 1 && Size <= 8) { |
1503 | // Prefer LHRL, LRL and LGRL. |
1504 | if (SystemZISD::isPCREL(Opcode: Load->getBasePtr().getOpcode())) |
1505 | return false; |
1506 | // Prefer STHRL, STRL and STGRL. |
1507 | if (SystemZISD::isPCREL(Opcode: Store->getBasePtr().getOpcode())) |
1508 | return false; |
1509 | } |
1510 | |
1511 | return canUseBlockOperation(Store, Load); |
1512 | } |
1513 | |
1514 | bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, |
1515 | unsigned I) const { |
1516 | auto *StoreA = cast<StoreSDNode>(Val: N); |
1517 | auto *LoadA = cast<LoadSDNode>(Val: StoreA->getValue().getOperand(i: 1 - I)); |
1518 | auto *LoadB = cast<LoadSDNode>(Val: StoreA->getValue().getOperand(i: I)); |
1519 | return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() && |
1520 | canUseBlockOperation(Store: StoreA, Load: LoadB); |
1521 | } |
1522 | |
1523 | bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { |
1524 | |
1525 | auto *MemAccess = cast<MemSDNode>(Val: N); |
1526 | auto *LdSt = dyn_cast<LSBaseSDNode>(Val: MemAccess); |
1527 | TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); |
1528 | SDValue BasePtr = MemAccess->getBasePtr(); |
1529 | MachineMemOperand *MMO = MemAccess->getMemOperand(); |
1530 | assert(MMO && "Expected a memory operand." ); |
1531 | |
1532 | // The memory access must have a proper alignment and no index register. |
1533 | // Only load and store nodes have the offset operand (atomic loads do not). |
1534 | if (MemAccess->getAlign().value() < StoreSize || |
1535 | (LdSt && !LdSt->getOffset().isUndef())) |
1536 | return false; |
1537 | |
1538 | // The MMO must not have an unaligned offset. |
1539 | if (MMO->getOffset() % StoreSize != 0) |
1540 | return false; |
1541 | |
1542 | // An access to GOT or the Constant Pool is aligned. |
1543 | if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) |
1544 | if ((PSV->isGOT() || PSV->isConstantPool())) |
1545 | return true; |
1546 | |
1547 | // Check the alignment of a Global Address. |
1548 | if (BasePtr.getNumOperands()) |
1549 | if (GlobalAddressSDNode *GA = |
1550 | dyn_cast<GlobalAddressSDNode>(Val: BasePtr.getOperand(i: 0))) { |
1551 | // The immediate offset must be aligned. |
1552 | if (GA->getOffset() % StoreSize != 0) |
1553 | return false; |
1554 | |
1555 | // The alignment of the symbol itself must be at least the store size. |
1556 | const GlobalValue *GV = GA->getGlobal(); |
1557 | const DataLayout &DL = GV->getDataLayout(); |
1558 | if (GV->getPointerAlignment(DL).value() < StoreSize) |
1559 | return false; |
1560 | } |
1561 | |
1562 | return true; |
1563 | } |
1564 | |
1565 | ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const { |
1566 | ISD::LoadExtType ETy; |
1567 | if (auto *L = dyn_cast<LoadSDNode>(Val: N)) |
1568 | ETy = L->getExtensionType(); |
1569 | else if (auto *AL = dyn_cast<AtomicSDNode>(Val: N)) |
1570 | ETy = AL->getExtensionType(); |
1571 | else |
1572 | llvm_unreachable("Unkown load node type." ); |
1573 | return ETy; |
1574 | } |
1575 | |
1576 | void SystemZDAGToDAGISel::Select(SDNode *Node) { |
1577 | // If we have a custom node, we already have selected! |
1578 | if (Node->isMachineOpcode()) { |
1579 | LLVM_DEBUG(errs() << "== " ; Node->dump(CurDAG); errs() << "\n" ); |
1580 | Node->setNodeId(-1); |
1581 | return; |
1582 | } |
1583 | |
1584 | unsigned Opcode = Node->getOpcode(); |
1585 | switch (Opcode) { |
1586 | case ISD::OR: |
1587 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1588 | if (tryRxSBG(N: Node, Opcode: SystemZ::ROSBG)) |
1589 | return; |
1590 | goto or_xor; |
1591 | |
1592 | case ISD::XOR: |
1593 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1594 | if (tryRxSBG(N: Node, Opcode: SystemZ::RXSBG)) |
1595 | return; |
1596 | // Fall through. |
1597 | or_xor: |
1598 | // If this is a 64-bit operation in which both 32-bit halves are nonzero, |
1599 | // split the operation into two. If both operands here happen to be |
1600 | // constant, leave this to common code to optimize. |
1601 | if (Node->getValueType(ResNo: 0) == MVT::i64 && |
1602 | Node->getOperand(Num: 0).getOpcode() != ISD::Constant) |
1603 | if (auto *Op1 = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) { |
1604 | uint64_t Val = Op1->getZExtValue(); |
1605 | // Don't split the operation if we can match one of the combined |
1606 | // logical operations provided by miscellaneous-extensions-3. |
1607 | if (Subtarget->hasMiscellaneousExtensions3()) { |
1608 | unsigned ChildOpcode = Node->getOperand(Num: 0).getOpcode(); |
1609 | // Check whether this expression matches NAND/NOR/NXOR. |
1610 | if (Val == (uint64_t)-1 && Opcode == ISD::XOR) |
1611 | if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || |
1612 | ChildOpcode == ISD::XOR) |
1613 | break; |
1614 | // Check whether this expression matches OR-with-complement |
1615 | // (or matches an alternate pattern for NXOR). |
1616 | if (ChildOpcode == ISD::XOR) { |
1617 | auto Op0 = Node->getOperand(Num: 0); |
1618 | if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Val: Op0->getOperand(Num: 1))) |
1619 | if (Op0Op1->getZExtValue() == (uint64_t)-1) |
1620 | break; |
1621 | } |
1622 | } |
1623 | // Don't split an XOR with -1 as LCGR/AGHI is more compact. |
1624 | if (Opcode == ISD::XOR && Op1->isAllOnes()) |
1625 | break; |
1626 | if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { |
1627 | splitLargeImmediate(Opcode, Node, Op0: Node->getOperand(Num: 0), |
1628 | UpperVal: Val - uint32_t(Val), LowerVal: uint32_t(Val)); |
1629 | return; |
1630 | } |
1631 | } |
1632 | break; |
1633 | |
1634 | case ISD::AND: |
1635 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1636 | if (tryRxSBG(N: Node, Opcode: SystemZ::RNSBG)) |
1637 | return; |
1638 | [[fallthrough]]; |
1639 | case ISD::ROTL: |
1640 | case ISD::SHL: |
1641 | case ISD::SRL: |
1642 | case ISD::ZERO_EXTEND: |
1643 | if (tryRISBGZero(N: Node)) |
1644 | return; |
1645 | break; |
1646 | |
1647 | case ISD::BSWAP: |
1648 | if (Node->getValueType(ResNo: 0) == MVT::i128) { |
1649 | SDLoc DL(Node); |
1650 | SDValue Src = Node->getOperand(Num: 0); |
1651 | Src = CurDAG->getNode(Opcode: ISD::BITCAST, DL, VT: MVT::v16i8, Operand: Src); |
1652 | |
1653 | uint64_t Bytes[2] = { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }; |
1654 | SDNode *Mask = loadPoolVectorConstant(Val: APInt(128, Bytes), VT: MVT::v16i8, DL); |
1655 | SDValue Ops[] = { Src, Src, SDValue(Mask, 0) }; |
1656 | SDValue Res = SDValue(CurDAG->getMachineNode(Opcode: SystemZ::VPERM, dl: DL, |
1657 | VT: MVT::v16i8, Ops), 0); |
1658 | |
1659 | Res = CurDAG->getNode(Opcode: ISD::BITCAST, DL, VT: MVT::i128, Operand: Res); |
1660 | SDNode *ResNode = Res.getNode(); |
1661 | ReplaceNode(F: Node, T: ResNode); |
1662 | SelectCode(N: Src.getNode()); |
1663 | SelectCode(N: ResNode); |
1664 | return; |
1665 | } |
1666 | break; |
1667 | |
1668 | case ISD::Constant: |
1669 | // If this is a 64-bit constant that is out of the range of LLILF, |
1670 | // LLIHF and LGFI, split it into two 32-bit pieces. |
1671 | if (Node->getValueType(ResNo: 0) == MVT::i64) { |
1672 | uint64_t Val = Node->getAsZExtVal(); |
1673 | if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(x: Val)) { |
1674 | splitLargeImmediate(Opcode: ISD::OR, Node, Op0: SDValue(), UpperVal: Val - uint32_t(Val), |
1675 | LowerVal: uint32_t(Val)); |
1676 | return; |
1677 | } |
1678 | } |
1679 | if (Node->getValueType(ResNo: 0) == MVT::i128) { |
1680 | const APInt &Val = Node->getAsAPIntVal(); |
1681 | SystemZVectorConstantInfo VCI(Val); |
1682 | if (VCI.isVectorConstantLegal(Subtarget: *Subtarget)) { |
1683 | loadVectorConstant(VCI, Node); |
1684 | return; |
1685 | } |
1686 | // If we can't materialize the constant we need to use a literal pool. |
1687 | SDNode *ResNode = loadPoolVectorConstant(Val, VT: MVT::i128, DL: SDLoc(Node)); |
1688 | ReplaceNode(F: Node, T: ResNode); |
1689 | return; |
1690 | } |
1691 | break; |
1692 | |
1693 | case SystemZISD::SELECT_CCMASK: { |
1694 | SDValue Op0 = Node->getOperand(Num: 0); |
1695 | SDValue Op1 = Node->getOperand(Num: 1); |
1696 | // Prefer to put any load first, so that it can be matched as a |
1697 | // conditional load. Likewise for constants in range for LOCHI. |
1698 | if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) || |
1699 | (Subtarget->hasLoadStoreOnCond2() && |
1700 | Node->getValueType(ResNo: 0).isInteger() && |
1701 | Node->getValueType(ResNo: 0).getSizeInBits() <= 64 && |
1702 | Op1.getOpcode() == ISD::Constant && |
1703 | isInt<16>(x: cast<ConstantSDNode>(Val&: Op1)->getSExtValue()) && |
1704 | !(Op0.getOpcode() == ISD::Constant && |
1705 | isInt<16>(x: cast<ConstantSDNode>(Val&: Op0)->getSExtValue())))) { |
1706 | SDValue CCValid = Node->getOperand(Num: 2); |
1707 | SDValue CCMask = Node->getOperand(Num: 3); |
1708 | uint64_t ConstCCValid = CCValid.getNode()->getAsZExtVal(); |
1709 | uint64_t ConstCCMask = CCMask.getNode()->getAsZExtVal(); |
1710 | // Invert the condition. |
1711 | CCMask = CurDAG->getTargetConstant(Val: ConstCCValid ^ ConstCCMask, |
1712 | DL: SDLoc(Node), VT: CCMask.getValueType()); |
1713 | SDValue Op4 = Node->getOperand(Num: 4); |
1714 | SDNode *UpdatedNode = |
1715 | CurDAG->UpdateNodeOperands(N: Node, Op1, Op2: Op0, Op3: CCValid, Op4: CCMask, Op5: Op4); |
1716 | if (UpdatedNode != Node) { |
1717 | // In case this node already exists then replace Node with it. |
1718 | ReplaceNode(F: Node, T: UpdatedNode); |
1719 | Node = UpdatedNode; |
1720 | } |
1721 | } |
1722 | break; |
1723 | } |
1724 | |
1725 | case ISD::INSERT_VECTOR_ELT: { |
1726 | EVT VT = Node->getValueType(ResNo: 0); |
1727 | unsigned ElemBitSize = VT.getScalarSizeInBits(); |
1728 | if (ElemBitSize == 32) { |
1729 | if (tryGather(N: Node, Opcode: SystemZ::VGEF)) |
1730 | return; |
1731 | } else if (ElemBitSize == 64) { |
1732 | if (tryGather(N: Node, Opcode: SystemZ::VGEG)) |
1733 | return; |
1734 | } |
1735 | break; |
1736 | } |
1737 | |
1738 | case ISD::BUILD_VECTOR: { |
1739 | auto *BVN = cast<BuildVectorSDNode>(Val: Node); |
1740 | SystemZVectorConstantInfo VCI(BVN); |
1741 | if (VCI.isVectorConstantLegal(Subtarget: *Subtarget)) { |
1742 | loadVectorConstant(VCI, Node); |
1743 | return; |
1744 | } |
1745 | break; |
1746 | } |
1747 | |
1748 | case ISD::ConstantFP: { |
1749 | APFloat Imm = cast<ConstantFPSDNode>(Val: Node)->getValueAPF(); |
1750 | if (Imm.isZero() || Imm.isNegZero()) |
1751 | break; |
1752 | SystemZVectorConstantInfo VCI(Imm); |
1753 | bool Success = VCI.isVectorConstantLegal(Subtarget: *Subtarget); (void)Success; |
1754 | assert(Success && "Expected legal FP immediate" ); |
1755 | loadVectorConstant(VCI, Node); |
1756 | return; |
1757 | } |
1758 | |
1759 | case ISD::STORE: { |
1760 | if (tryFoldLoadStoreIntoMemOperand(Node)) |
1761 | return; |
1762 | auto *Store = cast<StoreSDNode>(Val: Node); |
1763 | unsigned ElemBitSize = Store->getValue().getValueSizeInBits(); |
1764 | if (ElemBitSize == 32) { |
1765 | if (tryScatter(Store, Opcode: SystemZ::VSCEF)) |
1766 | return; |
1767 | } else if (ElemBitSize == 64) { |
1768 | if (tryScatter(Store, Opcode: SystemZ::VSCEG)) |
1769 | return; |
1770 | } |
1771 | break; |
1772 | } |
1773 | |
1774 | case ISD::ATOMIC_STORE: { |
1775 | auto *AtomOp = cast<AtomicSDNode>(Val: Node); |
1776 | // Replace the atomic_store with a regular store and select it. This is |
1777 | // ok since we know all store instructions <= 8 bytes are atomic, and the |
1778 | // 16 byte case is already handled during lowering. |
1779 | StoreSDNode *St = cast<StoreSDNode>(Val: CurDAG->getTruncStore( |
1780 | Chain: AtomOp->getChain(), dl: SDLoc(AtomOp), Val: AtomOp->getVal(), |
1781 | Ptr: AtomOp->getBasePtr(), SVT: AtomOp->getMemoryVT(), MMO: AtomOp->getMemOperand())); |
1782 | assert(St->getMemOperand()->isAtomic() && "Broken MMO." ); |
1783 | SDNode *Chain = St; |
1784 | // We have to enforce sequential consistency by performing a |
1785 | // serialization operation after the store. |
1786 | if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) |
1787 | Chain = CurDAG->getMachineNode(Opcode: SystemZ::Serialize, dl: SDLoc(AtomOp), |
1788 | VT: MVT::Other, Op1: SDValue(Chain, 0)); |
1789 | ReplaceNode(F: Node, T: Chain); |
1790 | SelectCode(N: St); |
1791 | return; |
1792 | } |
1793 | } |
1794 | |
1795 | SelectCode(N: Node); |
1796 | } |
1797 | |
1798 | bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand( |
1799 | const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, |
1800 | std::vector<SDValue> &OutOps) { |
1801 | SystemZAddressingMode::AddrForm Form; |
1802 | SystemZAddressingMode::DispRange DispRange; |
1803 | SDValue Base, Disp, Index; |
1804 | |
1805 | switch(ConstraintID) { |
1806 | default: |
1807 | llvm_unreachable("Unexpected asm memory constraint" ); |
1808 | case InlineAsm::ConstraintCode::i: |
1809 | case InlineAsm::ConstraintCode::Q: |
1810 | case InlineAsm::ConstraintCode::ZQ: |
1811 | // Accept an address with a short displacement, but no index. |
1812 | Form = SystemZAddressingMode::FormBD; |
1813 | DispRange = SystemZAddressingMode::Disp12Only; |
1814 | break; |
1815 | case InlineAsm::ConstraintCode::R: |
1816 | case InlineAsm::ConstraintCode::ZR: |
1817 | // Accept an address with a short displacement and an index. |
1818 | Form = SystemZAddressingMode::FormBDXNormal; |
1819 | DispRange = SystemZAddressingMode::Disp12Only; |
1820 | break; |
1821 | case InlineAsm::ConstraintCode::S: |
1822 | case InlineAsm::ConstraintCode::ZS: |
1823 | // Accept an address with a long displacement, but no index. |
1824 | Form = SystemZAddressingMode::FormBD; |
1825 | DispRange = SystemZAddressingMode::Disp20Only; |
1826 | break; |
1827 | case InlineAsm::ConstraintCode::T: |
1828 | case InlineAsm::ConstraintCode::m: |
1829 | case InlineAsm::ConstraintCode::o: |
1830 | case InlineAsm::ConstraintCode::p: |
1831 | case InlineAsm::ConstraintCode::ZT: |
1832 | // Accept an address with a long displacement and an index. |
1833 | // m works the same as T, as this is the most general case. |
1834 | // We don't really have any special handling of "offsettable" |
1835 | // memory addresses, so just treat o the same as m. |
1836 | Form = SystemZAddressingMode::FormBDXNormal; |
1837 | DispRange = SystemZAddressingMode::Disp20Only; |
1838 | break; |
1839 | } |
1840 | |
1841 | if (selectBDXAddr(Form, DR: DispRange, Addr: Op, Base, Disp, Index)) { |
1842 | const TargetRegisterClass *TRC = |
1843 | Subtarget->getRegisterInfo()->getPointerRegClass(MF: *MF); |
1844 | SDLoc DL(Base); |
1845 | SDValue RC = CurDAG->getTargetConstant(Val: TRC->getID(), DL, VT: MVT::i32); |
1846 | |
1847 | // Make sure that the base address doesn't go into %r0. |
1848 | // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything. |
1849 | if (Base.getOpcode() != ISD::TargetFrameIndex && |
1850 | Base.getOpcode() != ISD::Register) { |
1851 | Base = |
1852 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
1853 | dl: DL, VT: Base.getValueType(), |
1854 | Op1: Base, Op2: RC), 0); |
1855 | } |
1856 | |
1857 | // Make sure that the index register isn't assigned to %r0 either. |
1858 | if (Index.getOpcode() != ISD::Register) { |
1859 | Index = |
1860 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
1861 | dl: DL, VT: Index.getValueType(), |
1862 | Op1: Index, Op2: RC), 0); |
1863 | } |
1864 | |
1865 | OutOps.push_back(x: Base); |
1866 | OutOps.push_back(x: Disp); |
1867 | OutOps.push_back(x: Index); |
1868 | return false; |
1869 | } |
1870 | |
1871 | return true; |
1872 | } |
1873 | |
1874 | // IsProfitableToFold - Returns true if is profitable to fold the specific |
1875 | // operand node N of U during instruction selection that starts at Root. |
1876 | bool |
1877 | SystemZDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, |
1878 | SDNode *Root) const { |
1879 | // We want to avoid folding a LOAD into an ICMP node if as a result |
1880 | // we would be forced to spill the condition code into a GPR. |
1881 | if (N.getOpcode() == ISD::LOAD && U->getOpcode() == SystemZISD::ICMP) { |
1882 | if (!N.hasOneUse() || !U->hasOneUse()) |
1883 | return false; |
1884 | |
1885 | // The user of the CC value will usually be a CopyToReg into the |
1886 | // physical CC register, which in turn is glued and chained to the |
1887 | // actual instruction that uses the CC value. Bail out if we have |
1888 | // anything else than that. |
1889 | SDNode *CCUser = *U->use_begin(); |
1890 | SDNode *CCRegUser = nullptr; |
1891 | if (CCUser->getOpcode() == ISD::CopyToReg || |
1892 | cast<RegisterSDNode>(Val: CCUser->getOperand(Num: 1))->getReg() == SystemZ::CC) { |
1893 | for (auto *U : CCUser->uses()) { |
1894 | if (CCRegUser == nullptr) |
1895 | CCRegUser = U; |
1896 | else if (CCRegUser != U) |
1897 | return false; |
1898 | } |
1899 | } |
1900 | if (CCRegUser == nullptr) |
1901 | return false; |
1902 | |
1903 | // If the actual instruction is a branch, the only thing that remains to be |
1904 | // checked is whether the CCUser chain is a predecessor of the load. |
1905 | if (CCRegUser->isMachineOpcode() && |
1906 | CCRegUser->getMachineOpcode() == SystemZ::BRC) |
1907 | return !N->isPredecessorOf(N: CCUser->getOperand(Num: 0).getNode()); |
1908 | |
1909 | // Otherwise, the instruction may have multiple operands, and we need to |
1910 | // verify that none of them are a predecessor of the load. This is exactly |
1911 | // the same check that would be done by common code if the CC setter were |
1912 | // glued to the CC user, so simply invoke that check here. |
1913 | if (!IsLegalToFold(N, U, Root: CCRegUser, OptLevel, IgnoreChains: false)) |
1914 | return false; |
1915 | } |
1916 | |
1917 | return true; |
1918 | } |
1919 | |
1920 | namespace { |
1921 | // Represents a sequence for extracting a 0/1 value from an IPM result: |
1922 | // (((X ^ XORValue) + AddValue) >> Bit) |
1923 | struct IPMConversion { |
1924 | IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) |
1925 | : XORValue(xorValue), AddValue(addValue), Bit(bit) {} |
1926 | |
1927 | int64_t XORValue; |
1928 | int64_t AddValue; |
1929 | unsigned Bit; |
1930 | }; |
1931 | } // end anonymous namespace |
1932 | |
1933 | // Return a sequence for getting a 1 from an IPM result when CC has a |
1934 | // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. |
1935 | // The handling of CC values outside CCValid doesn't matter. |
1936 | static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { |
1937 | // Deal with cases where the result can be taken directly from a bit |
1938 | // of the IPM result. |
1939 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) |
1940 | return IPMConversion(0, 0, SystemZ::IPM_CC); |
1941 | if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) |
1942 | return IPMConversion(0, 0, SystemZ::IPM_CC + 1); |
1943 | |
1944 | // Deal with cases where we can add a value to force the sign bit |
1945 | // to contain the right value. Putting the bit in 31 means we can |
1946 | // use SRL rather than RISBG(L), and also makes it easier to get a |
1947 | // 0/-1 value, so it has priority over the other tests below. |
1948 | // |
1949 | // These sequences rely on the fact that the upper two bits of the |
1950 | // IPM result are zero. |
1951 | uint64_t TopBit = uint64_t(1) << 31; |
1952 | if (CCMask == (CCValid & SystemZ::CCMASK_0)) |
1953 | return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); |
1954 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) |
1955 | return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); |
1956 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1957 | | SystemZ::CCMASK_1 |
1958 | | SystemZ::CCMASK_2))) |
1959 | return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); |
1960 | if (CCMask == (CCValid & SystemZ::CCMASK_3)) |
1961 | return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); |
1962 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 |
1963 | | SystemZ::CCMASK_2 |
1964 | | SystemZ::CCMASK_3))) |
1965 | return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); |
1966 | |
1967 | // Next try inverting the value and testing a bit. 0/1 could be |
1968 | // handled this way too, but we dealt with that case above. |
1969 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) |
1970 | return IPMConversion(-1, 0, SystemZ::IPM_CC); |
1971 | |
1972 | // Handle cases where adding a value forces a non-sign bit to contain |
1973 | // the right value. |
1974 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) |
1975 | return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); |
1976 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) |
1977 | return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); |
1978 | |
1979 | // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are |
1980 | // can be done by inverting the low CC bit and applying one of the |
1981 | // sign-based extractions above. |
1982 | if (CCMask == (CCValid & SystemZ::CCMASK_1)) |
1983 | return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); |
1984 | if (CCMask == (CCValid & SystemZ::CCMASK_2)) |
1985 | return IPMConversion(1 << SystemZ::IPM_CC, |
1986 | TopBit - (3 << SystemZ::IPM_CC), 31); |
1987 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1988 | | SystemZ::CCMASK_1 |
1989 | | SystemZ::CCMASK_3))) |
1990 | return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); |
1991 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1992 | | SystemZ::CCMASK_2 |
1993 | | SystemZ::CCMASK_3))) |
1994 | return IPMConversion(1 << SystemZ::IPM_CC, |
1995 | TopBit - (1 << SystemZ::IPM_CC), 31); |
1996 | |
1997 | llvm_unreachable("Unexpected CC combination" ); |
1998 | } |
1999 | |
2000 | SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { |
2001 | auto *TrueOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 0)); |
2002 | auto *FalseOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
2003 | if (!TrueOp || !FalseOp) |
2004 | return SDValue(); |
2005 | if (FalseOp->getZExtValue() != 0) |
2006 | return SDValue(); |
2007 | if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1) |
2008 | return SDValue(); |
2009 | |
2010 | auto *CCValidOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
2011 | auto *CCMaskOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 3)); |
2012 | if (!CCValidOp || !CCMaskOp) |
2013 | return SDValue(); |
2014 | int CCValid = CCValidOp->getZExtValue(); |
2015 | int CCMask = CCMaskOp->getZExtValue(); |
2016 | |
2017 | SDLoc DL(Node); |
2018 | SDValue CCReg = Node->getOperand(Num: 4); |
2019 | IPMConversion IPM = getIPMConversion(CCValid, CCMask); |
2020 | SDValue Result = CurDAG->getNode(Opcode: SystemZISD::IPM, DL, VT: MVT::i32, Operand: CCReg); |
2021 | |
2022 | if (IPM.XORValue) |
2023 | Result = CurDAG->getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Result, |
2024 | N2: CurDAG->getConstant(Val: IPM.XORValue, DL, VT: MVT::i32)); |
2025 | |
2026 | if (IPM.AddValue) |
2027 | Result = CurDAG->getNode(Opcode: ISD::ADD, DL, VT: MVT::i32, N1: Result, |
2028 | N2: CurDAG->getConstant(Val: IPM.AddValue, DL, VT: MVT::i32)); |
2029 | |
2030 | EVT VT = Node->getValueType(ResNo: 0); |
2031 | if (VT == MVT::i32 && IPM.Bit == 31) { |
2032 | unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA; |
2033 | Result = CurDAG->getNode(Opcode: ShiftOp, DL, VT: MVT::i32, N1: Result, |
2034 | N2: CurDAG->getConstant(Val: IPM.Bit, DL, VT: MVT::i32)); |
2035 | } else { |
2036 | if (VT != MVT::i32) |
2037 | Result = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Result); |
2038 | |
2039 | if (TrueOp->getSExtValue() == 1) { |
2040 | // The SHR/AND sequence should get optimized to an RISBG. |
2041 | Result = CurDAG->getNode(Opcode: ISD::SRL, DL, VT, N1: Result, |
2042 | N2: CurDAG->getConstant(Val: IPM.Bit, DL, VT: MVT::i32)); |
2043 | Result = CurDAG->getNode(Opcode: ISD::AND, DL, VT, N1: Result, |
2044 | N2: CurDAG->getConstant(Val: 1, DL, VT)); |
2045 | } else { |
2046 | // Sign-extend from IPM.Bit using a pair of shifts. |
2047 | int ShlAmt = VT.getSizeInBits() - 1 - IPM.Bit; |
2048 | int SraAmt = VT.getSizeInBits() - 1; |
2049 | Result = CurDAG->getNode(Opcode: ISD::SHL, DL, VT, N1: Result, |
2050 | N2: CurDAG->getConstant(Val: ShlAmt, DL, VT: MVT::i32)); |
2051 | Result = CurDAG->getNode(Opcode: ISD::SRA, DL, VT, N1: Result, |
2052 | N2: CurDAG->getConstant(Val: SraAmt, DL, VT: MVT::i32)); |
2053 | } |
2054 | } |
2055 | |
2056 | return Result; |
2057 | } |
2058 | |
2059 | bool SystemZDAGToDAGISel::shouldSelectForReassoc(SDNode *N) const { |
2060 | EVT VT = N->getValueType(ResNo: 0); |
2061 | assert(VT.isFloatingPoint() && "Expected FP SDNode" ); |
2062 | return N->getFlags().hasAllowReassociation() && |
2063 | N->getFlags().hasNoSignedZeros() && Subtarget->hasVector() && |
2064 | (VT != MVT::f32 || Subtarget->hasVectorEnhancements1()) && |
2065 | !N->isStrictFPOpcode(); |
2066 | } |
2067 | |
2068 | void SystemZDAGToDAGISel::PreprocessISelDAG() { |
2069 | // If we have conditional immediate loads, we always prefer |
2070 | // using those over an IPM sequence. |
2071 | if (Subtarget->hasLoadStoreOnCond2()) |
2072 | return; |
2073 | |
2074 | bool MadeChange = false; |
2075 | |
2076 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), |
2077 | E = CurDAG->allnodes_end(); |
2078 | I != E;) { |
2079 | SDNode *N = &*I++; |
2080 | if (N->use_empty()) |
2081 | continue; |
2082 | |
2083 | SDValue Res; |
2084 | switch (N->getOpcode()) { |
2085 | default: break; |
2086 | case SystemZISD::SELECT_CCMASK: |
2087 | Res = expandSelectBoolean(Node: N); |
2088 | break; |
2089 | } |
2090 | |
2091 | if (Res) { |
2092 | LLVM_DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: " ); |
2093 | LLVM_DEBUG(N->dump(CurDAG)); |
2094 | LLVM_DEBUG(dbgs() << "\nNew: " ); |
2095 | LLVM_DEBUG(Res.getNode()->dump(CurDAG)); |
2096 | LLVM_DEBUG(dbgs() << "\n" ); |
2097 | |
2098 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); |
2099 | MadeChange = true; |
2100 | } |
2101 | } |
2102 | |
2103 | if (MadeChange) |
2104 | CurDAG->RemoveDeadNodes(); |
2105 | } |
2106 | |