1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/Analysis/VectorUtils.h"
17#include "llvm/CodeGen/Analysis.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/CodeGenCommonISel.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SDPatternMatch.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/TargetRegisterInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/GlobalVariable.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
33#include "llvm/Support/DivisionByConstantInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/KnownBits.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Target/TargetMachine.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
44TargetLowering::TargetLowering(const TargetMachine &tm,
45 const TargetSubtargetInfo &STI)
46 : TargetLoweringBase(tm, STI) {}
47
48// Define the virtual destructor out-of-line for build efficiency.
49TargetLowering::~TargetLowering() = default;
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
55bool TargetLowering::isPositionIndependent() const {
56 return getTargetMachine().isPositionIndependent();
57}
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
61bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
62 SDValue &Chain) const {
63 const Function &F = DAG.getMachineFunction().getFunction();
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Val: Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(A: Attribute::ZExt) ||
84 CallerAttrs.contains(A: Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
91bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(i: 0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
112 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
120void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
136 IndirectType = nullptr;
137 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
156 EVT RetVT, ArrayRef<SDValue> Ops,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError(reason: "unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
165 TargetLowering::ArgListTy Args;
166 Args.reserve(n: Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(Context&: *DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(Context&: *DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Ty: Entry.Ty, IsSigned: CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
184 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(x: Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
194 Type *OrigRetTy = RetTy;
195 TargetLowering::CallLoweringInfo CLI(DAG);
196 bool signExtend = shouldSignExtendTypeInLibCall(Ty: RetTy, IsSigned: CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(Context&: *DAG.getContext());
201 if (!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften))
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetTy, OrigResultType: OrigRetTy,
208 Target: Callee, ArgsList: std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
210 .setDiscardResult(!CallOptions.IsReturnValueUsed)
211 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
217bool TargetLowering::findOptimalMemOpLowering(
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
235 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(VT: LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(VT: LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
262 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
263 isSafeMemOpType(NewVT.getSimpleVT()))
264 Found = true;
265 else if (NewVT == MVT::i64 &&
266 isOperationLegalOrCustom(Op: ISD::STORE, VT: MVT::f64) &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
287 allowsMisalignedMemoryAccesses(
288 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
289 Flags: MachineMemOperand::MONone, &Fast) &&
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(x: VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
310void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
317 OldRHS, Chain);
318}
319
320void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
423 EVT RetVT = getCmpLibcallReturnType();
424 SDValue Ops[2] = {NewLHS, NewRHS};
425 TargetLowering::MakeLibCallOptions CallOptions;
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(Call: LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
435 reportFatalUsageError(
436 reason: "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(Call: LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
451 reportFatalUsageError(
452 reason: "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
460 if (getBooleanContents(Type: RetVT) == ZeroOrOneBooleanContent) {
461 NewLHS = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: RetVT, N1: Call.first,
462 N2: DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
466 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(Call: LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
470 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
471 if (Chain)
472 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Call.second,
473 N2: Call2.second);
474 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
475 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
482unsigned TargetLowering::getJumpTableEncoding() const {
483 // In non-pic modes, just use the address of a block.
484 if (!isPositionIndependent())
485 return MachineJumpTableInfo::EK_BlockAddress;
486
487 // Otherwise, use a label difference.
488 return MachineJumpTableInfo::EK_LabelDifference32;
489}
490
491SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
499TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
505SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
510 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
512 }
513 return DAG.getNode(Opcode: ISD::BRIND, DL: dl, VT: MVT::Other, N1: Chain, N2: Addr);
514}
515
516bool
517TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
527 if (isPositionIndependent())
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
541bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
571 return false;
572
573 if (!C.isSubsetOf(RHS: DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC,
577 Flags: Op->getFlags());
578 return TLO.CombineTo(O: Op, N: NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
588bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
593 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
602bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
633 if (isTruncateFree(Val: Op, VT2: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
639 : SDNodeFlags::None);
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, DL: dl, VT: SmallVT,
648 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
649 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
652 return TLO.CombineTo(O: Op, N: Z);
653 }
654 }
655 return false;
656}
657
658bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(N: Op.getNode());
668 DCI.CommitTargetLoweringOpt(TLO);
669 }
670 return Simplified;
671}
672
673bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(N: Op.getNode());
685 DCI.CommitTargetLoweringOpt(TLO);
686 }
687 return Simplified;
688}
689
690bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
691 KnownBits &Known,
692 TargetLoweringOpt &TLO,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
701 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
708SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
714 if (Depth >= SelectionDAG::MaxRecursionDepth)
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
743 if (SDValue V = SimplifyMultipleUseDemandedBits(
744 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
745 return DAG.getBitcast(VT: DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
760
761 if (SDValue V = SimplifyMultipleUseDemandedBits(
762 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
763 return DAG.getBitcast(VT: DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
779 if (SDValue V = SimplifyMultipleUseDemandedBits(
780 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
781 return DAG.getBitcast(VT: DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(i: 0);
795 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(i: 1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(i: 0);
808 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(i: 1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
819 return Op.getOperand(i: 0);
820 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
821 return Op.getOperand(i: 1);
822 break;
823 }
824 case ISD::ADD: {
825 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
826 if (RHSKnown.isZero())
827 return Op.getOperand(i: 0);
828
829 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
830 if (LHSKnown.isZero())
831 return Op.getOperand(i: 1);
832 break;
833 }
834 case ISD::SHL: {
835 // If we are only demanding sign bits then we can use the shift source
836 // directly.
837 if (std::optional<unsigned> MaxSA =
838 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
839 SDValue Op0 = Op.getOperand(i: 0);
840 unsigned ShAmt = *MaxSA;
841 unsigned NumSignBits =
842 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
843 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845 return Op0;
846 }
847 break;
848 }
849 case ISD::SRL: {
850 // If we are only demanding sign bits then we can use the shift source
851 // directly.
852 if (std::optional<unsigned> MaxSA =
853 DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
854 SDValue Op0 = Op.getOperand(i: 0);
855 unsigned ShAmt = *MaxSA;
856 // Must already be signbits in DemandedBits bounds, and can't demand any
857 // shifted in zeroes.
858 if (DemandedBits.countl_zero() >= ShAmt) {
859 unsigned NumSignBits =
860 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
861 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862 return Op0;
863 }
864 }
865 break;
866 }
867 case ISD::SETCC: {
868 SDValue Op0 = Op.getOperand(i: 0);
869 SDValue Op1 = Op.getOperand(i: 1);
870 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
871 // If (1) we only need the sign-bit, (2) the setcc operands are the same
872 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
873 // -1, we may be able to bypass the setcc.
874 if (DemandedBits.isSignMask() &&
875 Op0.getScalarValueSizeInBits() == BitWidth &&
876 getBooleanContents(Type: Op0.getValueType()) ==
877 BooleanContent::ZeroOrNegativeOneBooleanContent) {
878 // If we're testing X < 0, then this compare isn't needed - just use X!
879 // FIXME: We're limiting to integer types here, but this should also work
880 // if we don't care about FP signed-zero. The use of SETLT with FP means
881 // that we don't care about NaNs.
882 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
883 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
884 return Op0;
885 }
886 break;
887 }
888 case ISD::SIGN_EXTEND_INREG: {
889 // If none of the extended bits are demanded, eliminate the sextinreg.
890 SDValue Op0 = Op.getOperand(i: 0);
891 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
892 unsigned ExBits = ExVT.getScalarSizeInBits();
893 if (DemandedBits.getActiveBits() <= ExBits &&
894 shouldRemoveRedundantExtend(Op))
895 return Op0;
896 // If the input is already sign extended, just drop the extension.
897 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
898 if (NumSignBits >= (BitWidth - ExBits + 1))
899 return Op0;
900 break;
901 }
902 case ISD::ANY_EXTEND_VECTOR_INREG:
903 case ISD::SIGN_EXTEND_VECTOR_INREG:
904 case ISD::ZERO_EXTEND_VECTOR_INREG: {
905 if (VT.isScalableVector())
906 return SDValue();
907
908 // If we only want the lowest element and none of extended bits, then we can
909 // return the bitcasted source vector.
910 SDValue Src = Op.getOperand(i: 0);
911 EVT SrcVT = Src.getValueType();
912 EVT DstVT = Op.getValueType();
913 if (IsLE && DemandedElts == 1 &&
914 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916 return DAG.getBitcast(VT: DstVT, V: Src);
917 }
918 break;
919 }
920 case ISD::INSERT_VECTOR_ELT: {
921 if (VT.isScalableVector())
922 return SDValue();
923
924 // If we don't demand the inserted element, return the base vector.
925 SDValue Vec = Op.getOperand(i: 0);
926 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
927 EVT VecVT = Vec.getValueType();
928 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
929 !DemandedElts[CIdx->getZExtValue()])
930 return Vec;
931 break;
932 }
933 case ISD::INSERT_SUBVECTOR: {
934 if (VT.isScalableVector())
935 return SDValue();
936
937 SDValue Vec = Op.getOperand(i: 0);
938 SDValue Sub = Op.getOperand(i: 1);
939 uint64_t Idx = Op.getConstantOperandVal(i: 2);
940 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
942 // If we don't demand the inserted subvector, return the base vector.
943 if (DemandedSubElts == 0)
944 return Vec;
945 break;
946 }
947 case ISD::VECTOR_SHUFFLE: {
948 assert(!VT.isScalableVector());
949 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
950
951 // If all the demanded elts are from one operand and are inline,
952 // then we can use the operand directly.
953 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954 for (unsigned i = 0; i != NumElts; ++i) {
955 int M = ShuffleMask[i];
956 if (M < 0 || !DemandedElts[i])
957 continue;
958 AllUndef = false;
959 IdentityLHS &= (M == (int)i);
960 IdentityRHS &= ((M - NumElts) == i);
961 }
962
963 if (AllUndef)
964 return DAG.getUNDEF(VT: Op.getValueType());
965 if (IdentityLHS)
966 return Op.getOperand(i: 0);
967 if (IdentityRHS)
968 return Op.getOperand(i: 1);
969 break;
970 }
971 default:
972 // TODO: Probably okay to remove after audit; here to reduce change size
973 // in initial enablement patch for scalable vectors
974 if (VT.isScalableVector())
975 return SDValue();
976
977 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
978 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
979 Op, DemandedBits, DemandedElts, DAG, Depth))
980 return V;
981 break;
982 }
983 return SDValue();
984}
985
986SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
987 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
988 unsigned Depth) const {
989 EVT VT = Op.getValueType();
990 // Since the number of lanes in a scalable vector is unknown at compile time,
991 // we track one bit which is implicitly broadcast to all lanes. This means
992 // that all lanes in a scalable vector are considered demanded.
993 APInt DemandedElts = VT.isFixedLengthVector()
994 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
995 : APInt(1, 1);
996 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997 Depth);
998}
999
1000SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
1001 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002 unsigned Depth) const {
1003 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
1004 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005 Depth);
1006}
1007
1008// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1010static SDValue combineShiftToAVG(SDValue Op,
1011 TargetLowering::TargetLoweringOpt &TLO,
1012 const TargetLowering &TLI,
1013 const APInt &DemandedBits,
1014 const APInt &DemandedElts, unsigned Depth) {
1015 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1016 "SRL or SRA node is required here!");
1017 // Is the right shift using an immediate value of 1?
1018 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
1019 if (!N1C || !N1C->isOne())
1020 return SDValue();
1021
1022 // We are looking for an avgfloor
1023 // add(ext, ext)
1024 // or one of these as a avgceil
1025 // add(add(ext, ext), 1)
1026 // add(add(ext, 1), ext)
1027 // add(ext, add(ext, 1))
1028 SDValue Add = Op.getOperand(i: 0);
1029 if (Add.getOpcode() != ISD::ADD)
1030 return SDValue();
1031
1032 SDValue ExtOpA = Add.getOperand(i: 0);
1033 SDValue ExtOpB = Add.getOperand(i: 1);
1034 SDValue Add2;
1035 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036 ConstantSDNode *ConstOp;
1037 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
1038 ConstOp->isOne()) {
1039 ExtOpA = Op1;
1040 ExtOpB = Op3;
1041 Add2 = A;
1042 return true;
1043 }
1044 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
1045 ConstOp->isOne()) {
1046 ExtOpA = Op1;
1047 ExtOpB = Op2;
1048 Add2 = A;
1049 return true;
1050 }
1051 return false;
1052 };
1053 bool IsCeil =
1054 (ExtOpA.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
1056 (ExtOpB.getOpcode() == ISD::ADD &&
1057 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
1058
1059 // If the shift is signed (sra):
1060 // - Needs >= 2 sign bit for both operands.
1061 // - Needs >= 2 zero bits.
1062 // If the shift is unsigned (srl):
1063 // - Needs >= 1 zero bit for both operands.
1064 // - Needs 1 demanded bit zero and >= 2 sign bits.
1065 SelectionDAG &DAG = TLO.DAG;
1066 unsigned ShiftOpc = Op.getOpcode();
1067 bool IsSigned = false;
1068 unsigned KnownBits;
1069 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1070 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1071 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1072 unsigned NumZeroA =
1073 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZeroB =
1075 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1077
1078 switch (ShiftOpc) {
1079 default:
1080 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081 case ISD::SRA: {
1082 if (NumZero >= 2 && NumSigned < NumZero) {
1083 IsSigned = false;
1084 KnownBits = NumZero;
1085 break;
1086 }
1087 if (NumSigned >= 1) {
1088 IsSigned = true;
1089 KnownBits = NumSigned;
1090 break;
1091 }
1092 return SDValue();
1093 }
1094 case ISD::SRL: {
1095 if (NumZero >= 1 && NumSigned < NumZero) {
1096 IsSigned = false;
1097 KnownBits = NumZero;
1098 break;
1099 }
1100 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1101 IsSigned = true;
1102 KnownBits = NumSigned;
1103 break;
1104 }
1105 return SDValue();
1106 }
1107 }
1108
1109 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112 // Find the smallest power-2 type that is legal for this vector size and
1113 // operation, given the original type size and the number of known sign/zero
1114 // bits.
1115 EVT VT = Op.getValueType();
1116 unsigned MinWidth =
1117 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1118 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1119 if (NVT.getScalarSizeInBits() > VT.getScalarSizeInBits())
1120 return SDValue();
1121 if (VT.isVector())
1122 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1123 if (TLO.LegalTypes() && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT)) {
1124 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1125 // larger type size to do the transform.
1126 if (TLO.LegalOperations() && !TLI.isOperationLegal(Op: AVGOpc, VT))
1127 return SDValue();
1128 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1129 N1: Add.getOperand(i: 1)) &&
1130 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1131 N1: Add2.getOperand(i: 1))))
1132 NVT = VT;
1133 else
1134 return SDValue();
1135 }
1136
1137 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138 // this is likely to stop other folds (reassociation, value tracking etc.)
1139 if (!IsCeil && !TLI.isOperationLegal(Op: AVGOpc, VT: NVT) &&
1140 (isa<ConstantSDNode>(Val: ExtOpA) || isa<ConstantSDNode>(Val: ExtOpB)))
1141 return SDValue();
1142
1143 SDLoc DL(Op);
1144 SDValue ResultAVG =
1145 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1146 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1147 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1148}
1149
1150/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151/// result of Op are ever used downstream. If we can use this information to
1152/// simplify Op, create a new simplified DAG node and return true, returning the
1153/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154/// return a mask of Known bits for the expression (used to simplify the
1155/// caller). The Known bits may only be accurate for those bits in the
1156/// OriginalDemandedBits and OriginalDemandedElts.
1157bool TargetLowering::SimplifyDemandedBits(
1158 SDValue Op, const APInt &OriginalDemandedBits,
1159 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160 unsigned Depth, bool AssumeSingleUse) const {
1161 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163 "Mask size mismatches value type size!");
1164
1165 // Don't know anything.
1166 Known = KnownBits(BitWidth);
1167
1168 EVT VT = Op.getValueType();
1169 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1172 "Unexpected vector size");
1173
1174 APInt DemandedBits = OriginalDemandedBits;
1175 APInt DemandedElts = OriginalDemandedElts;
1176 SDLoc dl(Op);
1177
1178 // Undef operand.
1179 if (Op.isUndef())
1180 return false;
1181
1182 // We can't simplify target constants.
1183 if (Op.getOpcode() == ISD::TargetConstant)
1184 return false;
1185
1186 if (Op.getOpcode() == ISD::Constant) {
1187 // We know all of the bits for a constant!
1188 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1189 return false;
1190 }
1191
1192 if (Op.getOpcode() == ISD::ConstantFP) {
1193 // We know all of the bits for a floating point constant!
1194 Known = KnownBits::makeConstant(
1195 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1196 return false;
1197 }
1198
1199 // Other users may use these bits.
1200 bool HasMultiUse = false;
1201 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1202 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1203 // Limit search depth.
1204 return false;
1205 }
1206 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1207 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1208 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1209 HasMultiUse = true;
1210 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1211 // Not demanding any bits/elts from Op.
1212 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1213 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214 // Limit search depth.
1215 return false;
1216 }
1217
1218 KnownBits Known2;
1219 switch (Op.getOpcode()) {
1220 case ISD::SCALAR_TO_VECTOR: {
1221 if (VT.isScalableVector())
1222 return false;
1223 if (!DemandedElts[0])
1224 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1225
1226 KnownBits SrcKnown;
1227 SDValue Src = Op.getOperand(i: 0);
1228 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1230 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1231 return true;
1232
1233 // Upper elements are undef, so only get the knownbits if we just demand
1234 // the bottom element.
1235 if (DemandedElts == 1)
1236 Known = SrcKnown.anyextOrTrunc(BitWidth);
1237 break;
1238 }
1239 case ISD::BUILD_VECTOR:
1240 // Collect the known bits that are shared by every demanded element.
1241 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243 return false; // Don't fall through, will infinitely loop.
1244 case ISD::SPLAT_VECTOR: {
1245 SDValue Scl = Op.getOperand(i: 0);
1246 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1247 KnownBits KnownScl;
1248 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1249 return true;
1250
1251 // Implicitly truncate the bits to match the official semantics of
1252 // SPLAT_VECTOR.
1253 Known = KnownScl.trunc(BitWidth);
1254 break;
1255 }
1256 case ISD::LOAD: {
1257 auto *LD = cast<LoadSDNode>(Val&: Op);
1258 if (getTargetConstantFromLoad(LD)) {
1259 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260 return false; // Don't fall through, will infinitely loop.
1261 }
1262 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1263 // If this is a ZEXTLoad and we are looking at the loaded value.
1264 EVT MemVT = LD->getMemoryVT();
1265 unsigned MemBits = MemVT.getScalarSizeInBits();
1266 Known.Zero.setBitsFrom(MemBits);
1267 return false; // Don't fall through, will infinitely loop.
1268 }
1269 break;
1270 }
1271 case ISD::INSERT_VECTOR_ELT: {
1272 if (VT.isScalableVector())
1273 return false;
1274 SDValue Vec = Op.getOperand(i: 0);
1275 SDValue Scl = Op.getOperand(i: 1);
1276 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1277 EVT VecVT = Vec.getValueType();
1278
1279 // If index isn't constant, assume we need all vector elements AND the
1280 // inserted element.
1281 APInt DemandedVecElts(DemandedElts);
1282 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1283 unsigned Idx = CIdx->getZExtValue();
1284 DemandedVecElts.clearBit(BitPosition: Idx);
1285
1286 // Inserted element is not required.
1287 if (!DemandedElts[Idx])
1288 return TLO.CombineTo(O: Op, N: Vec);
1289 }
1290
1291 KnownBits KnownScl;
1292 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1294 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1295 return true;
1296
1297 Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299 KnownBits KnownVec;
1300 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1301 Depth: Depth + 1))
1302 return true;
1303
1304 if (!!DemandedVecElts)
1305 Known = Known.intersectWith(RHS: KnownVec);
1306
1307 return false;
1308 }
1309 case ISD::INSERT_SUBVECTOR: {
1310 if (VT.isScalableVector())
1311 return false;
1312 // Demand any elements from the subvector and the remainder from the src its
1313 // inserted into.
1314 SDValue Src = Op.getOperand(i: 0);
1315 SDValue Sub = Op.getOperand(i: 1);
1316 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1317 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1319 APInt DemandedSrcElts = DemandedElts;
1320 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
1321
1322 KnownBits KnownSub, KnownSrc;
1323 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1324 Depth: Depth + 1))
1325 return true;
1326 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1327 Depth: Depth + 1))
1328 return true;
1329
1330 Known.setAllConflict();
1331 if (!!DemandedSubElts)
1332 Known = Known.intersectWith(RHS: KnownSub);
1333 if (!!DemandedSrcElts)
1334 Known = Known.intersectWith(RHS: KnownSrc);
1335
1336 // Attempt to avoid multi-use src if we don't need anything from it.
1337 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1338 !DemandedSrcElts.isAllOnes()) {
1339 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1340 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1341 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1342 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1343 if (NewSub || NewSrc) {
1344 NewSub = NewSub ? NewSub : Sub;
1345 NewSrc = NewSrc ? NewSrc : Src;
1346 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1347 N3: Op.getOperand(i: 2));
1348 return TLO.CombineTo(O: Op, N: NewOp);
1349 }
1350 }
1351 break;
1352 }
1353 case ISD::EXTRACT_SUBVECTOR: {
1354 if (VT.isScalableVector())
1355 return false;
1356 // Offset the demanded elts by the subvector index.
1357 SDValue Src = Op.getOperand(i: 0);
1358 if (Src.getValueType().isScalableVector())
1359 break;
1360 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1361 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1363
1364 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1365 Depth: Depth + 1))
1366 return true;
1367
1368 // Attempt to avoid multi-use src if we don't need anything from it.
1369 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1370 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1371 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1372 if (DemandedSrc) {
1373 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1374 N2: Op.getOperand(i: 1));
1375 return TLO.CombineTo(O: Op, N: NewOp);
1376 }
1377 }
1378 break;
1379 }
1380 case ISD::CONCAT_VECTORS: {
1381 if (VT.isScalableVector())
1382 return false;
1383 Known.setAllConflict();
1384 EVT SubVT = Op.getOperand(i: 0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1390 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1391 Known&: Known2, TLO, Depth: Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(RHS: Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(i: 0);
1411 SDValue Op1 = Op.getOperand(i: 1);
1412
1413 Known.setAllConflict();
1414 if (!!DemandedLHS) {
1415 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1416 Depth: Depth + 1))
1417 return true;
1418 Known = Known.intersectWith(RHS: Known2);
1419 }
1420 if (!!DemandedRHS) {
1421 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1422 Depth: Depth + 1))
1423 return true;
1424 Known = Known.intersectWith(RHS: Known2);
1425 }
1426
1427 // Attempt to avoid multi-use ops if we don't need anything from them.
1428 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1429 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1430 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1431 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1432 if (DemandedOp0 || DemandedOp1) {
1433 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1436 return TLO.CombineTo(O: Op, N: NewOp);
1437 }
1438 }
1439 break;
1440 }
1441 case ISD::AND: {
1442 SDValue Op0 = Op.getOperand(i: 0);
1443 SDValue Op1 = Op.getOperand(i: 1);
1444
1445 // If the RHS is a constant, check to see if the LHS would be zero without
1446 // using the bits from the RHS. Below, we use knowledge about the RHS to
1447 // simplify the LHS, here we're using information from the LHS to simplify
1448 // the RHS.
1449 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1, DemandedElts)) {
1450 // Do not increment Depth here; that can cause an infinite loop.
1451 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1452 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1453 if ((LHSKnown.Zero & DemandedBits) ==
1454 (~RHSC->getAPIntValue() & DemandedBits))
1455 return TLO.CombineTo(O: Op, N: Op0);
1456
1457 // If any of the set bits in the RHS are known zero on the LHS, shrink
1458 // the constant.
1459 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1460 DemandedElts, TLO))
1461 return true;
1462
1463 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464 // constant, but if this 'and' is only clearing bits that were just set by
1465 // the xor, then this 'and' can be eliminated by shrinking the mask of
1466 // the xor. For example, for a 32-bit X:
1467 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1469 LHSKnown.One == ~RHSC->getAPIntValue()) {
1470 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1471 return TLO.CombineTo(O: Op, N: Xor);
1472 }
1473 }
1474
1475 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1476 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1477 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1478 (Op0.getOperand(i: 0).isUndef() ||
1479 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1480 Op0->hasOneUse()) {
1481 unsigned NumSubElts =
1482 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1483 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1484 APInt DemandedSub =
1485 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1486 KnownBits KnownSubMask =
1487 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1488 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1489 SDValue NewAnd =
1490 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1491 SDValue NewInsert =
1492 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1493 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1494 return TLO.CombineTo(O: Op, N: NewInsert);
1495 }
1496 }
1497
1498 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1499 Depth: Depth + 1))
1500 return true;
1501 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1502 Known&: Known2, TLO, Depth: Depth + 1))
1503 return true;
1504
1505 // If all of the demanded bits are known one on one side, return the other.
1506 // These bits cannot contribute to the result of the 'and'.
1507 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1508 return TLO.CombineTo(O: Op, N: Op0);
1509 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1510 return TLO.CombineTo(O: Op, N: Op1);
1511 // If all of the demanded bits in the inputs are known zeros, return zero.
1512 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1513 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1514 // If the RHS is a constant, see if we can simplify it.
1515 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1516 TLO))
1517 return true;
1518 // If the operation can be done in a smaller type, do so.
1519 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1520 return true;
1521
1522 // Attempt to avoid multi-use ops if we don't need anything from them.
1523 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1524 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1525 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1526 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1527 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1528 if (DemandedOp0 || DemandedOp1) {
1529 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1530 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1531 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1532 return TLO.CombineTo(O: Op, N: NewOp);
1533 }
1534 }
1535
1536 Known &= Known2;
1537 break;
1538 }
1539 case ISD::OR: {
1540 SDValue Op0 = Op.getOperand(i: 0);
1541 SDValue Op1 = Op.getOperand(i: 1);
1542 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1543 Depth: Depth + 1)) {
1544 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1545 return true;
1546 }
1547
1548 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1549 Known&: Known2, TLO, Depth: Depth + 1)) {
1550 Op->dropFlags(Mask: SDNodeFlags::Disjoint);
1551 return true;
1552 }
1553
1554 // If all of the demanded bits are known zero on one side, return the other.
1555 // These bits cannot contribute to the result of the 'or'.
1556 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1557 return TLO.CombineTo(O: Op, N: Op0);
1558 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1559 return TLO.CombineTo(O: Op, N: Op1);
1560 // If the RHS is a constant, see if we can simplify it.
1561 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1562 return true;
1563 // If the operation can be done in a smaller type, do so.
1564 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1565 return true;
1566
1567 // Attempt to avoid multi-use ops if we don't need anything from them.
1568 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1569 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1570 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1571 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1572 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1573 if (DemandedOp0 || DemandedOp1) {
1574 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1575 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1576 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1577 return TLO.CombineTo(O: Op, N: NewOp);
1578 }
1579 }
1580
1581 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1582 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1583 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1584 Op0->hasOneUse() && Op1->hasOneUse()) {
1585 // Attempt to match all commutations - m_c_Or would've been useful!
1586 for (int I = 0; I != 2; ++I) {
1587 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1588 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1589 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1590 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1591 if (Alt.getOpcode() == ISD::OR) {
1592 for (int J = 0; J != 2; ++J) {
1593 if (X == Alt.getOperand(i: J)) {
1594 SDValue Y = Alt.getOperand(i: 1 - J);
1595 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1596 Ops: {C1, C2})) {
1597 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1598 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1599 return TLO.CombineTo(
1600 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1601 }
1602 }
1603 }
1604 }
1605 }
1606 }
1607
1608 Known |= Known2;
1609 break;
1610 }
1611 case ISD::XOR: {
1612 SDValue Op0 = Op.getOperand(i: 0);
1613 SDValue Op1 = Op.getOperand(i: 1);
1614
1615 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1616 Depth: Depth + 1))
1617 return true;
1618 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1619 Depth: Depth + 1))
1620 return true;
1621
1622 // If all of the demanded bits are known zero on one side, return the other.
1623 // These bits cannot contribute to the result of the 'xor'.
1624 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1625 return TLO.CombineTo(O: Op, N: Op0);
1626 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1627 return TLO.CombineTo(O: Op, N: Op1);
1628 // If the operation can be done in a smaller type, do so.
1629 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1630 return true;
1631
1632 // If all of the unknown bits are known to be zero on one side or the other
1633 // turn this into an *inclusive* or.
1634 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1635 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1636 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1637
1638 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1639 if (C) {
1640 // If one side is a constant, and all of the set bits in the constant are
1641 // also known set on the other side, turn this into an AND, as we know
1642 // the bits will be cleared.
1643 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1644 // NB: it is okay if more bits are known than are requested
1645 if (C->getAPIntValue() == Known2.One) {
1646 SDValue ANDC =
1647 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1648 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1649 }
1650
1651 // If the RHS is a constant, see if we can change it. Don't alter a -1
1652 // constant because that's a 'not' op, and that is better for combining
1653 // and codegen.
1654 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1655 // We're flipping all demanded bits. Flip the undemanded bits too.
1656 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1657 return TLO.CombineTo(O: Op, N: New);
1658 }
1659
1660 unsigned Op0Opcode = Op0.getOpcode();
1661 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1662 if (ConstantSDNode *ShiftC =
1663 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1664 // Don't crash on an oversized shift. We can not guarantee that a
1665 // bogus shift has been simplified to undef.
1666 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1667 uint64_t ShiftAmt = ShiftC->getZExtValue();
1668 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1669 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1670 : Ones.lshr(shiftAmt: ShiftAmt);
1671 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1672 isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1673 // If the xor constant is a demanded mask, do a 'not' before the
1674 // shift:
1675 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1676 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1677 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1678 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1679 N2: Op0.getOperand(i: 1)));
1680 }
1681 }
1682 }
1683 }
1684 }
1685
1686 // If we can't turn this into a 'not', try to shrink the constant.
1687 if (!C || !C->isAllOnes())
1688 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1689 return true;
1690
1691 // Attempt to avoid multi-use ops if we don't need anything from them.
1692 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1693 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1694 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1695 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1696 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1697 if (DemandedOp0 || DemandedOp1) {
1698 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1699 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1700 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1701 return TLO.CombineTo(O: Op, N: NewOp);
1702 }
1703 }
1704
1705 Known ^= Known2;
1706 break;
1707 }
1708 case ISD::SELECT:
1709 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1710 Known, TLO, Depth: Depth + 1))
1711 return true;
1712 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1713 Known&: Known2, TLO, Depth: Depth + 1))
1714 return true;
1715
1716 // If the operands are constants, see if we can simplify them.
1717 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1718 return true;
1719
1720 // Only known if known in both the LHS and RHS.
1721 Known = Known.intersectWith(RHS: Known2);
1722 break;
1723 case ISD::VSELECT:
1724 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1725 Known, TLO, Depth: Depth + 1))
1726 return true;
1727 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1728 Known&: Known2, TLO, Depth: Depth + 1))
1729 return true;
1730
1731 // Only known if known in both the LHS and RHS.
1732 Known = Known.intersectWith(RHS: Known2);
1733 break;
1734 case ISD::SELECT_CC:
1735 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1736 Known, TLO, Depth: Depth + 1))
1737 return true;
1738 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1739 Known&: Known2, TLO, Depth: Depth + 1))
1740 return true;
1741
1742 // If the operands are constants, see if we can simplify them.
1743 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1744 return true;
1745
1746 // Only known if known in both the LHS and RHS.
1747 Known = Known.intersectWith(RHS: Known2);
1748 break;
1749 case ISD::SETCC: {
1750 SDValue Op0 = Op.getOperand(i: 0);
1751 SDValue Op1 = Op.getOperand(i: 1);
1752 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1753 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1754 // (X is of integer type) then we only need the sign mask of the previous
1755 // result
1756 if (Op1.getValueType().isInteger() &&
1757 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(V: Op1)) ||
1758 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1759 isAllOnesOrAllOnesSplat(V: Op1)))) {
1760 KnownBits KnownOp0;
1761 if (SimplifyDemandedBits(
1762 Op: Op0, OriginalDemandedBits: APInt::getSignMask(BitWidth: Op0.getScalarValueSizeInBits()),
1763 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1))
1764 return true;
1765 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1766 // width as the setcc result, and (3) the result of a setcc conforms to 0
1767 // or -1, we may be able to bypass the setcc.
1768 if (DemandedBits.isSignMask() &&
1769 Op0.getScalarValueSizeInBits() == BitWidth &&
1770 getBooleanContents(Type: Op0.getValueType()) ==
1771 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1772 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1773 // NOT Operation
1774 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1775 SDLoc DL(Op);
1776 EVT VT = Op0.getValueType();
1777 SDValue NotOp0 = TLO.DAG.getNOT(DL, Val: Op0, VT);
1778 return TLO.CombineTo(O: Op, N: NotOp0);
1779 }
1780 return TLO.CombineTo(O: Op, N: Op0);
1781 }
1782 }
1783 if (getBooleanContents(Type: Op0.getValueType()) ==
1784 TargetLowering::ZeroOrOneBooleanContent &&
1785 BitWidth > 1)
1786 Known.Zero.setBitsFrom(1);
1787 break;
1788 }
1789 case ISD::SHL: {
1790 SDValue Op0 = Op.getOperand(i: 0);
1791 SDValue Op1 = Op.getOperand(i: 1);
1792 EVT ShiftVT = Op1.getValueType();
1793
1794 if (std::optional<unsigned> KnownSA =
1795 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1796 unsigned ShAmt = *KnownSA;
1797 if (ShAmt == 0)
1798 return TLO.CombineTo(O: Op, N: Op0);
1799
1800 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1801 // single shift. We can do this if the bottom bits (which are shifted
1802 // out) are never demanded.
1803 // TODO - support non-uniform vector amounts.
1804 if (Op0.getOpcode() == ISD::SRL) {
1805 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1806 if (std::optional<unsigned> InnerSA =
1807 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1808 unsigned C1 = *InnerSA;
1809 unsigned Opc = ISD::SHL;
1810 int Diff = ShAmt - C1;
1811 if (Diff < 0) {
1812 Diff = -Diff;
1813 Opc = ISD::SRL;
1814 }
1815 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1816 return TLO.CombineTo(
1817 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1818 }
1819 }
1820 }
1821
1822 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1823 // are not demanded. This will likely allow the anyext to be folded away.
1824 // TODO - support non-uniform vector amounts.
1825 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1826 SDValue InnerOp = Op0.getOperand(i: 0);
1827 EVT InnerVT = InnerOp.getValueType();
1828 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1829 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1830 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1831 SDValue NarrowShl = TLO.DAG.getNode(
1832 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1833 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1834 return TLO.CombineTo(
1835 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1836 }
1837
1838 // Repeat the SHL optimization above in cases where an extension
1839 // intervenes: (shl (anyext (shr x, c1)), c2) to
1840 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1841 // aren't demanded (as above) and that the shifted upper c1 bits of
1842 // x aren't demanded.
1843 // TODO - support non-uniform vector amounts.
1844 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1845 InnerOp.hasOneUse()) {
1846 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1847 V: InnerOp, DemandedElts, Depth: Depth + 2)) {
1848 unsigned InnerShAmt = *SA2;
1849 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1850 DemandedBits.getActiveBits() <=
1851 (InnerBits - InnerShAmt + ShAmt) &&
1852 DemandedBits.countr_zero() >= ShAmt) {
1853 SDValue NewSA =
1854 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1855 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1856 Operand: InnerOp.getOperand(i: 0));
1857 return TLO.CombineTo(
1858 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1859 }
1860 }
1861 }
1862 }
1863
1864 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1865 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1866 Depth: Depth + 1)) {
1867 // Disable the nsw and nuw flags. We can no longer guarantee that we
1868 // won't wrap after simplification.
1869 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1870 return true;
1871 }
1872 Known <<= ShAmt;
1873 // low bits known zero.
1874 Known.Zero.setLowBits(ShAmt);
1875
1876 // Attempt to avoid multi-use ops if we don't need anything from them.
1877 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1878 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1879 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1880 if (DemandedOp0) {
1881 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1882 return TLO.CombineTo(O: Op, N: NewOp);
1883 }
1884 }
1885
1886 // TODO: Can we merge this fold with the one below?
1887 // Try shrinking the operation as long as the shift amount will still be
1888 // in range.
1889 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1890 Op.getNode()->hasOneUse()) {
1891 // Search for the smallest integer type with free casts to and from
1892 // Op's type. For expedience, just check power-of-2 integer types.
1893 unsigned DemandedSize = DemandedBits.getActiveBits();
1894 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
1895 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
1896 EVT SmallVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: SmallVTBits);
1897 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: SmallVT) &&
1898 isTypeDesirableForOp(ISD::SHL, VT: SmallVT) &&
1899 isTruncateFree(FromVT: VT, ToVT: SmallVT) && isZExtFree(FromTy: SmallVT, ToTy: VT) &&
1900 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: SmallVT))) {
1901 assert(DemandedSize <= SmallVTBits &&
1902 "Narrowed below demanded bits?");
1903 // We found a type with free casts.
1904 SDValue NarrowShl = TLO.DAG.getNode(
1905 Opcode: ISD::SHL, DL: dl, VT: SmallVT,
1906 N1: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
1907 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: SmallVT, DL: dl));
1908 return TLO.CombineTo(
1909 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1910 }
1911 }
1912 }
1913
1914 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1915 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1916 // Only do this if we demand the upper half so the knownbits are correct.
1917 unsigned HalfWidth = BitWidth / 2;
1918 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1919 DemandedBits.countLeadingOnes() >= HalfWidth) {
1920 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1921 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
1922 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1923 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1924 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1925 // If we're demanding the upper bits at all, we must ensure
1926 // that the upper bits of the shift result are known to be zero,
1927 // which is equivalent to the narrow shift being NUW.
1928 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1929 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1930 SDNodeFlags Flags;
1931 Flags.setNoSignedWrap(IsNSW);
1932 Flags.setNoUnsignedWrap(IsNUW);
1933 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1934 SDValue NewShiftAmt =
1935 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
1936 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1937 N2: NewShiftAmt, Flags);
1938 SDValue NewExt =
1939 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1940 return TLO.CombineTo(O: Op, N: NewExt);
1941 }
1942 }
1943 }
1944 } else {
1945 // This is a variable shift, so we can't shift the demand mask by a known
1946 // amount. But if we are not demanding high bits, then we are not
1947 // demanding those bits from the pre-shifted operand either.
1948 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1949 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1950 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1951 Depth: Depth + 1)) {
1952 // Disable the nsw and nuw flags. We can no longer guarantee that we
1953 // won't wrap after simplification.
1954 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
1955 return true;
1956 }
1957 Known.resetAll();
1958 }
1959 }
1960
1961 // If we are only demanding sign bits then we can use the shift source
1962 // directly.
1963 if (std::optional<unsigned> MaxSA =
1964 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1965 unsigned ShAmt = *MaxSA;
1966 unsigned NumSignBits =
1967 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1968 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1969 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1970 return TLO.CombineTo(O: Op, N: Op0);
1971 }
1972 break;
1973 }
1974 case ISD::SRL: {
1975 SDValue Op0 = Op.getOperand(i: 0);
1976 SDValue Op1 = Op.getOperand(i: 1);
1977 EVT ShiftVT = Op1.getValueType();
1978
1979 if (std::optional<unsigned> KnownSA =
1980 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
1981 unsigned ShAmt = *KnownSA;
1982 if (ShAmt == 0)
1983 return TLO.CombineTo(O: Op, N: Op0);
1984
1985 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1986 // single shift. We can do this if the top bits (which are shifted out)
1987 // are never demanded.
1988 // TODO - support non-uniform vector amounts.
1989 if (Op0.getOpcode() == ISD::SHL) {
1990 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1991 if (std::optional<unsigned> InnerSA =
1992 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
1993 unsigned C1 = *InnerSA;
1994 unsigned Opc = ISD::SRL;
1995 int Diff = ShAmt - C1;
1996 if (Diff < 0) {
1997 Diff = -Diff;
1998 Opc = ISD::SHL;
1999 }
2000 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
2001 return TLO.CombineTo(
2002 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
2003 }
2004 }
2005 }
2006
2007 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2008 // single sra. We can do this if the top bits are never demanded.
2009 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2010 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
2011 if (std::optional<unsigned> InnerSA =
2012 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2013 unsigned C1 = *InnerSA;
2014 // Clamp the combined shift amount if it exceeds the bit width.
2015 unsigned Combined = std::min(a: C1 + ShAmt, b: BitWidth - 1);
2016 SDValue NewSA = TLO.DAG.getConstant(Val: Combined, DL: dl, VT: ShiftVT);
2017 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT,
2018 N1: Op0.getOperand(i: 0), N2: NewSA));
2019 }
2020 }
2021 }
2022
2023 APInt InDemandedMask = (DemandedBits << ShAmt);
2024
2025 // If the shift is exact, then it does demand the low bits (and knows that
2026 // they are zero).
2027 if (Op->getFlags().hasExact())
2028 InDemandedMask.setLowBits(ShAmt);
2029
2030 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2031 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2032 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2033 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
2034 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
2035 if (isNarrowingProfitable(N: Op.getNode(), SrcVT: VT, DestVT: HalfVT) &&
2036 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
2037 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
2038 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
2039 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2040 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
2041 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
2042 SDValue NewShiftAmt =
2043 TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: HalfVT, DL: dl);
2044 SDValue NewShift =
2045 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
2046 return TLO.CombineTo(
2047 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
2048 }
2049 }
2050
2051 // Compute the new bits that are at the top now.
2052 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2053 Depth: Depth + 1))
2054 return true;
2055 Known >>= ShAmt;
2056 // High bits known zero.
2057 Known.Zero.setHighBits(ShAmt);
2058
2059 // Attempt to avoid multi-use ops if we don't need anything from them.
2060 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2061 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2062 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2063 if (DemandedOp0) {
2064 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2065 return TLO.CombineTo(O: Op, N: NewOp);
2066 }
2067 }
2068 } else {
2069 // Use generic knownbits computation as it has support for non-uniform
2070 // shift amounts.
2071 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2072 }
2073
2074 // If we are only demanding sign bits then we can use the shift source
2075 // directly.
2076 if (std::optional<unsigned> MaxSA =
2077 TLO.DAG.getValidMaximumShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2078 unsigned ShAmt = *MaxSA;
2079 // Must already be signbits in DemandedBits bounds, and can't demand any
2080 // shifted in zeroes.
2081 if (DemandedBits.countl_zero() >= ShAmt) {
2082 unsigned NumSignBits =
2083 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2084 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2085 return TLO.CombineTo(O: Op, N: Op0);
2086 }
2087 }
2088
2089 // Try to match AVG patterns (after shift simplification).
2090 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2091 DemandedElts, Depth: Depth + 1))
2092 return TLO.CombineTo(O: Op, N: AVG);
2093
2094 break;
2095 }
2096 case ISD::SRA: {
2097 SDValue Op0 = Op.getOperand(i: 0);
2098 SDValue Op1 = Op.getOperand(i: 1);
2099 EVT ShiftVT = Op1.getValueType();
2100
2101 // If we only want bits that already match the signbit then we don't need
2102 // to shift.
2103 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2104 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
2105 NumHiDemandedBits)
2106 return TLO.CombineTo(O: Op, N: Op0);
2107
2108 // If this is an arithmetic shift right and only the low-bit is set, we can
2109 // always convert this into a logical shr, even if the shift amount is
2110 // variable. The low bit of the shift cannot be an input sign bit unless
2111 // the shift amount is >= the size of the datatype, which is undefined.
2112 if (DemandedBits.isOne())
2113 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2114
2115 if (std::optional<unsigned> KnownSA =
2116 TLO.DAG.getValidShiftAmount(V: Op, DemandedElts, Depth: Depth + 1)) {
2117 unsigned ShAmt = *KnownSA;
2118 if (ShAmt == 0)
2119 return TLO.CombineTo(O: Op, N: Op0);
2120
2121 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2122 // supports sext_inreg.
2123 if (Op0.getOpcode() == ISD::SHL) {
2124 if (std::optional<unsigned> InnerSA =
2125 TLO.DAG.getValidShiftAmount(V: Op0, DemandedElts, Depth: Depth + 2)) {
2126 unsigned LowBits = BitWidth - ShAmt;
2127 EVT ExtVT = VT.changeElementType(
2128 Context&: *TLO.DAG.getContext(),
2129 EltVT: EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits));
2130
2131 if (*InnerSA == ShAmt) {
2132 if (!TLO.LegalOperations() ||
2133 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2134 return TLO.CombineTo(
2135 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2136 N1: Op0.getOperand(i: 0),
2137 N2: TLO.DAG.getValueType(ExtVT)));
2138
2139 // Even if we can't convert to sext_inreg, we might be able to
2140 // remove this shift pair if the input is already sign extended.
2141 unsigned NumSignBits =
2142 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2143 if (NumSignBits > ShAmt)
2144 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2145 }
2146 }
2147 }
2148
2149 APInt InDemandedMask = (DemandedBits << ShAmt);
2150
2151 // If the shift is exact, then it does demand the low bits (and knows that
2152 // they are zero).
2153 if (Op->getFlags().hasExact())
2154 InDemandedMask.setLowBits(ShAmt);
2155
2156 // If any of the demanded bits are produced by the sign extension, we also
2157 // demand the input sign bit.
2158 if (DemandedBits.countl_zero() < ShAmt)
2159 InDemandedMask.setSignBit();
2160
2161 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2162 Depth: Depth + 1))
2163 return true;
2164 Known >>= ShAmt;
2165
2166 // If the input sign bit is known to be zero, or if none of the top bits
2167 // are demanded, turn this into an unsigned shift right.
2168 if (Known.Zero[BitWidth - ShAmt - 1] ||
2169 DemandedBits.countl_zero() >= ShAmt) {
2170 SDNodeFlags Flags;
2171 Flags.setExact(Op->getFlags().hasExact());
2172 return TLO.CombineTo(
2173 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2174 }
2175
2176 int Log2 = DemandedBits.exactLogBase2();
2177 if (Log2 >= 0) {
2178 // The bit must come from the sign.
2179 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2180 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2181 }
2182
2183 if (Known.One[BitWidth - ShAmt - 1])
2184 // New bits are known one.
2185 Known.One.setHighBits(ShAmt);
2186
2187 // Attempt to avoid multi-use ops if we don't need anything from them.
2188 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2189 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2190 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2191 if (DemandedOp0) {
2192 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2193 return TLO.CombineTo(O: Op, N: NewOp);
2194 }
2195 }
2196 }
2197
2198 // Try to match AVG patterns (after shift simplification).
2199 if (SDValue AVG = combineShiftToAVG(Op, TLO, TLI: *this, DemandedBits,
2200 DemandedElts, Depth: Depth + 1))
2201 return TLO.CombineTo(O: Op, N: AVG);
2202
2203 break;
2204 }
2205 case ISD::FSHL:
2206 case ISD::FSHR: {
2207 SDValue Op0 = Op.getOperand(i: 0);
2208 SDValue Op1 = Op.getOperand(i: 1);
2209 SDValue Op2 = Op.getOperand(i: 2);
2210 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2211
2212 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2213 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2214
2215 // For fshl, 0-shift returns the 1st arg.
2216 // For fshr, 0-shift returns the 2nd arg.
2217 if (Amt == 0) {
2218 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2219 Known, TLO, Depth: Depth + 1))
2220 return true;
2221 break;
2222 }
2223
2224 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2225 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2226 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2227 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2228 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2229 Depth: Depth + 1))
2230 return true;
2231 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2232 Depth: Depth + 1))
2233 return true;
2234
2235 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2236 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2237 Known = Known.unionWith(RHS: Known2);
2238
2239 // Attempt to avoid multi-use ops if we don't need anything from them.
2240 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2241 !DemandedElts.isAllOnes()) {
2242 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2243 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2244 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2245 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2246 if (DemandedOp0 || DemandedOp1) {
2247 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2248 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2249 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2250 N2: DemandedOp1, N3: Op2);
2251 return TLO.CombineTo(O: Op, N: NewOp);
2252 }
2253 }
2254 }
2255
2256 if (isPowerOf2_32(Value: BitWidth)) {
2257 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2258 // iff we're guaranteed not to use Op0.
2259 // TODO: Add FSHL equivalent?
2260 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2261 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT))) {
2262 KnownBits KnownAmt =
2263 TLO.DAG.computeKnownBits(Op: Op2, DemandedElts, Depth: Depth + 1);
2264 unsigned MaxShiftAmt =
2265 KnownAmt.getMaxValue().getLimitedValue(Limit: BitWidth - 1);
2266 // Check we don't demand any shifted bits outside Op1.
2267 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2268 EVT AmtVT = Op2.getValueType();
2269 SDValue NewAmt =
2270 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Op2,
2271 N2: TLO.DAG.getConstant(Val: BitWidth - 1, DL: dl, VT: AmtVT));
2272 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op1, N2: NewAmt);
2273 return TLO.CombineTo(O: Op, N: NewOp);
2274 }
2275 }
2276
2277 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2278 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2279 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2280 Depth: Depth + 1))
2281 return true;
2282 }
2283 break;
2284 }
2285 case ISD::ROTL:
2286 case ISD::ROTR: {
2287 SDValue Op0 = Op.getOperand(i: 0);
2288 SDValue Op1 = Op.getOperand(i: 1);
2289 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2290
2291 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2292 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2293 return TLO.CombineTo(O: Op, N: Op0);
2294
2295 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2296 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2297 unsigned RevAmt = BitWidth - Amt;
2298
2299 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2300 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2301 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2302 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2303 Depth: Depth + 1))
2304 return true;
2305
2306 // rot*(x, 0) --> x
2307 if (Amt == 0)
2308 return TLO.CombineTo(O: Op, N: Op0);
2309
2310 // See if we don't demand either half of the rotated bits.
2311 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2312 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2313 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2314 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2315 }
2316 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2317 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2318 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2319 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2320 }
2321 }
2322
2323 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2324 if (isPowerOf2_32(Value: BitWidth)) {
2325 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2326 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2327 Depth: Depth + 1))
2328 return true;
2329 }
2330 break;
2331 }
2332 case ISD::SMIN:
2333 case ISD::SMAX:
2334 case ISD::UMIN:
2335 case ISD::UMAX: {
2336 unsigned Opc = Op.getOpcode();
2337 SDValue Op0 = Op.getOperand(i: 0);
2338 SDValue Op1 = Op.getOperand(i: 1);
2339
2340 // If we're only demanding signbits, then we can simplify to OR/AND node.
2341 unsigned BitOp =
2342 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2343 unsigned NumSignBits =
2344 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2345 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2346 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2347 if (NumSignBits >= NumDemandedUpperBits)
2348 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2349
2350 // Check if one arg is always less/greater than (or equal) to the other arg.
2351 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2352 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2353 switch (Opc) {
2354 case ISD::SMIN:
2355 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2356 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2357 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2358 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2359 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2360 break;
2361 case ISD::SMAX:
2362 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2363 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2364 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2365 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2366 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2367 break;
2368 case ISD::UMIN:
2369 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2370 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2371 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2372 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2373 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2374 break;
2375 case ISD::UMAX:
2376 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2377 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2378 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2379 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2380 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2381 break;
2382 }
2383 break;
2384 }
2385 case ISD::BITREVERSE: {
2386 SDValue Src = Op.getOperand(i: 0);
2387 APInt DemandedSrcBits = DemandedBits.reverseBits();
2388 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2389 Depth: Depth + 1))
2390 return true;
2391 Known = Known2.reverseBits();
2392 break;
2393 }
2394 case ISD::BSWAP: {
2395 SDValue Src = Op.getOperand(i: 0);
2396
2397 // If the only bits demanded come from one byte of the bswap result,
2398 // just shift the input byte into position to eliminate the bswap.
2399 unsigned NLZ = DemandedBits.countl_zero();
2400 unsigned NTZ = DemandedBits.countr_zero();
2401
2402 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2403 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2404 // have 14 leading zeros, round to 8.
2405 NLZ = alignDown(Value: NLZ, Align: 8);
2406 NTZ = alignDown(Value: NTZ, Align: 8);
2407 // If we need exactly one byte, we can do this transformation.
2408 if (BitWidth - NLZ - NTZ == 8) {
2409 // Replace this with either a left or right shift to get the byte into
2410 // the right place.
2411 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2412 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2413 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2414 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2415 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2416 return TLO.CombineTo(O: Op, N: NewOp);
2417 }
2418 }
2419
2420 APInt DemandedSrcBits = DemandedBits.byteSwap();
2421 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2422 Depth: Depth + 1))
2423 return true;
2424 Known = Known2.byteSwap();
2425 break;
2426 }
2427 case ISD::CTPOP: {
2428 // If only 1 bit is demanded, replace with PARITY as long as we're before
2429 // op legalization.
2430 // FIXME: Limit to scalars for now.
2431 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2432 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2433 Operand: Op.getOperand(i: 0)));
2434
2435 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2436 break;
2437 }
2438 case ISD::SIGN_EXTEND_INREG: {
2439 SDValue Op0 = Op.getOperand(i: 0);
2440 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2441 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2442
2443 // If we only care about the highest bit, don't bother shifting right.
2444 if (DemandedBits.isSignMask()) {
2445 unsigned MinSignedBits =
2446 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2447 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2448 // However if the input is already sign extended we expect the sign
2449 // extension to be dropped altogether later and do not simplify.
2450 if (!AlreadySignExtended) {
2451 // Compute the correct shift amount type, which must be getShiftAmountTy
2452 // for scalar types after legalization.
2453 SDValue ShiftAmt =
2454 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2455 return TLO.CombineTo(O: Op,
2456 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2457 }
2458 }
2459
2460 // If none of the extended bits are demanded, eliminate the sextinreg.
2461 if (DemandedBits.getActiveBits() <= ExVTBits)
2462 return TLO.CombineTo(O: Op, N: Op0);
2463
2464 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2465
2466 // Since the sign extended bits are demanded, we know that the sign
2467 // bit is demanded.
2468 InputDemandedBits.setBit(ExVTBits - 1);
2469
2470 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2471 Depth: Depth + 1))
2472 return true;
2473
2474 // If the sign bit of the input is known set or clear, then we know the
2475 // top bits of the result.
2476
2477 // If the input sign bit is known zero, convert this into a zero extension.
2478 if (Known.Zero[ExVTBits - 1])
2479 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2480
2481 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2482 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2483 Known.One.setBitsFrom(ExVTBits);
2484 Known.Zero &= Mask;
2485 } else { // Input sign bit unknown
2486 Known.Zero &= Mask;
2487 Known.One &= Mask;
2488 }
2489 break;
2490 }
2491 case ISD::BUILD_PAIR: {
2492 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2493 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2494
2495 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2496 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2497
2498 KnownBits KnownLo, KnownHi;
2499
2500 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2501 return true;
2502
2503 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2504 return true;
2505
2506 Known = KnownHi.concat(Lo: KnownLo);
2507 break;
2508 }
2509 case ISD::ZERO_EXTEND_VECTOR_INREG:
2510 if (VT.isScalableVector())
2511 return false;
2512 [[fallthrough]];
2513 case ISD::ZERO_EXTEND: {
2514 SDValue Src = Op.getOperand(i: 0);
2515 EVT SrcVT = Src.getValueType();
2516 unsigned InBits = SrcVT.getScalarSizeInBits();
2517 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2518 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2519
2520 // If none of the top bits are demanded, convert this into an any_extend.
2521 if (DemandedBits.getActiveBits() <= InBits) {
2522 // If we only need the non-extended bits of the bottom element
2523 // then we can just bitcast to the result.
2524 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2525 VT.getSizeInBits() == SrcVT.getSizeInBits())
2526 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2527
2528 unsigned Opc =
2529 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2530 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2531 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2532 }
2533
2534 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2535 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2536 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2537 Depth: Depth + 1)) {
2538 Op->dropFlags(Mask: SDNodeFlags::NonNeg);
2539 return true;
2540 }
2541 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2542 Known = Known.zext(BitWidth);
2543
2544 // Attempt to avoid multi-use ops if we don't need anything from them.
2545 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2546 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2547 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2548 break;
2549 }
2550 case ISD::SIGN_EXTEND_VECTOR_INREG:
2551 if (VT.isScalableVector())
2552 return false;
2553 [[fallthrough]];
2554 case ISD::SIGN_EXTEND: {
2555 SDValue Src = Op.getOperand(i: 0);
2556 EVT SrcVT = Src.getValueType();
2557 unsigned InBits = SrcVT.getScalarSizeInBits();
2558 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2559 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2560
2561 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2562 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2563
2564 // Since some of the sign extended bits are demanded, we know that the sign
2565 // bit is demanded.
2566 InDemandedBits.setBit(InBits - 1);
2567
2568 // If none of the top bits are demanded, convert this into an any_extend.
2569 if (DemandedBits.getActiveBits() <= InBits) {
2570 // If we only need the non-extended bits of the bottom element
2571 // then we can just bitcast to the result.
2572 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2573 VT.getSizeInBits() == SrcVT.getSizeInBits())
2574 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2575
2576 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2577 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2578 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2579 InBits) {
2580 unsigned Opc =
2581 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2582 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2583 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2584 }
2585 }
2586
2587 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2588 Depth: Depth + 1))
2589 return true;
2590 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2591
2592 // If the sign bit is known one, the top bits match.
2593 Known = Known.sext(BitWidth);
2594
2595 // If the sign bit is known zero, convert this to a zero extend.
2596 if (Known.isNonNegative()) {
2597 unsigned Opc =
2598 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2599 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2600 SDNodeFlags Flags;
2601 if (!IsVecInReg)
2602 Flags |= SDNodeFlags::NonNeg;
2603 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2604 }
2605 }
2606
2607 // Attempt to avoid multi-use ops if we don't need anything from them.
2608 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2609 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2610 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2611 break;
2612 }
2613 case ISD::ANY_EXTEND_VECTOR_INREG:
2614 if (VT.isScalableVector())
2615 return false;
2616 [[fallthrough]];
2617 case ISD::ANY_EXTEND: {
2618 SDValue Src = Op.getOperand(i: 0);
2619 EVT SrcVT = Src.getValueType();
2620 unsigned InBits = SrcVT.getScalarSizeInBits();
2621 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2622 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2623
2624 // If we only need the bottom element then we can just bitcast.
2625 // TODO: Handle ANY_EXTEND?
2626 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2627 VT.getSizeInBits() == SrcVT.getSizeInBits())
2628 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2629
2630 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2631 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2632 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2633 Depth: Depth + 1))
2634 return true;
2635 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2636 Known = Known.anyext(BitWidth);
2637
2638 // Attempt to avoid multi-use ops if we don't need anything from them.
2639 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2640 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2641 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2642 break;
2643 }
2644 case ISD::TRUNCATE: {
2645 SDValue Src = Op.getOperand(i: 0);
2646
2647 // Simplify the input, using demanded bit information, and compute the known
2648 // zero/one bits live out.
2649 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2650 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2651 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2652 Depth: Depth + 1)) {
2653 // Disable the nsw and nuw flags. We can no longer guarantee that we
2654 // won't wrap after simplification.
2655 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2656 return true;
2657 }
2658 Known = Known.trunc(BitWidth);
2659
2660 // Attempt to avoid multi-use ops if we don't need anything from them.
2661 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2662 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2663 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2664
2665 // If the input is only used by this truncate, see if we can shrink it based
2666 // on the known demanded bits.
2667 switch (Src.getOpcode()) {
2668 default:
2669 break;
2670 case ISD::SRL:
2671 // Shrink SRL by a constant if none of the high bits shifted in are
2672 // demanded.
2673 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2674 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2675 // undesirable.
2676 break;
2677
2678 if (Src.getNode()->hasOneUse()) {
2679 if (isTruncateFree(Val: Src, VT2: VT) &&
2680 !isTruncateFree(FromVT: Src.getValueType(), ToVT: VT)) {
2681 // If truncate is only free at trunc(srl), do not turn it into
2682 // srl(trunc). The check is done by first check the truncate is free
2683 // at Src's opcode(srl), then check the truncate is not done by
2684 // referencing sub-register. In test, if both trunc(srl) and
2685 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2686 // trunc(srl)'s trunc is free, trunc(srl) is better.
2687 break;
2688 }
2689
2690 std::optional<unsigned> ShAmtC =
2691 TLO.DAG.getValidShiftAmount(V: Src, DemandedElts, Depth: Depth + 2);
2692 if (!ShAmtC || *ShAmtC >= BitWidth)
2693 break;
2694 unsigned ShVal = *ShAmtC;
2695
2696 APInt HighBits =
2697 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2698 HighBits.lshrInPlace(ShiftAmt: ShVal);
2699 HighBits = HighBits.trunc(width: BitWidth);
2700 if (!(HighBits & DemandedBits)) {
2701 // None of the shifted in bits are needed. Add a truncate of the
2702 // shift input, then shift it.
2703 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl);
2704 SDValue NewTrunc =
2705 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2706 return TLO.CombineTo(
2707 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2708 }
2709 }
2710 break;
2711 }
2712
2713 break;
2714 }
2715 case ISD::AssertZext: {
2716 // AssertZext demands all of the high bits, plus any of the low bits
2717 // demanded by its users.
2718 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2719 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2720 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2721 TLO, Depth: Depth + 1))
2722 return true;
2723
2724 Known.Zero |= ~InMask;
2725 Known.One &= (~Known.Zero);
2726 break;
2727 }
2728 case ISD::EXTRACT_VECTOR_ELT: {
2729 SDValue Src = Op.getOperand(i: 0);
2730 SDValue Idx = Op.getOperand(i: 1);
2731 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2732 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2733
2734 if (SrcEltCnt.isScalable())
2735 return false;
2736
2737 // Demand the bits from every vector element without a constant index.
2738 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2739 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2740 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2741 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2742 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2743
2744 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2745 // anything about the extended bits.
2746 APInt DemandedSrcBits = DemandedBits;
2747 if (BitWidth > EltBitWidth)
2748 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2749
2750 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2751 Depth: Depth + 1))
2752 return true;
2753
2754 // Attempt to avoid multi-use ops if we don't need anything from them.
2755 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2756 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2757 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2758 SDValue NewOp =
2759 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2760 return TLO.CombineTo(O: Op, N: NewOp);
2761 }
2762 }
2763
2764 Known = Known2;
2765 if (BitWidth > EltBitWidth)
2766 Known = Known.anyext(BitWidth);
2767 break;
2768 }
2769 case ISD::BITCAST: {
2770 if (VT.isScalableVector())
2771 return false;
2772 SDValue Src = Op.getOperand(i: 0);
2773 EVT SrcVT = Src.getValueType();
2774 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2775
2776 // If this is an FP->Int bitcast and if the sign bit is the only
2777 // thing demanded, turn this into a FGETSIGN.
2778 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2779 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2780 SrcVT.isFloatingPoint()) {
2781 bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2782 bool i32Legal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT: MVT::i32);
2783 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2784 SrcVT != MVT::f128) {
2785 // Cannot eliminate/lower SHL for f128 yet.
2786 EVT Ty = OpVTLegal ? VT : MVT::i32;
2787 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2788 // place. We expect the SHL to be eliminated by other optimizations.
2789 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2790 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2791 if (!OpVTLegal && OpVTSizeInBits > 32)
2792 Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2793 unsigned ShVal = Op.getValueSizeInBits() - 1;
2794 SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2795 return TLO.CombineTo(O: Op,
2796 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2797 }
2798 }
2799
2800 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2801 // Demand the elt/bit if any of the original elts/bits are demanded.
2802 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2803 unsigned Scale = BitWidth / NumSrcEltBits;
2804 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2805 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2806 for (unsigned i = 0; i != Scale; ++i) {
2807 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2808 unsigned BitOffset = EltOffset * NumSrcEltBits;
2809 DemandedSrcBits |= DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2810 }
2811 // Recursive calls below may turn not demanded elements into poison, so we
2812 // need to demand all smaller source elements that maps to a demanded
2813 // destination element.
2814 APInt DemandedSrcElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
2815
2816 APInt KnownSrcUndef, KnownSrcZero;
2817 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2818 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2819 return true;
2820
2821 KnownBits KnownSrcBits;
2822 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2823 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2824 return true;
2825 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2826 // TODO - bigendian once we have test coverage.
2827 unsigned Scale = NumSrcEltBits / BitWidth;
2828 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2829 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2830 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2831 for (unsigned i = 0; i != NumElts; ++i)
2832 if (DemandedElts[i]) {
2833 unsigned Offset = (i % Scale) * BitWidth;
2834 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2835 DemandedSrcElts.setBit(i / Scale);
2836 }
2837
2838 if (SrcVT.isVector()) {
2839 APInt KnownSrcUndef, KnownSrcZero;
2840 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2841 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2842 return true;
2843 }
2844
2845 KnownBits KnownSrcBits;
2846 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2847 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2848 return true;
2849
2850 // Attempt to avoid multi-use ops if we don't need anything from them.
2851 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2852 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2853 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2854 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2855 return TLO.CombineTo(O: Op, N: NewOp);
2856 }
2857 }
2858 }
2859
2860 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2861 // recursive call where Known may be useful to the caller.
2862 if (Depth > 0) {
2863 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2864 return false;
2865 }
2866 break;
2867 }
2868 case ISD::MUL:
2869 if (DemandedBits.isPowerOf2()) {
2870 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2871 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2872 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2873 unsigned CTZ = DemandedBits.countr_zero();
2874 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2875 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2876 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2877 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2878 return TLO.CombineTo(O: Op, N: Shl);
2879 }
2880 }
2881 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2882 // X * X is odd iff X is odd.
2883 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2884 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2885 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2886 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2887 return TLO.CombineTo(O: Op, N: And1);
2888 }
2889 [[fallthrough]];
2890 case ISD::PTRADD:
2891 if (Op.getOperand(i: 0).getValueType() != Op.getOperand(i: 1).getValueType())
2892 break;
2893 // PTRADD behaves like ADD if pointers are represented as integers.
2894 [[fallthrough]];
2895 case ISD::ADD:
2896 case ISD::SUB: {
2897 // Add, Sub, and Mul don't demand any bits in positions beyond that
2898 // of the highest bit demanded of them.
2899 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2900 SDNodeFlags Flags = Op.getNode()->getFlags();
2901 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2902 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2903 KnownBits KnownOp0, KnownOp1;
2904 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2905 const KnownBits &KnownRHS) {
2906 if (Op.getOpcode() == ISD::MUL)
2907 Demanded.clearHighBits(hiBits: KnownRHS.countMinTrailingZeros());
2908 return Demanded;
2909 };
2910 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2911 Depth: Depth + 1) ||
2912 SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: GetDemandedBitsLHSMask(LoMask, KnownOp1),
2913 OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO, Depth: Depth + 1) ||
2914 // See if the operation should be performed at a smaller bit width.
2915 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2916 // Disable the nsw and nuw flags. We can no longer guarantee that we
2917 // won't wrap after simplification.
2918 Op->dropFlags(Mask: SDNodeFlags::NoWrap);
2919 return true;
2920 }
2921
2922 // neg x with only low bit demanded is simply x.
2923 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2924 isNullConstant(V: Op0))
2925 return TLO.CombineTo(O: Op, N: Op1);
2926
2927 // Attempt to avoid multi-use ops if we don't need anything from them.
2928 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2929 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2930 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2931 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2932 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2933 if (DemandedOp0 || DemandedOp1) {
2934 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2935 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2936 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1,
2937 Flags: Flags & ~SDNodeFlags::NoWrap);
2938 return TLO.CombineTo(O: Op, N: NewOp);
2939 }
2940 }
2941
2942 // If we have a constant operand, we may be able to turn it into -1 if we
2943 // do not demand the high bits. This can make the constant smaller to
2944 // encode, allow more general folding, or match specialized instruction
2945 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2946 // is probably not useful (and could be detrimental).
2947 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2948 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2949 if (C && !C->isAllOnes() && !C->isOne() &&
2950 (C->getAPIntValue() | HighMask).isAllOnes()) {
2951 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2952 // Disable the nsw and nuw flags. We can no longer guarantee that we
2953 // won't wrap after simplification.
2954 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1,
2955 Flags: Flags & ~SDNodeFlags::NoWrap);
2956 return TLO.CombineTo(O: Op, N: NewOp);
2957 }
2958
2959 // Match a multiply with a disguised negated-power-of-2 and convert to a
2960 // an equivalent shift-left amount.
2961 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2962 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2963 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2964 return 0;
2965
2966 // Don't touch opaque constants. Also, ignore zero and power-of-2
2967 // multiplies. Those will get folded later.
2968 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2969 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2970 !MulC->getAPIntValue().isPowerOf2()) {
2971 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2972 if (UnmaskedC.isNegatedPowerOf2())
2973 return (-UnmaskedC).logBase2();
2974 }
2975 return 0;
2976 };
2977
2978 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2979 unsigned ShlAmt) {
2980 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2981 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2982 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2983 return TLO.CombineTo(O: Op, N: Res);
2984 };
2985
2986 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2987 if (Op.getOpcode() == ISD::ADD) {
2988 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2989 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2990 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2991 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2992 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2993 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2994 }
2995 if (Op.getOpcode() == ISD::SUB) {
2996 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2997 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2998 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
2999 }
3000 }
3001
3002 if (Op.getOpcode() == ISD::MUL) {
3003 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
3004 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3005 Known = KnownBits::computeForAddSub(
3006 Add: Op.getOpcode() != ISD::SUB, NSW: Flags.hasNoSignedWrap(),
3007 NUW: Flags.hasNoUnsignedWrap(), LHS: KnownOp0, RHS: KnownOp1);
3008 }
3009 break;
3010 }
3011 case ISD::FABS: {
3012 SDValue Op0 = Op.getOperand(i: 0);
3013 APInt SignMask = APInt::getSignMask(BitWidth);
3014
3015 if (!DemandedBits.intersects(RHS: SignMask))
3016 return TLO.CombineTo(O: Op, N: Op0);
3017
3018 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3019 Depth: Depth + 1))
3020 return true;
3021
3022 if (Known.isNonNegative())
3023 return TLO.CombineTo(O: Op, N: Op0);
3024 if (Known.isNegative())
3025 return TLO.CombineTo(
3026 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3027
3028 Known.Zero |= SignMask;
3029 Known.One &= ~SignMask;
3030
3031 break;
3032 }
3033 case ISD::FCOPYSIGN: {
3034 SDValue Op0 = Op.getOperand(i: 0);
3035 SDValue Op1 = Op.getOperand(i: 1);
3036
3037 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3038 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3039 APInt SignMask0 = APInt::getSignMask(BitWidth: BitWidth0);
3040 APInt SignMask1 = APInt::getSignMask(BitWidth: BitWidth1);
3041
3042 if (!DemandedBits.intersects(RHS: SignMask0))
3043 return TLO.CombineTo(O: Op, N: Op0);
3044
3045 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~SignMask0 & DemandedBits, OriginalDemandedElts: DemandedElts,
3046 Known, TLO, Depth: Depth + 1) ||
3047 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: SignMask1, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
3048 Depth: Depth + 1))
3049 return true;
3050
3051 if (Known2.isNonNegative())
3052 return TLO.CombineTo(
3053 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FABS, DL: dl, VT, Operand: Op0, Flags: Op->getFlags()));
3054
3055 if (Known2.isNegative())
3056 return TLO.CombineTo(
3057 O: Op, N: TLO.DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT,
3058 Operand: TLO.DAG.getNode(Opcode: ISD::FABS, DL: SDLoc(Op0), VT, Operand: Op0)));
3059
3060 Known.Zero &= ~SignMask0;
3061 Known.One &= ~SignMask0;
3062 break;
3063 }
3064 case ISD::FNEG: {
3065 SDValue Op0 = Op.getOperand(i: 0);
3066 APInt SignMask = APInt::getSignMask(BitWidth);
3067
3068 if (!DemandedBits.intersects(RHS: SignMask))
3069 return TLO.CombineTo(O: Op, N: Op0);
3070
3071 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
3072 Depth: Depth + 1))
3073 return true;
3074
3075 if (!Known.isSignUnknown()) {
3076 Known.Zero ^= SignMask;
3077 Known.One ^= SignMask;
3078 }
3079
3080 break;
3081 }
3082 default:
3083 // We also ask the target about intrinsics (which could be specific to it).
3084 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3085 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3086 // TODO: Probably okay to remove after audit; here to reduce change size
3087 // in initial enablement patch for scalable vectors
3088 if (Op.getValueType().isScalableVector())
3089 break;
3090 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
3091 Known, TLO, Depth))
3092 return true;
3093 break;
3094 }
3095
3096 // Just use computeKnownBits to compute output bits.
3097 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3098 break;
3099 }
3100
3101 // If we know the value of all of the demanded bits, return this as a
3102 // constant.
3103 if (!isTargetCanonicalConstantNode(Op) &&
3104 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
3105 // Avoid folding to a constant if any OpaqueConstant is involved.
3106 if (llvm::any_of(Range: Op->ops(), P: [](SDValue V) {
3107 auto *C = dyn_cast<ConstantSDNode>(Val&: V);
3108 return C && C->isOpaque();
3109 }))
3110 return false;
3111 if (VT.isInteger())
3112 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
3113 if (VT.isFloatingPoint())
3114 return TLO.CombineTo(
3115 O: Op, N: TLO.DAG.getConstantFP(Val: APFloat(VT.getFltSemantics(), Known.One),
3116 DL: dl, VT));
3117 }
3118
3119 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3120 // Try again just for the original demanded elts.
3121 // Ensure we do this AFTER constant folding above.
3122 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3123 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
3124
3125 return false;
3126}
3127
3128bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
3129 const APInt &DemandedElts,
3130 DAGCombinerInfo &DCI) const {
3131 SelectionDAG &DAG = DCI.DAG;
3132 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3133 !DCI.isBeforeLegalizeOps());
3134
3135 APInt KnownUndef, KnownZero;
3136 bool Simplified =
3137 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
3138 if (Simplified) {
3139 DCI.AddToWorklist(N: Op.getNode());
3140 DCI.CommitTargetLoweringOpt(TLO);
3141 }
3142
3143 return Simplified;
3144}
3145
3146/// Given a vector binary operation and known undefined elements for each input
3147/// operand, compute whether each element of the output is undefined.
3148static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
3149 const APInt &UndefOp0,
3150 const APInt &UndefOp1) {
3151 EVT VT = BO.getValueType();
3152 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
3153 "Vector binop only");
3154
3155 EVT EltVT = VT.getVectorElementType();
3156 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3157 assert(UndefOp0.getBitWidth() == NumElts &&
3158 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3159
3160 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3161 const APInt &UndefVals) {
3162 if (UndefVals[Index])
3163 return DAG.getUNDEF(VT: EltVT);
3164
3165 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
3166 // Try hard to make sure that the getNode() call is not creating temporary
3167 // nodes. Ignore opaque integers because they do not constant fold.
3168 SDValue Elt = BV->getOperand(Num: Index);
3169 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
3170 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3171 return Elt;
3172 }
3173
3174 return SDValue();
3175 };
3176
3177 APInt KnownUndef = APInt::getZero(numBits: NumElts);
3178 for (unsigned i = 0; i != NumElts; ++i) {
3179 // If both inputs for this element are either constant or undef and match
3180 // the element type, compute the constant/undef result for this element of
3181 // the vector.
3182 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3183 // not handle FP constants. The code within getNode() should be refactored
3184 // to avoid the danger of creating a bogus temporary node here.
3185 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
3186 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
3187 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3188 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
3189 KnownUndef.setBit(i);
3190 }
3191 return KnownUndef;
3192}
3193
3194bool TargetLowering::SimplifyDemandedVectorElts(
3195 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3196 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3197 bool AssumeSingleUse) const {
3198 EVT VT = Op.getValueType();
3199 unsigned Opcode = Op.getOpcode();
3200 APInt DemandedElts = OriginalDemandedElts;
3201 unsigned NumElts = DemandedElts.getBitWidth();
3202 assert(VT.isVector() && "Expected vector op");
3203
3204 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3205
3206 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3207 return false;
3208
3209 // TODO: For now we assume we know nothing about scalable vectors.
3210 if (VT.isScalableVector())
3211 return false;
3212
3213 assert(VT.getVectorNumElements() == NumElts &&
3214 "Mask size mismatches value type element count!");
3215
3216 // Undef operand.
3217 if (Op.isUndef()) {
3218 KnownUndef.setAllBits();
3219 return false;
3220 }
3221
3222 // If Op has other users, assume that all elements are needed.
3223 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3224 DemandedElts.setAllBits();
3225
3226 // Not demanding any elements from Op.
3227 if (DemandedElts == 0) {
3228 KnownUndef.setAllBits();
3229 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3230 }
3231
3232 // Limit search depth.
3233 if (Depth >= SelectionDAG::MaxRecursionDepth)
3234 return false;
3235
3236 SDLoc DL(Op);
3237 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3238 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3239
3240 // Helper for demanding the specified elements and all the bits of both binary
3241 // operands.
3242 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3243 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3244 DAG&: TLO.DAG, Depth: Depth + 1);
3245 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3246 DAG&: TLO.DAG, Depth: Depth + 1);
3247 if (NewOp0 || NewOp1) {
3248 SDValue NewOp =
3249 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3250 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3251 return TLO.CombineTo(O: Op, N: NewOp);
3252 }
3253 return false;
3254 };
3255
3256 switch (Opcode) {
3257 case ISD::SCALAR_TO_VECTOR: {
3258 if (!DemandedElts[0]) {
3259 KnownUndef.setAllBits();
3260 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3261 }
3262 KnownUndef.setHighBits(NumElts - 1);
3263 break;
3264 }
3265 case ISD::BITCAST: {
3266 SDValue Src = Op.getOperand(i: 0);
3267 EVT SrcVT = Src.getValueType();
3268
3269 if (!SrcVT.isVector()) {
3270 // TODO - bigendian once we have test coverage.
3271 if (IsLE) {
3272 APInt DemandedSrcBits = APInt::getZero(numBits: SrcVT.getSizeInBits());
3273 unsigned EltSize = VT.getScalarSizeInBits();
3274 for (unsigned I = 0; I != NumElts; ++I) {
3275 if (DemandedElts[I]) {
3276 unsigned Offset = I * EltSize;
3277 DemandedSrcBits.setBits(loBit: Offset, hiBit: Offset + EltSize);
3278 }
3279 }
3280 KnownBits Known;
3281 if (SimplifyDemandedBits(Op: Src, DemandedBits: DemandedSrcBits, Known, TLO, Depth: Depth + 1))
3282 return true;
3283 }
3284 break;
3285 }
3286
3287 // Fast handling of 'identity' bitcasts.
3288 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3289 if (NumSrcElts == NumElts)
3290 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3291 KnownZero, TLO, Depth: Depth + 1);
3292
3293 APInt SrcDemandedElts, SrcZero, SrcUndef;
3294
3295 // Bitcast from 'large element' src vector to 'small element' vector, we
3296 // must demand a source element if any DemandedElt maps to it.
3297 if ((NumElts % NumSrcElts) == 0) {
3298 unsigned Scale = NumElts / NumSrcElts;
3299 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3300 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3301 TLO, Depth: Depth + 1))
3302 return true;
3303
3304 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3305 // of the large element.
3306 // TODO - bigendian once we have test coverage.
3307 if (IsLE) {
3308 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3309 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3310 for (unsigned i = 0; i != NumElts; ++i)
3311 if (DemandedElts[i]) {
3312 unsigned Ofs = (i % Scale) * EltSizeInBits;
3313 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3314 }
3315
3316 KnownBits Known;
3317 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3318 TLO, Depth: Depth + 1))
3319 return true;
3320
3321 // The bitcast has split each wide element into a number of
3322 // narrow subelements. We have just computed the Known bits
3323 // for wide elements. See if element splitting results in
3324 // some subelements being zero. Only for demanded elements!
3325 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3326 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3327 .isAllOnes())
3328 continue;
3329 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3330 unsigned Elt = Scale * SrcElt + SubElt;
3331 if (DemandedElts[Elt])
3332 KnownZero.setBit(Elt);
3333 }
3334 }
3335 }
3336
3337 // If the src element is zero/undef then all the output elements will be -
3338 // only demanded elements are guaranteed to be correct.
3339 for (unsigned i = 0; i != NumSrcElts; ++i) {
3340 if (SrcDemandedElts[i]) {
3341 if (SrcZero[i])
3342 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3343 if (SrcUndef[i])
3344 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3345 }
3346 }
3347 }
3348
3349 // Bitcast from 'small element' src vector to 'large element' vector, we
3350 // demand all smaller source elements covered by the larger demanded element
3351 // of this vector.
3352 if ((NumSrcElts % NumElts) == 0) {
3353 unsigned Scale = NumSrcElts / NumElts;
3354 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3355 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3356 TLO, Depth: Depth + 1))
3357 return true;
3358
3359 // If all the src elements covering an output element are zero/undef, then
3360 // the output element will be as well, assuming it was demanded.
3361 for (unsigned i = 0; i != NumElts; ++i) {
3362 if (DemandedElts[i]) {
3363 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3364 KnownZero.setBit(i);
3365 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3366 KnownUndef.setBit(i);
3367 }
3368 }
3369 }
3370 break;
3371 }
3372 case ISD::FREEZE: {
3373 SDValue N0 = Op.getOperand(i: 0);
3374 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(Op: N0, DemandedElts,
3375 /*PoisonOnly=*/false,
3376 Depth: Depth + 1))
3377 return TLO.CombineTo(O: Op, N: N0);
3378
3379 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3380 // freeze(op(x, ...)) -> op(freeze(x), ...).
3381 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3382 return TLO.CombineTo(
3383 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT,
3384 Operand: TLO.DAG.getFreeze(V: N0.getOperand(i: 0))));
3385 break;
3386 }
3387 case ISD::BUILD_VECTOR: {
3388 // Check all elements and simplify any unused elements with UNDEF.
3389 if (!DemandedElts.isAllOnes()) {
3390 // Don't simplify BROADCASTS.
3391 if (llvm::any_of(Range: Op->op_values(),
3392 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3393 SmallVector<SDValue, 32> Ops(Op->ops());
3394 bool Updated = false;
3395 for (unsigned i = 0; i != NumElts; ++i) {
3396 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3397 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3398 KnownUndef.setBit(i);
3399 Updated = true;
3400 }
3401 }
3402 if (Updated)
3403 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3404 }
3405 }
3406 for (unsigned i = 0; i != NumElts; ++i) {
3407 SDValue SrcOp = Op.getOperand(i);
3408 if (SrcOp.isUndef()) {
3409 KnownUndef.setBit(i);
3410 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3411 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3412 KnownZero.setBit(i);
3413 }
3414 }
3415 break;
3416 }
3417 case ISD::CONCAT_VECTORS: {
3418 EVT SubVT = Op.getOperand(i: 0).getValueType();
3419 unsigned NumSubVecs = Op.getNumOperands();
3420 unsigned NumSubElts = SubVT.getVectorNumElements();
3421 for (unsigned i = 0; i != NumSubVecs; ++i) {
3422 SDValue SubOp = Op.getOperand(i);
3423 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3424 APInt SubUndef, SubZero;
3425 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3426 Depth: Depth + 1))
3427 return true;
3428 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3429 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3430 }
3431
3432 // Attempt to avoid multi-use ops if we don't need anything from them.
3433 if (!DemandedElts.isAllOnes()) {
3434 bool FoundNewSub = false;
3435 SmallVector<SDValue, 2> DemandedSubOps;
3436 for (unsigned i = 0; i != NumSubVecs; ++i) {
3437 SDValue SubOp = Op.getOperand(i);
3438 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3439 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3440 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3441 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3442 FoundNewSub = NewSubOp ? true : FoundNewSub;
3443 }
3444 if (FoundNewSub) {
3445 SDValue NewOp =
3446 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3447 return TLO.CombineTo(O: Op, N: NewOp);
3448 }
3449 }
3450 break;
3451 }
3452 case ISD::INSERT_SUBVECTOR: {
3453 // Demand any elements from the subvector and the remainder from the src it
3454 // is inserted into.
3455 SDValue Src = Op.getOperand(i: 0);
3456 SDValue Sub = Op.getOperand(i: 1);
3457 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3458 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3459 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3460 APInt DemandedSrcElts = DemandedElts;
3461 DemandedSrcElts.clearBits(LoBit: Idx, HiBit: Idx + NumSubElts);
3462
3463 // If none of the sub operand elements are demanded, bypass the insert.
3464 if (!DemandedSubElts)
3465 return TLO.CombineTo(O: Op, N: Src);
3466
3467 APInt SubUndef, SubZero;
3468 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3469 Depth: Depth + 1))
3470 return true;
3471
3472 // If none of the src operand elements are demanded, replace it with undef.
3473 if (!DemandedSrcElts && !Src.isUndef())
3474 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3475 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3476 N3: Op.getOperand(i: 2)));
3477
3478 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3479 TLO, Depth: Depth + 1))
3480 return true;
3481 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3482 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3483
3484 // Attempt to avoid multi-use ops if we don't need anything from them.
3485 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3486 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3487 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3488 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3489 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3490 if (NewSrc || NewSub) {
3491 NewSrc = NewSrc ? NewSrc : Src;
3492 NewSub = NewSub ? NewSub : Sub;
3493 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3494 N2: NewSub, N3: Op.getOperand(i: 2));
3495 return TLO.CombineTo(O: Op, N: NewOp);
3496 }
3497 }
3498 break;
3499 }
3500 case ISD::EXTRACT_SUBVECTOR: {
3501 // Offset the demanded elts by the subvector index.
3502 SDValue Src = Op.getOperand(i: 0);
3503 if (Src.getValueType().isScalableVector())
3504 break;
3505 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3506 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3507 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3508
3509 APInt SrcUndef, SrcZero;
3510 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3511 Depth: Depth + 1))
3512 return true;
3513 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3514 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3515
3516 // Attempt to avoid multi-use ops if we don't need anything from them.
3517 if (!DemandedElts.isAllOnes()) {
3518 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3519 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3520 if (NewSrc) {
3521 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3522 N2: Op.getOperand(i: 1));
3523 return TLO.CombineTo(O: Op, N: NewOp);
3524 }
3525 }
3526 break;
3527 }
3528 case ISD::INSERT_VECTOR_ELT: {
3529 SDValue Vec = Op.getOperand(i: 0);
3530 SDValue Scl = Op.getOperand(i: 1);
3531 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3532
3533 // For a legal, constant insertion index, if we don't need this insertion
3534 // then strip it, else remove it from the demanded elts.
3535 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3536 unsigned Idx = CIdx->getZExtValue();
3537 if (!DemandedElts[Idx])
3538 return TLO.CombineTo(O: Op, N: Vec);
3539
3540 APInt DemandedVecElts(DemandedElts);
3541 DemandedVecElts.clearBit(BitPosition: Idx);
3542 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3543 KnownZero, TLO, Depth: Depth + 1))
3544 return true;
3545
3546 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3547
3548 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3549 break;
3550 }
3551
3552 APInt VecUndef, VecZero;
3553 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3554 Depth: Depth + 1))
3555 return true;
3556 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3557 break;
3558 }
3559 case ISD::VSELECT: {
3560 SDValue Sel = Op.getOperand(i: 0);
3561 SDValue LHS = Op.getOperand(i: 1);
3562 SDValue RHS = Op.getOperand(i: 2);
3563
3564 // Try to transform the select condition based on the current demanded
3565 // elements.
3566 APInt UndefSel, ZeroSel;
3567 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3568 Depth: Depth + 1))
3569 return true;
3570
3571 // See if we can simplify either vselect operand.
3572 APInt DemandedLHS(DemandedElts);
3573 APInt DemandedRHS(DemandedElts);
3574 APInt UndefLHS, ZeroLHS;
3575 APInt UndefRHS, ZeroRHS;
3576 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3577 Depth: Depth + 1))
3578 return true;
3579 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3580 Depth: Depth + 1))
3581 return true;
3582
3583 KnownUndef = UndefLHS & UndefRHS;
3584 KnownZero = ZeroLHS & ZeroRHS;
3585
3586 // If we know that the selected element is always zero, we don't need the
3587 // select value element.
3588 APInt DemandedSel = DemandedElts & ~KnownZero;
3589 if (DemandedSel != DemandedElts)
3590 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3591 Depth: Depth + 1))
3592 return true;
3593
3594 break;
3595 }
3596 case ISD::VECTOR_SHUFFLE: {
3597 SDValue LHS = Op.getOperand(i: 0);
3598 SDValue RHS = Op.getOperand(i: 1);
3599 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3600
3601 // Collect demanded elements from shuffle operands..
3602 APInt DemandedLHS(NumElts, 0);
3603 APInt DemandedRHS(NumElts, 0);
3604 for (unsigned i = 0; i != NumElts; ++i) {
3605 int M = ShuffleMask[i];
3606 if (M < 0 || !DemandedElts[i])
3607 continue;
3608 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3609 if (M < (int)NumElts)
3610 DemandedLHS.setBit(M);
3611 else
3612 DemandedRHS.setBit(M - NumElts);
3613 }
3614
3615 // If either side isn't demanded, replace it by UNDEF. We handle this
3616 // explicitly here to also simplify in case of multiple uses (on the
3617 // contrary to the SimplifyDemandedVectorElts calls below).
3618 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3619 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3620 if (FoldLHS || FoldRHS) {
3621 LHS = FoldLHS ? TLO.DAG.getUNDEF(VT: LHS.getValueType()) : LHS;
3622 RHS = FoldRHS ? TLO.DAG.getUNDEF(VT: RHS.getValueType()) : RHS;
3623 SDValue NewOp =
3624 TLO.DAG.getVectorShuffle(VT, dl: SDLoc(Op), N1: LHS, N2: RHS, Mask: ShuffleMask);
3625 return TLO.CombineTo(O: Op, N: NewOp);
3626 }
3627
3628 // See if we can simplify either shuffle operand.
3629 APInt UndefLHS, ZeroLHS;
3630 APInt UndefRHS, ZeroRHS;
3631 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3632 Depth: Depth + 1))
3633 return true;
3634 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3635 Depth: Depth + 1))
3636 return true;
3637
3638 // Simplify mask using undef elements from LHS/RHS.
3639 bool Updated = false;
3640 bool IdentityLHS = true, IdentityRHS = true;
3641 SmallVector<int, 32> NewMask(ShuffleMask);
3642 for (unsigned i = 0; i != NumElts; ++i) {
3643 int &M = NewMask[i];
3644 if (M < 0)
3645 continue;
3646 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3647 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3648 Updated = true;
3649 M = -1;
3650 }
3651 IdentityLHS &= (M < 0) || (M == (int)i);
3652 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3653 }
3654
3655 // Update legal shuffle masks based on demanded elements if it won't reduce
3656 // to Identity which can cause premature removal of the shuffle mask.
3657 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3658 SDValue LegalShuffle =
3659 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3660 if (LegalShuffle)
3661 return TLO.CombineTo(O: Op, N: LegalShuffle);
3662 }
3663
3664 // Propagate undef/zero elements from LHS/RHS.
3665 for (unsigned i = 0; i != NumElts; ++i) {
3666 int M = ShuffleMask[i];
3667 if (M < 0) {
3668 KnownUndef.setBit(i);
3669 } else if (M < (int)NumElts) {
3670 if (UndefLHS[M])
3671 KnownUndef.setBit(i);
3672 if (ZeroLHS[M])
3673 KnownZero.setBit(i);
3674 } else {
3675 if (UndefRHS[M - NumElts])
3676 KnownUndef.setBit(i);
3677 if (ZeroRHS[M - NumElts])
3678 KnownZero.setBit(i);
3679 }
3680 }
3681 break;
3682 }
3683 case ISD::ANY_EXTEND_VECTOR_INREG:
3684 case ISD::SIGN_EXTEND_VECTOR_INREG:
3685 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3686 APInt SrcUndef, SrcZero;
3687 SDValue Src = Op.getOperand(i: 0);
3688 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3689 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3690 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3691 Depth: Depth + 1))
3692 return true;
3693 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3694 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3695
3696 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3697 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3698 DemandedSrcElts == 1) {
3699 // aext - if we just need the bottom element then we can bitcast.
3700 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3701 }
3702
3703 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3704 // zext(undef) upper bits are guaranteed to be zero.
3705 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3706 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3707 KnownUndef.clearAllBits();
3708
3709 // zext - if we just need the bottom element then we can mask:
3710 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3711 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3712 Op->isOnlyUserOf(N: Src.getNode()) &&
3713 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3714 SDLoc DL(Op);
3715 EVT SrcVT = Src.getValueType();
3716 EVT SrcSVT = SrcVT.getScalarType();
3717
3718 // If we're after type legalization and SrcSVT is not legal, use the
3719 // promoted type for creating constants to avoid creating nodes with
3720 // illegal types.
3721 if (AfterLegalizeTypes)
3722 SrcSVT = getLegalTypeToTransformTo(Context&: *TLO.DAG.getContext(), VT: SrcSVT);
3723
3724 SmallVector<SDValue> MaskElts;
3725 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3726 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3727 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3728 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3729 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3730 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3731 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3732 }
3733 }
3734 }
3735 break;
3736 }
3737
3738 // TODO: There are more binop opcodes that could be handled here - MIN,
3739 // MAX, saturated math, etc.
3740 case ISD::ADD: {
3741 SDValue Op0 = Op.getOperand(i: 0);
3742 SDValue Op1 = Op.getOperand(i: 1);
3743 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3744 APInt UndefLHS, ZeroLHS;
3745 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3746 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3747 return true;
3748 }
3749 [[fallthrough]];
3750 }
3751 case ISD::AVGCEILS:
3752 case ISD::AVGCEILU:
3753 case ISD::AVGFLOORS:
3754 case ISD::AVGFLOORU:
3755 case ISD::OR:
3756 case ISD::XOR:
3757 case ISD::SUB:
3758 case ISD::FADD:
3759 case ISD::FSUB:
3760 case ISD::FMUL:
3761 case ISD::FDIV:
3762 case ISD::FREM: {
3763 SDValue Op0 = Op.getOperand(i: 0);
3764 SDValue Op1 = Op.getOperand(i: 1);
3765
3766 APInt UndefRHS, ZeroRHS;
3767 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3768 Depth: Depth + 1))
3769 return true;
3770 APInt UndefLHS, ZeroLHS;
3771 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3772 Depth: Depth + 1))
3773 return true;
3774
3775 KnownZero = ZeroLHS & ZeroRHS;
3776 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3777
3778 // Attempt to avoid multi-use ops if we don't need anything from them.
3779 // TODO - use KnownUndef to relax the demandedelts?
3780 if (!DemandedElts.isAllOnes())
3781 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3782 return true;
3783 break;
3784 }
3785 case ISD::SHL:
3786 case ISD::SRL:
3787 case ISD::SRA:
3788 case ISD::ROTL:
3789 case ISD::ROTR: {
3790 SDValue Op0 = Op.getOperand(i: 0);
3791 SDValue Op1 = Op.getOperand(i: 1);
3792
3793 APInt UndefRHS, ZeroRHS;
3794 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3795 Depth: Depth + 1))
3796 return true;
3797 APInt UndefLHS, ZeroLHS;
3798 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3799 Depth: Depth + 1))
3800 return true;
3801
3802 KnownZero = ZeroLHS;
3803 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3804
3805 // Attempt to avoid multi-use ops if we don't need anything from them.
3806 // TODO - use KnownUndef to relax the demandedelts?
3807 if (!DemandedElts.isAllOnes())
3808 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3809 return true;
3810 break;
3811 }
3812 case ISD::MUL:
3813 case ISD::MULHU:
3814 case ISD::MULHS:
3815 case ISD::AND: {
3816 SDValue Op0 = Op.getOperand(i: 0);
3817 SDValue Op1 = Op.getOperand(i: 1);
3818
3819 APInt SrcUndef, SrcZero;
3820 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3821 Depth: Depth + 1))
3822 return true;
3823 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3824 // to demand it in Op0 - its guaranteed to be zero. There is however a
3825 // restriction, as we must not make any of the originally demanded elements
3826 // more poisonous. We could reduce amount of elements demanded, but then we
3827 // also need a to inform SimplifyDemandedVectorElts that some elements must
3828 // not be made more poisonous.
3829 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef, KnownZero,
3830 TLO, Depth: Depth + 1))
3831 return true;
3832
3833 KnownUndef &= DemandedElts;
3834 KnownZero &= DemandedElts;
3835
3836 // If every element pair has a zero/undef/poison then just fold to zero.
3837 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3838 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3839 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3840 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3841
3842 // If either side has a zero element, then the result element is zero, even
3843 // if the other is an UNDEF.
3844 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3845 // and then handle 'and' nodes with the rest of the binop opcodes.
3846 KnownZero |= SrcZero;
3847 KnownUndef &= SrcUndef;
3848 KnownUndef &= ~KnownZero;
3849
3850 // Attempt to avoid multi-use ops if we don't need anything from them.
3851 if (!DemandedElts.isAllOnes())
3852 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3853 return true;
3854 break;
3855 }
3856 case ISD::TRUNCATE:
3857 case ISD::SIGN_EXTEND:
3858 case ISD::ZERO_EXTEND:
3859 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3860 KnownZero, TLO, Depth: Depth + 1))
3861 return true;
3862
3863 if (!DemandedElts.isAllOnes())
3864 if (SDValue NewOp = SimplifyMultipleUseDemandedVectorElts(
3865 Op: Op.getOperand(i: 0), DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
3866 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, Operand: NewOp));
3867
3868 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3869 // zext(undef) upper bits are guaranteed to be zero.
3870 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3871 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3872 KnownUndef.clearAllBits();
3873 }
3874 break;
3875 case ISD::SINT_TO_FP:
3876 case ISD::UINT_TO_FP:
3877 case ISD::FP_TO_SINT:
3878 case ISD::FP_TO_UINT:
3879 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3880 KnownZero, TLO, Depth: Depth + 1))
3881 return true;
3882 // Don't fall through to generic undef -> undef handling.
3883 return false;
3884 default: {
3885 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3886 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3887 KnownZero, TLO, Depth))
3888 return true;
3889 } else {
3890 KnownBits Known;
3891 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3892 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3893 TLO, Depth, AssumeSingleUse))
3894 return true;
3895 }
3896 break;
3897 }
3898 }
3899 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3900
3901 // Constant fold all undef cases.
3902 // TODO: Handle zero cases as well.
3903 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3904 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3905
3906 return false;
3907}
3908
3909/// Determine which of the bits specified in Mask are known to be either zero or
3910/// one and return them in the Known.
3911void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3912 KnownBits &Known,
3913 const APInt &DemandedElts,
3914 const SelectionDAG &DAG,
3915 unsigned Depth) const {
3916 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3917 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3918 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3919 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3920 "Should use MaskedValueIsZero if you don't know whether Op"
3921 " is a target node!");
3922 Known.resetAll();
3923}
3924
3925void TargetLowering::computeKnownBitsForTargetInstr(
3926 GISelValueTracking &Analysis, Register R, KnownBits &Known,
3927 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3928 unsigned Depth) const {
3929 Known.resetAll();
3930}
3931
3932void TargetLowering::computeKnownFPClassForTargetInstr(
3933 GISelValueTracking &Analysis, Register R, KnownFPClass &Known,
3934 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3935 unsigned Depth) const {
3936 Known.resetAll();
3937}
3938
3939void TargetLowering::computeKnownBitsForFrameIndex(
3940 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3941 // The low bits are known zero if the pointer is aligned.
3942 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3943}
3944
3945Align TargetLowering::computeKnownAlignForTargetInstr(
3946 GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI,
3947 unsigned Depth) const {
3948 return Align(1);
3949}
3950
3951/// This method can be implemented by targets that want to expose additional
3952/// information about sign bits to the DAG Combiner.
3953unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3954 const APInt &,
3955 const SelectionDAG &,
3956 unsigned Depth) const {
3957 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3958 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3959 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3960 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3961 "Should use ComputeNumSignBits if you don't know whether Op"
3962 " is a target node!");
3963 return 1;
3964}
3965
3966unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3967 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3968 const MachineRegisterInfo &MRI, unsigned Depth) const {
3969 return 1;
3970}
3971
3972bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3973 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3974 TargetLoweringOpt &TLO, unsigned Depth) const {
3975 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3976 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3977 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3978 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3979 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3980 " is a target node!");
3981 return false;
3982}
3983
3984bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3985 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3986 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3987 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3988 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3989 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3990 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3991 "Should use SimplifyDemandedBits if you don't know whether Op"
3992 " is a target node!");
3993 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3994 return false;
3995}
3996
3997SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3998 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3999 SelectionDAG &DAG, unsigned Depth) const {
4000 assert(
4001 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4002 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4003 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4004 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4005 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4006 " is a target node!");
4007 return SDValue();
4008}
4009
4010SDValue
4011TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4012 SDValue N1, MutableArrayRef<int> Mask,
4013 SelectionDAG &DAG) const {
4014 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4015 if (!LegalMask) {
4016 std::swap(a&: N0, b&: N1);
4017 ShuffleVectorSDNode::commuteMask(Mask);
4018 LegalMask = isShuffleMaskLegal(Mask, VT);
4019 }
4020
4021 if (!LegalMask)
4022 return SDValue();
4023
4024 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
4025}
4026
4027const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
4028 return nullptr;
4029}
4030
4031bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4032 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4033 bool PoisonOnly, unsigned Depth) const {
4034 assert(
4035 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4036 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4037 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4038 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4039 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4040 " is a target node!");
4041
4042 // If Op can't create undef/poison and none of its operands are undef/poison
4043 // then Op is never undef/poison.
4044 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4045 /*ConsiderFlags*/ true, Depth) &&
4046 all_of(Range: Op->ops(), P: [&](SDValue V) {
4047 return DAG.isGuaranteedNotToBeUndefOrPoison(Op: V, PoisonOnly,
4048 Depth: Depth + 1);
4049 });
4050}
4051
4052bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
4053 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4054 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4055 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4056 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4057 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4058 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4059 "Should use canCreateUndefOrPoison if you don't know whether Op"
4060 " is a target node!");
4061 // Be conservative and return true.
4062 return true;
4063}
4064
4065bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
4066 const APInt &DemandedElts,
4067 const SelectionDAG &DAG,
4068 bool SNaN,
4069 unsigned Depth) const {
4070 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4071 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4072 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4073 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4074 "Should use isKnownNeverNaN if you don't know whether Op"
4075 " is a target node!");
4076 return false;
4077}
4078
4079bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
4080 const APInt &DemandedElts,
4081 APInt &UndefElts,
4082 const SelectionDAG &DAG,
4083 unsigned Depth) const {
4084 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4085 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4086 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4087 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4088 "Should use isSplatValue if you don't know whether Op"
4089 " is a target node!");
4090 return false;
4091}
4092
4093// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4094// work with truncating build vectors and vectors with elements of less than
4095// 8 bits.
4096bool TargetLowering::isConstTrueVal(SDValue N) const {
4097 if (!N)
4098 return false;
4099
4100 unsigned EltWidth;
4101 APInt CVal;
4102 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4103 /*AllowTruncation=*/true)) {
4104 CVal = CN->getAPIntValue();
4105 EltWidth = N.getValueType().getScalarSizeInBits();
4106 } else
4107 return false;
4108
4109 // If this is a truncating splat, truncate the splat value.
4110 // Otherwise, we may fail to match the expected values below.
4111 if (EltWidth < CVal.getBitWidth())
4112 CVal = CVal.trunc(width: EltWidth);
4113
4114 switch (getBooleanContents(Type: N.getValueType())) {
4115 case UndefinedBooleanContent:
4116 return CVal[0];
4117 case ZeroOrOneBooleanContent:
4118 return CVal.isOne();
4119 case ZeroOrNegativeOneBooleanContent:
4120 return CVal.isAllOnes();
4121 }
4122
4123 llvm_unreachable("Invalid boolean contents");
4124}
4125
4126bool TargetLowering::isConstFalseVal(SDValue N) const {
4127 if (!N)
4128 return false;
4129
4130 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
4131 if (!CN) {
4132 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
4133 if (!BV)
4134 return false;
4135
4136 // Only interested in constant splats, we don't care about undef
4137 // elements in identifying boolean constants and getConstantSplatNode
4138 // returns NULL if all ops are undef;
4139 CN = BV->getConstantSplatNode();
4140 if (!CN)
4141 return false;
4142 }
4143
4144 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
4145 return !CN->getAPIntValue()[0];
4146
4147 return CN->isZero();
4148}
4149
4150bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
4151 bool SExt) const {
4152 if (VT == MVT::i1)
4153 return N->isOne();
4154
4155 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
4156 switch (Cnt) {
4157 case TargetLowering::ZeroOrOneBooleanContent:
4158 // An extended value of 1 is always true, unless its original type is i1,
4159 // in which case it will be sign extended to -1.
4160 return (N->isOne() && !SExt) || (SExt && (N->getValueType(ResNo: 0) != MVT::i1));
4161 case TargetLowering::UndefinedBooleanContent:
4162 case TargetLowering::ZeroOrNegativeOneBooleanContent:
4163 return N->isAllOnes() && SExt;
4164 }
4165 llvm_unreachable("Unexpected enumeration.");
4166}
4167
4168/// This helper function of SimplifySetCC tries to optimize the comparison when
4169/// either operand of the SetCC node is a bitwise-and instruction.
4170SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4171 ISD::CondCode Cond, const SDLoc &DL,
4172 DAGCombinerInfo &DCI) const {
4173 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4174 std::swap(a&: N0, b&: N1);
4175
4176 SelectionDAG &DAG = DCI.DAG;
4177 EVT OpVT = N0.getValueType();
4178 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4179 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4180 return SDValue();
4181
4182 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4183 // iff everything but LSB is known zero:
4184 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
4185 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
4186 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
4187 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4188 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
4189 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
4190 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
4191 }
4192
4193 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4194 // test in a narrow type that we can truncate to with no cost. Examples:
4195 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4196 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4197 // TODO: This conservatively checks for type legality on the source and
4198 // destination types. That may inhibit optimizations, but it also
4199 // allows setcc->shift transforms that may be more beneficial.
4200 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
4201 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
4202 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
4203 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
4204 BitWidth: AndC->getAPIntValue().getActiveBits());
4205 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
4206 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
4207 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
4208 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
4209 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
4210 }
4211 }
4212
4213 // Match these patterns in any of their permutations:
4214 // (X & Y) == Y
4215 // (X & Y) != Y
4216 SDValue X, Y;
4217 if (N0.getOperand(i: 0) == N1) {
4218 X = N0.getOperand(i: 1);
4219 Y = N0.getOperand(i: 0);
4220 } else if (N0.getOperand(i: 1) == N1) {
4221 X = N0.getOperand(i: 0);
4222 Y = N0.getOperand(i: 1);
4223 } else {
4224 return SDValue();
4225 }
4226
4227 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4228 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4229 // its liable to create and infinite loop.
4230 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
4231 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4232 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
4233 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4234 // Note that where Y is variable and is known to have at most one bit set
4235 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4236 // equivalent when Y == 0.
4237 assert(OpVT.isInteger());
4238 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
4239 if (DCI.isBeforeLegalizeOps() ||
4240 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
4241 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
4242 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4243 // If the target supports an 'and-not' or 'and-complement' logic operation,
4244 // try to use that to make a comparison operation more efficient.
4245 // But don't do this transform if the mask is a single bit because there are
4246 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4247 // 'rlwinm' on PPC).
4248
4249 // Bail out if the compare operand that we want to turn into a zero is
4250 // already a zero (otherwise, infinite loop).
4251 if (isNullConstant(V: Y))
4252 return SDValue();
4253
4254 // Transform this into: ~X & Y == 0.
4255 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
4256 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
4257 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
4258 }
4259
4260 return SDValue();
4261}
4262
4263/// This helper function of SimplifySetCC tries to optimize the comparison when
4264/// either operand of the SetCC node is a bitwise-or instruction.
4265/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4266SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4267 ISD::CondCode Cond, const SDLoc &DL,
4268 DAGCombinerInfo &DCI) const {
4269 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4270 std::swap(a&: N0, b&: N1);
4271
4272 SelectionDAG &DAG = DCI.DAG;
4273 EVT OpVT = N0.getValueType();
4274 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4275 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4276 return SDValue();
4277
4278 // (X | Y) == Y
4279 // (X | Y) != Y
4280 SDValue X;
4281 if (sd_match(N: N0, P: m_Or(L: m_Value(N&: X), R: m_Specific(N: N1))) && hasAndNotCompare(Y: X)) {
4282 // If the target supports an 'and-not' or 'and-complement' logic operation,
4283 // try to use that to make a comparison operation more efficient.
4284
4285 // Bail out if the compare operand that we want to turn into a zero is
4286 // already a zero (otherwise, infinite loop).
4287 if (isNullConstant(V: N1))
4288 return SDValue();
4289
4290 // Transform this into: X & ~Y ==/!= 0.
4291 SDValue NotY = DAG.getNOT(DL: SDLoc(N1), Val: N1, VT: OpVT);
4292 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: X, N2: NotY);
4293 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4294 }
4295
4296 return SDValue();
4297}
4298
4299/// There are multiple IR patterns that could be checking whether certain
4300/// truncation of a signed number would be lossy or not. The pattern which is
4301/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4302/// We are looking for the following pattern: (KeptBits is a constant)
4303/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4304/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4305/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4306/// We will unfold it into the natural trunc+sext pattern:
4307/// ((%x << C) a>> C) dstcond %x
4308/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4309SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4310 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4311 const SDLoc &DL) const {
4312 // We must be comparing with a constant.
4313 ConstantSDNode *C1;
4314 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4315 return SDValue();
4316
4317 // N0 should be: add %x, (1 << (KeptBits-1))
4318 if (N0->getOpcode() != ISD::ADD)
4319 return SDValue();
4320
4321 // And we must be 'add'ing a constant.
4322 ConstantSDNode *C01;
4323 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4324 return SDValue();
4325
4326 SDValue X = N0->getOperand(Num: 0);
4327 EVT XVT = X.getValueType();
4328
4329 // Validate constants ...
4330
4331 APInt I1 = C1->getAPIntValue();
4332
4333 ISD::CondCode NewCond;
4334 if (Cond == ISD::CondCode::SETULT) {
4335 NewCond = ISD::CondCode::SETEQ;
4336 } else if (Cond == ISD::CondCode::SETULE) {
4337 NewCond = ISD::CondCode::SETEQ;
4338 // But need to 'canonicalize' the constant.
4339 I1 += 1;
4340 } else if (Cond == ISD::CondCode::SETUGT) {
4341 NewCond = ISD::CondCode::SETNE;
4342 // But need to 'canonicalize' the constant.
4343 I1 += 1;
4344 } else if (Cond == ISD::CondCode::SETUGE) {
4345 NewCond = ISD::CondCode::SETNE;
4346 } else
4347 return SDValue();
4348
4349 APInt I01 = C01->getAPIntValue();
4350
4351 auto checkConstants = [&I1, &I01]() -> bool {
4352 // Both of them must be power-of-two, and the constant from setcc is bigger.
4353 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4354 };
4355
4356 if (checkConstants()) {
4357 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4358 } else {
4359 // What if we invert constants? (and the target predicate)
4360 I1.negate();
4361 I01.negate();
4362 assert(XVT.isInteger());
4363 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4364 if (!checkConstants())
4365 return SDValue();
4366 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4367 }
4368
4369 // They are power-of-two, so which bit is set?
4370 const unsigned KeptBits = I1.logBase2();
4371 const unsigned KeptBitsMinusOne = I01.logBase2();
4372
4373 // Magic!
4374 if (KeptBits != (KeptBitsMinusOne + 1))
4375 return SDValue();
4376 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4377
4378 // We don't want to do this in every single case.
4379 SelectionDAG &DAG = DCI.DAG;
4380 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4381 return SDValue();
4382
4383 // Unfold into: sext_inreg(%x) cond %x
4384 // Where 'cond' will be either 'eq' or 'ne'.
4385 SDValue SExtInReg = DAG.getNode(
4386 Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: XVT, N1: X,
4387 N2: DAG.getValueType(EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: KeptBits)));
4388 return DAG.getSetCC(DL, VT: SCCVT, LHS: SExtInReg, RHS: X, Cond: NewCond);
4389}
4390
4391// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4392SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4393 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4394 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4395 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4396 "Should be a comparison with 0.");
4397 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4398 "Valid only for [in]equality comparisons.");
4399
4400 unsigned NewShiftOpcode;
4401 SDValue X, C, Y;
4402
4403 SelectionDAG &DAG = DCI.DAG;
4404
4405 // Look for '(C l>>/<< Y)'.
4406 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4407 // The shift should be one-use.
4408 if (!V.hasOneUse())
4409 return false;
4410 unsigned OldShiftOpcode = V.getOpcode();
4411 switch (OldShiftOpcode) {
4412 case ISD::SHL:
4413 NewShiftOpcode = ISD::SRL;
4414 break;
4415 case ISD::SRL:
4416 NewShiftOpcode = ISD::SHL;
4417 break;
4418 default:
4419 return false; // must be a logical shift.
4420 }
4421 // We should be shifting a constant.
4422 // FIXME: best to use isConstantOrConstantVector().
4423 C = V.getOperand(i: 0);
4424 ConstantSDNode *CC =
4425 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4426 if (!CC)
4427 return false;
4428 Y = V.getOperand(i: 1);
4429
4430 ConstantSDNode *XC =
4431 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4432 return shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4433 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4434 };
4435
4436 // LHS of comparison should be an one-use 'and'.
4437 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4438 return SDValue();
4439
4440 X = N0.getOperand(i: 0);
4441 SDValue Mask = N0.getOperand(i: 1);
4442
4443 // 'and' is commutative!
4444 if (!Match(Mask)) {
4445 std::swap(a&: X, b&: Mask);
4446 if (!Match(Mask))
4447 return SDValue();
4448 }
4449
4450 EVT VT = X.getValueType();
4451
4452 // Produce:
4453 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4454 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4455 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4456 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4457 return T2;
4458}
4459
4460/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4461/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4462/// handle the commuted versions of these patterns.
4463SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4464 ISD::CondCode Cond, const SDLoc &DL,
4465 DAGCombinerInfo &DCI) const {
4466 unsigned BOpcode = N0.getOpcode();
4467 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4468 "Unexpected binop");
4469 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4470
4471 // (X + Y) == X --> Y == 0
4472 // (X - Y) == X --> Y == 0
4473 // (X ^ Y) == X --> Y == 0
4474 SelectionDAG &DAG = DCI.DAG;
4475 EVT OpVT = N0.getValueType();
4476 SDValue X = N0.getOperand(i: 0);
4477 SDValue Y = N0.getOperand(i: 1);
4478 if (X == N1)
4479 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4480
4481 if (Y != N1)
4482 return SDValue();
4483
4484 // (X + Y) == Y --> X == 0
4485 // (X ^ Y) == Y --> X == 0
4486 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4487 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4488
4489 // The shift would not be valid if the operands are boolean (i1).
4490 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4491 return SDValue();
4492
4493 // (X - Y) == Y --> X == Y << 1
4494 SDValue One = DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL);
4495 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4496 if (!DCI.isCalledByLegalizer())
4497 DCI.AddToWorklist(N: YShl1.getNode());
4498 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4499}
4500
4501static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4502 SDValue N0, const APInt &C1,
4503 ISD::CondCode Cond, const SDLoc &dl,
4504 SelectionDAG &DAG) {
4505 // Look through truncs that don't change the value of a ctpop.
4506 // FIXME: Add vector support? Need to be careful with setcc result type below.
4507 SDValue CTPOP = N0;
4508 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4509 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4510 CTPOP = N0.getOperand(i: 0);
4511
4512 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4513 return SDValue();
4514
4515 EVT CTVT = CTPOP.getValueType();
4516 SDValue CTOp = CTPOP.getOperand(i: 0);
4517
4518 // Expand a power-of-2-or-zero comparison based on ctpop:
4519 // (ctpop x) u< 2 -> (x & x-1) == 0
4520 // (ctpop x) u> 1 -> (x & x-1) != 0
4521 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4522 // Keep the CTPOP if it is a cheap vector op.
4523 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4524 return SDValue();
4525
4526 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4527 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4528 return SDValue();
4529 if (C1 == 0 && (Cond == ISD::SETULT))
4530 return SDValue(); // This is handled elsewhere.
4531
4532 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4533
4534 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4535 SDValue Result = CTOp;
4536 for (unsigned i = 0; i < Passes; i++) {
4537 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4538 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4539 }
4540 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4541 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4542 }
4543
4544 // Expand a power-of-2 comparison based on ctpop
4545 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4546 // Keep the CTPOP if it is cheap.
4547 if (TLI.isCtpopFast(VT: CTVT))
4548 return SDValue();
4549
4550 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4551 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4552 assert(CTVT.isInteger());
4553 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4554
4555 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4556 // check before emitting a potentially unnecessary op.
4557 if (DAG.isKnownNeverZero(Op: CTOp)) {
4558 // (ctpop x) == 1 --> (x & x-1) == 0
4559 // (ctpop x) != 1 --> (x & x-1) != 0
4560 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4561 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4562 return RHS;
4563 }
4564
4565 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4566 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4567 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4568 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4569 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4570 }
4571
4572 return SDValue();
4573}
4574
4575static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4576 ISD::CondCode Cond, const SDLoc &dl,
4577 SelectionDAG &DAG) {
4578 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4579 return SDValue();
4580
4581 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4582 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4583 return SDValue();
4584
4585 auto getRotateSource = [](SDValue X) {
4586 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4587 return X.getOperand(i: 0);
4588 return SDValue();
4589 };
4590
4591 // Peek through a rotated value compared against 0 or -1:
4592 // (rot X, Y) == 0/-1 --> X == 0/-1
4593 // (rot X, Y) != 0/-1 --> X != 0/-1
4594 if (SDValue R = getRotateSource(N0))
4595 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4596
4597 // Peek through an 'or' of a rotated value compared against 0:
4598 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4599 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4600 //
4601 // TODO: Add the 'and' with -1 sibling.
4602 // TODO: Recurse through a series of 'or' ops to find the rotate.
4603 EVT OpVT = N0.getValueType();
4604 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4605 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4606 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4607 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4608 }
4609 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4610 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4611 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4612 }
4613 }
4614
4615 return SDValue();
4616}
4617
4618static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4619 ISD::CondCode Cond, const SDLoc &dl,
4620 SelectionDAG &DAG) {
4621 // If we are testing for all-bits-clear, we might be able to do that with
4622 // less shifting since bit-order does not matter.
4623 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4624 return SDValue();
4625
4626 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4627 if (!C1 || !C1->isZero())
4628 return SDValue();
4629
4630 if (!N0.hasOneUse() ||
4631 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4632 return SDValue();
4633
4634 unsigned BitWidth = N0.getScalarValueSizeInBits();
4635 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4636 if (!ShAmtC)
4637 return SDValue();
4638
4639 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(RHS: BitWidth);
4640 if (ShAmt == 0)
4641 return SDValue();
4642
4643 // Canonicalize fshr as fshl to reduce pattern-matching.
4644 if (N0.getOpcode() == ISD::FSHR)
4645 ShAmt = BitWidth - ShAmt;
4646
4647 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4648 SDValue X, Y;
4649 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4650 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4651 return false;
4652 if (Or.getOperand(i: 0) == Other) {
4653 X = Or.getOperand(i: 0);
4654 Y = Or.getOperand(i: 1);
4655 return true;
4656 }
4657 if (Or.getOperand(i: 1) == Other) {
4658 X = Or.getOperand(i: 1);
4659 Y = Or.getOperand(i: 0);
4660 return true;
4661 }
4662 return false;
4663 };
4664
4665 EVT OpVT = N0.getValueType();
4666 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4667 SDValue F0 = N0.getOperand(i: 0);
4668 SDValue F1 = N0.getOperand(i: 1);
4669 if (matchOr(F0, F1)) {
4670 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4671 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4672 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4673 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4674 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4675 }
4676 if (matchOr(F1, F0)) {
4677 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4678 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4679 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4680 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4681 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4682 }
4683
4684 return SDValue();
4685}
4686
4687/// Try to simplify a setcc built with the specified operands and cc. If it is
4688/// unable to simplify it, return a null SDValue.
4689SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4690 ISD::CondCode Cond, bool foldBooleans,
4691 DAGCombinerInfo &DCI,
4692 const SDLoc &dl) const {
4693 SelectionDAG &DAG = DCI.DAG;
4694 const DataLayout &Layout = DAG.getDataLayout();
4695 EVT OpVT = N0.getValueType();
4696 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4697
4698 // Constant fold or commute setcc.
4699 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4700 return Fold;
4701
4702 bool N0ConstOrSplat =
4703 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4704 bool N1ConstOrSplat =
4705 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4706
4707 // Canonicalize toward having the constant on the RHS.
4708 // TODO: Handle non-splat vector constants. All undef causes trouble.
4709 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4710 // infinite loop here when we encounter one.
4711 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4712 if (N0ConstOrSplat && !N1ConstOrSplat &&
4713 (DCI.isBeforeLegalizeOps() ||
4714 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4715 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4716
4717 // If we have a subtract with the same 2 non-constant operands as this setcc
4718 // -- but in reverse order -- then try to commute the operands of this setcc
4719 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4720 // instruction on some targets.
4721 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4722 (DCI.isBeforeLegalizeOps() ||
4723 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4724 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4725 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4726 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4727
4728 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4729 return V;
4730
4731 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4732 return V;
4733
4734 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4735 const APInt &C1 = N1C->getAPIntValue();
4736
4737 // Optimize some CTPOP cases.
4738 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4739 return V;
4740
4741 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4742 // X * Y == 0 --> (X == 0) || (Y == 0)
4743 // X * Y != 0 --> (X != 0) && (Y != 0)
4744 // TODO: This bails out if minsize is set, but if the target doesn't have a
4745 // single instruction multiply for this type, it would likely be
4746 // smaller to decompose.
4747 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4748 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4749 (N0->getFlags().hasNoUnsignedWrap() ||
4750 N0->getFlags().hasNoSignedWrap()) &&
4751 !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
4752 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4753 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4754 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4755 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4756 }
4757
4758 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4759 // equality comparison, then we're just comparing whether X itself is
4760 // zero.
4761 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4762 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4763 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4764 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4765 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4766 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4767 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4768 // (srl (ctlz x), 5) == 0 -> X != 0
4769 // (srl (ctlz x), 5) != 1 -> X != 0
4770 Cond = ISD::SETNE;
4771 } else {
4772 // (srl (ctlz x), 5) != 0 -> X == 0
4773 // (srl (ctlz x), 5) == 1 -> X == 0
4774 Cond = ISD::SETEQ;
4775 }
4776 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4777 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4778 Cond);
4779 }
4780 }
4781 }
4782 }
4783
4784 // setcc X, 0, setlt --> X (when X is all sign bits)
4785 // setcc X, 0, setne --> X (when X is all sign bits)
4786 //
4787 // When we know that X has 0 or -1 in each element (or scalar), this
4788 // comparison will produce X. This is only true when boolean contents are
4789 // represented via 0s and -1s.
4790 if (VT == OpVT &&
4791 // Check that the result of setcc is 0 and -1.
4792 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent &&
4793 // Match only for checks X < 0 and X != 0
4794 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(V: N1) &&
4795 // The identity holds iff we know all sign bits for all lanes.
4796 DAG.ComputeNumSignBits(Op: N0) == N0.getScalarValueSizeInBits())
4797 return N0;
4798
4799 // FIXME: Support vectors.
4800 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4801 const APInt &C1 = N1C->getAPIntValue();
4802
4803 // (zext x) == C --> x == (trunc C)
4804 // (sext x) == C --> x == (trunc C)
4805 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4806 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4807 unsigned MinBits = N0.getValueSizeInBits();
4808 SDValue PreExt;
4809 bool Signed = false;
4810 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4811 // ZExt
4812 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4813 PreExt = N0->getOperand(Num: 0);
4814 } else if (N0->getOpcode() == ISD::AND) {
4815 // DAGCombine turns costly ZExts into ANDs
4816 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4817 if ((C->getAPIntValue()+1).isPowerOf2()) {
4818 MinBits = C->getAPIntValue().countr_one();
4819 PreExt = N0->getOperand(Num: 0);
4820 }
4821 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4822 // SExt
4823 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4824 PreExt = N0->getOperand(Num: 0);
4825 Signed = true;
4826 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4827 // ZEXTLOAD / SEXTLOAD
4828 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4829 MinBits = LN0->getMemoryVT().getSizeInBits();
4830 PreExt = N0;
4831 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4832 Signed = true;
4833 MinBits = LN0->getMemoryVT().getSizeInBits();
4834 PreExt = N0;
4835 }
4836 }
4837
4838 // Figure out how many bits we need to preserve this constant.
4839 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4840
4841 // Make sure we're not losing bits from the constant.
4842 if (MinBits > 0 &&
4843 MinBits < C1.getBitWidth() &&
4844 MinBits >= ReqdBits) {
4845 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4846 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4847 // Will get folded away.
4848 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4849 if (MinBits == 1 && C1 == 1)
4850 // Invert the condition.
4851 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i1),
4852 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4853 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4854 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4855 }
4856
4857 // If truncating the setcc operands is not desirable, we can still
4858 // simplify the expression in some cases:
4859 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4860 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4861 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4862 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4863 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4864 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4865 SDValue TopSetCC = N0->getOperand(Num: 0);
4866 unsigned N0Opc = N0->getOpcode();
4867 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4868 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4869 TopSetCC.getOpcode() == ISD::SETCC &&
4870 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4871 (isConstFalseVal(N: N1) ||
4872 isExtendedTrueVal(N: N1C, VT: N0->getValueType(ResNo: 0), SExt))) {
4873
4874 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4875 (!N1C->isZero() && Cond == ISD::SETNE);
4876
4877 if (!Inverse)
4878 return TopSetCC;
4879
4880 ISD::CondCode InvCond = ISD::getSetCCInverse(
4881 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4882 Type: TopSetCC.getOperand(i: 0).getValueType());
4883 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4884 RHS: TopSetCC.getOperand(i: 1),
4885 Cond: InvCond);
4886 }
4887 }
4888 }
4889
4890 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4891 // equality or unsigned, and all 1 bits of the const are in the same
4892 // partial word, see if we can shorten the load.
4893 if (DCI.isBeforeLegalize() &&
4894 !ISD::isSignedIntSetCC(Code: Cond) &&
4895 N0.getOpcode() == ISD::AND && C1 == 0 &&
4896 N0.getNode()->hasOneUse() &&
4897 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4898 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4899 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4900 auto *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4901 APInt bestMask;
4902 unsigned bestWidth = 0, bestOffset = 0;
4903 if (Lod->isSimple() && Lod->isUnindexed() &&
4904 (Lod->getMemoryVT().isByteSized() ||
4905 isPaddedAtMostSignificantBitsWhenStored(VT: Lod->getMemoryVT()))) {
4906 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4907 unsigned origWidth = N0.getValueSizeInBits();
4908 unsigned maskWidth = origWidth;
4909 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4910 // 8 bits, but have to be careful...
4911 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4912 origWidth = Lod->getMemoryVT().getSizeInBits();
4913 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4914 // Only consider power-of-2 widths (and at least one byte) as candiates
4915 // for the narrowed load.
4916 for (unsigned width = 8; width < origWidth; width *= 2) {
4917 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: width);
4918 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4919 // Avoid accessing any padding here for now (we could use memWidth
4920 // instead of origWidth here otherwise).
4921 unsigned maxOffset = origWidth - width;
4922 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4923 if (Mask.isSubsetOf(RHS: newMask)) {
4924 unsigned ptrOffset =
4925 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4926 unsigned IsFast = 0;
4927 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4928 Align NewAlign = commonAlignment(A: Lod->getAlign(), Offset: ptrOffset / 8);
4929 if (shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT,
4930 ByteOffset: ptrOffset / 8) &&
4931 allowsMemoryAccess(
4932 Context&: *DAG.getContext(), DL: Layout, VT: newVT, AddrSpace: Lod->getAddressSpace(),
4933 Alignment: NewAlign, Flags: Lod->getMemOperand()->getFlags(), Fast: &IsFast) &&
4934 IsFast) {
4935 bestOffset = ptrOffset / 8;
4936 bestMask = Mask.lshr(shiftAmt: offset);
4937 bestWidth = width;
4938 break;
4939 }
4940 }
4941 newMask <<= 8;
4942 }
4943 if (bestWidth)
4944 break;
4945 }
4946 }
4947 if (bestWidth) {
4948 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4949 SDValue Ptr = Lod->getBasePtr();
4950 if (bestOffset != 0)
4951 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset));
4952 SDValue NewLoad =
4953 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4954 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4955 Alignment: Lod->getBaseAlign());
4956 SDValue And =
4957 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4958 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth), DL: dl, VT: newVT));
4959 return DAG.getSetCC(DL: dl, VT, LHS: And, RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4960 }
4961 }
4962
4963 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4964 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4965 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4966
4967 // If the comparison constant has bits in the upper part, the
4968 // zero-extended value could never match.
4969 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4970 hiBitsSet: C1.getBitWidth() - InSize))) {
4971 switch (Cond) {
4972 case ISD::SETUGT:
4973 case ISD::SETUGE:
4974 case ISD::SETEQ:
4975 return DAG.getConstant(Val: 0, DL: dl, VT);
4976 case ISD::SETULT:
4977 case ISD::SETULE:
4978 case ISD::SETNE:
4979 return DAG.getConstant(Val: 1, DL: dl, VT);
4980 case ISD::SETGT:
4981 case ISD::SETGE:
4982 // True if the sign bit of C1 is set.
4983 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4984 case ISD::SETLT:
4985 case ISD::SETLE:
4986 // True if the sign bit of C1 isn't set.
4987 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4988 default:
4989 break;
4990 }
4991 }
4992
4993 // Otherwise, we can perform the comparison with the low bits.
4994 switch (Cond) {
4995 case ISD::SETEQ:
4996 case ISD::SETNE:
4997 case ISD::SETUGT:
4998 case ISD::SETUGE:
4999 case ISD::SETULT:
5000 case ISD::SETULE: {
5001 EVT newVT = N0.getOperand(i: 0).getValueType();
5002 // FIXME: Should use isNarrowingProfitable.
5003 if (DCI.isBeforeLegalizeOps() ||
5004 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
5005 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()) &&
5006 isTypeDesirableForOp(ISD::SETCC, VT: newVT))) {
5007 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
5008 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
5009
5010 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
5011 RHS: NewConst, Cond);
5012 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
5013 }
5014 break;
5015 }
5016 default:
5017 break; // todo, be more careful with signed comparisons
5018 }
5019 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5020 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5021 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
5022 ToTy: OpVT)) {
5023 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
5024 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5025 EVT ExtDstTy = N0.getValueType();
5026 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5027
5028 // If the constant doesn't fit into the number of bits for the source of
5029 // the sign extension, it is impossible for both sides to be equal.
5030 if (C1.getSignificantBits() > ExtSrcTyBits)
5031 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
5032
5033 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5034 ExtDstTy != ExtSrcTy && "Unexpected types!");
5035 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
5036 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
5037 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
5038 if (!DCI.isCalledByLegalizer())
5039 DCI.AddToWorklist(N: ZextOp.getNode());
5040 // Otherwise, make this a use of a zext.
5041 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
5042 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
5043 } else if ((N1C->isZero() || N1C->isOne()) &&
5044 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5045 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5046 // excluded as they are handled below whilst checking for foldBooleans.
5047 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5048 isTypeLegal(VT) && VT.bitsLE(VT: N0.getValueType()) &&
5049 (N0.getValueType() == MVT::i1 ||
5050 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5051 DAG.MaskedValueIsZero(
5052 Op: N0, Mask: APInt::getBitsSetFrom(numBits: N0.getValueSizeInBits(), loBit: 1))) {
5053 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5054 if (TrueWhenTrue)
5055 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
5056 // Invert the condition.
5057 if (N0.getOpcode() == ISD::SETCC) {
5058 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
5059 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
5060 if (DCI.isBeforeLegalizeOps() ||
5061 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
5062 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
5063 }
5064 }
5065
5066 if ((N0.getOpcode() == ISD::XOR ||
5067 (N0.getOpcode() == ISD::AND &&
5068 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
5069 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
5070 isOneConstant(V: N0.getOperand(i: 1))) {
5071 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5072 // can only do this if the top bits are known zero.
5073 unsigned BitWidth = N0.getValueSizeInBits();
5074 if (DAG.MaskedValueIsZero(Op: N0,
5075 Mask: APInt::getHighBitsSet(numBits: BitWidth,
5076 hiBitsSet: BitWidth-1))) {
5077 // Okay, get the un-inverted input value.
5078 SDValue Val;
5079 if (N0.getOpcode() == ISD::XOR) {
5080 Val = N0.getOperand(i: 0);
5081 } else {
5082 assert(N0.getOpcode() == ISD::AND &&
5083 N0.getOperand(0).getOpcode() == ISD::XOR);
5084 // ((X^1)&1)^1 -> X & 1
5085 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
5086 N1: N0.getOperand(i: 0).getOperand(i: 0),
5087 N2: N0.getOperand(i: 1));
5088 }
5089
5090 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
5091 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5092 }
5093 } else if (N1C->isOne()) {
5094 SDValue Op0 = N0;
5095 if (Op0.getOpcode() == ISD::TRUNCATE)
5096 Op0 = Op0.getOperand(i: 0);
5097
5098 if ((Op0.getOpcode() == ISD::XOR) &&
5099 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
5100 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
5101 SDValue XorLHS = Op0.getOperand(i: 0);
5102 SDValue XorRHS = Op0.getOperand(i: 1);
5103 // Ensure that the input setccs return an i1 type or 0/1 value.
5104 if (Op0.getValueType() == MVT::i1 ||
5105 (getBooleanContents(Type: XorLHS.getOperand(i: 0).getValueType()) ==
5106 ZeroOrOneBooleanContent &&
5107 getBooleanContents(Type: XorRHS.getOperand(i: 0).getValueType()) ==
5108 ZeroOrOneBooleanContent)) {
5109 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5110 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
5111 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
5112 }
5113 }
5114 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
5115 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5116 if (Op0.getValueType().bitsGT(VT))
5117 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5118 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5119 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5120 else if (Op0.getValueType().bitsLT(VT))
5121 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
5122 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
5123 N2: DAG.getConstant(Val: 1, DL: dl, VT));
5124
5125 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5126 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5127 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5128 }
5129 if (Op0.getOpcode() == ISD::AssertZext &&
5130 cast<VTSDNode>(Val: Op0.getOperand(i: 1))->getVT() == MVT::i1)
5131 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
5132 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
5133 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
5134 }
5135 }
5136
5137 // Given:
5138 // icmp eq/ne (urem %x, %y), 0
5139 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5140 // icmp eq/ne %x, 0
5141 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5142 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5143 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
5144 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
5145 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5146 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
5147 }
5148
5149 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5150 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5151 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5152 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
5153 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
5154 N1C->isAllOnes()) {
5155 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
5156 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
5157 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
5158 }
5159
5160 // fold (setcc (trunc x) c) -> (setcc x c)
5161 if (N0.getOpcode() == ISD::TRUNCATE &&
5162 ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Code: Cond)) ||
5163 (N0->getFlags().hasNoSignedWrap() &&
5164 !ISD::isUnsignedIntSetCC(Code: Cond))) &&
5165 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5166 EVT NewVT = N0.getOperand(i: 0).getValueType();
5167 SDValue NewConst = DAG.getConstant(
5168 Val: (N0->getFlags().hasNoSignedWrap() && !ISD::isUnsignedIntSetCC(Code: Cond))
5169 ? C1.sext(width: NewVT.getSizeInBits())
5170 : C1.zext(width: NewVT.getSizeInBits()),
5171 DL: dl, VT: NewVT);
5172 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NewConst, Cond);
5173 }
5174
5175 if (SDValue V =
5176 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
5177 return V;
5178 }
5179
5180 // These simplifications apply to splat vectors as well.
5181 // TODO: Handle more splat vector cases.
5182 if (auto *N1C = isConstOrConstSplat(N: N1)) {
5183 const APInt &C1 = N1C->getAPIntValue();
5184
5185 APInt MinVal, MaxVal;
5186 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
5187 if (ISD::isSignedIntSetCC(Code: Cond)) {
5188 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
5189 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
5190 } else {
5191 MinVal = APInt::getMinValue(numBits: OperandBitSize);
5192 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
5193 }
5194
5195 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5196 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5197 // X >= MIN --> true
5198 if (C1 == MinVal)
5199 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5200
5201 if (!VT.isVector()) { // TODO: Support this for vectors.
5202 // X >= C0 --> X > (C0 - 1)
5203 APInt C = C1 - 1;
5204 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
5205 if ((DCI.isBeforeLegalizeOps() ||
5206 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5207 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5208 isLegalICmpImmediate(C.getSExtValue())))) {
5209 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5210 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5211 Cond: NewCC);
5212 }
5213 }
5214 }
5215
5216 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5217 // X <= MAX --> true
5218 if (C1 == MaxVal)
5219 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
5220
5221 // X <= C0 --> X < (C0 + 1)
5222 if (!VT.isVector()) { // TODO: Support this for vectors.
5223 APInt C = C1 + 1;
5224 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
5225 if ((DCI.isBeforeLegalizeOps() ||
5226 isCondCodeLegal(CC: NewCC, VT: OpVT.getSimpleVT())) &&
5227 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5228 isLegalICmpImmediate(C.getSExtValue())))) {
5229 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5230 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
5231 Cond: NewCC);
5232 }
5233 }
5234 }
5235
5236 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5237 if (C1 == MinVal)
5238 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
5239
5240 // TODO: Support this for vectors after legalize ops.
5241 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5242 // Canonicalize setlt X, Max --> setne X, Max
5243 if (C1 == MaxVal)
5244 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5245
5246 // If we have setult X, 1, turn it into seteq X, 0
5247 if (C1 == MinVal+1)
5248 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5249 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
5250 Cond: ISD::SETEQ);
5251 }
5252 }
5253
5254 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5255 if (C1 == MaxVal)
5256 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
5257
5258 // TODO: Support this for vectors after legalize ops.
5259 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5260 // Canonicalize setgt X, Min --> setne X, Min
5261 if (C1 == MinVal)
5262 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
5263
5264 // If we have setugt X, Max-1, turn it into seteq X, Max
5265 if (C1 == MaxVal-1)
5266 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5267 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
5268 Cond: ISD::SETEQ);
5269 }
5270 }
5271
5272 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5273 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5274 if (C1.isZero())
5275 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5276 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
5277 return CC;
5278
5279 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5280 // For example, when high 32-bits of i64 X are known clear:
5281 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5282 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5283 bool CmpZero = N1C->isZero();
5284 bool CmpNegOne = N1C->isAllOnes();
5285 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5286 // Match or(lo,shl(hi,bw/2)) pattern.
5287 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5288 unsigned EltBits = V.getScalarValueSizeInBits();
5289 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5290 return false;
5291 SDValue LHS = V.getOperand(i: 0);
5292 SDValue RHS = V.getOperand(i: 1);
5293 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
5294 // Unshifted element must have zero upperbits.
5295 if (RHS.getOpcode() == ISD::SHL &&
5296 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
5297 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5298 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
5299 Lo = LHS;
5300 Hi = RHS.getOperand(i: 0);
5301 return true;
5302 }
5303 if (LHS.getOpcode() == ISD::SHL &&
5304 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
5305 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
5306 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
5307 Lo = RHS;
5308 Hi = LHS.getOperand(i: 0);
5309 return true;
5310 }
5311 return false;
5312 };
5313
5314 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5315 unsigned EltBits = N0.getScalarValueSizeInBits();
5316 unsigned HalfBits = EltBits / 2;
5317 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
5318 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
5319 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
5320 SDValue NewN0 =
5321 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
5322 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
5323 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
5324 };
5325
5326 SDValue Lo, Hi;
5327 if (IsConcat(N0, Lo, Hi))
5328 return MergeConcat(Lo, Hi);
5329
5330 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5331 SDValue Lo0, Lo1, Hi0, Hi1;
5332 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
5333 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
5334 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
5335 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
5336 }
5337 }
5338 }
5339 }
5340
5341 // If we have "setcc X, C0", check to see if we can shrink the immediate
5342 // by changing cc.
5343 // TODO: Support this for vectors after legalize ops.
5344 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5345 // SETUGT X, SINTMAX -> SETLT X, 0
5346 // SETUGE X, SINTMIN -> SETLT X, 0
5347 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5348 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5349 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5350 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5351 Cond: ISD::SETLT);
5352
5353 // SETULT X, SINTMIN -> SETGT X, -1
5354 // SETULE X, SINTMAX -> SETGT X, -1
5355 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5356 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5357 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5358 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5359 Cond: ISD::SETGT);
5360 }
5361 }
5362
5363 // Back to non-vector simplifications.
5364 // TODO: Can we do these for vector splats?
5365 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5366 const APInt &C1 = N1C->getAPIntValue();
5367 EVT ShValTy = N0.getValueType();
5368
5369 // Fold bit comparisons when we can. This will result in an
5370 // incorrect value when boolean false is negative one, unless
5371 // the bitsize is 1 in which case the false value is the same
5372 // in practice regardless of the representation.
5373 if ((VT.getSizeInBits() == 1 ||
5374 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5375 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5376 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5377 N0.getOpcode() == ISD::AND) {
5378 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5379 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5380 // Perform the xform if the AND RHS is a single bit.
5381 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5382 if (AndRHS->getAPIntValue().isPowerOf2() &&
5383 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5384 return DAG.getNode(
5385 Opcode: ISD::TRUNCATE, DL: dl, VT,
5386 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5387 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5388 }
5389 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5390 // (X & 8) == 8 --> (X & 8) >> 3
5391 // Perform the xform if C1 is a single bit.
5392 unsigned ShCt = C1.logBase2();
5393 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5394 return DAG.getNode(
5395 Opcode: ISD::TRUNCATE, DL: dl, VT,
5396 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5397 N2: DAG.getShiftAmountConstant(Val: ShCt, VT: ShValTy, DL: dl)));
5398 }
5399 }
5400 }
5401 }
5402
5403 if (C1.getSignificantBits() <= 64 &&
5404 !isLegalICmpImmediate(C1.getSExtValue())) {
5405 // (X & -256) == 256 -> (X >> 8) == 1
5406 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5407 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5408 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5409 const APInt &AndRHSC = AndRHS->getAPIntValue();
5410 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(RHS: AndRHSC)) {
5411 unsigned ShiftBits = AndRHSC.countr_zero();
5412 if (!shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5413 // If using an unsigned shift doesn't yield a legal compare
5414 // immediate, try using sra instead.
5415 APInt NewC = C1.lshr(shiftAmt: ShiftBits);
5416 if (NewC.getSignificantBits() <= 64 &&
5417 !isLegalICmpImmediate(NewC.getSExtValue())) {
5418 APInt SignedC = C1.ashr(ShiftAmt: ShiftBits);
5419 if (SignedC.getSignificantBits() <= 64 &&
5420 isLegalICmpImmediate(SignedC.getSExtValue())) {
5421 SDValue Shift = DAG.getNode(
5422 Opcode: ISD::SRA, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5423 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5424 SDValue CmpRHS = DAG.getConstant(Val: SignedC, DL: dl, VT: ShValTy);
5425 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5426 }
5427 }
5428 SDValue Shift = DAG.getNode(
5429 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5430 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5431 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5432 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5433 }
5434 }
5435 }
5436 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5437 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5438 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5439 // X < 0x100000000 -> (X >> 32) < 1
5440 // X >= 0x100000000 -> (X >> 32) >= 1
5441 // X <= 0x0ffffffff -> (X >> 32) < 1
5442 // X > 0x0ffffffff -> (X >> 32) >= 1
5443 unsigned ShiftBits;
5444 APInt NewC = C1;
5445 ISD::CondCode NewCond = Cond;
5446 if (AdjOne) {
5447 ShiftBits = C1.countr_one();
5448 NewC = NewC + 1;
5449 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5450 } else {
5451 ShiftBits = C1.countr_zero();
5452 }
5453 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5454 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5455 isLegalICmpImmediate(NewC.getSExtValue()) &&
5456 !shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5457 SDValue Shift =
5458 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5459 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl));
5460 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5461 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5462 }
5463 }
5464 }
5465 }
5466
5467 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5468 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5469 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5470
5471 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5472 // constant if knowing that the operand is non-nan is enough. We prefer to
5473 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5474 // materialize 0.0.
5475 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5476 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5477
5478 // setcc (fneg x), C -> setcc swap(pred) x, -C
5479 if (N0.getOpcode() == ISD::FNEG) {
5480 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5481 if (DCI.isBeforeLegalizeOps() ||
5482 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5483 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5484 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5485 }
5486 }
5487
5488 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5489 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5490 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5491 bool IsFabs = N0.getOpcode() == ISD::FABS;
5492 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5493 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5494 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5495 : (IsFabs ? fcInf : fcPosInf);
5496 if (Cond == ISD::SETUEQ)
5497 Flag |= fcNan;
5498 return DAG.getNode(Opcode: ISD::IS_FPCLASS, DL: dl, VT, N1: Op,
5499 N2: DAG.getTargetConstant(Val: Flag, DL: dl, VT: MVT::i32));
5500 }
5501 }
5502
5503 // If the condition is not legal, see if we can find an equivalent one
5504 // which is legal.
5505 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5506 // If the comparison was an awkward floating-point == or != and one of
5507 // the comparison operands is infinity or negative infinity, convert the
5508 // condition to a less-awkward <= or >=.
5509 if (CFP->getValueAPF().isInfinity()) {
5510 bool IsNegInf = CFP->getValueAPF().isNegative();
5511 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5512 switch (Cond) {
5513 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5514 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5515 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5516 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5517 default: break;
5518 }
5519 if (NewCond != ISD::SETCC_INVALID &&
5520 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5521 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5522 }
5523 }
5524 }
5525
5526 if (N0 == N1) {
5527 // The sext(setcc()) => setcc() optimization relies on the appropriate
5528 // constant being emitted.
5529 assert(!N0.getValueType().isInteger() &&
5530 "Integer types should be handled by FoldSetCC");
5531
5532 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5533 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5534 if (UOF == 2) // FP operators that are undefined on NaNs.
5535 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5536 if (UOF == unsigned(EqTrue))
5537 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5538 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5539 // if it is not already.
5540 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5541 if (NewCond != Cond &&
5542 (DCI.isBeforeLegalizeOps() ||
5543 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5544 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5545 }
5546
5547 // ~X > ~Y --> Y > X
5548 // ~X < ~Y --> Y < X
5549 // ~X < C --> X > ~C
5550 // ~X > C --> X < ~C
5551 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5552 N0.getValueType().isInteger()) {
5553 if (isBitwiseNot(V: N0)) {
5554 if (isBitwiseNot(V: N1))
5555 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5556
5557 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5558 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5559 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5560 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5561 }
5562 }
5563 }
5564
5565 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5566 N0.getValueType().isInteger()) {
5567 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5568 N0.getOpcode() == ISD::XOR) {
5569 // Simplify (X+Y) == (X+Z) --> Y == Z
5570 if (N0.getOpcode() == N1.getOpcode()) {
5571 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5572 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5573 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5574 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5575 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5576 // If X op Y == Y op X, try other combinations.
5577 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5578 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5579 Cond);
5580 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5581 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5582 Cond);
5583 }
5584 }
5585
5586 // If RHS is a legal immediate value for a compare instruction, we need
5587 // to be careful about increasing register pressure needlessly.
5588 bool LegalRHSImm = false;
5589
5590 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5591 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5592 // Turn (X+C1) == C2 --> X == C2-C1
5593 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5594 return DAG.getSetCC(
5595 DL: dl, VT, LHS: N0.getOperand(i: 0),
5596 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5597 DL: dl, VT: N0.getValueType()),
5598 Cond);
5599
5600 // Turn (X^C1) == C2 --> X == C1^C2
5601 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5602 return DAG.getSetCC(
5603 DL: dl, VT, LHS: N0.getOperand(i: 0),
5604 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5605 DL: dl, VT: N0.getValueType()),
5606 Cond);
5607 }
5608
5609 // Turn (C1-X) == C2 --> X == C1-C2
5610 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5611 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5612 return DAG.getSetCC(
5613 DL: dl, VT, LHS: N0.getOperand(i: 1),
5614 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5615 DL: dl, VT: N0.getValueType()),
5616 Cond);
5617
5618 // Could RHSC fold directly into a compare?
5619 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5620 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5621 }
5622
5623 // (X+Y) == X --> Y == 0 and similar folds.
5624 // Don't do this if X is an immediate that can fold into a cmp
5625 // instruction and X+Y has other uses. It could be an induction variable
5626 // chain, and the transform would increase register pressure.
5627 if (!LegalRHSImm || N0.hasOneUse())
5628 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5629 return V;
5630 }
5631
5632 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5633 N1.getOpcode() == ISD::XOR)
5634 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5635 return V;
5636
5637 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5638 return V;
5639
5640 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, DL: dl, DCI))
5641 return V;
5642 }
5643
5644 // Fold remainder of division by a constant.
5645 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5646 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5647 // When division is cheap or optimizing for minimum size,
5648 // fall through to DIVREM creation by skipping this fold.
5649 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Kind: Attribute::MinSize)) {
5650 if (N0.getOpcode() == ISD::UREM) {
5651 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5652 return Folded;
5653 } else if (N0.getOpcode() == ISD::SREM) {
5654 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5655 return Folded;
5656 }
5657 }
5658 }
5659
5660 // Fold away ALL boolean setcc's.
5661 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5662 SDValue Temp;
5663 switch (Cond) {
5664 default: llvm_unreachable("Unknown integer setcc!");
5665 case ISD::SETEQ: // X == Y -> ~(X^Y)
5666 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5667 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5668 if (!DCI.isCalledByLegalizer())
5669 DCI.AddToWorklist(N: Temp.getNode());
5670 break;
5671 case ISD::SETNE: // X != Y --> (X^Y)
5672 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5673 break;
5674 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5675 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5676 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5677 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5678 if (!DCI.isCalledByLegalizer())
5679 DCI.AddToWorklist(N: Temp.getNode());
5680 break;
5681 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5682 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5683 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5684 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5685 if (!DCI.isCalledByLegalizer())
5686 DCI.AddToWorklist(N: Temp.getNode());
5687 break;
5688 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5689 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5690 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5691 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5692 if (!DCI.isCalledByLegalizer())
5693 DCI.AddToWorklist(N: Temp.getNode());
5694 break;
5695 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5696 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5697 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5698 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5699 break;
5700 }
5701 if (VT.getScalarType() != MVT::i1) {
5702 if (!DCI.isCalledByLegalizer())
5703 DCI.AddToWorklist(N: N0.getNode());
5704 // FIXME: If running after legalize, we probably can't do this.
5705 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5706 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5707 }
5708 return N0;
5709 }
5710
5711 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5712 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5713 N0.getOperand(i: 0).getValueType() == N1.getOperand(i: 0).getValueType() &&
5714 ((!ISD::isSignedIntSetCC(Code: Cond) && N0->getFlags().hasNoUnsignedWrap() &&
5715 N1->getFlags().hasNoUnsignedWrap()) ||
5716 (!ISD::isUnsignedIntSetCC(Code: Cond) && N0->getFlags().hasNoSignedWrap() &&
5717 N1->getFlags().hasNoSignedWrap())) &&
5718 isTypeDesirableForOp(ISD::SETCC, VT: N0.getOperand(i: 0).getValueType())) {
5719 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5720 }
5721
5722 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5723 // TODO: Remove that .isVector() check
5724 if (VT.isVector() && isZeroOrZeroSplat(N: N1) && N0.getOpcode() == ISD::SUB &&
5725 N0->getFlags().hasNoSignedWrap() && ISD::isSignedIntSetCC(Code: Cond)) {
5726 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond);
5727 }
5728
5729 // Could not fold it.
5730 return SDValue();
5731}
5732
5733/// Returns true (and the GlobalValue and the offset) if the node is a
5734/// GlobalAddress + offset.
5735bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5736 int64_t &Offset) const {
5737
5738 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5739
5740 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5741 GA = GASD->getGlobal();
5742 Offset += GASD->getOffset();
5743 return true;
5744 }
5745
5746 if (N->isAnyAdd()) {
5747 SDValue N1 = N->getOperand(Num: 0);
5748 SDValue N2 = N->getOperand(Num: 1);
5749 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5750 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5751 Offset += V->getSExtValue();
5752 return true;
5753 }
5754 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5755 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5756 Offset += V->getSExtValue();
5757 return true;
5758 }
5759 }
5760 }
5761
5762 return false;
5763}
5764
5765SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5766 DAGCombinerInfo &DCI) const {
5767 // Default implementation: no optimization.
5768 return SDValue();
5769}
5770
5771//===----------------------------------------------------------------------===//
5772// Inline Assembler Implementation Methods
5773//===----------------------------------------------------------------------===//
5774
5775TargetLowering::ConstraintType
5776TargetLowering::getConstraintType(StringRef Constraint) const {
5777 unsigned S = Constraint.size();
5778
5779 if (S == 1) {
5780 switch (Constraint[0]) {
5781 default: break;
5782 case 'r':
5783 return C_RegisterClass;
5784 case 'm': // memory
5785 case 'o': // offsetable
5786 case 'V': // not offsetable
5787 return C_Memory;
5788 case 'p': // Address.
5789 return C_Address;
5790 case 'n': // Simple Integer
5791 case 'E': // Floating Point Constant
5792 case 'F': // Floating Point Constant
5793 return C_Immediate;
5794 case 'i': // Simple Integer or Relocatable Constant
5795 case 's': // Relocatable Constant
5796 case 'X': // Allow ANY value.
5797 case 'I': // Target registers.
5798 case 'J':
5799 case 'K':
5800 case 'L':
5801 case 'M':
5802 case 'N':
5803 case 'O':
5804 case 'P':
5805 case '<':
5806 case '>':
5807 return C_Other;
5808 }
5809 }
5810
5811 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5812 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5813 return C_Memory;
5814 return C_Register;
5815 }
5816 return C_Unknown;
5817}
5818
5819/// Try to replace an X constraint, which matches anything, with another that
5820/// has more specific requirements based on the type of the corresponding
5821/// operand.
5822const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5823 if (ConstraintVT.isInteger())
5824 return "r";
5825 if (ConstraintVT.isFloatingPoint())
5826 return "f"; // works for many targets
5827 return nullptr;
5828}
5829
5830SDValue TargetLowering::LowerAsmOutputForConstraint(
5831 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5832 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5833 return SDValue();
5834}
5835
5836/// Lower the specified operand into the Ops vector.
5837/// If it is invalid, don't add anything to Ops.
5838void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5839 StringRef Constraint,
5840 std::vector<SDValue> &Ops,
5841 SelectionDAG &DAG) const {
5842
5843 if (Constraint.size() > 1)
5844 return;
5845
5846 char ConstraintLetter = Constraint[0];
5847 switch (ConstraintLetter) {
5848 default: break;
5849 case 'X': // Allows any operand
5850 case 'i': // Simple Integer or Relocatable Constant
5851 case 'n': // Simple Integer
5852 case 's': { // Relocatable Constant
5853
5854 ConstantSDNode *C;
5855 uint64_t Offset = 0;
5856
5857 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5858 // etc., since getelementpointer is variadic. We can't use
5859 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5860 // while in this case the GA may be furthest from the root node which is
5861 // likely an ISD::ADD.
5862 while (true) {
5863 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5864 // gcc prints these as sign extended. Sign extend value to 64 bits
5865 // now; without this it would get ZExt'd later in
5866 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5867 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5868 BooleanContent BCont = getBooleanContents(Type: MVT::i64);
5869 ISD::NodeType ExtOpc =
5870 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5871 int64_t ExtVal =
5872 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5873 Ops.push_back(
5874 x: DAG.getTargetConstant(Val: Offset + ExtVal, DL: SDLoc(C), VT: MVT::i64));
5875 return;
5876 }
5877 if (ConstraintLetter != 'n') {
5878 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5879 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5880 VT: GA->getValueType(ResNo: 0),
5881 offset: Offset + GA->getOffset()));
5882 return;
5883 }
5884 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5885 Ops.push_back(x: DAG.getTargetBlockAddress(
5886 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5887 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5888 return;
5889 }
5890 if (isa<BasicBlockSDNode>(Val: Op)) {
5891 Ops.push_back(x: Op);
5892 return;
5893 }
5894 }
5895 const unsigned OpCode = Op.getOpcode();
5896 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5897 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5898 Op = Op.getOperand(i: 1);
5899 // Subtraction is not commutative.
5900 else if (OpCode == ISD::ADD &&
5901 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5902 Op = Op.getOperand(i: 0);
5903 else
5904 return;
5905 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5906 continue;
5907 }
5908 return;
5909 }
5910 break;
5911 }
5912 }
5913}
5914
5915void TargetLowering::CollectTargetIntrinsicOperands(
5916 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5917}
5918
5919std::pair<unsigned, const TargetRegisterClass *>
5920TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5921 StringRef Constraint,
5922 MVT VT) const {
5923 if (!Constraint.starts_with(Prefix: "{"))
5924 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5925 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5926
5927 // Remove the braces from around the name.
5928 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5929
5930 std::pair<unsigned, const TargetRegisterClass *> R =
5931 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5932
5933 // Figure out which register class contains this reg.
5934 for (const TargetRegisterClass *RC : RI->regclasses()) {
5935 // If none of the value types for this register class are valid, we
5936 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5937 if (!isLegalRC(TRI: *RI, RC: *RC))
5938 continue;
5939
5940 for (const MCPhysReg &PR : *RC) {
5941 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5942 std::pair<unsigned, const TargetRegisterClass *> S =
5943 std::make_pair(x: PR, y&: RC);
5944
5945 // If this register class has the requested value type, return it,
5946 // otherwise keep searching and return the first class found
5947 // if no other is found which explicitly has the requested type.
5948 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5949 return S;
5950 if (!R.second)
5951 R = S;
5952 }
5953 }
5954 }
5955
5956 return R;
5957}
5958
5959//===----------------------------------------------------------------------===//
5960// Constraint Selection.
5961
5962/// Return true of this is an input operand that is a matching constraint like
5963/// "4".
5964bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5965 assert(!ConstraintCode.empty() && "No known constraint!");
5966 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5967}
5968
5969/// If this is an input matching constraint, this method returns the output
5970/// operand it matches.
5971unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5972 assert(!ConstraintCode.empty() && "No known constraint!");
5973 return atoi(nptr: ConstraintCode.c_str());
5974}
5975
5976/// Split up the constraint string from the inline assembly value into the
5977/// specific constraints and their prefixes, and also tie in the associated
5978/// operand values.
5979/// If this returns an empty vector, and if the constraint string itself
5980/// isn't empty, there was an error parsing.
5981TargetLowering::AsmOperandInfoVector
5982TargetLowering::ParseConstraints(const DataLayout &DL,
5983 const TargetRegisterInfo *TRI,
5984 const CallBase &Call) const {
5985 /// Information about all of the constraints.
5986 AsmOperandInfoVector ConstraintOperands;
5987 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5988 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5989
5990 // Do a prepass over the constraints, canonicalizing them, and building up the
5991 // ConstraintOperands list.
5992 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5993 unsigned ResNo = 0; // ResNo - The result number of the next output.
5994 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5995
5996 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5997 ConstraintOperands.emplace_back(args: std::move(CI));
5998 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5999
6000 // Update multiple alternative constraint count.
6001 if (OpInfo.multipleAlternatives.size() > maCount)
6002 maCount = OpInfo.multipleAlternatives.size();
6003
6004 OpInfo.ConstraintVT = MVT::Other;
6005
6006 // Compute the value type for each operand.
6007 switch (OpInfo.Type) {
6008 case InlineAsm::isOutput: {
6009 // Indirect outputs just consume an argument.
6010 if (OpInfo.isIndirect) {
6011 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6012 break;
6013 }
6014
6015 // The return value of the call is this value. As such, there is no
6016 // corresponding argument.
6017 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6018 EVT VT;
6019 if (auto *STy = dyn_cast<StructType>(Val: Call.getType())) {
6020 VT = getAsmOperandValueType(DL, Ty: STy->getElementType(N: ResNo));
6021 } else {
6022 assert(ResNo == 0 && "Asm only has one result!");
6023 VT = getAsmOperandValueType(DL, Ty: Call.getType());
6024 }
6025 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6026 ++ResNo;
6027 break;
6028 }
6029 case InlineAsm::isInput:
6030 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
6031 break;
6032 case InlineAsm::isLabel:
6033 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
6034 ++LabelNo;
6035 continue;
6036 case InlineAsm::isClobber:
6037 // Nothing to do.
6038 break;
6039 }
6040
6041 if (OpInfo.CallOperandVal) {
6042 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6043 if (OpInfo.isIndirect) {
6044 OpTy = Call.getParamElementType(ArgNo);
6045 assert(OpTy && "Indirect operand must have elementtype attribute");
6046 }
6047
6048 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6049 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
6050 if (STy->getNumElements() == 1)
6051 OpTy = STy->getElementType(N: 0);
6052
6053 // If OpTy is not a single value, it may be a struct/union that we
6054 // can tile with integers.
6055 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6056 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
6057 switch (BitSize) {
6058 default: break;
6059 case 1:
6060 case 8:
6061 case 16:
6062 case 32:
6063 case 64:
6064 case 128:
6065 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
6066 break;
6067 }
6068 }
6069
6070 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
6071 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6072 ArgNo++;
6073 }
6074 }
6075
6076 // If we have multiple alternative constraints, select the best alternative.
6077 if (!ConstraintOperands.empty()) {
6078 if (maCount) {
6079 unsigned bestMAIndex = 0;
6080 int bestWeight = -1;
6081 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6082 int weight = -1;
6083 unsigned maIndex;
6084 // Compute the sums of the weights for each alternative, keeping track
6085 // of the best (highest weight) one so far.
6086 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6087 int weightSum = 0;
6088 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6089 cIndex != eIndex; ++cIndex) {
6090 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6091 if (OpInfo.Type == InlineAsm::isClobber)
6092 continue;
6093
6094 // If this is an output operand with a matching input operand,
6095 // look up the matching input. If their types mismatch, e.g. one
6096 // is an integer, the other is floating point, or their sizes are
6097 // different, flag it as an maCantMatch.
6098 if (OpInfo.hasMatchingInput()) {
6099 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6100 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6101 if ((OpInfo.ConstraintVT.isInteger() !=
6102 Input.ConstraintVT.isInteger()) ||
6103 (OpInfo.ConstraintVT.getSizeInBits() !=
6104 Input.ConstraintVT.getSizeInBits())) {
6105 weightSum = -1; // Can't match.
6106 break;
6107 }
6108 }
6109 }
6110 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
6111 if (weight == -1) {
6112 weightSum = -1;
6113 break;
6114 }
6115 weightSum += weight;
6116 }
6117 // Update best.
6118 if (weightSum > bestWeight) {
6119 bestWeight = weightSum;
6120 bestMAIndex = maIndex;
6121 }
6122 }
6123
6124 // Now select chosen alternative in each constraint.
6125 for (AsmOperandInfo &cInfo : ConstraintOperands)
6126 if (cInfo.Type != InlineAsm::isClobber)
6127 cInfo.selectAlternative(index: bestMAIndex);
6128 }
6129 }
6130
6131 // Check and hook up tied operands, choose constraint code to use.
6132 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6133 cIndex != eIndex; ++cIndex) {
6134 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6135
6136 // If this is an output operand with a matching input operand, look up the
6137 // matching input. If their types mismatch, e.g. one is an integer, the
6138 // other is floating point, or their sizes are different, flag it as an
6139 // error.
6140 if (OpInfo.hasMatchingInput()) {
6141 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6142
6143 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6144 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6145 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
6146 VT: OpInfo.ConstraintVT);
6147 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6148 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
6149 VT: Input.ConstraintVT);
6150 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6151 OpInfo.ConstraintVT.isFloatingPoint();
6152 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6153 Input.ConstraintVT.isFloatingPoint();
6154 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6155 (MatchRC.second != InputRC.second)) {
6156 report_fatal_error(reason: "Unsupported asm: input constraint"
6157 " with a matching output constraint of"
6158 " incompatible type!");
6159 }
6160 }
6161 }
6162 }
6163
6164 return ConstraintOperands;
6165}
6166
6167/// Return a number indicating our preference for chosing a type of constraint
6168/// over another, for the purpose of sorting them. Immediates are almost always
6169/// preferrable (when they can be emitted). A higher return value means a
6170/// stronger preference for one constraint type relative to another.
6171/// FIXME: We should prefer registers over memory but doing so may lead to
6172/// unrecoverable register exhaustion later.
6173/// https://github.com/llvm/llvm-project/issues/20571
6174static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
6175 switch (CT) {
6176 case TargetLowering::C_Immediate:
6177 case TargetLowering::C_Other:
6178 return 4;
6179 case TargetLowering::C_Memory:
6180 case TargetLowering::C_Address:
6181 return 3;
6182 case TargetLowering::C_RegisterClass:
6183 return 2;
6184 case TargetLowering::C_Register:
6185 return 1;
6186 case TargetLowering::C_Unknown:
6187 return 0;
6188 }
6189 llvm_unreachable("Invalid constraint type");
6190}
6191
6192/// Examine constraint type and operand type and determine a weight value.
6193/// This object must already have been set up with the operand type
6194/// and the current alternative constraint selected.
6195TargetLowering::ConstraintWeight
6196 TargetLowering::getMultipleConstraintMatchWeight(
6197 AsmOperandInfo &info, int maIndex) const {
6198 InlineAsm::ConstraintCodeVector *rCodes;
6199 if (maIndex >= (int)info.multipleAlternatives.size())
6200 rCodes = &info.Codes;
6201 else
6202 rCodes = &info.multipleAlternatives[maIndex].Codes;
6203 ConstraintWeight BestWeight = CW_Invalid;
6204
6205 // Loop over the options, keeping track of the most general one.
6206 for (const std::string &rCode : *rCodes) {
6207 ConstraintWeight weight =
6208 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
6209 if (weight > BestWeight)
6210 BestWeight = weight;
6211 }
6212
6213 return BestWeight;
6214}
6215
6216/// Examine constraint type and operand type and determine a weight value.
6217/// This object must already have been set up with the operand type
6218/// and the current alternative constraint selected.
6219TargetLowering::ConstraintWeight
6220 TargetLowering::getSingleConstraintMatchWeight(
6221 AsmOperandInfo &info, const char *constraint) const {
6222 ConstraintWeight weight = CW_Invalid;
6223 Value *CallOperandVal = info.CallOperandVal;
6224 // If we don't have a value, we can't do a match,
6225 // but allow it at the lowest weight.
6226 if (!CallOperandVal)
6227 return CW_Default;
6228 // Look at the constraint type.
6229 switch (*constraint) {
6230 case 'i': // immediate integer.
6231 case 'n': // immediate integer with a known value.
6232 if (isa<ConstantInt>(Val: CallOperandVal))
6233 weight = CW_Constant;
6234 break;
6235 case 's': // non-explicit intregal immediate.
6236 if (isa<GlobalValue>(Val: CallOperandVal))
6237 weight = CW_Constant;
6238 break;
6239 case 'E': // immediate float if host format.
6240 case 'F': // immediate float.
6241 if (isa<ConstantFP>(Val: CallOperandVal))
6242 weight = CW_Constant;
6243 break;
6244 case '<': // memory operand with autodecrement.
6245 case '>': // memory operand with autoincrement.
6246 case 'm': // memory operand.
6247 case 'o': // offsettable memory operand
6248 case 'V': // non-offsettable memory operand
6249 weight = CW_Memory;
6250 break;
6251 case 'r': // general register.
6252 case 'g': // general register, memory operand or immediate integer.
6253 // note: Clang converts "g" to "imr".
6254 if (CallOperandVal->getType()->isIntegerTy())
6255 weight = CW_Register;
6256 break;
6257 case 'X': // any operand.
6258 default:
6259 weight = CW_Default;
6260 break;
6261 }
6262 return weight;
6263}
6264
6265/// If there are multiple different constraints that we could pick for this
6266/// operand (e.g. "imr") try to pick the 'best' one.
6267/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6268/// into seven classes:
6269/// Register -> one specific register
6270/// RegisterClass -> a group of regs
6271/// Memory -> memory
6272/// Address -> a symbolic memory reference
6273/// Immediate -> immediate values
6274/// Other -> magic values (such as "Flag Output Operands")
6275/// Unknown -> something we don't recognize yet and can't handle
6276/// Ideally, we would pick the most specific constraint possible: if we have
6277/// something that fits into a register, we would pick it. The problem here
6278/// is that if we have something that could either be in a register or in
6279/// memory that use of the register could cause selection of *other*
6280/// operands to fail: they might only succeed if we pick memory. Because of
6281/// this the heuristic we use is:
6282///
6283/// 1) If there is an 'other' constraint, and if the operand is valid for
6284/// that constraint, use it. This makes us take advantage of 'i'
6285/// constraints when available.
6286/// 2) Otherwise, pick the most general constraint present. This prefers
6287/// 'm' over 'r', for example.
6288///
6289TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
6290 TargetLowering::AsmOperandInfo &OpInfo) const {
6291 ConstraintGroup Ret;
6292
6293 Ret.reserve(N: OpInfo.Codes.size());
6294 for (StringRef Code : OpInfo.Codes) {
6295 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
6296
6297 // Indirect 'other' or 'immediate' constraints are not allowed.
6298 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6299 CType == TargetLowering::C_Register ||
6300 CType == TargetLowering::C_RegisterClass))
6301 continue;
6302
6303 // Things with matching constraints can only be registers, per gcc
6304 // documentation. This mainly affects "g" constraints.
6305 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6306 continue;
6307
6308 Ret.emplace_back(Args&: Code, Args&: CType);
6309 }
6310
6311 llvm::stable_sort(Range&: Ret, C: [](ConstraintPair a, ConstraintPair b) {
6312 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
6313 });
6314
6315 return Ret;
6316}
6317
6318/// If we have an immediate, see if we can lower it. Return true if we can,
6319/// false otherwise.
6320static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
6321 SDValue Op, SelectionDAG *DAG,
6322 const TargetLowering &TLI) {
6323
6324 assert((P.second == TargetLowering::C_Other ||
6325 P.second == TargetLowering::C_Immediate) &&
6326 "need immediate or other");
6327
6328 if (!Op.getNode())
6329 return false;
6330
6331 std::vector<SDValue> ResultOps;
6332 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
6333 return !ResultOps.empty();
6334}
6335
6336/// Determines the constraint code and constraint type to use for the specific
6337/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6338void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
6339 SDValue Op,
6340 SelectionDAG *DAG) const {
6341 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6342
6343 // Single-letter constraints ('r') are very common.
6344 if (OpInfo.Codes.size() == 1) {
6345 OpInfo.ConstraintCode = OpInfo.Codes[0];
6346 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6347 } else {
6348 ConstraintGroup G = getConstraintPreferences(OpInfo);
6349 if (G.empty())
6350 return;
6351
6352 unsigned BestIdx = 0;
6353 for (const unsigned E = G.size();
6354 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6355 G[BestIdx].second == TargetLowering::C_Immediate);
6356 ++BestIdx) {
6357 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
6358 break;
6359 // If we're out of constraints, just pick the first one.
6360 if (BestIdx + 1 == E) {
6361 BestIdx = 0;
6362 break;
6363 }
6364 }
6365
6366 OpInfo.ConstraintCode = G[BestIdx].first;
6367 OpInfo.ConstraintType = G[BestIdx].second;
6368 }
6369
6370 // 'X' matches anything.
6371 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6372 // Constants are handled elsewhere. For Functions, the type here is the
6373 // type of the result, which is not what we want to look at; leave them
6374 // alone.
6375 Value *v = OpInfo.CallOperandVal;
6376 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6377 return;
6378 }
6379
6380 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6381 OpInfo.ConstraintCode = "i";
6382 return;
6383 }
6384
6385 // Otherwise, try to resolve it to something we know about by looking at
6386 // the actual operand type.
6387 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6388 OpInfo.ConstraintCode = Repl;
6389 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6390 }
6391 }
6392}
6393
6394/// Given an exact SDIV by a constant, create a multiplication
6395/// with the multiplicative inverse of the constant.
6396/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6397static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6398 const SDLoc &dl, SelectionDAG &DAG,
6399 SmallVectorImpl<SDNode *> &Created) {
6400 SDValue Op0 = N->getOperand(Num: 0);
6401 SDValue Op1 = N->getOperand(Num: 1);
6402 EVT VT = N->getValueType(ResNo: 0);
6403 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6404 EVT ShSVT = ShVT.getScalarType();
6405
6406 bool UseSRA = false;
6407 SmallVector<SDValue, 16> Shifts, Factors;
6408
6409 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6410 if (C->isZero())
6411 return false;
6412
6413 EVT CT = C->getValueType(ResNo: 0);
6414 APInt Divisor = C->getAPIntValue();
6415 unsigned Shift = Divisor.countr_zero();
6416 if (Shift) {
6417 Divisor.ashrInPlace(ShiftAmt: Shift);
6418 UseSRA = true;
6419 }
6420 APInt Factor = Divisor.multiplicativeInverse();
6421 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6422 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6423 return true;
6424 };
6425
6426 // Collect all magic values from the build vector.
6427 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6428 /*AllowTruncation=*/true))
6429 return SDValue();
6430
6431 SDValue Shift, Factor;
6432 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6433 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6434 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6435 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6436 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6437 "Expected matchUnaryPredicate to return one element for scalable "
6438 "vectors");
6439 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6440 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6441 } else {
6442 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6443 Shift = Shifts[0];
6444 Factor = Factors[0];
6445 }
6446
6447 SDValue Res = Op0;
6448 if (UseSRA) {
6449 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6450 Created.push_back(Elt: Res.getNode());
6451 }
6452
6453 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6454}
6455
6456/// Given an exact UDIV by a constant, create a multiplication
6457/// with the multiplicative inverse of the constant.
6458/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6459static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N,
6460 const SDLoc &dl, SelectionDAG &DAG,
6461 SmallVectorImpl<SDNode *> &Created) {
6462 EVT VT = N->getValueType(ResNo: 0);
6463 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6464 EVT ShSVT = ShVT.getScalarType();
6465
6466 bool UseSRL = false;
6467 SmallVector<SDValue, 16> Shifts, Factors;
6468
6469 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6470 if (C->isZero())
6471 return false;
6472
6473 EVT CT = C->getValueType(ResNo: 0);
6474 APInt Divisor = C->getAPIntValue();
6475 unsigned Shift = Divisor.countr_zero();
6476 if (Shift) {
6477 Divisor.lshrInPlace(ShiftAmt: Shift);
6478 UseSRL = true;
6479 }
6480 // Calculate the multiplicative inverse modulo BW.
6481 APInt Factor = Divisor.multiplicativeInverse();
6482 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6483 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: CT));
6484 return true;
6485 };
6486
6487 SDValue Op1 = N->getOperand(Num: 1);
6488
6489 // Collect all magic values from the build vector.
6490 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6491 /*AllowTruncation=*/true))
6492 return SDValue();
6493
6494 SDValue Shift, Factor;
6495 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6496 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6497 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6498 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6499 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6500 "Expected matchUnaryPredicate to return one element for scalable "
6501 "vectors");
6502 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6503 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6504 } else {
6505 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6506 Shift = Shifts[0];
6507 Factor = Factors[0];
6508 }
6509
6510 SDValue Res = N->getOperand(Num: 0);
6511 if (UseSRL) {
6512 Res = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Res, N2: Shift, Flags: SDNodeFlags::Exact);
6513 Created.push_back(Elt: Res.getNode());
6514 }
6515
6516 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6517}
6518
6519SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6520 SelectionDAG &DAG,
6521 SmallVectorImpl<SDNode *> &Created) const {
6522 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6523 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6524 return SDValue(N, 0); // Lower SDIV as SDIV
6525 return SDValue();
6526}
6527
6528SDValue
6529TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6530 SelectionDAG &DAG,
6531 SmallVectorImpl<SDNode *> &Created) const {
6532 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6533 if (isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6534 return SDValue(N, 0); // Lower SREM as SREM
6535 return SDValue();
6536}
6537
6538/// Build sdiv by power-of-2 with conditional move instructions
6539/// Ref: "Hacker's Delight" by Henry Warren 10-1
6540/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6541/// bgez x, label
6542/// add x, x, 2**k-1
6543/// label:
6544/// sra res, x, k
6545/// neg res, res (when the divisor is negative)
6546SDValue TargetLowering::buildSDIVPow2WithCMov(
6547 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6548 SmallVectorImpl<SDNode *> &Created) const {
6549 unsigned Lg2 = Divisor.countr_zero();
6550 EVT VT = N->getValueType(ResNo: 0);
6551
6552 SDLoc DL(N);
6553 SDValue N0 = N->getOperand(Num: 0);
6554 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6555 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6556 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6557
6558 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6559 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6560 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6561 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6562 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6563
6564 Created.push_back(Elt: Cmp.getNode());
6565 Created.push_back(Elt: Add.getNode());
6566 Created.push_back(Elt: CMov.getNode());
6567
6568 // Divide by pow2.
6569 SDValue SRA = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov,
6570 N2: DAG.getShiftAmountConstant(Val: Lg2, VT, DL));
6571
6572 // If we're dividing by a positive value, we're done. Otherwise, we must
6573 // negate the result.
6574 if (Divisor.isNonNegative())
6575 return SRA;
6576
6577 Created.push_back(Elt: SRA.getNode());
6578 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6579}
6580
6581/// Given an ISD::SDIV node expressing a divide by constant,
6582/// return a DAG expression to select that will generate the same value by
6583/// multiplying by a magic number.
6584/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6585SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6586 bool IsAfterLegalization,
6587 bool IsAfterLegalTypes,
6588 SmallVectorImpl<SDNode *> &Created) const {
6589 SDLoc dl(N);
6590 EVT VT = N->getValueType(ResNo: 0);
6591 EVT SVT = VT.getScalarType();
6592 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6593 EVT ShSVT = ShVT.getScalarType();
6594 unsigned EltBits = VT.getScalarSizeInBits();
6595 EVT MulVT;
6596
6597 // Check to see if we can do this.
6598 // FIXME: We should be more aggressive here.
6599 if (!isTypeLegal(VT)) {
6600 // Limit this to simple scalars for now.
6601 if (VT.isVector() || !VT.isSimple())
6602 return SDValue();
6603
6604 // If this type will be promoted to a large enough type with a legal
6605 // multiply operation, we can go ahead and do this transform.
6606 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6607 return SDValue();
6608
6609 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6610 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6611 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6612 return SDValue();
6613 }
6614
6615 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6616 if (N->getFlags().hasExact())
6617 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6618
6619 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6620
6621 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6622 if (C->isZero())
6623 return false;
6624 // Truncate the divisor to the target scalar type in case it was promoted
6625 // during type legalization.
6626 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6627 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6628 int NumeratorFactor = 0;
6629 int ShiftMask = -1;
6630
6631 if (Divisor.isOne() || Divisor.isAllOnes()) {
6632 // If d is +1/-1, we just multiply the numerator by +1/-1.
6633 NumeratorFactor = Divisor.getSExtValue();
6634 magics.Magic = 0;
6635 magics.ShiftAmount = 0;
6636 ShiftMask = 0;
6637 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6638 // If d > 0 and m < 0, add the numerator.
6639 NumeratorFactor = 1;
6640 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6641 // If d < 0 and m > 0, subtract the numerator.
6642 NumeratorFactor = -1;
6643 }
6644
6645 MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6646 Factors.push_back(Elt: DAG.getSignedConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6647 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6648 ShiftMasks.push_back(Elt: DAG.getSignedConstant(Val: ShiftMask, DL: dl, VT: SVT));
6649 return true;
6650 };
6651
6652 SDValue N0 = N->getOperand(Num: 0);
6653 SDValue N1 = N->getOperand(Num: 1);
6654
6655 // Collect the shifts / magic values from each element.
6656 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern, /*AllowUndefs=*/false,
6657 /*AllowTruncation=*/true))
6658 return SDValue();
6659
6660 SDValue MagicFactor, Factor, Shift, ShiftMask;
6661 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6662 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6663 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6664 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6665 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6666 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6667 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6668 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6669 "Expected matchUnaryPredicate to return one element for scalable "
6670 "vectors");
6671 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6672 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6673 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6674 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6675 } else {
6676 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6677 MagicFactor = MagicFactors[0];
6678 Factor = Factors[0];
6679 Shift = Shifts[0];
6680 ShiftMask = ShiftMasks[0];
6681 }
6682
6683 // Multiply the numerator (operand 0) by the magic value.
6684 // FIXME: We should support doing a MUL in a wider type.
6685 auto GetMULHS = [&](SDValue X, SDValue Y) {
6686 // If the type isn't legal, use a wider mul of the type calculated
6687 // earlier.
6688 if (!isTypeLegal(VT)) {
6689 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6690 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6691 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6692 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6693 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6694 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6695 }
6696
6697 if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6698 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6699 if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6700 SDValue LoHi =
6701 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6702 return SDValue(LoHi.getNode(), 1);
6703 }
6704 // If type twice as wide legal, widen and use a mul plus a shift.
6705 unsigned Size = VT.getScalarSizeInBits();
6706 EVT WideVT = VT.changeElementType(
6707 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2));
6708 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6709 // custom lowered. This is very expensive so avoid it at all costs for
6710 // constant divisors.
6711 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::SDIV, VT) &&
6712 isOperationCustom(Op: ISD::SDIVREM, VT: VT.getScalarType())) ||
6713 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6714 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6715 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6716 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6717 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6718 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6719 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6720 }
6721 return SDValue();
6722 };
6723
6724 SDValue Q = GetMULHS(N0, MagicFactor);
6725 if (!Q)
6726 return SDValue();
6727
6728 Created.push_back(Elt: Q.getNode());
6729
6730 // (Optionally) Add/subtract the numerator using Factor.
6731 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6732 Created.push_back(Elt: Factor.getNode());
6733 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6734 Created.push_back(Elt: Q.getNode());
6735
6736 // Shift right algebraic by shift value.
6737 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6738 Created.push_back(Elt: Q.getNode());
6739
6740 // Extract the sign bit, mask it and add it to the quotient.
6741 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6742 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6743 Created.push_back(Elt: T.getNode());
6744 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6745 Created.push_back(Elt: T.getNode());
6746 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6747}
6748
6749/// Given an ISD::UDIV node expressing a divide by constant,
6750/// return a DAG expression to select that will generate the same value by
6751/// multiplying by a magic number.
6752/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6753SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6754 bool IsAfterLegalization,
6755 bool IsAfterLegalTypes,
6756 SmallVectorImpl<SDNode *> &Created) const {
6757 SDLoc dl(N);
6758 EVT VT = N->getValueType(ResNo: 0);
6759 EVT SVT = VT.getScalarType();
6760 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6761 EVT ShSVT = ShVT.getScalarType();
6762 unsigned EltBits = VT.getScalarSizeInBits();
6763 EVT MulVT;
6764
6765 // Check to see if we can do this.
6766 // FIXME: We should be more aggressive here.
6767 if (!isTypeLegal(VT)) {
6768 // Limit this to simple scalars for now.
6769 if (VT.isVector() || !VT.isSimple())
6770 return SDValue();
6771
6772 // If this type will be promoted to a large enough type with a legal
6773 // multiply operation, we can go ahead and do this transform.
6774 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6775 return SDValue();
6776
6777 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6778 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6779 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6780 return SDValue();
6781 }
6782
6783 // If the udiv has an 'exact' bit we can use a simpler lowering.
6784 if (N->getFlags().hasExact())
6785 return BuildExactUDIV(TLI: *this, N, dl, DAG, Created);
6786
6787 SDValue N0 = N->getOperand(Num: 0);
6788 SDValue N1 = N->getOperand(Num: 1);
6789
6790 // Try to use leading zeros of the dividend to reduce the multiplier and
6791 // avoid expensive fixups.
6792 unsigned KnownLeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6793
6794 // If we're after type legalization and SVT is not legal, use the
6795 // promoted type for creating constants to avoid creating nodes with
6796 // illegal types.
6797 if (IsAfterLegalTypes && VT.isVector()) {
6798 SVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: SVT);
6799 if (SVT.bitsLT(VT: VT.getScalarType()))
6800 return SDValue();
6801 ShSVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT: ShSVT);
6802 if (ShSVT.bitsLT(VT: ShVT.getScalarType()))
6803 return SDValue();
6804 }
6805 const unsigned SVTBits = SVT.getSizeInBits();
6806
6807 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6808 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6809
6810 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6811 if (C->isZero())
6812 return false;
6813 // Truncate the divisor to the target scalar type in case it was promoted
6814 // during type legalization.
6815 APInt Divisor = C->getAPIntValue().trunc(width: EltBits);
6816
6817 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6818
6819 // Magic algorithm doesn't work for division by 1. We need to emit a select
6820 // at the end.
6821 if (Divisor.isOne()) {
6822 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6823 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6824 } else {
6825 UnsignedDivisionByConstantInfo magics =
6826 UnsignedDivisionByConstantInfo::get(
6827 D: Divisor, LeadingZeros: std::min(a: KnownLeadingZeros, b: Divisor.countl_zero()));
6828
6829 MagicFactor = DAG.getConstant(Val: magics.Magic.zext(width: SVTBits), DL: dl, VT: SVT);
6830
6831 assert(magics.PreShift < Divisor.getBitWidth() &&
6832 "We shouldn't generate an undefined shift!");
6833 assert(magics.PostShift < Divisor.getBitWidth() &&
6834 "We shouldn't generate an undefined shift!");
6835 assert((!magics.IsAdd || magics.PreShift == 0) &&
6836 "Unexpected pre-shift");
6837 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6838 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6839 NPQFactor = DAG.getConstant(
6840 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: SVTBits, BitNo: EltBits - 1)
6841 : APInt::getZero(numBits: SVTBits),
6842 DL: dl, VT: SVT);
6843 UseNPQ |= magics.IsAdd;
6844 UsePreShift |= magics.PreShift != 0;
6845 UsePostShift |= magics.PostShift != 0;
6846 }
6847
6848 PreShifts.push_back(Elt: PreShift);
6849 MagicFactors.push_back(Elt: MagicFactor);
6850 NPQFactors.push_back(Elt: NPQFactor);
6851 PostShifts.push_back(Elt: PostShift);
6852 return true;
6853 };
6854
6855 // Collect the shifts/magic values from each element.
6856 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern, /*AllowUndefs=*/false,
6857 /*AllowTruncation=*/true))
6858 return SDValue();
6859
6860 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6861 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6862 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6863 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6864 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6865 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6866 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6867 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6868 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6869 "Expected matchUnaryPredicate to return one for scalable vectors");
6870 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6871 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6872 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6873 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6874 } else {
6875 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6876 PreShift = PreShifts[0];
6877 MagicFactor = MagicFactors[0];
6878 PostShift = PostShifts[0];
6879 }
6880
6881 SDValue Q = N0;
6882 if (UsePreShift) {
6883 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6884 Created.push_back(Elt: Q.getNode());
6885 }
6886
6887 // FIXME: We should support doing a MUL in a wider type.
6888 auto GetMULHU = [&](SDValue X, SDValue Y) {
6889 // If the type isn't legal, use a wider mul of the type calculated
6890 // earlier.
6891 if (!isTypeLegal(VT)) {
6892 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6893 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6894 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6895 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6896 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6897 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6898 }
6899
6900 if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6901 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6902 if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6903 SDValue LoHi =
6904 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6905 return SDValue(LoHi.getNode(), 1);
6906 }
6907 // If type twice as wide legal, widen and use a mul plus a shift.
6908 unsigned Size = VT.getScalarSizeInBits();
6909 EVT WideVT = VT.changeElementType(
6910 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2));
6911 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6912 // custom lowered. This is very expensive so avoid it at all costs for
6913 // constant divisors.
6914 if ((!IsAfterLegalTypes && isOperationExpand(Op: ISD::UDIV, VT) &&
6915 isOperationCustom(Op: ISD::UDIVREM, VT: VT.getScalarType())) ||
6916 isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6917 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6918 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6919 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6920 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6921 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6922 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6923 }
6924 return SDValue(); // No mulhu or equivalent
6925 };
6926
6927 // Multiply the numerator (operand 0) by the magic value.
6928 Q = GetMULHU(Q, MagicFactor);
6929 if (!Q)
6930 return SDValue();
6931
6932 Created.push_back(Elt: Q.getNode());
6933
6934 if (UseNPQ) {
6935 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6936 Created.push_back(Elt: NPQ.getNode());
6937
6938 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6939 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6940 if (VT.isVector())
6941 NPQ = GetMULHU(NPQ, NPQFactor);
6942 else
6943 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
6944
6945 Created.push_back(Elt: NPQ.getNode());
6946
6947 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6948 Created.push_back(Elt: Q.getNode());
6949 }
6950
6951 if (UsePostShift) {
6952 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6953 Created.push_back(Elt: Q.getNode());
6954 }
6955
6956 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6957
6958 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
6959 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6960 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6961}
6962
6963/// If all values in Values that *don't* match the predicate are same 'splat'
6964/// value, then replace all values with that splat value.
6965/// Else, if AlternativeReplacement was provided, then replace all values that
6966/// do match predicate with AlternativeReplacement value.
6967static void
6968turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6969 std::function<bool(SDValue)> Predicate,
6970 SDValue AlternativeReplacement = SDValue()) {
6971 SDValue Replacement;
6972 // Is there a value for which the Predicate does *NOT* match? What is it?
6973 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6974 if (SplatValue != Values.end()) {
6975 // Does Values consist only of SplatValue's and values matching Predicate?
6976 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6977 return Value == *SplatValue || Predicate(Value);
6978 })) // Then we shall replace values matching predicate with SplatValue.
6979 Replacement = *SplatValue;
6980 }
6981 if (!Replacement) {
6982 // Oops, we did not find the "baseline" splat value.
6983 if (!AlternativeReplacement)
6984 return; // Nothing to do.
6985 // Let's replace with provided value then.
6986 Replacement = AlternativeReplacement;
6987 }
6988 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6989}
6990
6991/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6992/// where the divisor is constant and the comparison target is zero,
6993/// return a DAG expression that will generate the same comparison result
6994/// using only multiplications, additions and shifts/rotations.
6995/// Ref: "Hacker's Delight" 10-17.
6996SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6997 SDValue CompTargetNode,
6998 ISD::CondCode Cond,
6999 DAGCombinerInfo &DCI,
7000 const SDLoc &DL) const {
7001 SmallVector<SDNode *, 5> Built;
7002 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7003 DCI, DL, Created&: Built)) {
7004 for (SDNode *N : Built)
7005 DCI.AddToWorklist(N);
7006 return Folded;
7007 }
7008
7009 return SDValue();
7010}
7011
7012SDValue
7013TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7014 SDValue CompTargetNode, ISD::CondCode Cond,
7015 DAGCombinerInfo &DCI, const SDLoc &DL,
7016 SmallVectorImpl<SDNode *> &Created) const {
7017 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
7018 // - D must be constant, with D = D0 * 2^K where D0 is odd
7019 // - P is the multiplicative inverse of D0 modulo 2^W
7020 // - Q = floor(((2^W) - 1) / D)
7021 // where W is the width of the common type of N and D.
7022 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7023 "Only applicable for (in)equality comparisons.");
7024
7025 SelectionDAG &DAG = DCI.DAG;
7026
7027 EVT VT = REMNode.getValueType();
7028 EVT SVT = VT.getScalarType();
7029 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7030 EVT ShSVT = ShVT.getScalarType();
7031
7032 // If MUL is unavailable, we cannot proceed in any case.
7033 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7034 return SDValue();
7035
7036 bool ComparingWithAllZeros = true;
7037 bool AllComparisonsWithNonZerosAreTautological = true;
7038 bool HadTautologicalLanes = false;
7039 bool AllLanesAreTautological = true;
7040 bool HadEvenDivisor = false;
7041 bool AllDivisorsArePowerOfTwo = true;
7042 bool HadTautologicalInvertedLanes = false;
7043 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7044
7045 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7046 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7047 if (CDiv->isZero())
7048 return false;
7049
7050 const APInt &D = CDiv->getAPIntValue();
7051 const APInt &Cmp = CCmp->getAPIntValue();
7052
7053 ComparingWithAllZeros &= Cmp.isZero();
7054
7055 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7056 // if C2 is not less than C1, the comparison is always false.
7057 // But we will only be able to produce the comparison that will give the
7058 // opposive tautological answer. So this lane would need to be fixed up.
7059 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
7060 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7061
7062 // If all lanes are tautological (either all divisors are ones, or divisor
7063 // is not greater than the constant we are comparing with),
7064 // we will prefer to avoid the fold.
7065 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7066 HadTautologicalLanes |= TautologicalLane;
7067 AllLanesAreTautological &= TautologicalLane;
7068
7069 // If we are comparing with non-zero, we need'll need to subtract said
7070 // comparison value from the LHS. But there is no point in doing that if
7071 // every lane where we are comparing with non-zero is tautological..
7072 if (!Cmp.isZero())
7073 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7074
7075 // Decompose D into D0 * 2^K
7076 unsigned K = D.countr_zero();
7077 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7078 APInt D0 = D.lshr(shiftAmt: K);
7079
7080 // D is even if it has trailing zeros.
7081 HadEvenDivisor |= (K != 0);
7082 // D is a power-of-two if D0 is one.
7083 // If all divisors are power-of-two, we will prefer to avoid the fold.
7084 AllDivisorsArePowerOfTwo &= D0.isOne();
7085
7086 // P = inv(D0, 2^W)
7087 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7088 unsigned W = D.getBitWidth();
7089 APInt P = D0.multiplicativeInverse();
7090 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7091
7092 // Q = floor((2^W - 1) u/ D)
7093 // R = ((2^W - 1) u% D)
7094 APInt Q, R;
7095 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
7096
7097 // If we are comparing with zero, then that comparison constant is okay,
7098 // else it may need to be one less than that.
7099 if (Cmp.ugt(RHS: R))
7100 Q -= 1;
7101
7102 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7103 "We are expecting that K is always less than all-ones for ShSVT");
7104
7105 // If the lane is tautological the result can be constant-folded.
7106 if (TautologicalLane) {
7107 // Set P and K amount to a bogus values so we can try to splat them.
7108 P = 0;
7109 K = -1;
7110 // And ensure that comparison constant is tautological,
7111 // it will always compare true/false.
7112 Q = -1;
7113 }
7114
7115 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7116 KAmts.push_back(
7117 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7118 /*implicitTrunc=*/true),
7119 DL, VT: ShSVT));
7120 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7121 return true;
7122 };
7123
7124 SDValue N = REMNode.getOperand(i: 0);
7125 SDValue D = REMNode.getOperand(i: 1);
7126
7127 // Collect the values from each element.
7128 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
7129 return SDValue();
7130
7131 // If all lanes are tautological, the result can be constant-folded.
7132 if (AllLanesAreTautological)
7133 return SDValue();
7134
7135 // If this is a urem by a powers-of-two, avoid the fold since it can be
7136 // best implemented as a bit test.
7137 if (AllDivisorsArePowerOfTwo)
7138 return SDValue();
7139
7140 SDValue PVal, KVal, QVal;
7141 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7142 if (HadTautologicalLanes) {
7143 // Try to turn PAmts into a splat, since we don't care about the values
7144 // that are currently '0'. If we can't, just keep '0'`s.
7145 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7146 // Try to turn KAmts into a splat, since we don't care about the values
7147 // that are currently '-1'. If we can't, change them to '0'`s.
7148 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7149 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7150 }
7151
7152 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7153 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7154 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7155 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7156 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7157 "Expected matchBinaryPredicate to return one element for "
7158 "SPLAT_VECTORs");
7159 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7160 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7161 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7162 } else {
7163 PVal = PAmts[0];
7164 KVal = KAmts[0];
7165 QVal = QAmts[0];
7166 }
7167
7168 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7169 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
7170 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7171 assert(CompTargetNode.getValueType() == N.getValueType() &&
7172 "Expecting that the types on LHS and RHS of comparisons match.");
7173 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
7174 }
7175
7176 // (mul N, P)
7177 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7178 Created.push_back(Elt: Op0.getNode());
7179
7180 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7181 // divisors as a performance improvement, since rotating by 0 is a no-op.
7182 if (HadEvenDivisor) {
7183 // We need ROTR to do this.
7184 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7185 return SDValue();
7186 // UREM: (rotr (mul N, P), K)
7187 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7188 Created.push_back(Elt: Op0.getNode());
7189 }
7190
7191 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7192 SDValue NewCC =
7193 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7194 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7195 if (!HadTautologicalInvertedLanes)
7196 return NewCC;
7197
7198 // If any lanes previously compared always-false, the NewCC will give
7199 // always-true result for them, so we need to fixup those lanes.
7200 // Or the other way around for inequality predicate.
7201 assert(VT.isVector() && "Can/should only get here for vectors.");
7202 Created.push_back(Elt: NewCC.getNode());
7203
7204 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7205 // if C2 is not less than C1, the comparison is always false.
7206 // But we have produced the comparison that will give the
7207 // opposive tautological answer. So these lanes would need to be fixed up.
7208 SDValue TautologicalInvertedChannels =
7209 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
7210 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
7211
7212 // NOTE: we avoid letting illegal types through even if we're before legalize
7213 // ops – legalization has a hard time producing good code for this.
7214 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
7215 // If we have a vector select, let's replace the comparison results in the
7216 // affected lanes with the correct tautological result.
7217 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
7218 DL, VT: SETCCVT, OpVT: SETCCVT);
7219 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
7220 N2: Replacement, N3: NewCC);
7221 }
7222
7223 // Else, we can just invert the comparison result in the appropriate lanes.
7224 //
7225 // NOTE: see the note above VSELECT above.
7226 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
7227 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
7228 N2: TautologicalInvertedChannels);
7229
7230 return SDValue(); // Don't know how to lower.
7231}
7232
7233/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7234/// where the divisor is constant and the comparison target is zero,
7235/// return a DAG expression that will generate the same comparison result
7236/// using only multiplications, additions and shifts/rotations.
7237/// Ref: "Hacker's Delight" 10-17.
7238SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7239 SDValue CompTargetNode,
7240 ISD::CondCode Cond,
7241 DAGCombinerInfo &DCI,
7242 const SDLoc &DL) const {
7243 SmallVector<SDNode *, 7> Built;
7244 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7245 DCI, DL, Created&: Built)) {
7246 assert(Built.size() <= 7 && "Max size prediction failed.");
7247 for (SDNode *N : Built)
7248 DCI.AddToWorklist(N);
7249 return Folded;
7250 }
7251
7252 return SDValue();
7253}
7254
7255SDValue
7256TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7257 SDValue CompTargetNode, ISD::CondCode Cond,
7258 DAGCombinerInfo &DCI, const SDLoc &DL,
7259 SmallVectorImpl<SDNode *> &Created) const {
7260 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7261 // Fold:
7262 // (seteq/ne (srem N, D), 0)
7263 // To:
7264 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7265 //
7266 // - D must be constant, with D = D0 * 2^K where D0 is odd
7267 // - P is the multiplicative inverse of D0 modulo 2^W
7268 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7269 // - Q = floor((2 * A) / (2^K))
7270 // where W is the width of the common type of N and D.
7271 //
7272 // When D is a power of two (and thus D0 is 1), the normal
7273 // formula for A and Q don't apply, because the derivation
7274 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7275 // does not apply. This specifically fails when N = INT_MIN.
7276 //
7277 // Instead, for power-of-two D, we use:
7278 // - A = 2^(W-1)
7279 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7280 // - Q = 2^(W-K) - 1
7281 // |-> Test that the top K bits are zero after rotation
7282 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7283 "Only applicable for (in)equality comparisons.");
7284
7285 SelectionDAG &DAG = DCI.DAG;
7286
7287 EVT VT = REMNode.getValueType();
7288 EVT SVT = VT.getScalarType();
7289 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
7290 EVT ShSVT = ShVT.getScalarType();
7291
7292 // If we are after ops legalization, and MUL is unavailable, we can not
7293 // proceed.
7294 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
7295 return SDValue();
7296
7297 // TODO: Could support comparing with non-zero too.
7298 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
7299 if (!CompTarget || !CompTarget->isZero())
7300 return SDValue();
7301
7302 bool HadIntMinDivisor = false;
7303 bool HadOneDivisor = false;
7304 bool AllDivisorsAreOnes = true;
7305 bool HadEvenDivisor = false;
7306 bool NeedToApplyOffset = false;
7307 bool AllDivisorsArePowerOfTwo = true;
7308 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7309
7310 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7311 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7312 if (C->isZero())
7313 return false;
7314
7315 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7316
7317 // WARNING: this fold is only valid for positive divisors!
7318 APInt D = C->getAPIntValue();
7319 if (D.isNegative())
7320 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7321
7322 HadIntMinDivisor |= D.isMinSignedValue();
7323
7324 // If all divisors are ones, we will prefer to avoid the fold.
7325 HadOneDivisor |= D.isOne();
7326 AllDivisorsAreOnes &= D.isOne();
7327
7328 // Decompose D into D0 * 2^K
7329 unsigned K = D.countr_zero();
7330 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7331 APInt D0 = D.lshr(shiftAmt: K);
7332
7333 if (!D.isMinSignedValue()) {
7334 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7335 // we don't care about this lane in this fold, we'll special-handle it.
7336 HadEvenDivisor |= (K != 0);
7337 }
7338
7339 // D is a power-of-two if D0 is one. This includes INT_MIN.
7340 // If all divisors are power-of-two, we will prefer to avoid the fold.
7341 AllDivisorsArePowerOfTwo &= D0.isOne();
7342
7343 // P = inv(D0, 2^W)
7344 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7345 unsigned W = D.getBitWidth();
7346 APInt P = D0.multiplicativeInverse();
7347 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7348
7349 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7350 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
7351 A.clearLowBits(loBits: K);
7352
7353 if (!D.isMinSignedValue()) {
7354 // If divisor INT_MIN, then we don't care about this lane in this fold,
7355 // we'll special-handle it.
7356 NeedToApplyOffset |= A != 0;
7357 }
7358
7359 // Q = floor((2 * A) / (2^K))
7360 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
7361
7362 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
7363 "We are expecting that A is always less than all-ones for SVT");
7364 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
7365 "We are expecting that K is always less than all-ones for ShSVT");
7366
7367 // If D was a power of two, apply the alternate constant derivation.
7368 if (D0.isOne()) {
7369 // A = 2^(W-1)
7370 A = APInt::getSignedMinValue(numBits: W);
7371 // - Q = 2^(W-K) - 1
7372 Q = APInt::getAllOnes(numBits: W - K).zext(width: W);
7373 }
7374
7375 // If the divisor is 1 the result can be constant-folded. Likewise, we
7376 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7377 if (D.isOne()) {
7378 // Set P, A and K to a bogus values so we can try to splat them.
7379 P = 0;
7380 A = -1;
7381 K = -1;
7382
7383 // x ?% 1 == 0 <--> true <--> x u<= -1
7384 Q = -1;
7385 }
7386
7387 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
7388 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
7389 KAmts.push_back(
7390 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7391 /*implicitTrunc=*/true),
7392 DL, VT: ShSVT));
7393 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
7394 return true;
7395 };
7396
7397 SDValue N = REMNode.getOperand(i: 0);
7398 SDValue D = REMNode.getOperand(i: 1);
7399
7400 // Collect the values from each element.
7401 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
7402 return SDValue();
7403
7404 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7405 if (AllDivisorsAreOnes)
7406 return SDValue();
7407
7408 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7409 // since it can be best implemented as a bit test.
7410 if (AllDivisorsArePowerOfTwo)
7411 return SDValue();
7412
7413 SDValue PVal, AVal, KVal, QVal;
7414 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7415 if (HadOneDivisor) {
7416 // Try to turn PAmts into a splat, since we don't care about the values
7417 // that are currently '0'. If we can't, just keep '0'`s.
7418 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
7419 // Try to turn AAmts into a splat, since we don't care about the
7420 // values that are currently '-1'. If we can't, change them to '0'`s.
7421 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
7422 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
7423 // Try to turn KAmts into a splat, since we don't care about the values
7424 // that are currently '-1'. If we can't, change them to '0'`s.
7425 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
7426 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
7427 }
7428
7429 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
7430 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
7431 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
7432 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
7433 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7434 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7435 QAmts.size() == 1 &&
7436 "Expected matchUnaryPredicate to return one element for scalable "
7437 "vectors");
7438 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
7439 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
7440 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
7441 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
7442 } else {
7443 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7444 PVal = PAmts[0];
7445 AVal = AAmts[0];
7446 KVal = KAmts[0];
7447 QVal = QAmts[0];
7448 }
7449
7450 // (mul N, P)
7451 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
7452 Created.push_back(Elt: Op0.getNode());
7453
7454 if (NeedToApplyOffset) {
7455 // We need ADD to do this.
7456 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
7457 return SDValue();
7458
7459 // (add (mul N, P), A)
7460 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
7461 Created.push_back(Elt: Op0.getNode());
7462 }
7463
7464 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7465 // divisors as a performance improvement, since rotating by 0 is a no-op.
7466 if (HadEvenDivisor) {
7467 // We need ROTR to do this.
7468 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7469 return SDValue();
7470 // SREM: (rotr (add (mul N, P), A), K)
7471 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7472 Created.push_back(Elt: Op0.getNode());
7473 }
7474
7475 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7476 SDValue Fold =
7477 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7478 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7479
7480 // If we didn't have lanes with INT_MIN divisor, then we're done.
7481 if (!HadIntMinDivisor)
7482 return Fold;
7483
7484 // That fold is only valid for positive divisors. Which effectively means,
7485 // it is invalid for INT_MIN divisors. So if we have such a lane,
7486 // we must fix-up results for said lanes.
7487 assert(VT.isVector() && "Can/should only get here for vectors.");
7488
7489 // NOTE: we avoid letting illegal types through even if we're before legalize
7490 // ops – legalization has a hard time producing good code for the code that
7491 // follows.
7492 if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) ||
7493 !isOperationLegalOrCustom(Op: ISD::AND, VT) ||
7494 !isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) ||
7495 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7496 return SDValue();
7497
7498 Created.push_back(Elt: Fold.getNode());
7499
7500 SDValue IntMin = DAG.getConstant(
7501 Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7502 SDValue IntMax = DAG.getConstant(
7503 Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7504 SDValue Zero =
7505 DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7506
7507 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7508 SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7509 Created.push_back(Elt: DivisorIsIntMin.getNode());
7510
7511 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7512 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7513 Created.push_back(Elt: Masked.getNode());
7514 SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7515 Created.push_back(Elt: MaskedIsZero.getNode());
7516
7517 // To produce final result we need to blend 2 vectors: 'SetCC' and
7518 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7519 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7520 // constant-folded, select can get lowered to a shuffle with constant mask.
7521 SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7522 N2: MaskedIsZero, N3: Fold);
7523
7524 return Blended;
7525}
7526
7527SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7528 const DenormalMode &Mode,
7529 SDNodeFlags Flags) const {
7530 SDLoc DL(Op);
7531 EVT VT = Op.getValueType();
7532 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7533 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7534
7535 // This is specifically a check for the handling of denormal inputs, not the
7536 // result.
7537 if (Mode.Input == DenormalMode::PreserveSign ||
7538 Mode.Input == DenormalMode::PositiveZero) {
7539 // Test = X == 0.0
7540 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ, /*Chain=*/{},
7541 /*Signaling=*/IsSignaling: false, Flags);
7542 }
7543
7544 // Testing it with denormal inputs to avoid wrong estimate.
7545 //
7546 // Test = fabs(X) < SmallestNormal
7547 const fltSemantics &FltSem = VT.getFltSemantics();
7548 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7549 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7550 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op, Flags);
7551 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT, /*Chain=*/{},
7552 /*Signaling=*/IsSignaling: false, Flags);
7553}
7554
7555SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7556 bool LegalOps, bool OptForSize,
7557 NegatibleCost &Cost,
7558 unsigned Depth) const {
7559 // fneg is removable even if it has multiple uses.
7560 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7561 Cost = NegatibleCost::Cheaper;
7562 return Op.getOperand(i: 0);
7563 }
7564
7565 // Don't recurse exponentially.
7566 if (Depth > SelectionDAG::MaxRecursionDepth)
7567 return SDValue();
7568
7569 // Pre-increment recursion depth for use in recursive calls.
7570 ++Depth;
7571 const SDNodeFlags Flags = Op->getFlags();
7572 EVT VT = Op.getValueType();
7573 unsigned Opcode = Op.getOpcode();
7574
7575 // Don't allow anything with multiple uses unless we know it is free.
7576 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7577 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7578 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7579 if (!IsFreeExtend)
7580 return SDValue();
7581 }
7582
7583 auto RemoveDeadNode = [&](SDValue N) {
7584 if (N && N.getNode()->use_empty())
7585 DAG.RemoveDeadNode(N: N.getNode());
7586 };
7587
7588 SDLoc DL(Op);
7589
7590 // Because getNegatedExpression can delete nodes we need a handle to keep
7591 // temporary nodes alive in case the recursion manages to create an identical
7592 // node.
7593 std::list<HandleSDNode> Handles;
7594
7595 switch (Opcode) {
7596 case ISD::ConstantFP: {
7597 // Don't invert constant FP values after legalization unless the target says
7598 // the negated constant is legal.
7599 bool IsOpLegal =
7600 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7601 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7602 ForCodeSize: OptForSize);
7603
7604 if (LegalOps && !IsOpLegal)
7605 break;
7606
7607 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7608 V.changeSign();
7609 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7610
7611 // If we already have the use of the negated floating constant, it is free
7612 // to negate it even it has multiple uses.
7613 if (!Op.hasOneUse() && CFP.use_empty())
7614 break;
7615 Cost = NegatibleCost::Neutral;
7616 return CFP;
7617 }
7618 case ISD::SPLAT_VECTOR: {
7619 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7620 SDValue X = Op.getOperand(i: 0);
7621 if (!isOperationLegal(Op: ISD::SPLAT_VECTOR, VT))
7622 break;
7623
7624 SDValue NegX = getCheaperNegatedExpression(Op: X, DAG, LegalOps, OptForSize);
7625 if (!NegX)
7626 break;
7627 Cost = NegatibleCost::Cheaper;
7628 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL, VT, Operand: NegX);
7629 }
7630 case ISD::BUILD_VECTOR: {
7631 // Only permit BUILD_VECTOR of constants.
7632 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7633 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7634 }))
7635 break;
7636
7637 bool IsOpLegal =
7638 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7639 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7640 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7641 return N.isUndef() ||
7642 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7643 ForCodeSize: OptForSize);
7644 });
7645
7646 if (LegalOps && !IsOpLegal)
7647 break;
7648
7649 SmallVector<SDValue, 4> Ops;
7650 for (SDValue C : Op->op_values()) {
7651 if (C.isUndef()) {
7652 Ops.push_back(Elt: C);
7653 continue;
7654 }
7655 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7656 V.changeSign();
7657 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7658 }
7659 Cost = NegatibleCost::Neutral;
7660 return DAG.getBuildVector(VT, DL, Ops);
7661 }
7662 case ISD::FADD: {
7663 if (!Flags.hasNoSignedZeros())
7664 break;
7665
7666 // After operation legalization, it might not be legal to create new FSUBs.
7667 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7668 break;
7669 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7670
7671 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7672 NegatibleCost CostX = NegatibleCost::Expensive;
7673 SDValue NegX =
7674 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7675 // Prevent this node from being deleted by the next call.
7676 if (NegX)
7677 Handles.emplace_back(args&: NegX);
7678
7679 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7680 NegatibleCost CostY = NegatibleCost::Expensive;
7681 SDValue NegY =
7682 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7683
7684 // We're done with the handles.
7685 Handles.clear();
7686
7687 // Negate the X if its cost is less or equal than Y.
7688 if (NegX && (CostX <= CostY)) {
7689 Cost = CostX;
7690 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7691 if (NegY != N)
7692 RemoveDeadNode(NegY);
7693 return N;
7694 }
7695
7696 // Negate the Y if it is not expensive.
7697 if (NegY) {
7698 Cost = CostY;
7699 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7700 if (NegX != N)
7701 RemoveDeadNode(NegX);
7702 return N;
7703 }
7704 break;
7705 }
7706 case ISD::FSUB: {
7707 // We can't turn -(A-B) into B-A when we honor signed zeros.
7708 if (!Flags.hasNoSignedZeros())
7709 break;
7710
7711 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7712 // fold (fneg (fsub 0, Y)) -> Y
7713 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7714 if (C->isZero()) {
7715 Cost = NegatibleCost::Cheaper;
7716 return Y;
7717 }
7718
7719 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7720 Cost = NegatibleCost::Neutral;
7721 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7722 }
7723 case ISD::FMUL:
7724 case ISD::FDIV: {
7725 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7726
7727 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7728 NegatibleCost CostX = NegatibleCost::Expensive;
7729 SDValue NegX =
7730 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7731 // Prevent this node from being deleted by the next call.
7732 if (NegX)
7733 Handles.emplace_back(args&: NegX);
7734
7735 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7736 NegatibleCost CostY = NegatibleCost::Expensive;
7737 SDValue NegY =
7738 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7739
7740 // We're done with the handles.
7741 Handles.clear();
7742
7743 // Negate the X if its cost is less or equal than Y.
7744 if (NegX && (CostX <= CostY)) {
7745 Cost = CostX;
7746 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7747 if (NegY != N)
7748 RemoveDeadNode(NegY);
7749 return N;
7750 }
7751
7752 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7753 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7754 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7755 break;
7756
7757 // Negate the Y if it is not expensive.
7758 if (NegY) {
7759 Cost = CostY;
7760 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7761 if (NegX != N)
7762 RemoveDeadNode(NegX);
7763 return N;
7764 }
7765 break;
7766 }
7767 case ISD::FMA:
7768 case ISD::FMULADD:
7769 case ISD::FMAD: {
7770 if (!Flags.hasNoSignedZeros())
7771 break;
7772
7773 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7774 NegatibleCost CostZ = NegatibleCost::Expensive;
7775 SDValue NegZ =
7776 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7777 // Give up if fail to negate the Z.
7778 if (!NegZ)
7779 break;
7780
7781 // Prevent this node from being deleted by the next two calls.
7782 Handles.emplace_back(args&: NegZ);
7783
7784 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7785 NegatibleCost CostX = NegatibleCost::Expensive;
7786 SDValue NegX =
7787 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7788 // Prevent this node from being deleted by the next call.
7789 if (NegX)
7790 Handles.emplace_back(args&: NegX);
7791
7792 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7793 NegatibleCost CostY = NegatibleCost::Expensive;
7794 SDValue NegY =
7795 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7796
7797 // We're done with the handles.
7798 Handles.clear();
7799
7800 // Negate the X if its cost is less or equal than Y.
7801 if (NegX && (CostX <= CostY)) {
7802 Cost = std::min(a: CostX, b: CostZ);
7803 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7804 if (NegY != N)
7805 RemoveDeadNode(NegY);
7806 return N;
7807 }
7808
7809 // Negate the Y if it is not expensive.
7810 if (NegY) {
7811 Cost = std::min(a: CostY, b: CostZ);
7812 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7813 if (NegX != N)
7814 RemoveDeadNode(NegX);
7815 return N;
7816 }
7817 break;
7818 }
7819
7820 case ISD::FP_EXTEND:
7821 case ISD::FSIN:
7822 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7823 OptForSize, Cost, Depth))
7824 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7825 break;
7826 case ISD::FP_ROUND:
7827 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7828 OptForSize, Cost, Depth))
7829 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7830 break;
7831 case ISD::SELECT:
7832 case ISD::VSELECT: {
7833 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7834 // iff at least one cost is cheaper and the other is neutral/cheaper
7835 SDValue LHS = Op.getOperand(i: 1);
7836 NegatibleCost CostLHS = NegatibleCost::Expensive;
7837 SDValue NegLHS =
7838 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7839 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7840 RemoveDeadNode(NegLHS);
7841 break;
7842 }
7843
7844 // Prevent this node from being deleted by the next call.
7845 Handles.emplace_back(args&: NegLHS);
7846
7847 SDValue RHS = Op.getOperand(i: 2);
7848 NegatibleCost CostRHS = NegatibleCost::Expensive;
7849 SDValue NegRHS =
7850 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7851
7852 // We're done with the handles.
7853 Handles.clear();
7854
7855 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7856 (CostLHS != NegatibleCost::Cheaper &&
7857 CostRHS != NegatibleCost::Cheaper)) {
7858 RemoveDeadNode(NegLHS);
7859 RemoveDeadNode(NegRHS);
7860 break;
7861 }
7862
7863 Cost = std::min(a: CostLHS, b: CostRHS);
7864 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7865 }
7866 }
7867
7868 return SDValue();
7869}
7870
7871//===----------------------------------------------------------------------===//
7872// Legalization Utilities
7873//===----------------------------------------------------------------------===//
7874
7875bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7876 SDValue LHS, SDValue RHS,
7877 SmallVectorImpl<SDValue> &Result,
7878 EVT HiLoVT, SelectionDAG &DAG,
7879 MulExpansionKind Kind, SDValue LL,
7880 SDValue LH, SDValue RL, SDValue RH) const {
7881 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7882 Opcode == ISD::SMUL_LOHI);
7883
7884 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7885 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7886 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7887 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7888 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7889 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7890 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7891 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7892
7893 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7894 return false;
7895
7896 unsigned OuterBitSize = VT.getScalarSizeInBits();
7897 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7898
7899 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7900 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7901 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7902
7903 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7904 bool Signed) -> bool {
7905 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7906 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7907 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7908 Hi = Lo.getValue(R: 1);
7909 return true;
7910 }
7911 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7912 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7913 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7914 return true;
7915 }
7916 return false;
7917 };
7918
7919 SDValue Lo, Hi;
7920
7921 if (!LL.getNode() && !RL.getNode() &&
7922 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7923 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7924 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7925 }
7926
7927 if (!LL.getNode())
7928 return false;
7929
7930 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7931 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7932 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7933 // The inputs are both zero-extended.
7934 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7935 Result.push_back(Elt: Lo);
7936 Result.push_back(Elt: Hi);
7937 if (Opcode != ISD::MUL) {
7938 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7939 Result.push_back(Elt: Zero);
7940 Result.push_back(Elt: Zero);
7941 }
7942 return true;
7943 }
7944 }
7945
7946 if (!VT.isVector() && Opcode == ISD::MUL &&
7947 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7948 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7949 // The input values are both sign-extended.
7950 // TODO non-MUL case?
7951 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7952 Result.push_back(Elt: Lo);
7953 Result.push_back(Elt: Hi);
7954 return true;
7955 }
7956 }
7957
7958 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7959 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7960
7961 if (!LH.getNode() && !RH.getNode() &&
7962 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7963 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7964 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7965 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7966 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7967 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7968 }
7969
7970 if (!LH.getNode())
7971 return false;
7972
7973 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7974 return false;
7975
7976 Result.push_back(Elt: Lo);
7977
7978 if (Opcode == ISD::MUL) {
7979 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7980 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7981 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7982 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7983 Result.push_back(Elt: Hi);
7984 return true;
7985 }
7986
7987 // Compute the full width result.
7988 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7989 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7990 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7991 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7992 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7993 };
7994
7995 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7996 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7997 return false;
7998
7999 // This is effectively the add part of a multiply-add of half-sized operands,
8000 // so it cannot overflow.
8001 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8002
8003 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8004 return false;
8005
8006 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8007 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8008
8009 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
8010 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
8011 if (UseGlue)
8012 Next = DAG.getNode(Opcode: ISD::ADDC, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::Glue), N1: Next,
8013 N2: Merge(Lo, Hi));
8014 else
8015 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
8016 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
8017
8018 SDValue Carry = Next.getValue(R: 1);
8019 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8020 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8021
8022 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8023 return false;
8024
8025 if (UseGlue)
8026 Hi = DAG.getNode(Opcode: ISD::ADDE, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: MVT::Glue), N1: Hi, N2: Zero,
8027 N3: Carry);
8028 else
8029 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
8030 N2: Zero, N3: Carry);
8031
8032 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
8033
8034 if (Opcode == ISD::SMUL_LOHI) {
8035 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8036 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
8037 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8038
8039 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
8040 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
8041 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
8042 }
8043
8044 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8045 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
8046 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
8047 return true;
8048}
8049
8050bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
8051 SelectionDAG &DAG, MulExpansionKind Kind,
8052 SDValue LL, SDValue LH, SDValue RL,
8053 SDValue RH) const {
8054 SmallVector<SDValue, 2> Result;
8055 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
8056 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
8057 DAG, Kind, LL, LH, RL, RH);
8058 if (Ok) {
8059 assert(Result.size() == 2);
8060 Lo = Result[0];
8061 Hi = Result[1];
8062 }
8063 return Ok;
8064}
8065
8066// Optimize unsigned division or remainder by constants for types twice as large
8067// as a legal VT.
8068//
8069// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8070// can be computed
8071// as:
8072// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8073// Remainder = Sum % Constant
8074// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8075//
8076// For division, we can compute the remainder using the algorithm described
8077// above, subtract it from the dividend to get an exact multiple of Constant.
8078// Then multiply that exact multiply by the multiplicative inverse modulo
8079// (1 << (BitWidth / 2)) to get the quotient.
8080
8081// If Constant is even, we can shift right the dividend and the divisor by the
8082// number of trailing zeros in Constant before applying the remainder algorithm.
8083// If we're after the quotient, we can subtract this value from the shifted
8084// dividend and multiply by the multiplicative inverse of the shifted divisor.
8085// If we want the remainder, we shift the value left by the number of trailing
8086// zeros and add the bits that were shifted out of the dividend.
8087bool TargetLowering::expandDIVREMByConstant(SDNode *N,
8088 SmallVectorImpl<SDValue> &Result,
8089 EVT HiLoVT, SelectionDAG &DAG,
8090 SDValue LL, SDValue LH) const {
8091 unsigned Opcode = N->getOpcode();
8092 EVT VT = N->getValueType(ResNo: 0);
8093
8094 // TODO: Support signed division/remainder.
8095 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8096 return false;
8097 assert(
8098 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8099 "Unexpected opcode");
8100
8101 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
8102 if (!CN)
8103 return false;
8104
8105 APInt Divisor = CN->getAPIntValue();
8106 unsigned BitWidth = Divisor.getBitWidth();
8107 unsigned HBitWidth = BitWidth / 2;
8108 assert(VT.getScalarSizeInBits() == BitWidth &&
8109 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8110
8111 // Divisor needs to less than (1 << HBitWidth).
8112 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
8113 if (Divisor.uge(RHS: HalfMaxPlus1))
8114 return false;
8115
8116 // We depend on the UREM by constant optimization in DAGCombiner that requires
8117 // high multiply.
8118 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
8119 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
8120 return false;
8121
8122 // Don't expand if optimizing for size.
8123 if (DAG.shouldOptForSize())
8124 return false;
8125
8126 // Early out for 0 or 1 divisors.
8127 if (Divisor.ule(RHS: 1))
8128 return false;
8129
8130 // If the divisor is even, shift it until it becomes odd.
8131 unsigned TrailingZeros = 0;
8132 if (!Divisor[0]) {
8133 TrailingZeros = Divisor.countr_zero();
8134 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
8135 }
8136
8137 SDLoc dl(N);
8138 SDValue Sum;
8139 SDValue PartialRem;
8140
8141 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8142 // then add in the carry.
8143 // TODO: If we can't split it in half, we might be able to split into 3 or
8144 // more pieces using a smaller bit width.
8145 if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
8146 assert(!LL == !LH && "Expected both input halves or no input halves!");
8147 if (!LL)
8148 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8149
8150 // Shift the input by the number of TrailingZeros in the divisor. The
8151 // shifted out bits will be added to the remainder later.
8152 if (TrailingZeros) {
8153 // Save the shifted off bits if we need the remainder.
8154 if (Opcode != ISD::UDIV) {
8155 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
8156 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
8157 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
8158 }
8159
8160 LL = DAG.getNode(
8161 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
8162 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
8163 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
8164 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
8165 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
8166 VT: HiLoVT, DL: dl)));
8167 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
8168 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8169 }
8170
8171 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8172 EVT SetCCType =
8173 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
8174 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
8175 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
8176 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
8177 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
8178 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
8179 } else {
8180 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
8181 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
8182 // If the boolean for the target is 0 or 1, we can add the setcc result
8183 // directly.
8184 if (getBooleanContents(Type: HiLoVT) ==
8185 TargetLoweringBase::ZeroOrOneBooleanContent)
8186 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
8187 else
8188 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
8189 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8190 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
8191 }
8192 }
8193
8194 // If we didn't find a sum, we can't do the expansion.
8195 if (!Sum)
8196 return false;
8197
8198 // Perform a HiLoVT urem on the Sum using truncated divisor.
8199 SDValue RemL =
8200 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
8201 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
8202 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
8203
8204 if (Opcode != ISD::UREM) {
8205 // Subtract the remainder from the shifted dividend.
8206 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
8207 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
8208
8209 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
8210
8211 // Multiply by the multiplicative inverse of the divisor modulo
8212 // (1 << BitWidth).
8213 APInt MulFactor = Divisor.multiplicativeInverse();
8214
8215 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
8216 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
8217
8218 // Split the quotient into low and high parts.
8219 SDValue QuotL, QuotH;
8220 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
8221 Result.push_back(Elt: QuotL);
8222 Result.push_back(Elt: QuotH);
8223 }
8224
8225 if (Opcode != ISD::UDIV) {
8226 // If we shifted the input, shift the remainder left and add the bits we
8227 // shifted off the input.
8228 if (TrailingZeros) {
8229 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
8230 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
8231 RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
8232 }
8233 Result.push_back(Elt: RemL);
8234 Result.push_back(Elt: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
8235 }
8236
8237 return true;
8238}
8239
8240// Check that (every element of) Z is undef or not an exact multiple of BW.
8241static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8242 return ISD::matchUnaryPredicate(
8243 Op: Z,
8244 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
8245 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8246}
8247
8248static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
8249 EVT VT = Node->getValueType(ResNo: 0);
8250 SDValue ShX, ShY;
8251 SDValue ShAmt, InvShAmt;
8252 SDValue X = Node->getOperand(Num: 0);
8253 SDValue Y = Node->getOperand(Num: 1);
8254 SDValue Z = Node->getOperand(Num: 2);
8255 SDValue Mask = Node->getOperand(Num: 3);
8256 SDValue VL = Node->getOperand(Num: 4);
8257
8258 unsigned BW = VT.getScalarSizeInBits();
8259 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8260 SDLoc DL(SDValue(Node, 0));
8261
8262 EVT ShVT = Z.getValueType();
8263 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8264 // fshl: X << C | Y >> (BW - C)
8265 // fshr: X << (BW - C) | Y >> C
8266 // where C = Z % BW is not zero
8267 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8268 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8269 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
8270 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
8271 N4: VL);
8272 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
8273 N4: VL);
8274 } else {
8275 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8276 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8277 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8278 if (isPowerOf2_32(Value: BW)) {
8279 // Z % BW -> Z & (BW - 1)
8280 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
8281 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8282 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
8283 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
8284 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
8285 } else {
8286 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8287 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
8288 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
8289 }
8290
8291 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8292 if (IsFSHL) {
8293 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
8294 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
8295 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
8296 } else {
8297 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
8298 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
8299 ShY = DAG.getNode(Opcode: ISD::VP_SRL, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
8300 }
8301 }
8302 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
8303}
8304
8305SDValue TargetLowering::expandFunnelShift(SDNode *Node,
8306 SelectionDAG &DAG) const {
8307 if (Node->isVPOpcode())
8308 return expandVPFunnelShift(Node, DAG);
8309
8310 EVT VT = Node->getValueType(ResNo: 0);
8311
8312 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8313 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8314 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8315 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8316 return SDValue();
8317
8318 SDValue X = Node->getOperand(Num: 0);
8319 SDValue Y = Node->getOperand(Num: 1);
8320 SDValue Z = Node->getOperand(Num: 2);
8321
8322 unsigned BW = VT.getScalarSizeInBits();
8323 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8324 SDLoc DL(SDValue(Node, 0));
8325
8326 EVT ShVT = Z.getValueType();
8327
8328 // If a funnel shift in the other direction is more supported, use it.
8329 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8330 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8331 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
8332 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8333 // fshl X, Y, Z -> fshr X, Y, -Z
8334 // fshr X, Y, Z -> fshl X, Y, -Z
8335 Z = DAG.getNegative(Val: Z, DL, VT: ShVT);
8336 } else {
8337 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8338 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8339 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8340 if (IsFSHL) {
8341 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8342 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
8343 } else {
8344 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
8345 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
8346 }
8347 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
8348 }
8349 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
8350 }
8351
8352 SDValue ShX, ShY;
8353 SDValue ShAmt, InvShAmt;
8354 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8355 // fshl: X << C | Y >> (BW - C)
8356 // fshr: X << (BW - C) | Y >> C
8357 // where C = Z % BW is not zero
8358 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8359 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8360 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
8361 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
8362 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
8363 } else {
8364 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8365 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8366 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
8367 if (isPowerOf2_32(Value: BW)) {
8368 // Z % BW -> Z & (BW - 1)
8369 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
8370 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8371 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
8372 } else {
8373 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
8374 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
8375 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
8376 }
8377
8378 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8379 if (IsFSHL) {
8380 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
8381 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
8382 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
8383 } else {
8384 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
8385 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
8386 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
8387 }
8388 }
8389 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
8390}
8391
8392// TODO: Merge with expandFunnelShift.
8393SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8394 SelectionDAG &DAG) const {
8395 EVT VT = Node->getValueType(ResNo: 0);
8396 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8397 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8398 SDValue Op0 = Node->getOperand(Num: 0);
8399 SDValue Op1 = Node->getOperand(Num: 1);
8400 SDLoc DL(SDValue(Node, 0));
8401
8402 EVT ShVT = Op1.getValueType();
8403 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
8404
8405 // If a rotate in the other direction is more supported, use it.
8406 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8407 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
8408 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
8409 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8410 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
8411 }
8412
8413 if (!AllowVectorOps && VT.isVector() &&
8414 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
8415 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8416 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8417 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
8418 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
8419 return SDValue();
8420
8421 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8422 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8423 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
8424 SDValue ShVal;
8425 SDValue HsVal;
8426 if (isPowerOf2_32(Value: EltSizeInBits)) {
8427 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8428 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8429 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
8430 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
8431 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8432 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
8433 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
8434 } else {
8435 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8436 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8437 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
8438 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
8439 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
8440 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
8441 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
8442 HsVal =
8443 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
8444 }
8445 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
8446}
8447
8448SDValue TargetLowering::expandCLMUL(SDNode *Node, SelectionDAG &DAG) const {
8449 SDLoc DL(Node);
8450 EVT VT = Node->getValueType(ResNo: 0);
8451 SDValue X = Node->getOperand(Num: 0);
8452 SDValue Y = Node->getOperand(Num: 1);
8453 unsigned BW = VT.getScalarSizeInBits();
8454 unsigned Opcode = Node->getOpcode();
8455
8456 // Scalarize if the vector multiplication is unlikely to work.
8457 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
8458 return DAG.UnrollVectorOp(N: Node);
8459
8460 switch (Opcode) {
8461 case ISD::CLMUL: {
8462 // NOTE: If you change this expansion, please update the cost model
8463 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8464 // Intrinsic::clmul.
8465
8466 EVT SetCCVT =
8467 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8468
8469 SDValue Res = DAG.getConstant(Val: 0, DL, VT);
8470 for (unsigned I = 0; I < BW; ++I) {
8471 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: I, VT, DL);
8472 SDValue Mask = DAG.getConstant(Val: APInt::getOneBitSet(numBits: BW, BitNo: I), DL, VT);
8473 SDValue YMasked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Y, N2: Mask);
8474
8475 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8476 // multiply, use a shift-based expansion to avoid expensive MUL
8477 // instructions.
8478 SDValue Part;
8479 if (!hasBitTest(X: Y, Y: ShiftAmt) &&
8480 isOperationLegalOrCustom(
8481 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
8482 Part = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: X, N2: YMasked);
8483 } else {
8484 // Canonical bit test: (Y & (1 << I)) != 0
8485 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
8486 SDValue Cond = DAG.getSetCC(DL, VT: SetCCVT, LHS: YMasked, RHS: Zero, Cond: ISD::SETEQ);
8487 SDValue XShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShiftAmt);
8488 Part = DAG.getSelect(DL, VT, Cond, LHS: Zero, RHS: XShifted);
8489 }
8490 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Res, N2: Part);
8491 }
8492 return Res;
8493 }
8494 case ISD::CLMULR:
8495 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8496 if (isOperationLegalOrCustom(Op: ISD::CLMUL, VT) &&
8497 isOperationLegalOrCustom(Op: ISD::CLMULH, VT)) {
8498 SDValue Lo = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: X, N2: Y);
8499 SDValue Hi = DAG.getNode(Opcode: ISD::CLMULH, DL, VT, N1: X, N2: Y);
8500 Lo = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Lo,
8501 N2: DAG.getShiftAmountConstant(Val: BW - 1, VT, DL));
8502 Hi = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Hi,
8503 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8504 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Lo, N2: Hi);
8505 }
8506 [[fallthrough]];
8507 case ISD::CLMULH: {
8508 EVT ExtVT = VT.changeElementType(
8509 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 2 * BW));
8510 // For example, ExtVT = i64 based operations aren't legal on a 32-bit
8511 // target; use bitreverse-based lowering in this case.
8512 if (!isOperationLegalOrCustom(Op: ISD::ZERO_EXTEND, VT: ExtVT) ||
8513 !isOperationLegalOrCustom(Op: ISD::SRL, VT: ExtVT)) {
8514 SDValue XRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: X);
8515 SDValue YRev = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: Y);
8516 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT, N1: XRev, N2: YRev);
8517 SDValue Res = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT, Operand: ClMul);
8518 if (Opcode == ISD::CLMULH)
8519 Res = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Res,
8520 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL));
8521 return Res;
8522 }
8523 SDValue XExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: X);
8524 SDValue YExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: ExtVT, Operand: Y);
8525 SDValue ClMul = DAG.getNode(Opcode: ISD::CLMUL, DL, VT: ExtVT, N1: XExt, N2: YExt);
8526 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8527 SDValue HiBits = DAG.getNode(Opcode: ISD::SRL, DL, VT: ExtVT, N1: ClMul,
8528 N2: DAG.getShiftAmountConstant(Val: ShAmt, VT: ExtVT, DL));
8529 return DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: HiBits);
8530 }
8531 }
8532 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8533}
8534
8535void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
8536 SelectionDAG &DAG) const {
8537 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8538 EVT VT = Node->getValueType(ResNo: 0);
8539 unsigned VTBits = VT.getScalarSizeInBits();
8540 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8541
8542 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8543 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8544 SDValue ShOpLo = Node->getOperand(Num: 0);
8545 SDValue ShOpHi = Node->getOperand(Num: 1);
8546 SDValue ShAmt = Node->getOperand(Num: 2);
8547 EVT ShAmtVT = ShAmt.getValueType();
8548 EVT ShAmtCCVT =
8549 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
8550 SDLoc dl(Node);
8551
8552 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8553 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8554 // away during isel.
8555 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8556 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8557 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8558 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8559 : DAG.getConstant(Val: 0, DL: dl, VT);
8560
8561 SDValue Tmp2, Tmp3;
8562 if (IsSHL) {
8563 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8564 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8565 } else {
8566 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8567 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8568 }
8569
8570 // If the shift amount is larger or equal than the width of a part we don't
8571 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8572 // values for large shift amounts.
8573 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8574 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8575 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8576 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8577
8578 if (IsSHL) {
8579 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8580 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8581 } else {
8582 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8583 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8584 }
8585}
8586
8587bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8588 SelectionDAG &DAG) const {
8589 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8590 SDValue Src = Node->getOperand(Num: OpNo);
8591 EVT SrcVT = Src.getValueType();
8592 EVT DstVT = Node->getValueType(ResNo: 0);
8593 SDLoc dl(SDValue(Node, 0));
8594
8595 // FIXME: Only f32 to i64 conversions are supported.
8596 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8597 return false;
8598
8599 if (Node->isStrictFPOpcode())
8600 // When a NaN is converted to an integer a trap is allowed. We can't
8601 // use this expansion here because it would eliminate that trap. Other
8602 // traps are also allowed and cannot be eliminated. See
8603 // IEEE 754-2008 sec 5.8.
8604 return false;
8605
8606 // Expand f32 -> i64 conversion
8607 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8608 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8609 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8610 EVT IntVT = SrcVT.changeTypeToInteger();
8611 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8612
8613 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8614 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8615 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8616 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8617 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8618 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8619
8620 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8621
8622 SDValue ExponentBits = DAG.getNode(
8623 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8624 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8625 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8626
8627 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8628 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8629 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8630 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8631
8632 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8633 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8634 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8635
8636 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8637
8638 R = DAG.getSelectCC(
8639 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8640 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8641 N2: DAG.getZExtOrTrunc(
8642 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8643 DL: dl, VT: IntShVT)),
8644 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8645 N2: DAG.getZExtOrTrunc(
8646 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8647 DL: dl, VT: IntShVT)),
8648 Cond: ISD::SETGT);
8649
8650 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8651 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8652
8653 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8654 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8655 return true;
8656}
8657
8658bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8659 SDValue &Chain,
8660 SelectionDAG &DAG) const {
8661 SDLoc dl(SDValue(Node, 0));
8662 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8663 SDValue Src = Node->getOperand(Num: OpNo);
8664
8665 EVT SrcVT = Src.getValueType();
8666 EVT DstVT = Node->getValueType(ResNo: 0);
8667 EVT SetCCVT =
8668 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8669 EVT DstSetCCVT =
8670 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8671
8672 // Only expand vector types if we have the appropriate vector bit operations.
8673 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8674 ISD::FP_TO_SINT;
8675 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8676 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8677 return false;
8678
8679 // If the maximum float value is smaller then the signed integer range,
8680 // the destination signmask can't be represented by the float, so we can
8681 // just use FP_TO_SINT directly.
8682 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8683 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8684 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8685 if (APFloat::opOverflow &
8686 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8687 if (Node->isStrictFPOpcode()) {
8688 Result = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8689 Ops: { Node->getOperand(Num: 0), Src });
8690 Chain = Result.getValue(R: 1);
8691 } else
8692 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8693 return true;
8694 }
8695
8696 // Don't expand it if there isn't cheap fsub instruction.
8697 if (!isOperationLegalOrCustom(
8698 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8699 return false;
8700
8701 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8702 SDValue Sel;
8703
8704 if (Node->isStrictFPOpcode()) {
8705 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8706 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8707 Chain = Sel.getValue(R: 1);
8708 } else {
8709 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8710 }
8711
8712 bool Strict = Node->isStrictFPOpcode() ||
8713 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8714
8715 if (Strict) {
8716 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8717 // signmask then offset (the result of which should be fully representable).
8718 // Sel = Src < 0x8000000000000000
8719 // FltOfs = select Sel, 0, 0x8000000000000000
8720 // IntOfs = select Sel, 0, 0x8000000000000000
8721 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8722
8723 // TODO: Should any fast-math-flags be set for the FSUB?
8724 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8725 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8726 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8727 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8728 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8729 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8730 SDValue SInt;
8731 if (Node->isStrictFPOpcode()) {
8732 SDValue Val = DAG.getNode(Opcode: ISD::STRICT_FSUB, DL: dl, ResultTys: { SrcVT, MVT::Other },
8733 Ops: { Chain, Src, FltOfs });
8734 SInt = DAG.getNode(Opcode: ISD::STRICT_FP_TO_SINT, DL: dl, ResultTys: { DstVT, MVT::Other },
8735 Ops: { Val.getValue(R: 1), Val });
8736 Chain = SInt.getValue(R: 1);
8737 } else {
8738 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8739 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8740 }
8741 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8742 } else {
8743 // Expand based on maximum range of FP_TO_SINT:
8744 // True = fp_to_sint(Src)
8745 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8746 // Result = select (Src < 0x8000000000000000), True, False
8747
8748 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8749 // TODO: Should any fast-math-flags be set for the FSUB?
8750 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8751 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8752 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8753 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8754 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8755 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8756 }
8757 return true;
8758}
8759
8760bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8761 SDValue &Chain, SelectionDAG &DAG) const {
8762 // This transform is not correct for converting 0 when rounding mode is set
8763 // to round toward negative infinity which will produce -0.0. So disable
8764 // under strictfp.
8765 if (Node->isStrictFPOpcode())
8766 return false;
8767
8768 SDValue Src = Node->getOperand(Num: 0);
8769 EVT SrcVT = Src.getValueType();
8770 EVT DstVT = Node->getValueType(ResNo: 0);
8771
8772 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8773 // it.
8774 if (Node->getFlags().hasNonNeg() &&
8775 isOperationLegalOrCustom(Op: ISD::SINT_TO_FP, VT: SrcVT)) {
8776 Result =
8777 DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(Node), VT: DstVT, Operand: Node->getOperand(Num: 0));
8778 return true;
8779 }
8780
8781 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8782 return false;
8783
8784 // Only expand vector types if we have the appropriate vector bit
8785 // operations.
8786 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
8787 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
8788 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
8789 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
8790 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8791 return false;
8792
8793 SDLoc dl(SDValue(Node, 0));
8794
8795 // Implementation of unsigned i64 to f64 following the algorithm in
8796 // __floatundidf in compiler_rt. This implementation performs rounding
8797 // correctly in all rounding modes with the exception of converting 0
8798 // when rounding toward negative infinity. In that case the fsub will
8799 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8800 // incorrect.
8801 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
8802 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8803 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
8804 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
8805 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
8806 SDValue HiShift = DAG.getShiftAmountConstant(Val: 32, VT: SrcVT, DL: dl);
8807
8808 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8809 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8810 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8811 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8812 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8813 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8814 SDValue HiSub = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8815 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8816 return true;
8817}
8818
8819SDValue
8820TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8821 SelectionDAG &DAG) const {
8822 unsigned Opcode = Node->getOpcode();
8823 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8824 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8825 "Wrong opcode");
8826
8827 if (Node->getFlags().hasNoNaNs()) {
8828 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8829 EVT VT = Node->getValueType(ResNo: 0);
8830 if ((!isCondCodeLegal(CC: Pred, VT: VT.getSimpleVT()) ||
8831 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)) &&
8832 VT.isVector())
8833 return SDValue();
8834 SDValue Op1 = Node->getOperand(Num: 0);
8835 SDValue Op2 = Node->getOperand(Num: 1);
8836 return DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred,
8837 Flags: Node->getFlags());
8838 }
8839
8840 return SDValue();
8841}
8842
8843SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8844 SelectionDAG &DAG) const {
8845 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8846 return Expanded;
8847
8848 EVT VT = Node->getValueType(ResNo: 0);
8849 if (VT.isScalableVector())
8850 report_fatal_error(
8851 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8852
8853 SDLoc dl(Node);
8854 unsigned NewOp =
8855 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8856
8857 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8858 SDValue Quiet0 = Node->getOperand(Num: 0);
8859 SDValue Quiet1 = Node->getOperand(Num: 1);
8860
8861 if (!Node->getFlags().hasNoNaNs()) {
8862 // Insert canonicalizes if it's possible we need to quiet to get correct
8863 // sNaN behavior.
8864 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8865 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8866 Flags: Node->getFlags());
8867 }
8868 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8869 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8870 Flags: Node->getFlags());
8871 }
8872 }
8873
8874 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8875 }
8876
8877 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8878 // instead if there are no NaNs.
8879 if (Node->getFlags().hasNoNaNs() ||
8880 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
8881 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) {
8882 unsigned IEEE2018Op =
8883 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8884 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8885 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
8886 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
8887 }
8888
8889 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8890 return SelCC;
8891
8892 return SDValue();
8893}
8894
8895SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
8896 SelectionDAG &DAG) const {
8897 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node: N, DAG))
8898 return Expanded;
8899
8900 SDLoc DL(N);
8901 SDValue LHS = N->getOperand(Num: 0);
8902 SDValue RHS = N->getOperand(Num: 1);
8903 unsigned Opc = N->getOpcode();
8904 EVT VT = N->getValueType(ResNo: 0);
8905 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8906 bool IsMax = Opc == ISD::FMAXIMUM;
8907 SDNodeFlags Flags = N->getFlags();
8908
8909 // First, implement comparison not propagating NaN. If no native fmin or fmax
8910 // available, use plain select with setcc instead.
8911 SDValue MinMax;
8912 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8913 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8914
8915 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8916 // signed zero behavior.
8917 bool MinMaxMustRespectOrderedZero = false;
8918
8919 if (isOperationLegalOrCustom(Op: CompOpcIeee, VT)) {
8920 MinMax = DAG.getNode(Opcode: CompOpcIeee, DL, VT, N1: LHS, N2: RHS, Flags);
8921 MinMaxMustRespectOrderedZero = true;
8922 } else if (isOperationLegalOrCustom(Op: CompOpc, VT)) {
8923 MinMax = DAG.getNode(Opcode: CompOpc, DL, VT, N1: LHS, N2: RHS, Flags);
8924 } else {
8925 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
8926 return DAG.UnrollVectorOp(N);
8927
8928 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8929 SDValue Compare =
8930 DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: IsMax ? ISD::SETOGT : ISD::SETOLT);
8931 MinMax = DAG.getSelect(DL, VT, Cond: Compare, LHS, RHS, Flags);
8932 }
8933
8934 // Propagate any NaN of both operands
8935 if (!N->getFlags().hasNoNaNs() &&
8936 (!DAG.isKnownNeverNaN(Op: RHS) || !DAG.isKnownNeverNaN(Op: LHS))) {
8937 ConstantFP *FPNaN = ConstantFP::get(Context&: *DAG.getContext(),
8938 V: APFloat::getNaN(Sem: VT.getFltSemantics()));
8939 MinMax = DAG.getSelect(DL, VT, Cond: DAG.getSetCC(DL, VT: CCVT, LHS, RHS, Cond: ISD::SETUO),
8940 LHS: DAG.getConstantFP(V: *FPNaN, DL, VT), RHS: MinMax, Flags);
8941 }
8942
8943 // fminimum/fmaximum requires -0.0 less than +0.0
8944 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8945 !DAG.isKnownNeverZeroFloat(Op: RHS) && !DAG.isKnownNeverZeroFloat(Op: LHS)) {
8946 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
8947 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETOEQ);
8948 SDValue TestZero =
8949 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
8950 SDValue LCmp = DAG.getSelect(
8951 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHS, N2: TestZero), LHS,
8952 RHS: MinMax, Flags);
8953 SDValue RCmp = DAG.getSelect(
8954 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: RHS, N2: TestZero), LHS: RHS,
8955 RHS: LCmp, Flags);
8956 MinMax = DAG.getSelect(DL, VT, Cond: IsZero, LHS: RCmp, RHS: MinMax, Flags);
8957 }
8958
8959 return MinMax;
8960}
8961
8962SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
8963 SelectionDAG &DAG) const {
8964 SDLoc DL(Node);
8965 SDValue LHS = Node->getOperand(Num: 0);
8966 SDValue RHS = Node->getOperand(Num: 1);
8967 unsigned Opc = Node->getOpcode();
8968 EVT VT = Node->getValueType(ResNo: 0);
8969 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8970 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8971 SDNodeFlags Flags = Node->getFlags();
8972
8973 unsigned NewOp =
8974 Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8975
8976 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8977 if (!Flags.hasNoNaNs()) {
8978 // Insert canonicalizes if it's possible we need to quiet to get correct
8979 // sNaN behavior.
8980 if (!DAG.isKnownNeverSNaN(Op: LHS)) {
8981 LHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: LHS, Flags);
8982 }
8983 if (!DAG.isKnownNeverSNaN(Op: RHS)) {
8984 RHS = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL, VT, Operand: RHS, Flags);
8985 }
8986 }
8987
8988 return DAG.getNode(Opcode: NewOp, DL, VT, N1: LHS, N2: RHS, Flags);
8989 }
8990
8991 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8992 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8993 if (Flags.hasNoNaNs() ||
8994 (DAG.isKnownNeverNaN(Op: LHS) && DAG.isKnownNeverNaN(Op: RHS))) {
8995 unsigned IEEE2019Op =
8996 Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8997 if (isOperationLegalOrCustom(Op: IEEE2019Op, VT))
8998 return DAG.getNode(Opcode: IEEE2019Op, DL, VT, N1: LHS, N2: RHS, Flags);
8999 }
9000
9001 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9002 // either one for +0.0 vs -0.0.
9003 if ((Flags.hasNoNaNs() ||
9004 (DAG.isKnownNeverSNaN(Op: LHS) && DAG.isKnownNeverSNaN(Op: RHS))) &&
9005 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
9006 DAG.isKnownNeverZeroFloat(Op: RHS))) {
9007 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9008 if (isOperationLegalOrCustom(Op: IEEE2008Op, VT))
9009 return DAG.getNode(Opcode: IEEE2008Op, DL, VT, N1: LHS, N2: RHS, Flags);
9010 }
9011
9012 if (VT.isVector() &&
9013 (isOperationLegalOrCustomOrPromote(Op: Opc, VT: VT.getVectorElementType()) ||
9014 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT)))
9015 return DAG.UnrollVectorOp(N: Node);
9016
9017 // If only one operand is NaN, override it with another operand.
9018 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: LHS)) {
9019 LHS = DAG.getSelectCC(DL, LHS, RHS: LHS, True: RHS, False: LHS, Cond: ISD::SETUO);
9020 }
9021 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(Op: RHS)) {
9022 RHS = DAG.getSelectCC(DL, LHS: RHS, RHS, True: LHS, False: RHS, Cond: ISD::SETUO);
9023 }
9024
9025 // Always prefer RHS if equal.
9026 SDValue MinMax =
9027 DAG.getSelectCC(DL, LHS, RHS, True: LHS, False: RHS, Cond: IsMax ? ISD::SETGT : ISD::SETLT);
9028
9029 // TODO: We need quiet sNaN if strictfp.
9030
9031 // Fixup signed zero behavior.
9032 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(Op: LHS) ||
9033 DAG.isKnownNeverZeroFloat(Op: RHS)) {
9034 return MinMax;
9035 }
9036 SDValue TestZero =
9037 DAG.getTargetConstant(Val: IsMax ? fcPosZero : fcNegZero, DL, VT: MVT::i32);
9038 SDValue IsZero = DAG.getSetCC(DL, VT: CCVT, LHS: MinMax,
9039 RHS: DAG.getConstantFP(Val: 0.0, DL, VT), Cond: ISD::SETEQ);
9040 EVT IntVT = VT.changeTypeToInteger();
9041 EVT FloatVT = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
9042 SDValue LHSTrunc = LHS;
9043 if (!isTypeLegal(VT: IntVT) && !isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT)) {
9044 LHSTrunc = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: FloatVT, N1: LHS,
9045 N2: DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true));
9046 }
9047 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9048 // we preferred RHS when generate MinMax, if the operands are equal.
9049 SDValue RetZero = DAG.getSelect(
9050 DL, VT, Cond: DAG.getNode(Opcode: ISD::IS_FPCLASS, DL, VT: CCVT, N1: LHSTrunc, N2: TestZero), LHS,
9051 RHS: MinMax, Flags);
9052 return DAG.getSelect(DL, VT, Cond: IsZero, LHS: RetZero, RHS: MinMax, Flags);
9053}
9054
9055/// Returns a true value if if this FPClassTest can be performed with an ordered
9056/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9057/// std::nullopt if it cannot be performed as a compare with 0.
9058static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9059 const fltSemantics &Semantics,
9060 const MachineFunction &MF) {
9061 FPClassTest OrderedMask = Test & ~fcNan;
9062 FPClassTest NanTest = Test & fcNan;
9063 bool IsOrdered = NanTest == fcNone;
9064 bool IsUnordered = NanTest == fcNan;
9065
9066 // Skip cases that are testing for only a qnan or snan.
9067 if (!IsOrdered && !IsUnordered)
9068 return std::nullopt;
9069
9070 if (OrderedMask == fcZero &&
9071 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
9072 return IsOrdered;
9073 if (OrderedMask == (fcZero | fcSubnormal) &&
9074 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
9075 return IsOrdered;
9076 return std::nullopt;
9077}
9078
9079SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
9080 const FPClassTest OrigTestMask,
9081 SDNodeFlags Flags, const SDLoc &DL,
9082 SelectionDAG &DAG) const {
9083 EVT OperandVT = Op.getValueType();
9084 assert(OperandVT.isFloatingPoint());
9085 FPClassTest Test = OrigTestMask;
9086
9087 // Degenerated cases.
9088 if (Test == fcNone)
9089 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
9090 if (Test == fcAllFlags)
9091 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
9092
9093 // PPC double double is a pair of doubles, of which the higher part determines
9094 // the value class.
9095 if (OperandVT == MVT::ppcf128) {
9096 Op = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::f64, N1: Op,
9097 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
9098 OperandVT = MVT::f64;
9099 }
9100
9101 // Floating-point type properties.
9102 EVT ScalarFloatVT = OperandVT.getScalarType();
9103 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
9104 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9105 bool IsF80 = (ScalarFloatVT == MVT::f80);
9106
9107 // Some checks can be implemented using float comparisons, if floating point
9108 // exceptions are ignored.
9109 if (Flags.hasNoFPExcept() &&
9110 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
9111 FPClassTest FPTestMask = Test;
9112 bool IsInvertedFP = false;
9113
9114 if (FPClassTest InvertedFPCheck =
9115 invertFPClassTestIfSimpler(Test: FPTestMask, UseFCmp: true)) {
9116 FPTestMask = InvertedFPCheck;
9117 IsInvertedFP = true;
9118 }
9119
9120 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9121 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9122
9123 // See if we can fold an | fcNan into an unordered compare.
9124 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9125
9126 // Can't fold the ordered check if we're only testing for snan or qnan
9127 // individually.
9128 if ((FPTestMask & fcNan) != fcNan)
9129 OrderedFPTestMask = FPTestMask;
9130
9131 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9132
9133 if (std::optional<bool> IsCmp0 =
9134 isFCmpEqualZero(Test: FPTestMask, Semantics, MF: DAG.getMachineFunction());
9135 IsCmp0 && (isCondCodeLegalOrCustom(
9136 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9137 VT: OperandVT.getScalarType().getSimpleVT()))) {
9138
9139 // If denormals could be implicitly treated as 0, this is not equivalent
9140 // to a compare with 0 since it will also be true for denormals.
9141 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
9142 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
9143 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9144 }
9145
9146 if (FPTestMask == fcNan &&
9147 isCondCodeLegalOrCustom(CC: IsInvertedFP ? ISD::SETO : ISD::SETUO,
9148 VT: OperandVT.getScalarType().getSimpleVT()))
9149 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
9150 Cond: IsInvertedFP ? ISD::SETO : ISD::SETUO);
9151
9152 bool IsOrderedInf = FPTestMask == fcInf;
9153 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9154 isCondCodeLegalOrCustom(CC: IsOrderedInf ? OrderedCmpOpcode
9155 : UnorderedCmpOpcode,
9156 VT: OperandVT.getScalarType().getSimpleVT()) &&
9157 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType()) &&
9158 (isOperationLegal(Op: ISD::ConstantFP, VT: OperandVT.getScalarType()) ||
9159 (OperandVT.isVector() &&
9160 isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: OperandVT)))) {
9161 // isinf(x) --> fabs(x) == inf
9162 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9163 SDValue Inf =
9164 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9165 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
9166 Cond: IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9167 }
9168
9169 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9170 isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedCmpOpcode
9171 : UnorderedCmpOpcode,
9172 VT: OperandVT.getSimpleVT())) {
9173 // isposinf(x) --> x == inf
9174 // isneginf(x) --> x == -inf
9175 // isposinf(x) || nan --> x u== inf
9176 // isneginf(x) || nan --> x u== -inf
9177
9178 SDValue Inf = DAG.getConstantFP(
9179 Val: APFloat::getInf(Sem: Semantics, Negative: OrderedFPTestMask == fcNegInf), DL,
9180 VT: OperandVT);
9181 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Inf,
9182 Cond: IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9183 }
9184
9185 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9186 // TODO: Could handle ordered case, but it produces worse code for
9187 // x86. Maybe handle ordered if fabs is free?
9188
9189 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9190 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9191
9192 if (isCondCodeLegalOrCustom(CC: IsOrdered ? OrderedOp : UnorderedOp,
9193 VT: OperandVT.getScalarType().getSimpleVT())) {
9194 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9195
9196 // TODO: Maybe only makes sense if fabs is free. Integer test of
9197 // exponent bits seems better for x86.
9198 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9199 SDValue SmallestNormal = DAG.getConstantFP(
9200 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9201 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal,
9202 Cond: IsOrdered ? OrderedOp : UnorderedOp);
9203 }
9204 }
9205
9206 if (FPTestMask == fcNormal) {
9207 // TODO: Handle unordered
9208 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9209 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9210
9211 if (isCondCodeLegalOrCustom(CC: IsFiniteOp,
9212 VT: OperandVT.getScalarType().getSimpleVT()) &&
9213 isCondCodeLegalOrCustom(CC: IsNormalOp,
9214 VT: OperandVT.getScalarType().getSimpleVT()) &&
9215 isFAbsFree(VT: OperandVT)) {
9216 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9217 SDValue Inf =
9218 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
9219 SDValue SmallestNormal = DAG.getConstantFP(
9220 Val: APFloat::getSmallestNormalized(Sem: Semantics), DL, VT: OperandVT);
9221
9222 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
9223 SDValue IsFinite = DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf, Cond: IsFiniteOp);
9224 SDValue IsNormal =
9225 DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: SmallestNormal, Cond: IsNormalOp);
9226 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9227 return DAG.getNode(Opcode: LogicOp, DL, VT: ResultVT, N1: IsFinite, N2: IsNormal);
9228 }
9229 }
9230 }
9231
9232 // Some checks may be represented as inversion of simpler check, for example
9233 // "inf|normal|subnormal|zero" => !"nan".
9234 bool IsInverted = false;
9235
9236 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, UseFCmp: false)) {
9237 Test = InvertedCheck;
9238 IsInverted = true;
9239 }
9240
9241 // In the general case use integer operations.
9242 unsigned BitSize = OperandVT.getScalarSizeInBits();
9243 EVT IntVT = OperandVT.changeElementType(
9244 Context&: *DAG.getContext(), EltVT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize));
9245 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
9246
9247 // Various masks.
9248 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
9249 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
9250 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
9251 const unsigned ExplicitIntBitInF80 = 63;
9252 APInt ExpMask = Inf;
9253 if (IsF80)
9254 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
9255 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
9256 APInt QNaNBitMask =
9257 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
9258 APInt InversionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
9259
9260 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
9261 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
9262 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
9263 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
9264 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
9265 SDValue ResultInversionMask = DAG.getConstant(Val: InversionMask, DL, VT: ResultVT);
9266
9267 SDValue Res;
9268 const auto appendResult = [&](SDValue PartialRes) {
9269 if (PartialRes) {
9270 if (Res)
9271 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
9272 else
9273 Res = PartialRes;
9274 }
9275 };
9276
9277 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9278 const auto getIntBitIsSet = [&]() -> SDValue {
9279 if (!IntBitIsSetV) {
9280 APInt IntBitMask(BitSize, 0);
9281 IntBitMask.setBit(ExplicitIntBitInF80);
9282 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
9283 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
9284 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
9285 }
9286 return IntBitIsSetV;
9287 };
9288
9289 // Split the value into sign bit and absolute value.
9290 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
9291 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
9292 RHS: DAG.getConstant(Val: 0, DL, VT: IntVT), Cond: ISD::SETLT);
9293
9294 // Tests that involve more than one class should be processed first.
9295 SDValue PartialRes;
9296
9297 if (IsF80)
9298 ; // Detect finite numbers of f80 by checking individual classes because
9299 // they have different settings of the explicit integer bit.
9300 else if ((Test & fcFinite) == fcFinite) {
9301 // finite(V) ==> abs(V) < exp_mask
9302 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9303 Test &= ~fcFinite;
9304 } else if ((Test & fcFinite) == fcPosFinite) {
9305 // finite(V) && V > 0 ==> V < exp_mask
9306 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
9307 Test &= ~fcPosFinite;
9308 } else if ((Test & fcFinite) == fcNegFinite) {
9309 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9310 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
9311 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9312 Test &= ~fcNegFinite;
9313 }
9314 appendResult(PartialRes);
9315
9316 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9317 // fcZero | fcSubnormal => test all exponent bits are 0
9318 // TODO: Handle sign bit specific cases
9319 if (PartialCheck == (fcZero | fcSubnormal)) {
9320 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
9321 SDValue ExpIsZero =
9322 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9323 appendResult(ExpIsZero);
9324 Test &= ~PartialCheck & fcAllFlags;
9325 }
9326 }
9327
9328 // Check for individual classes.
9329
9330 if (unsigned PartialCheck = Test & fcZero) {
9331 if (PartialCheck == fcPosZero)
9332 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
9333 else if (PartialCheck == fcZero)
9334 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
9335 else // ISD::fcNegZero
9336 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
9337 appendResult(PartialRes);
9338 }
9339
9340 if (unsigned PartialCheck = Test & fcSubnormal) {
9341 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9342 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9343 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9344 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
9345 SDValue VMinusOneV =
9346 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
9347 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
9348 if (PartialCheck == fcNegSubnormal)
9349 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9350 appendResult(PartialRes);
9351 }
9352
9353 if (unsigned PartialCheck = Test & fcInf) {
9354 if (PartialCheck == fcPosInf)
9355 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
9356 else if (PartialCheck == fcInf)
9357 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
9358 else { // ISD::fcNegInf
9359 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
9360 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
9361 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
9362 }
9363 appendResult(PartialRes);
9364 }
9365
9366 if (unsigned PartialCheck = Test & fcNan) {
9367 APInt InfWithQnanBit = Inf | QNaNBitMask;
9368 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
9369 if (PartialCheck == fcNan) {
9370 // isnan(V) ==> abs(V) > int(inf)
9371 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9372 if (IsF80) {
9373 // Recognize unsupported values as NaNs for compatibility with glibc.
9374 // In them (exp(V)==0) == int_bit.
9375 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
9376 SDValue ExpIsZero =
9377 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
9378 SDValue IsPseudo =
9379 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
9380 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
9381 }
9382 } else if (PartialCheck == fcQNan) {
9383 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9384 PartialRes =
9385 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
9386 } else { // ISD::fcSNan
9387 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9388 // abs(V) < (unsigned(Inf) | quiet_bit)
9389 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
9390 SDValue IsNotQnan =
9391 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
9392 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
9393 }
9394 appendResult(PartialRes);
9395 }
9396
9397 if (unsigned PartialCheck = Test & fcNormal) {
9398 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9399 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
9400 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
9401 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
9402 APInt ExpLimit = ExpMask - ExpLSB;
9403 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
9404 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
9405 if (PartialCheck == fcNegNormal)
9406 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
9407 else if (PartialCheck == fcPosNormal) {
9408 SDValue PosSignV =
9409 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInversionMask);
9410 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
9411 }
9412 if (IsF80)
9413 PartialRes =
9414 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
9415 appendResult(PartialRes);
9416 }
9417
9418 if (!Res)
9419 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
9420 if (IsInverted)
9421 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInversionMask);
9422 return Res;
9423}
9424
9425// Only expand vector types if we have the appropriate vector bit operations.
9426static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9427 assert(VT.isVector() && "Expected vector type");
9428 unsigned Len = VT.getScalarSizeInBits();
9429 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
9430 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
9431 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
9432 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
9433 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
9434}
9435
9436SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9437 SDLoc dl(Node);
9438 EVT VT = Node->getValueType(ResNo: 0);
9439 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9440 SDValue Op = Node->getOperand(Num: 0);
9441 unsigned Len = VT.getScalarSizeInBits();
9442 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9443
9444 // TODO: Add support for irregular type lengths.
9445 if (!(Len <= 128 && Len % 8 == 0))
9446 return SDValue();
9447
9448 // Only expand vector types if we have the appropriate vector bit operations.
9449 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
9450 return SDValue();
9451
9452 // This is the "best" algorithm from
9453 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9454 SDValue Mask55 =
9455 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9456 SDValue Mask33 =
9457 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9458 SDValue Mask0F =
9459 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9460
9461 // v = v - ((v >> 1) & 0x55555555...)
9462 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
9463 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9464 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9465 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
9466 N2: Mask55));
9467 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9468 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
9469 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9470 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9471 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
9472 N2: Mask33));
9473 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9474 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9475 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9476 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9477 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
9478 N2: Mask0F);
9479
9480 if (Len <= 8)
9481 return Op;
9482
9483 // Avoid the multiply if we only have 2 bytes to add.
9484 // TODO: Only doing this for scalars because vectors weren't as obviously
9485 // improved.
9486 if (Len == 16 && !VT.isVector()) {
9487 // v = (v + (v >> 8)) & 0x00FF;
9488 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
9489 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
9490 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
9491 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
9492 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
9493 }
9494
9495 // v = (v * 0x01010101...) >> (Len - 8)
9496 SDValue V;
9497 if (isOperationLegalOrCustomOrPromote(
9498 Op: ISD::MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9499 SDValue Mask01 =
9500 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9501 V = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01);
9502 } else {
9503 V = Op;
9504 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9505 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9506 V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: V,
9507 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: ShiftC));
9508 }
9509 }
9510 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
9511}
9512
9513SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
9514 SDLoc dl(Node);
9515 EVT VT = Node->getValueType(ResNo: 0);
9516 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9517 SDValue Op = Node->getOperand(Num: 0);
9518 SDValue Mask = Node->getOperand(Num: 1);
9519 SDValue VL = Node->getOperand(Num: 2);
9520 unsigned Len = VT.getScalarSizeInBits();
9521 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9522
9523 // TODO: Add support for irregular type lengths.
9524 if (!(Len <= 128 && Len % 8 == 0))
9525 return SDValue();
9526
9527 // This is same algorithm of expandCTPOP from
9528 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9529 SDValue Mask55 =
9530 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
9531 SDValue Mask33 =
9532 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
9533 SDValue Mask0F =
9534 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
9535
9536 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9537
9538 // v = v - ((v >> 1) & 0x55555555...)
9539 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9540 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9541 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9542 N2: Mask55, N3: Mask, N4: VL);
9543 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
9544
9545 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9546 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
9547 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
9548 N1: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op,
9549 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
9550 N2: Mask33, N3: Mask, N4: VL);
9551 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
9552
9553 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9554 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
9555 N3: Mask, N4: VL),
9556 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
9557 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
9558
9559 if (Len <= 8)
9560 return Op;
9561
9562 // v = (v * 0x01010101...) >> (Len - 8)
9563 SDValue V;
9564 if (isOperationLegalOrCustomOrPromote(
9565 Op: ISD::VP_MUL, VT: getTypeToTransformTo(Context&: *DAG.getContext(), VT))) {
9566 SDValue Mask01 =
9567 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
9568 V = DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL);
9569 } else {
9570 V = Op;
9571 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9572 SDValue ShiftC = DAG.getShiftAmountConstant(Val: Shift, VT, DL: dl);
9573 V = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: V,
9574 N2: DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: V, N2: ShiftC, N3: Mask, N4: VL),
9575 N3: Mask, N4: VL);
9576 }
9577 }
9578 return DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: V, N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT),
9579 N3: Mask, N4: VL);
9580}
9581
9582SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9583 SDLoc dl(Node);
9584 EVT VT = Node->getValueType(ResNo: 0);
9585 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9586 SDValue Op = Node->getOperand(Num: 0);
9587 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9588
9589 // If the non-ZERO_UNDEF version is supported we can use that instead.
9590 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9591 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
9592 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
9593
9594 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9595 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
9596 EVT SetCCVT =
9597 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9598 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9599 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9600 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9601 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9602 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
9603 }
9604
9605 // Only expand vector types if we have the appropriate vector bit operations.
9606 // This includes the operations needed to expand CTPOP if it isn't supported.
9607 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9608 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9609 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9610 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
9611 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
9612 return SDValue();
9613
9614 // for now, we do this:
9615 // x = x | (x >> 1);
9616 // x = x | (x >> 2);
9617 // ...
9618 // x = x | (x >>16);
9619 // x = x | (x >>32); // for 64-bit input
9620 // return popcount(~x);
9621 //
9622 // Ref: "Hacker's Delight" by Henry Warren
9623 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9624 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9625 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
9626 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
9627 }
9628 Op = DAG.getNOT(DL: dl, Val: Op, VT);
9629 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
9630}
9631
9632SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
9633 SDLoc dl(Node);
9634 EVT VT = Node->getValueType(ResNo: 0);
9635 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9636 SDValue Op = Node->getOperand(Num: 0);
9637 SDValue Mask = Node->getOperand(Num: 1);
9638 SDValue VL = Node->getOperand(Num: 2);
9639 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9640
9641 // do this:
9642 // x = x | (x >> 1);
9643 // x = x | (x >> 2);
9644 // ...
9645 // x = x | (x >>16);
9646 // x = x | (x >>32); // for 64-bit input
9647 // return popcount(~x);
9648 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9649 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
9650 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
9651 N2: DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
9652 N4: VL);
9653 }
9654 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getAllOnesConstant(DL: dl, VT),
9655 N3: Mask, N4: VL);
9656 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
9657}
9658
9659SDValue TargetLowering::expandCTLS(SDNode *Node, SelectionDAG &DAG) const {
9660 SDLoc dl(Node);
9661 EVT VT = Node->getValueType(ResNo: 0);
9662 SDValue Op = DAG.getFreeze(V: Node->getOperand(Num: 0));
9663 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9664
9665 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
9666 // This transforms the sign bits into leading zeros that can be counted.
9667 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: NumBitsPerElt - 1, VT, DL: dl);
9668 SDValue SignBit = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Op, N2: ShiftAmt);
9669 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: SignBit);
9670 SDValue Shl =
9671 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
9672 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Shl, N2: DAG.getConstant(Val: 1, DL: dl, VT));
9673 return DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Or);
9674}
9675
9676SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
9677 const SDLoc &DL, EVT VT, SDValue Op,
9678 unsigned BitWidth) const {
9679 if (BitWidth != 32 && BitWidth != 64)
9680 return SDValue();
9681
9682 const DataLayout &TD = DAG.getDataLayout();
9683 if (!isOperationCustom(Op: ISD::ConstantPool, VT: getPointerTy(DL: TD)))
9684 return SDValue();
9685
9686 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9687 : APInt(64, 0x0218A392CD3D5DBFULL);
9688 MachinePointerInfo PtrInfo =
9689 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
9690 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
9691 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
9692 SDValue Lookup = DAG.getNode(
9693 Opcode: ISD::SRL, DL, VT,
9694 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
9695 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
9696 N2: DAG.getShiftAmountConstant(Val: ShiftAmt, VT, DL));
9697 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
9698
9699 SmallVector<uint8_t> Table(BitWidth, 0);
9700 for (unsigned i = 0; i < BitWidth; i++) {
9701 APInt Shl = DeBruijn.shl(shiftAmt: i);
9702 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
9703 Table[Lshr.getZExtValue()] = i;
9704 }
9705
9706 // Create a ConstantArray in Constant Pool
9707 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
9708 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
9709 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
9710 SDValue ExtLoad = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl: DL, VT, Chain: DAG.getEntryNode(),
9711 Ptr: DAG.getMemBasePlusOffset(Base: CPIdx, Offset: Lookup, DL),
9712 PtrInfo, MemVT: MVT::i8);
9713 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9714 return ExtLoad;
9715
9716 EVT SetCCVT =
9717 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9718 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
9719 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9720 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
9721 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
9722}
9723
9724SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9725 SDLoc dl(Node);
9726 EVT VT = Node->getValueType(ResNo: 0);
9727 SDValue Op = Node->getOperand(Num: 0);
9728 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9729
9730 // If the non-ZERO_UNDEF version is supported we can use that instead.
9731 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9732 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
9733 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
9734
9735 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9736 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
9737 EVT SetCCVT =
9738 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9739 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
9740 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9741 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
9742 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
9743 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
9744 }
9745
9746 // Only expand vector types if we have the appropriate vector bit operations.
9747 // This includes the operations needed to expand CTPOP if it isn't supported.
9748 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
9749 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
9750 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
9751 !canExpandVectorCTPOP(TLI: *this, VT)) ||
9752 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
9753 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
9754 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9755 return SDValue();
9756
9757 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9758 // to be expanded or converted to a libcall.
9759 if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(Op: ISD::CTPOP, VT) &&
9760 !isOperationLegal(Op: ISD::CTLZ, VT))
9761 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
9762 return V;
9763
9764 // for now, we use: { return popcount(~x & (x - 1)); }
9765 // unless the target has ctlz but not ctpop, in which case we use:
9766 // { return 32 - nlz(~x & (x-1)); }
9767 // Ref: "Hacker's Delight" by Henry Warren
9768 SDValue Tmp = DAG.getNode(
9769 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
9770 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
9771
9772 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9773 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
9774 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
9775 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
9776 }
9777
9778 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
9779}
9780
9781SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
9782 SDValue Op = Node->getOperand(Num: 0);
9783 SDValue Mask = Node->getOperand(Num: 1);
9784 SDValue VL = Node->getOperand(Num: 2);
9785 SDLoc dl(Node);
9786 EVT VT = Node->getValueType(ResNo: 0);
9787
9788 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9789 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
9790 N2: DAG.getAllOnesConstant(DL: dl, VT), N3: Mask, N4: VL);
9791 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
9792 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
9793 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
9794 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
9795}
9796
9797SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
9798 SelectionDAG &DAG) const {
9799 // %cond = to_bool_vec %source
9800 // %splat = splat /*val=*/VL
9801 // %tz = step_vector
9802 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9803 // %r = vp.reduce.umin %v
9804 SDLoc DL(N);
9805 SDValue Source = N->getOperand(Num: 0);
9806 SDValue Mask = N->getOperand(Num: 1);
9807 SDValue EVL = N->getOperand(Num: 2);
9808 EVT SrcVT = Source.getValueType();
9809 EVT ResVT = N->getValueType(ResNo: 0);
9810 EVT ResVecVT =
9811 EVT::getVectorVT(Context&: *DAG.getContext(), VT: ResVT, EC: SrcVT.getVectorElementCount());
9812
9813 // Convert to boolean vector.
9814 if (SrcVT.getScalarType() != MVT::i1) {
9815 SDValue AllZero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
9816 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
9817 EC: SrcVT.getVectorElementCount());
9818 Source = DAG.getNode(Opcode: ISD::VP_SETCC, DL, VT: SrcVT, N1: Source, N2: AllZero,
9819 N3: DAG.getCondCode(Cond: ISD::SETNE), N4: Mask, N5: EVL);
9820 }
9821
9822 SDValue ExtEVL = DAG.getZExtOrTrunc(Op: EVL, DL, VT: ResVT);
9823 SDValue Splat = DAG.getSplat(VT: ResVecVT, DL, Op: ExtEVL);
9824 SDValue StepVec = DAG.getStepVector(DL, ResVT: ResVecVT);
9825 SDValue Select =
9826 DAG.getNode(Opcode: ISD::VP_SELECT, DL, VT: ResVecVT, N1: Source, N2: StepVec, N3: Splat, N4: EVL);
9827 return DAG.getNode(Opcode: ISD::VP_REDUCE_UMIN, DL, VT: ResVT, N1: ExtEVL, N2: Select, N3: Mask, N4: EVL);
9828}
9829
9830SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9831 SelectionDAG &DAG) const {
9832 SDLoc DL(N);
9833 SDValue Mask = N->getOperand(Num: 0);
9834 EVT MaskVT = Mask.getValueType();
9835 EVT BoolVT = MaskVT.getScalarType();
9836
9837 // Find a suitable type for a stepvector.
9838 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9839 if (MaskVT.isScalableVector())
9840 VScaleRange = getVScaleRange(F: &DAG.getMachineFunction().getFunction(), BitWidth: 64);
9841 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9842 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
9843 RetTy: BoolVT.getTypeForEVT(Context&: *DAG.getContext()), EC: MaskVT.getVectorElementCount(),
9844 /*ZeroIsPoison=*/true, VScaleRange: &VScaleRange);
9845 // If the step vector element type is smaller than the mask element type,
9846 // use the mask type directly to avoid widening issues.
9847 EltWidth = std::max(a: EltWidth, b: BoolVT.getFixedSizeInBits());
9848 EVT StepVT = MVT::getIntegerVT(BitWidth: EltWidth);
9849 EVT StepVecVT = MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: StepVT);
9850
9851 // If promotion or widening is required to make the type legal, do it here.
9852 // Promotion of integers within LegalizeVectorOps is looking for types of
9853 // the same size but with a smaller number of larger elements, not the usual
9854 // larger size with the same number of larger elements.
9855 TargetLowering::LegalizeTypeAction TypeAction =
9856 TLI.getTypeAction(VT: StepVecVT.getSimpleVT());
9857 SDValue StepVec;
9858 if (TypeAction == TargetLowering::TypePromoteInteger) {
9859 StepVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9860 StepVT = StepVecVT.getVectorElementType();
9861 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9862 } else if (TypeAction == TargetLowering::TypeWidenVector) {
9863 // For widening, the element count changes. Create a step vector with only
9864 // the original elements valid and zeros for padding. Also widen the mask.
9865 EVT WideVecVT = TLI.getTypeToTransformTo(Context&: *DAG.getContext(), VT: StepVecVT);
9866 unsigned WideNumElts = WideVecVT.getVectorNumElements();
9867
9868 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
9869 SDValue OrigStepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9870 SDValue UndefStep = DAG.getPOISON(VT: WideVecVT);
9871 StepVec = DAG.getInsertSubvector(DL, Vec: UndefStep, SubVec: OrigStepVec, Idx: 0);
9872
9873 // Widen mask: pad with zeros.
9874 EVT WideMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: BoolVT, NumElements: WideNumElts);
9875 SDValue ZeroMask = DAG.getConstant(Val: 0, DL, VT: WideMaskVT);
9876 Mask = DAG.getInsertSubvector(DL, Vec: ZeroMask, SubVec: Mask, Idx: 0);
9877
9878 StepVecVT = WideVecVT;
9879 StepVT = WideVecVT.getVectorElementType();
9880 } else {
9881 StepVec = DAG.getStepVector(DL, ResVT: StepVecVT);
9882 }
9883
9884 // Zero out lanes with inactive elements, then find the highest remaining
9885 // value from the stepvector.
9886 SDValue Zeroes = DAG.getConstant(Val: 0, DL, VT: StepVecVT);
9887 SDValue ActiveElts = DAG.getSelect(DL, VT: StepVecVT, Cond: Mask, LHS: StepVec, RHS: Zeroes);
9888 SDValue HighestIdx = DAG.getNode(Opcode: ISD::VECREDUCE_UMAX, DL, VT: StepVT, Operand: ActiveElts);
9889 return DAG.getZExtOrTrunc(Op: HighestIdx, DL, VT: N->getValueType(ResNo: 0));
9890}
9891
9892SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
9893 bool IsNegative) const {
9894 SDLoc dl(N);
9895 EVT VT = N->getValueType(ResNo: 0);
9896 SDValue Op = N->getOperand(Num: 0);
9897
9898 // abs(x) -> smax(x,sub(0,x))
9899 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9900 isOperationLegal(Op: ISD::SMAX, VT)) {
9901 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9902 Op = DAG.getFreeze(V: Op);
9903 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
9904 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9905 }
9906
9907 // abs(x) -> umin(x,sub(0,x))
9908 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9909 isOperationLegal(Op: ISD::UMIN, VT)) {
9910 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9911 Op = DAG.getFreeze(V: Op);
9912 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
9913 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9914 }
9915
9916 // 0 - abs(x) -> smin(x, sub(0,x))
9917 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
9918 isOperationLegal(Op: ISD::SMIN, VT)) {
9919 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
9920 Op = DAG.getFreeze(V: Op);
9921 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
9922 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
9923 }
9924
9925 // Only expand vector types if we have the appropriate vector operations.
9926 if (VT.isVector() &&
9927 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
9928 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
9929 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
9930 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
9931 return SDValue();
9932
9933 Op = DAG.getFreeze(V: Op);
9934 SDValue Shift = DAG.getNode(
9935 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
9936 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
9937 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
9938
9939 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9940 if (!IsNegative)
9941 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
9942
9943 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9944 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
9945}
9946
9947SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9948 SDLoc dl(N);
9949 EVT VT = N->getValueType(ResNo: 0);
9950 SDValue LHS = N->getOperand(Num: 0);
9951 SDValue RHS = N->getOperand(Num: 1);
9952 bool IsSigned = N->getOpcode() == ISD::ABDS;
9953
9954 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9955 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9956 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9957 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9958 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9959 LHS = DAG.getFreeze(V: LHS);
9960 RHS = DAG.getFreeze(V: RHS);
9961 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9962 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9963 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9964 }
9965
9966 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9967 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT)) {
9968 LHS = DAG.getFreeze(V: LHS);
9969 RHS = DAG.getFreeze(V: RHS);
9970 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9971 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9972 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9973 }
9974
9975 // If the subtract doesn't overflow then just use abs(sub())
9976 bool IsNonNegative = DAG.SignBitIsZero(Op: LHS) && DAG.SignBitIsZero(Op: RHS);
9977
9978 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: LHS, N1: RHS))
9979 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9980 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS));
9981
9982 if (DAG.willNotOverflowSub(IsSigned: IsSigned || IsNonNegative, N0: RHS, N1: LHS))
9983 return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT,
9984 Operand: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9985
9986 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9987 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9988 LHS = DAG.getFreeze(V: LHS);
9989 RHS = DAG.getFreeze(V: RHS);
9990 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9991
9992 // Branchless expansion iff cmp result is allbits:
9993 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9994 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9995 if (CCVT == VT && getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9996 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS);
9997 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Diff, N2: Cmp);
9998 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Cmp, N2: Xor);
9999 }
10000
10001 // Similar to the branchless expansion, if we don't prefer selects, use the
10002 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10003 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10004 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10005 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10006 !preferSelectsOverBooleanArithmetic(VT)) {
10007 SDValue USubO =
10008 DAG.getNode(Opcode: ISD::USUBO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {LHS, RHS});
10009 SDValue Cmp = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT, Operand: USubO.getValue(R: 1));
10010 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: USubO.getValue(R: 0), N2: Cmp);
10011 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Cmp);
10012 }
10013
10014 // FIXME: Should really try to split the vector in case it's legal on a
10015 // subvector.
10016 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10017 return DAG.UnrollVectorOp(N);
10018
10019 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10020 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10021 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
10022 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
10023}
10024
10025SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
10026 SDLoc dl(N);
10027 EVT VT = N->getValueType(ResNo: 0);
10028 SDValue LHS = N->getOperand(Num: 0);
10029 SDValue RHS = N->getOperand(Num: 1);
10030
10031 unsigned Opc = N->getOpcode();
10032 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10033 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10034 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10035 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10036 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10037 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10038 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
10039 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10040 "Unknown AVG node");
10041
10042 // If the operands are already extended, we can add+shift.
10043 bool IsExt =
10044 (IsSigned && DAG.ComputeNumSignBits(Op: LHS) >= 2 &&
10045 DAG.ComputeNumSignBits(Op: RHS) >= 2) ||
10046 (!IsSigned && DAG.computeKnownBits(Op: LHS).countMinLeadingZeros() >= 1 &&
10047 DAG.computeKnownBits(Op: RHS).countMinLeadingZeros() >= 1);
10048 if (IsExt) {
10049 SDValue Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: LHS, N2: RHS);
10050 if (!IsFloor)
10051 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Sum, N2: DAG.getConstant(Val: 1, DL: dl, VT));
10052 return DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Sum,
10053 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10054 }
10055
10056 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10057 if (VT.isScalarInteger()) {
10058 unsigned BW = VT.getScalarSizeInBits();
10059 EVT ExtVT = VT.getIntegerVT(Context&: *DAG.getContext(), BitWidth: 2 * BW);
10060 if (isTypeLegal(VT: ExtVT) && isTruncateFree(FromVT: ExtVT, ToVT: VT)) {
10061 LHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: LHS);
10062 RHS = DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ExtVT, Operand: RHS);
10063 SDValue Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: LHS, N2: RHS);
10064 if (!IsFloor)
10065 Avg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ExtVT, N1: Avg,
10066 N2: DAG.getConstant(Val: 1, DL: dl, VT: ExtVT));
10067 // Just use SRL as we will be truncating away the extended sign bits.
10068 Avg = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ExtVT, N1: Avg,
10069 N2: DAG.getShiftAmountConstant(Val: 1, VT: ExtVT, DL: dl));
10070 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Avg);
10071 }
10072 }
10073
10074 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10075 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
10076 SDValue UAddWithOverflow =
10077 DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: MVT::i1), Ops: {RHS, LHS});
10078
10079 SDValue Sum = UAddWithOverflow.getValue(R: 0);
10080 SDValue Overflow = UAddWithOverflow.getValue(R: 1);
10081
10082 // Right shift the sum by 1
10083 SDValue LShrVal = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Sum,
10084 N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10085
10086 SDValue ZeroExtOverflow = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Overflow);
10087 SDValue OverflowShl = DAG.getNode(
10088 Opcode: ISD::SHL, DL: dl, VT, N1: ZeroExtOverflow,
10089 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
10090
10091 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: LShrVal, N2: OverflowShl);
10092 }
10093
10094 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10095 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10096 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10097 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10098 LHS = DAG.getFreeze(V: LHS);
10099 RHS = DAG.getFreeze(V: RHS);
10100 SDValue Sign = DAG.getNode(Opcode: SignOpc, DL: dl, VT, N1: LHS, N2: RHS);
10101 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10102 SDValue Shift =
10103 DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: Xor, N2: DAG.getShiftAmountConstant(Val: 1, VT, DL: dl));
10104 return DAG.getNode(Opcode: SumOpc, DL: dl, VT, N1: Sign, N2: Shift);
10105}
10106
10107SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
10108 SDLoc dl(N);
10109 EVT VT = N->getValueType(ResNo: 0);
10110 SDValue Op = N->getOperand(Num: 0);
10111
10112 if (!VT.isSimple())
10113 return SDValue();
10114
10115 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10116 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10117 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10118 default:
10119 return SDValue();
10120 case MVT::i16:
10121 // Use a rotate by 8. This can be further expanded if necessary.
10122 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10123 case MVT::i32:
10124 // This is meant for ARM speficially, which has ROTR but no ROTL.
10125 if (isOperationLegalOrCustom(Op: ISD::ROTR, VT)) {
10126 SDValue Mask = DAG.getConstant(Val: 0x00FF00FF, DL: dl, VT);
10127 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10128 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask);
10129 SDValue Rotr =
10130 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: And, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10131 SDValue Rotl =
10132 DAG.getNode(Opcode: ISD::ROTR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10133 SDValue And2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Rotl, N2: Mask);
10134 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Rotr, N2: And2);
10135 }
10136 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10137 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10138 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10139 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10140 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10141 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
10142 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10143 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10144 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10145 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10146 case MVT::i64:
10147 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10148 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10149 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10150 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10151 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10152 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10153 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10154 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
10155 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10156 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10157 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
10158 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
10159 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
10160 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
10161 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
10162 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
10163 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
10164 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
10165 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
10166 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
10167 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
10168 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
10169 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
10170 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
10171 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
10172 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
10173 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
10174 }
10175}
10176
10177SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
10178 SDLoc dl(N);
10179 EVT VT = N->getValueType(ResNo: 0);
10180 SDValue Op = N->getOperand(Num: 0);
10181 SDValue Mask = N->getOperand(Num: 1);
10182 SDValue EVL = N->getOperand(Num: 2);
10183
10184 if (!VT.isSimple())
10185 return SDValue();
10186
10187 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10188 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10189 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10190 default:
10191 return SDValue();
10192 case MVT::i16:
10193 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10194 N3: Mask, N4: EVL);
10195 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10196 N3: Mask, N4: EVL);
10197 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
10198 case MVT::i32:
10199 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10200 N3: Mask, N4: EVL);
10201 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
10202 N3: Mask, N4: EVL);
10203 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10204 N3: Mask, N4: EVL);
10205 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10206 N3: Mask, N4: EVL);
10207 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10208 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
10209 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10210 N3: Mask, N4: EVL);
10211 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10212 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10213 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10214 case MVT::i64:
10215 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10216 N3: Mask, N4: EVL);
10217 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10218 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10219 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10220 N3: Mask, N4: EVL);
10221 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10222 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10223 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10224 N3: Mask, N4: EVL);
10225 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
10226 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10227 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10228 N3: Mask, N4: EVL);
10229 Tmp4 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
10230 N3: Mask, N4: EVL);
10231 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
10232 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
10233 Tmp3 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
10234 N3: Mask, N4: EVL);
10235 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
10236 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
10237 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
10238 N3: Mask, N4: EVL);
10239 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10240 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
10241 Tmp1 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
10242 N3: Mask, N4: EVL);
10243 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
10244 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
10245 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
10246 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
10247 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
10248 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
10249 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
10250 }
10251}
10252
10253SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10254 SDLoc dl(N);
10255 EVT VT = N->getValueType(ResNo: 0);
10256 SDValue Op = N->getOperand(Num: 0);
10257 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10258 unsigned Sz = VT.getScalarSizeInBits();
10259
10260 SDValue Tmp, Tmp2, Tmp3;
10261
10262 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10263 // and finally the i1 pairs.
10264 // TODO: We can easily support i4/i2 legal types if any target ever does.
10265 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10266 // Create the masks - repeating the pattern every byte.
10267 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10268 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10269 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10270
10271 // BSWAP if the type is wider than a single byte.
10272 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
10273
10274 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10275 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10276 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10277 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
10278 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
10279 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10280
10281 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10282 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10283 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10284 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
10285 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
10286 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10287
10288 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10289 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10290 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10291 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
10292 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
10293 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
10294 return Tmp;
10295 }
10296
10297 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
10298 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10299 if (I < J)
10300 Tmp2 =
10301 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
10302 else
10303 Tmp2 =
10304 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
10305
10306 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
10307 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
10308 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
10309 }
10310
10311 return Tmp;
10312}
10313
10314SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
10315 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10316
10317 SDLoc dl(N);
10318 EVT VT = N->getValueType(ResNo: 0);
10319 SDValue Op = N->getOperand(Num: 0);
10320 SDValue Mask = N->getOperand(Num: 1);
10321 SDValue EVL = N->getOperand(Num: 2);
10322 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
10323 unsigned Sz = VT.getScalarSizeInBits();
10324
10325 SDValue Tmp, Tmp2, Tmp3;
10326
10327 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10328 // and finally the i1 pairs.
10329 // TODO: We can easily support i4/i2 legal types if any target ever does.
10330 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
10331 // Create the masks - repeating the pattern every byte.
10332 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
10333 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
10334 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
10335
10336 // BSWAP if the type is wider than a single byte.
10337 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
10338
10339 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10340 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10341 N3: Mask, N4: EVL);
10342 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10343 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
10344 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
10345 N3: Mask, N4: EVL);
10346 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
10347 N3: Mask, N4: EVL);
10348 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10349
10350 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10351 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10352 N3: Mask, N4: EVL);
10353 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10354 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
10355 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
10356 N3: Mask, N4: EVL);
10357 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
10358 N3: Mask, N4: EVL);
10359 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10360
10361 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10362 Tmp2 = DAG.getNode(Opcode: ISD::VP_SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10363 N3: Mask, N4: EVL);
10364 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
10365 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
10366 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
10367 N3: Mask, N4: EVL);
10368 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
10369 N3: Mask, N4: EVL);
10370 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
10371 return Tmp;
10372 }
10373 return SDValue();
10374}
10375
10376std::pair<SDValue, SDValue>
10377TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
10378 SelectionDAG &DAG) const {
10379 SDLoc SL(LD);
10380 SDValue Chain = LD->getChain();
10381 SDValue BasePTR = LD->getBasePtr();
10382 EVT SrcVT = LD->getMemoryVT();
10383 EVT DstVT = LD->getValueType(ResNo: 0);
10384 ISD::LoadExtType ExtType = LD->getExtensionType();
10385
10386 if (SrcVT.isScalableVector())
10387 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
10388
10389 unsigned NumElem = SrcVT.getVectorNumElements();
10390
10391 EVT SrcEltVT = SrcVT.getScalarType();
10392 EVT DstEltVT = DstVT.getScalarType();
10393
10394 // A vector must always be stored in memory as-is, i.e. without any padding
10395 // between the elements, since various code depend on it, e.g. in the
10396 // handling of a bitcast of a vector type to int, which may be done with a
10397 // vector store followed by an integer load. A vector that does not have
10398 // elements that are byte-sized must therefore be stored as an integer
10399 // built out of the extracted vector elements.
10400 if (!SrcEltVT.isByteSized()) {
10401 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10402 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
10403
10404 unsigned NumSrcBits = SrcVT.getSizeInBits();
10405 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
10406
10407 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10408 SDValue SrcEltBitMask = DAG.getConstant(
10409 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
10410
10411 // Load the whole vector and avoid masking off the top bits as it makes
10412 // the codegen worse.
10413 SDValue Load =
10414 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
10415 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getBaseAlign(),
10416 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10417
10418 SmallVector<SDValue, 8> Vals;
10419 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10420 unsigned ShiftIntoIdx =
10421 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10422 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10423 Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(), VT: LoadVT, DL: SL);
10424 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
10425 SDValue Elt =
10426 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
10427 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
10428
10429 if (ExtType != ISD::NON_EXTLOAD) {
10430 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
10431 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
10432 }
10433
10434 Vals.push_back(Elt: Scalar);
10435 }
10436
10437 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10438 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
10439 }
10440
10441 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10442 assert(SrcEltVT.isByteSized());
10443
10444 SmallVector<SDValue, 8> Vals;
10445 SmallVector<SDValue, 8> LoadChains;
10446
10447 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10448 SDValue ScalarLoad = DAG.getExtLoad(
10449 ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
10450 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride), MemVT: SrcEltVT,
10451 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10452
10453 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
10454
10455 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
10456 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
10457 }
10458
10459 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: LoadChains);
10460 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
10461
10462 return std::make_pair(x&: Value, y&: NewChain);
10463}
10464
10465SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
10466 SelectionDAG &DAG) const {
10467 SDLoc SL(ST);
10468
10469 SDValue Chain = ST->getChain();
10470 SDValue BasePtr = ST->getBasePtr();
10471 SDValue Value = ST->getValue();
10472 EVT StVT = ST->getMemoryVT();
10473
10474 if (StVT.isScalableVector())
10475 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
10476
10477 // The type of the data we want to save
10478 EVT RegVT = Value.getValueType();
10479 EVT RegSclVT = RegVT.getScalarType();
10480
10481 // The type of data as saved in memory.
10482 EVT MemSclVT = StVT.getScalarType();
10483
10484 unsigned NumElem = StVT.getVectorNumElements();
10485
10486 // A vector must always be stored in memory as-is, i.e. without any padding
10487 // between the elements, since various code depend on it, e.g. in the
10488 // handling of a bitcast of a vector type to int, which may be done with a
10489 // vector store followed by an integer load. A vector that does not have
10490 // elements that are byte-sized must therefore be stored as an integer
10491 // built out of the extracted vector elements.
10492 if (!MemSclVT.isByteSized()) {
10493 unsigned NumBits = StVT.getSizeInBits();
10494 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
10495
10496 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
10497
10498 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10499 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10500 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
10501 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
10502 unsigned ShiftIntoIdx =
10503 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10504 SDValue ShiftAmount =
10505 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
10506 SDValue ShiftedElt =
10507 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
10508 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
10509 }
10510
10511 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
10512 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10513 AAInfo: ST->getAAInfo());
10514 }
10515
10516 // Store Stride in bytes
10517 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10518 assert(Stride && "Zero stride!");
10519 // Extract each of the elements from the original vector and save them into
10520 // memory individually.
10521 SmallVector<SDValue, 8> Stores;
10522 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10523 SDValue Elt = DAG.getExtractVectorElt(DL: SL, VT: RegSclVT, Vec: Value, Idx);
10524
10525 SDValue Ptr =
10526 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
10527
10528 // This scalar TruncStore may be illegal, but we legalize it later.
10529 SDValue Store = DAG.getTruncStore(
10530 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
10531 SVT: MemSclVT, Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
10532 AAInfo: ST->getAAInfo());
10533
10534 Stores.push_back(Elt: Store);
10535 }
10536
10537 return DAG.getNode(Opcode: ISD::TokenFactor, DL: SL, VT: MVT::Other, Ops: Stores);
10538}
10539
10540std::pair<SDValue, SDValue>
10541TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
10542 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10543 "unaligned indexed loads not implemented!");
10544 SDValue Chain = LD->getChain();
10545 SDValue Ptr = LD->getBasePtr();
10546 EVT VT = LD->getValueType(ResNo: 0);
10547 EVT LoadedVT = LD->getMemoryVT();
10548 SDLoc dl(LD);
10549 auto &MF = DAG.getMachineFunction();
10550
10551 if (VT.isFloatingPoint() || VT.isVector()) {
10552 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
10553 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
10554 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
10555 LoadedVT.isVector()) {
10556 // Scalarize the load and let the individual components be handled.
10557 return scalarizeVectorLoad(LD, DAG);
10558 }
10559
10560 // Expand to a (misaligned) integer load of the same size,
10561 // then bitconvert to floating point or vector.
10562 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
10563 MMO: LD->getMemOperand());
10564 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
10565 if (LoadedVT != VT)
10566 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
10567 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
10568
10569 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
10570 }
10571
10572 // Copy the value to a (aligned) stack slot using (unaligned) integer
10573 // loads and stores, then do a (aligned) load from the stack slot.
10574 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
10575 unsigned LoadedBytes = LoadedVT.getStoreSize();
10576 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10577 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10578
10579 // Make sure the stack slot is also aligned for the register type.
10580 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
10581 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
10582 SmallVector<SDValue, 8> Stores;
10583 SDValue StackPtr = StackBase;
10584 unsigned Offset = 0;
10585
10586 EVT PtrVT = Ptr.getValueType();
10587 EVT StackPtrVT = StackPtr.getValueType();
10588
10589 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10590 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10591
10592 // Do all but one copies using the full register width.
10593 for (unsigned i = 1; i < NumRegs; i++) {
10594 // Load one integer register's worth from the original location.
10595 SDValue Load = DAG.getLoad(
10596 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
10597 Alignment: LD->getBaseAlign(), MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10598 // Follow the load with a store to the stack slot. Remember the store.
10599 Stores.push_back(Elt: DAG.getStore(
10600 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10601 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
10602 // Increment the pointers.
10603 Offset += RegBytes;
10604
10605 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10606 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10607 }
10608
10609 // The last copy may be partial. Do an extending load.
10610 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
10611 BitWidth: 8 * (LoadedBytes - Offset));
10612 SDValue Load = DAG.getExtLoad(
10613 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
10614 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT, Alignment: LD->getBaseAlign(),
10615 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
10616 // Follow the load with a store to the stack slot. Remember the store.
10617 // On big-endian machines this requires a truncating store to ensure
10618 // that the bits end up in the right place.
10619 Stores.push_back(Elt: DAG.getTruncStore(
10620 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
10621 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
10622
10623 // The order of the stores doesn't matter - say it with a TokenFactor.
10624 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10625
10626 // Finally, perform the original load only redirected to the stack slot.
10627 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
10628 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
10629 MemVT: LoadedVT);
10630
10631 // Callers expect a MERGE_VALUES node.
10632 return std::make_pair(x&: Load, y&: TF);
10633 }
10634
10635 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10636 "Unaligned load of unsupported type.");
10637
10638 // Compute the new VT that is half the size of the old one. This is an
10639 // integer MVT.
10640 unsigned NumBits = LoadedVT.getSizeInBits();
10641 EVT NewLoadedVT;
10642 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
10643 NumBits >>= 1;
10644
10645 Align Alignment = LD->getBaseAlign();
10646 unsigned IncrementSize = NumBits / 8;
10647 ISD::LoadExtType HiExtType = LD->getExtensionType();
10648
10649 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10650 if (HiExtType == ISD::NON_EXTLOAD)
10651 HiExtType = ISD::ZEXTLOAD;
10652
10653 // Load the value in two parts
10654 SDValue Lo, Hi;
10655 if (DAG.getDataLayout().isLittleEndian()) {
10656 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10657 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10658 AAInfo: LD->getAAInfo());
10659
10660 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10661 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
10662 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10663 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10664 AAInfo: LD->getAAInfo());
10665 } else {
10666 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
10667 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10668 AAInfo: LD->getAAInfo());
10669
10670 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10671 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10672 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
10673 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
10674 AAInfo: LD->getAAInfo());
10675 }
10676
10677 // aggregate the two parts
10678 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
10679 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
10680 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
10681
10682 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Lo.getValue(R: 1),
10683 N2: Hi.getValue(R: 1));
10684
10685 return std::make_pair(x&: Result, y&: TF);
10686}
10687
10688SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
10689 SelectionDAG &DAG) const {
10690 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10691 "unaligned indexed stores not implemented!");
10692 SDValue Chain = ST->getChain();
10693 SDValue Ptr = ST->getBasePtr();
10694 SDValue Val = ST->getValue();
10695 EVT VT = Val.getValueType();
10696 Align Alignment = ST->getBaseAlign();
10697 auto &MF = DAG.getMachineFunction();
10698 EVT StoreMemVT = ST->getMemoryVT();
10699
10700 SDLoc dl(ST);
10701 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10702 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
10703 if (isTypeLegal(VT: intVT)) {
10704 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
10705 StoreMemVT.isVector()) {
10706 // Scalarize the store and let the individual components be handled.
10707 SDValue Result = scalarizeVectorStore(ST, DAG);
10708 return Result;
10709 }
10710 // Expand to a bitconvert of the value to the integer type of the
10711 // same size, then a (misaligned) int store.
10712 // FIXME: Does not handle truncating floating point stores!
10713 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
10714 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
10715 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
10716 return Result;
10717 }
10718 // Do a (aligned) store to a stack slot, then copy from the stack slot
10719 // to the final destination using (unaligned) integer loads and stores.
10720 MVT RegVT = getRegisterType(
10721 Context&: *DAG.getContext(),
10722 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
10723 EVT PtrVT = Ptr.getValueType();
10724 unsigned StoredBytes = StoreMemVT.getStoreSize();
10725 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10726 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10727
10728 // Make sure the stack slot is also aligned for the register type.
10729 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
10730 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10731
10732 // Perform the original store, only redirected to the stack slot.
10733 SDValue Store = DAG.getTruncStore(
10734 Chain, dl, Val, Ptr: StackPtr,
10735 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
10736
10737 EVT StackPtrVT = StackPtr.getValueType();
10738
10739 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
10740 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
10741 SmallVector<SDValue, 8> Stores;
10742 unsigned Offset = 0;
10743
10744 // Do all but one copies using the full register width.
10745 for (unsigned i = 1; i < NumRegs; i++) {
10746 // Load one integer register's worth from the stack slot.
10747 SDValue Load = DAG.getLoad(
10748 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
10749 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
10750 // Store it to the final location. Remember the store.
10751 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10752 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
10753 Alignment: ST->getBaseAlign(),
10754 MMOFlags: ST->getMemOperand()->getFlags()));
10755 // Increment the pointers.
10756 Offset += RegBytes;
10757 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
10758 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
10759 }
10760
10761 // The last store may be partial. Do a truncating store. On big-endian
10762 // machines this requires an extending load from the stack slot to ensure
10763 // that the bits are in the right place.
10764 EVT LoadMemVT =
10765 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
10766
10767 // Load from the stack slot.
10768 SDValue Load = DAG.getExtLoad(
10769 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
10770 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
10771
10772 Stores.push_back(Elt: DAG.getTruncStore(
10773 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
10774 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
10775 Alignment: ST->getBaseAlign(), MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
10776 // The order of the stores doesn't matter - say it with a TokenFactor.
10777 SDValue Result = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: Stores);
10778 return Result;
10779 }
10780
10781 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10782 "Unaligned store of unknown type.");
10783 // Get the half-size VT
10784 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
10785 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10786 unsigned IncrementSize = NumBits / 8;
10787
10788 // Divide the stored value in two parts.
10789 SDValue ShiftAmount =
10790 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
10791 SDValue Lo = Val;
10792 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10793 // fold and not use the upper bits. A smaller constant may be easier to
10794 // materialize.
10795 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
10796 Lo = DAG.getNode(
10797 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
10798 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
10799 VT));
10800 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
10801
10802 // Store the two parts
10803 SDValue Store1, Store2;
10804 Store1 = DAG.getTruncStore(Chain, dl,
10805 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10806 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
10807 MMOFlags: ST->getMemOperand()->getFlags());
10808
10809 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
10810 Store2 = DAG.getTruncStore(
10811 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10812 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
10813 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
10814
10815 SDValue Result =
10816 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store1, N2: Store2);
10817 return Result;
10818}
10819
10820SDValue
10821TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
10822 const SDLoc &DL, EVT DataVT,
10823 SelectionDAG &DAG,
10824 bool IsCompressedMemory) const {
10825 SDValue Increment;
10826 EVT AddrVT = Addr.getValueType();
10827 EVT MaskVT = Mask.getValueType();
10828 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10829 "Incompatible types of Data and Mask");
10830 if (IsCompressedMemory) {
10831 // Incrementing the pointer according to number of '1's in the mask.
10832 if (DataVT.isScalableVector()) {
10833 EVT MaskExtVT = MaskVT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i32);
10834 SDValue MaskExt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MaskExtVT, Operand: Mask);
10835 Increment = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: MVT::i32, Operand: MaskExt);
10836 } else {
10837 EVT MaskIntVT =
10838 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
10839 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
10840 if (MaskIntVT.getSizeInBits() < 32) {
10841 MaskInIntReg =
10842 DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: MVT::i32, Operand: MaskInIntReg);
10843 MaskIntVT = MVT::i32;
10844 }
10845 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
10846 }
10847 // Scale is an element size in bytes.
10848 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
10849 VT: AddrVT);
10850 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
10851 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
10852 } else
10853 Increment = DAG.getTypeSize(DL, VT: AddrVT, TS: DataVT.getStoreSize());
10854
10855 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
10856}
10857
10858static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
10859 EVT VecVT, const SDLoc &dl,
10860 ElementCount SubEC) {
10861 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10862 "Cannot index a scalable vector within a fixed-width vector");
10863
10864 unsigned NElts = VecVT.getVectorMinNumElements();
10865 unsigned NumSubElts = SubEC.getKnownMinValue();
10866 EVT IdxVT = Idx.getValueType();
10867
10868 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10869 // If this is a constant index and we know the value plus the number of the
10870 // elements in the subvector minus one is less than the minimum number of
10871 // elements then it's safe to return Idx.
10872 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
10873 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10874 return Idx;
10875 SDValue VS =
10876 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
10877 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10878 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
10879 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
10880 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
10881 }
10882 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
10883 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
10884 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
10885 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
10886 }
10887 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10888 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
10889 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
10890}
10891
10892SDValue
10893TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
10894 EVT VecVT, SDValue Index,
10895 const SDNodeFlags PtrArithFlags) const {
10896 return getVectorSubVecPointer(
10897 DAG, VecPtr, VecVT,
10898 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
10899 Index, PtrArithFlags);
10900}
10901
10902SDValue
10903TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr,
10904 EVT VecVT, EVT SubVecVT, SDValue Index,
10905 const SDNodeFlags PtrArithFlags) const {
10906 SDLoc dl(Index);
10907 // Make sure the index type is big enough to compute in.
10908 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
10909
10910 EVT EltVT = VecVT.getVectorElementType();
10911
10912 // Calculate the element offset and add it to the pointer.
10913 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10914 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10915 "Converting bits to bytes lost precision");
10916 assert(SubVecVT.getVectorElementType() == EltVT &&
10917 "Sub-vector must be a vector with matching element type");
10918 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
10919 SubEC: SubVecVT.getVectorElementCount());
10920
10921 EVT IdxVT = Index.getValueType();
10922 if (SubVecVT.isScalableVector())
10923 Index =
10924 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10925 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
10926
10927 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
10928 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
10929 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl, Flags: PtrArithFlags);
10930}
10931
10932//===----------------------------------------------------------------------===//
10933// Implementation of Emulated TLS Model
10934//===----------------------------------------------------------------------===//
10935
10936SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
10937 SelectionDAG &DAG) const {
10938 // Access to address of TLS varialbe xyz is lowered to a function call:
10939 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10940 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
10941 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
10942 SDLoc dl(GA);
10943
10944 ArgListTy Args;
10945 const GlobalValue *GV =
10946 cast<GlobalValue>(Val: GA->getGlobal()->stripPointerCastsAndAliases());
10947 SmallString<32> NameString("__emutls_v.");
10948 NameString += GV->getName();
10949 StringRef EmuTlsVarName(NameString);
10950 const GlobalVariable *EmuTlsVar =
10951 GV->getParent()->getNamedGlobal(Name: EmuTlsVarName);
10952 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10953 Args.emplace_back(args: DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT), args&: VoidPtrType);
10954
10955 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
10956
10957 TargetLowering::CallLoweringInfo CLI(DAG);
10958 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10959 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
10960 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10961
10962 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10963 // At last for X86 targets, maybe good for other targets too?
10964 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10965 MFI.setAdjustsStack(true); // Is this only for X86 target?
10966 MFI.setHasCalls(true);
10967
10968 assert((GA->getOffset() == 0) &&
10969 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10970 return CallResult.first;
10971}
10972
10973SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
10974 SelectionDAG &DAG) const {
10975 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10976 if (!isCtlzFast())
10977 return SDValue();
10978 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
10979 SDLoc dl(Op);
10980 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
10981 EVT VT = Op.getOperand(i: 0).getValueType();
10982 SDValue Zext = Op.getOperand(i: 0);
10983 if (VT.bitsLT(VT: MVT::i32)) {
10984 VT = MVT::i32;
10985 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
10986 }
10987 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
10988 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
10989 SDValue Scc = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Clz,
10990 N2: DAG.getConstant(Val: Log2b, DL: dl, VT: MVT::i32));
10991 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Scc);
10992 }
10993 return SDValue();
10994}
10995
10996SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
10997 SDValue Op0 = Node->getOperand(Num: 0);
10998 SDValue Op1 = Node->getOperand(Num: 1);
10999 EVT VT = Op0.getValueType();
11000 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11001 unsigned Opcode = Node->getOpcode();
11002 SDLoc DL(Node);
11003
11004 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11005 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(MinMaxOpc: Opcode);
11006 if (isOperationLegal(Op: AltOpcode, VT) && DAG.SignBitIsZero(Op: Op0) &&
11007 DAG.SignBitIsZero(Op: Op1))
11008 return DAG.getNode(Opcode: AltOpcode, DL, VT, N1: Op0, N2: Op1);
11009
11010 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11011 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
11012 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11013 Op0 = DAG.getFreeze(V: Op0);
11014 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
11015 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11016 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
11017 }
11018
11019 // umin(x,y) -> sub(x,usubsat(x,y))
11020 // TODO: Missing freeze(Op0)?
11021 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
11022 isOperationLegal(Op: ISD::USUBSAT, VT)) {
11023 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
11024 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
11025 }
11026
11027 // umax(x,y) -> add(x,usubsat(y,x))
11028 // TODO: Missing freeze(Op0)?
11029 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
11030 isOperationLegal(Op: ISD::USUBSAT, VT)) {
11031 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
11032 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
11033 }
11034
11035 // FIXME: Should really try to split the vector in case it's legal on a
11036 // subvector.
11037 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11038 return DAG.UnrollVectorOp(N: Node);
11039
11040 // Attempt to find an existing SETCC node that we can reuse.
11041 // TODO: Do we need a generic doesSETCCNodeExist?
11042 // TODO: Missing freeze(Op0)/freeze(Op1)?
11043 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11044 ISD::CondCode PrefCommuteCC,
11045 ISD::CondCode AltCommuteCC) {
11046 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
11047 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11048 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11049 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11050 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11051 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11052 }
11053 }
11054 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11055 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
11056 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
11057 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
11058 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
11059 }
11060 }
11061 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
11062 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
11063 };
11064
11065 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11066 // -> Y = (A < B) ? B : A
11067 // -> Y = (A >= B) ? A : B
11068 // -> Y = (A <= B) ? B : A
11069 switch (Opcode) {
11070 case ISD::SMAX:
11071 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11072 case ISD::SMIN:
11073 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11074 case ISD::UMAX:
11075 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11076 case ISD::UMIN:
11077 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11078 }
11079
11080 llvm_unreachable("How did we get here?");
11081}
11082
11083SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
11084 unsigned Opcode = Node->getOpcode();
11085 SDValue LHS = Node->getOperand(Num: 0);
11086 SDValue RHS = Node->getOperand(Num: 1);
11087 EVT VT = LHS.getValueType();
11088 SDLoc dl(Node);
11089
11090 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11091 assert(VT.isInteger() && "Expected operands to be integers");
11092
11093 // usub.sat(a, b) -> umax(a, b) - b
11094 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
11095 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
11096 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
11097 }
11098
11099 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11100 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(V: RHS)) {
11101 LHS = DAG.getFreeze(V: LHS);
11102 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11103 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11104 SDValue IsNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETNE);
11105 SDValue Subtrahend = DAG.getBoolExtOrTrunc(Op: IsNonZero, SL: dl, VT, OpVT: BoolVT);
11106 Subtrahend =
11107 DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Subtrahend, N2: DAG.getConstant(Val: 1, DL: dl, VT));
11108 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: Subtrahend);
11109 }
11110
11111 // uadd.sat(a, b) -> umin(a, ~b) + b
11112 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
11113 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
11114 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
11115 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
11116 }
11117
11118 unsigned OverflowOp;
11119 switch (Opcode) {
11120 case ISD::SADDSAT:
11121 OverflowOp = ISD::SADDO;
11122 break;
11123 case ISD::UADDSAT:
11124 OverflowOp = ISD::UADDO;
11125 break;
11126 case ISD::SSUBSAT:
11127 OverflowOp = ISD::SSUBO;
11128 break;
11129 case ISD::USUBSAT:
11130 OverflowOp = ISD::USUBO;
11131 break;
11132 default:
11133 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11134 "addition or subtraction node.");
11135 }
11136
11137 // FIXME: Should really try to split the vector in case it's legal on a
11138 // subvector.
11139 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11140 return DAG.UnrollVectorOp(N: Node);
11141
11142 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11143 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11144 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11145 SDValue SumDiff = Result.getValue(R: 0);
11146 SDValue Overflow = Result.getValue(R: 1);
11147 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11148 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
11149
11150 if (Opcode == ISD::UADDSAT) {
11151 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11152 // (LHS + RHS) | OverflowMask
11153 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11154 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
11155 }
11156 // Overflow ? 0xffff.... : (LHS + RHS)
11157 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
11158 }
11159
11160 if (Opcode == ISD::USUBSAT) {
11161 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
11162 // (LHS - RHS) & ~OverflowMask
11163 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
11164 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
11165 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
11166 }
11167 // Overflow ? 0 : (LHS - RHS)
11168 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
11169 }
11170
11171 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
11172 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11173 APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
11174
11175 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
11176 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
11177
11178 // If either of the operand signs are known, then they are guaranteed to
11179 // only saturate in one direction. If non-negative they will saturate
11180 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11181 //
11182 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11183 // sign of 'y' has to be flipped.
11184
11185 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11186 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11187 : KnownRHS.isNegative();
11188 if (LHSIsNonNegative || RHSIsNonNegative) {
11189 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11190 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
11191 }
11192
11193 bool LHSIsNegative = KnownLHS.isNegative();
11194 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11195 : KnownRHS.isNonNegative();
11196 if (LHSIsNegative || RHSIsNegative) {
11197 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11198 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
11199 }
11200 }
11201
11202 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11203 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
11204 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11205 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
11206 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
11207 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
11208 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
11209}
11210
11211SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
11212 unsigned Opcode = Node->getOpcode();
11213 SDValue LHS = Node->getOperand(Num: 0);
11214 SDValue RHS = Node->getOperand(Num: 1);
11215 EVT VT = LHS.getValueType();
11216 EVT ResVT = Node->getValueType(ResNo: 0);
11217 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11218 SDLoc dl(Node);
11219
11220 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11221 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11222 SDValue IsLT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: LTPredicate);
11223 SDValue IsGT = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS, Cond: GTPredicate);
11224
11225 // We can't perform arithmetic on i1 values. Extending them would
11226 // probably result in worse codegen, so let's just use two selects instead.
11227 // Some targets are also just better off using selects rather than subtraction
11228 // because one of the conditions can be merged with one of the selects.
11229 // And finally, if we don't know the contents of high bits of a boolean value
11230 // we can't perform any arithmetic either.
11231 if (preferSelectsOverBooleanArithmetic(VT) ||
11232 BoolVT.getScalarSizeInBits() == 1 ||
11233 getBooleanContents(Type: BoolVT) == UndefinedBooleanContent) {
11234 SDValue SelectZeroOrOne =
11235 DAG.getSelect(DL: dl, VT: ResVT, Cond: IsGT, LHS: DAG.getConstant(Val: 1, DL: dl, VT: ResVT),
11236 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ResVT));
11237 return DAG.getSelect(DL: dl, VT: ResVT, Cond: IsLT, LHS: DAG.getAllOnesConstant(DL: dl, VT: ResVT),
11238 RHS: SelectZeroOrOne);
11239 }
11240
11241 if (getBooleanContents(Type: BoolVT) == ZeroOrNegativeOneBooleanContent)
11242 std::swap(a&: IsGT, b&: IsLT);
11243 return DAG.getSExtOrTrunc(Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: BoolVT, N1: IsGT, N2: IsLT), DL: dl,
11244 VT: ResVT);
11245}
11246
11247SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
11248 unsigned Opcode = Node->getOpcode();
11249 bool IsSigned = Opcode == ISD::SSHLSAT;
11250 SDValue LHS = Node->getOperand(Num: 0);
11251 SDValue RHS = Node->getOperand(Num: 1);
11252 EVT VT = LHS.getValueType();
11253 SDLoc dl(Node);
11254
11255 assert((Node->getOpcode() == ISD::SSHLSAT ||
11256 Node->getOpcode() == ISD::USHLSAT) &&
11257 "Expected a SHLSAT opcode");
11258 assert(VT.isInteger() && "Expected operands to be integers");
11259
11260 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
11261 return DAG.UnrollVectorOp(N: Node);
11262
11263 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11264
11265 unsigned BW = VT.getScalarSizeInBits();
11266 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11267 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
11268 SDValue Orig =
11269 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
11270
11271 SDValue SatVal;
11272 if (IsSigned) {
11273 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
11274 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
11275 SDValue Cond =
11276 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
11277 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
11278 } else {
11279 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
11280 }
11281 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
11282 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
11283}
11284
11285void TargetLowering::forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl,
11286 bool Signed, SDValue &Lo, SDValue &Hi,
11287 SDValue LHS, SDValue RHS,
11288 SDValue HiLHS, SDValue HiRHS) const {
11289 EVT VT = LHS.getValueType();
11290 assert(RHS.getValueType() == VT && "Mismatching operand types");
11291
11292 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11293 assert((!Signed || !HiLHS) &&
11294 "Signed flag should only be set when HiLHS and RiRHS are null");
11295
11296 // We'll expand the multiplication by brute force because we have no other
11297 // options. This is a trivially-generalized version of the code from
11298 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11299 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11300 // sign bits while calculating the Hi half.
11301 unsigned Bits = VT.getSizeInBits();
11302 unsigned HalfBits = Bits / 2;
11303 SDValue Mask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
11304 SDValue LL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LHS, N2: Mask);
11305 SDValue RL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RHS, N2: Mask);
11306
11307 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RL);
11308 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
11309
11310 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
11311 // This is always an unsigned shift.
11312 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
11313
11314 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11315 SDValue LH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: LHS, N2: Shift);
11316 SDValue RH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: RHS, N2: Shift);
11317
11318 SDValue U =
11319 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RL), N2: TH);
11320 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
11321 SDValue UH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: U, N2: Shift);
11322
11323 SDValue V =
11324 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LL, N2: RH), N2: UL);
11325 SDValue VH = DAG.getNode(Opcode: ShiftOpc, DL: dl, VT, N1: V, N2: Shift);
11326
11327 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
11328 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
11329
11330 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LH, N2: RH),
11331 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
11332
11333 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11334 // the products to Hi.
11335 if (HiLHS) {
11336 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Hi,
11337 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
11338 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: HiRHS, N2: LHS),
11339 N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RHS, N2: HiLHS)));
11340 }
11341}
11342
11343void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
11344 bool Signed, const SDValue LHS,
11345 const SDValue RHS, SDValue &Lo,
11346 SDValue &Hi) const {
11347 EVT VT = LHS.getValueType();
11348 assert(RHS.getValueType() == VT && "Mismatching operand types");
11349 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits() * 2);
11350 // We can fall back to a libcall with an illegal type for the MUL if we
11351 // have a libcall big enough.
11352 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11353 if (WideVT == MVT::i16)
11354 LC = RTLIB::MUL_I16;
11355 else if (WideVT == MVT::i32)
11356 LC = RTLIB::MUL_I32;
11357 else if (WideVT == MVT::i64)
11358 LC = RTLIB::MUL_I64;
11359 else if (WideVT == MVT::i128)
11360 LC = RTLIB::MUL_I128;
11361
11362 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
11363 if (LibcallImpl == RTLIB::Unsupported) {
11364 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11365 return;
11366 }
11367
11368 SDValue HiLHS, HiRHS;
11369 if (Signed) {
11370 // The high part is obtained by SRA'ing all but one of the bits of low
11371 // part.
11372 unsigned LoSize = VT.getFixedSizeInBits();
11373 SDValue Shift = DAG.getShiftAmountConstant(Val: LoSize - 1, VT, DL: dl);
11374 HiLHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: LHS, N2: Shift);
11375 HiRHS = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: RHS, N2: Shift);
11376 } else {
11377 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
11378 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
11379 }
11380
11381 // Attempt a libcall.
11382 SDValue Ret;
11383 TargetLowering::MakeLibCallOptions CallOptions;
11384 CallOptions.setIsSigned(Signed);
11385 CallOptions.setIsPostTypeLegalization(true);
11386 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
11387 // Halves of WideVT are packed into registers in different order
11388 // depending on platform endianness. This is usually handled by
11389 // the C calling convention, but we can't defer to it in
11390 // the legalizer.
11391 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11392 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11393 } else {
11394 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11395 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
11396 }
11397 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
11398 "Ret value is a collection of constituent nodes holding result.");
11399 if (DAG.getDataLayout().isLittleEndian()) {
11400 // Same as above.
11401 Lo = Ret.getOperand(i: 0);
11402 Hi = Ret.getOperand(i: 1);
11403 } else {
11404 Lo = Ret.getOperand(i: 1);
11405 Hi = Ret.getOperand(i: 0);
11406 }
11407}
11408
11409SDValue
11410TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
11411 assert((Node->getOpcode() == ISD::SMULFIX ||
11412 Node->getOpcode() == ISD::UMULFIX ||
11413 Node->getOpcode() == ISD::SMULFIXSAT ||
11414 Node->getOpcode() == ISD::UMULFIXSAT) &&
11415 "Expected a fixed point multiplication opcode");
11416
11417 SDLoc dl(Node);
11418 SDValue LHS = Node->getOperand(Num: 0);
11419 SDValue RHS = Node->getOperand(Num: 1);
11420 EVT VT = LHS.getValueType();
11421 unsigned Scale = Node->getConstantOperandVal(Num: 2);
11422 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11423 Node->getOpcode() == ISD::UMULFIXSAT);
11424 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11425 Node->getOpcode() == ISD::SMULFIXSAT);
11426 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11427 unsigned VTSize = VT.getScalarSizeInBits();
11428
11429 if (!Scale) {
11430 // [us]mul.fix(a, b, 0) -> mul(a, b)
11431 if (!Saturating) {
11432 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
11433 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11434 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
11435 SDValue Result =
11436 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11437 SDValue Product = Result.getValue(R: 0);
11438 SDValue Overflow = Result.getValue(R: 1);
11439 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11440
11441 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
11442 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
11443 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
11444 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11445 // Xor the inputs, if resulting sign bit is 0 the product will be
11446 // positive, else negative.
11447 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
11448 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
11449 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
11450 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
11451 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
11452 SDValue Result =
11453 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
11454 SDValue Product = Result.getValue(R: 0);
11455 SDValue Overflow = Result.getValue(R: 1);
11456
11457 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11458 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
11459 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
11460 }
11461 }
11462
11463 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11464 "Expected scale to be less than the number of bits if signed or at "
11465 "most the number of bits if unsigned.");
11466 assert(LHS.getValueType() == RHS.getValueType() &&
11467 "Expected both operands to be the same type");
11468
11469 // Get the upper and lower bits of the result.
11470 SDValue Lo, Hi;
11471 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11472 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11473 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VTSize * 2);
11474 if (VT.isVector())
11475 WideVT =
11476 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11477 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
11478 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
11479 Lo = Result.getValue(R: 0);
11480 Hi = Result.getValue(R: 1);
11481 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
11482 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11483 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
11484 } else if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
11485 // Try for a multiplication using a wider type.
11486 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11487 SDValue LHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: LHS);
11488 SDValue RHSExt = DAG.getNode(Opcode: Ext, DL: dl, VT: WideVT, Operand: RHS);
11489 SDValue Res = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHSExt, N2: RHSExt);
11490 Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Res);
11491 SDValue Shifted =
11492 DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: WideVT, N1: Res,
11493 N2: DAG.getShiftAmountConstant(Val: VTSize, VT: WideVT, DL: dl));
11494 Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Shifted);
11495 } else if (VT.isVector()) {
11496 return SDValue();
11497 } else {
11498 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11499 }
11500
11501 if (Scale == VTSize)
11502 // Result is just the top half since we'd be shifting by the width of the
11503 // operand. Overflow impossible so this works for both UMULFIX and
11504 // UMULFIXSAT.
11505 return Hi;
11506
11507 // The result will need to be shifted right by the scale since both operands
11508 // are scaled. The result is given to us in 2 halves, so we only want part of
11509 // both in the result.
11510 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
11511 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
11512 if (!Saturating)
11513 return Result;
11514
11515 if (!Signed) {
11516 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11517 // widened multiplication) aren't all zeroes.
11518
11519 // Saturate to max if ((Hi >> Scale) != 0),
11520 // which is the same as if (Hi > ((1 << Scale) - 1))
11521 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
11522 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
11523 DL: dl, VT);
11524 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
11525 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
11526 Cond: ISD::SETUGT);
11527
11528 return Result;
11529 }
11530
11531 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11532 // widened multiplication) aren't all ones or all zeroes.
11533
11534 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
11535 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
11536
11537 if (Scale == 0) {
11538 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
11539 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
11540 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
11541 // Saturated to SatMin if wide product is negative, and SatMax if wide
11542 // product is positive ...
11543 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11544 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
11545 Cond: ISD::SETLT);
11546 // ... but only if we overflowed.
11547 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
11548 }
11549
11550 // We handled Scale==0 above so all the bits to examine is in Hi.
11551
11552 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11553 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11554 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
11555 DL: dl, VT);
11556 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
11557 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11558 // which is the same as if (HI < (-1 << (Scale - 1))
11559 SDValue HighMask =
11560 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
11561 DL: dl, VT);
11562 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
11563 return Result;
11564}
11565
11566SDValue
11567TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
11568 SDValue LHS, SDValue RHS,
11569 unsigned Scale, SelectionDAG &DAG) const {
11570 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11571 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11572 "Expected a fixed point division opcode");
11573
11574 EVT VT = LHS.getValueType();
11575 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11576 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11577 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11578
11579 // If there is enough room in the type to upscale the LHS or downscale the
11580 // RHS before the division, we can perform it in this type without having to
11581 // resize. For signed operations, the LHS headroom is the number of
11582 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11583 // The headroom for the RHS is the number of trailing zeroes.
11584 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
11585 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
11586 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
11587
11588 // For signed saturating operations, we need to be able to detect true integer
11589 // division overflow; that is, when you have MIN / -EPS. However, this
11590 // is undefined behavior and if we emit divisions that could take such
11591 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11592 // example).
11593 // Avoid this by requiring an extra bit so that we never get this case.
11594 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11595 // signed saturating division, we need to emit a whopping 32-bit division.
11596 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11597 return SDValue();
11598
11599 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
11600 unsigned RHSShift = Scale - LHSShift;
11601
11602 // At this point, we know that if we shift the LHS up by LHSShift and the
11603 // RHS down by RHSShift, we can emit a regular division with a final scaling
11604 // factor of Scale.
11605
11606 if (LHSShift)
11607 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
11608 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
11609 if (RHSShift)
11610 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
11611 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
11612
11613 SDValue Quot;
11614 if (Signed) {
11615 // For signed operations, if the resulting quotient is negative and the
11616 // remainder is nonzero, subtract 1 from the quotient to round towards
11617 // negative infinity.
11618 SDValue Rem;
11619 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11620 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11621 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11622 if (isTypeLegal(VT) &&
11623 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
11624 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
11625 VTList: DAG.getVTList(VT1: VT, VT2: VT),
11626 N1: LHS, N2: RHS);
11627 Rem = Quot.getValue(R: 1);
11628 Quot = Quot.getValue(R: 0);
11629 } else {
11630 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
11631 N1: LHS, N2: RHS);
11632 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
11633 N1: LHS, N2: RHS);
11634 }
11635 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
11636 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
11637 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
11638 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
11639 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
11640 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
11641 N2: DAG.getConstant(Val: 1, DL: dl, VT));
11642 Quot = DAG.getSelect(DL: dl, VT,
11643 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
11644 LHS: Sub1, RHS: Quot);
11645 } else
11646 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
11647 N1: LHS, N2: RHS);
11648
11649 return Quot;
11650}
11651
11652void TargetLowering::expandUADDSUBO(
11653 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11654 SDLoc dl(Node);
11655 SDValue LHS = Node->getOperand(Num: 0);
11656 SDValue RHS = Node->getOperand(Num: 1);
11657 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11658
11659 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11660 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11661 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
11662 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
11663 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
11664 Ops: { LHS, RHS, CarryIn });
11665 Result = SDValue(NodeCarry.getNode(), 0);
11666 Overflow = SDValue(NodeCarry.getNode(), 1);
11667 return;
11668 }
11669
11670 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11671 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11672
11673 EVT ResultType = Node->getValueType(ResNo: 1);
11674 EVT SetCCType = getSetCCResultType(
11675 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11676 SDValue SetCC;
11677 if (IsAdd && isOneConstant(V: RHS)) {
11678 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11679 // the live range of X. We assume comparing with 0 is cheap.
11680 // The general case (X + C) < C is not necessarily beneficial. Although we
11681 // reduce the live range of X, we may introduce the materialization of
11682 // constant C.
11683 SetCC =
11684 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
11685 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
11686 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
11687 // Special case: uaddo X, -1 overflows if X != 0.
11688 SetCC =
11689 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
11690 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
11691 } else {
11692 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11693 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
11694 }
11695 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11696}
11697
11698void TargetLowering::expandSADDSUBO(
11699 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11700 SDLoc dl(Node);
11701 SDValue LHS = Node->getOperand(Num: 0);
11702 SDValue RHS = Node->getOperand(Num: 1);
11703 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11704
11705 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
11706 VT: LHS.getValueType(), N1: LHS, N2: RHS);
11707
11708 EVT ResultType = Node->getValueType(ResNo: 1);
11709 EVT OType = getSetCCResultType(
11710 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
11711
11712 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11713 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11714 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
11715 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
11716 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
11717 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
11718 return;
11719 }
11720
11721 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
11722
11723 // For an addition, the result should be less than one of the operands (LHS)
11724 // if and only if the other operand (RHS) is negative, otherwise there will
11725 // be overflow.
11726 // For a subtraction, the result should be less than one of the operands
11727 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11728 // otherwise there will be overflow.
11729 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
11730 SDValue ConditionRHS =
11731 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
11732
11733 Overflow = DAG.getBoolExtOrTrunc(
11734 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
11735 VT: ResultType, OpVT: ResultType);
11736}
11737
11738bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
11739 SDValue &Overflow, SelectionDAG &DAG) const {
11740 SDLoc dl(Node);
11741 EVT VT = Node->getValueType(ResNo: 0);
11742 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
11743 SDValue LHS = Node->getOperand(Num: 0);
11744 SDValue RHS = Node->getOperand(Num: 1);
11745 bool isSigned = Node->getOpcode() == ISD::SMULO;
11746
11747 // For power-of-two multiplications we can use a simpler shift expansion.
11748 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
11749 const APInt &C = RHSC->getAPIntValue();
11750 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11751 if (C.isPowerOf2()) {
11752 // smulo(x, signed_min) is same as umulo(x, signed_min).
11753 bool UseArithShift = isSigned && !C.isMinSignedValue();
11754 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
11755 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
11756 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
11757 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
11758 DL: dl, VT, N1: Result, N2: ShiftAmt),
11759 RHS: LHS, Cond: ISD::SETNE);
11760 return true;
11761 }
11762 }
11763
11764 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits() * 2);
11765 if (VT.isVector())
11766 WideVT =
11767 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
11768
11769 SDValue BottomHalf;
11770 SDValue TopHalf;
11771 static const unsigned Ops[2][3] =
11772 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
11773 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
11774 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
11775 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
11776 TopHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VT, N1: LHS, N2: RHS);
11777 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
11778 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
11779 N2: RHS);
11780 TopHalf = BottomHalf.getValue(R: 1);
11781 } else if (isTypeLegal(VT: WideVT)) {
11782 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
11783 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
11784 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
11785 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
11786 SDValue ShiftAmt =
11787 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
11788 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
11789 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
11790 } else {
11791 if (VT.isVector())
11792 return false;
11793
11794 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
11795 }
11796
11797 Result = BottomHalf;
11798 if (isSigned) {
11799 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11800 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
11801 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
11802 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
11803 } else {
11804 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
11805 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
11806 }
11807
11808 // Truncate the result if SetCC returns a larger type than needed.
11809 EVT RType = Node->getValueType(ResNo: 1);
11810 if (RType.bitsLT(VT: Overflow.getValueType()))
11811 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
11812
11813 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11814 "Unexpected result type for S/UMULO legalization");
11815 return true;
11816}
11817
11818SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
11819 SDLoc dl(Node);
11820 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11821 SDValue Op = Node->getOperand(Num: 0);
11822 EVT VT = Op.getValueType();
11823
11824 // Try to use a shuffle reduction for power of two vectors.
11825 if (VT.isPow2VectorType()) {
11826 while (VT.getVectorElementCount().isKnownMultipleOf(RHS: 2)) {
11827 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
11828 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
11829 break;
11830
11831 SDValue Lo, Hi;
11832 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
11833 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi, Flags: Node->getFlags());
11834 VT = HalfVT;
11835
11836 // Stop if splitting is enough to make the reduction legal.
11837 if (isOperationLegalOrCustom(Op: Node->getOpcode(), VT: HalfVT))
11838 return DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Op,
11839 Flags: Node->getFlags());
11840 }
11841 }
11842
11843 if (VT.isScalableVector())
11844 reportFatalInternalError(
11845 reason: "Expanding reductions for scalable vectors is undefined.");
11846
11847 EVT EltVT = VT.getVectorElementType();
11848 unsigned NumElts = VT.getVectorNumElements();
11849
11850 SmallVector<SDValue, 8> Ops;
11851 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
11852
11853 SDValue Res = Ops[0];
11854 for (unsigned i = 1; i < NumElts; i++)
11855 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
11856
11857 // Result type may be wider than element type.
11858 if (EltVT != Node->getValueType(ResNo: 0))
11859 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
11860 return Res;
11861}
11862
11863SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
11864 SDLoc dl(Node);
11865 SDValue AccOp = Node->getOperand(Num: 0);
11866 SDValue VecOp = Node->getOperand(Num: 1);
11867 SDNodeFlags Flags = Node->getFlags();
11868
11869 EVT VT = VecOp.getValueType();
11870 EVT EltVT = VT.getVectorElementType();
11871
11872 if (VT.isScalableVector())
11873 report_fatal_error(
11874 reason: "Expanding reductions for scalable vectors is undefined.");
11875
11876 unsigned NumElts = VT.getVectorNumElements();
11877
11878 SmallVector<SDValue, 8> Ops;
11879 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
11880
11881 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
11882
11883 SDValue Res = AccOp;
11884 for (unsigned i = 0; i < NumElts; i++)
11885 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
11886
11887 return Res;
11888}
11889
11890bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
11891 SelectionDAG &DAG) const {
11892 EVT VT = Node->getValueType(ResNo: 0);
11893 SDLoc dl(Node);
11894 bool isSigned = Node->getOpcode() == ISD::SREM;
11895 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11896 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11897 SDValue Dividend = Node->getOperand(Num: 0);
11898 SDValue Divisor = Node->getOperand(Num: 1);
11899 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
11900 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
11901 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
11902 return true;
11903 }
11904 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
11905 // X % Y -> X-X/Y*Y
11906 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
11907 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
11908 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
11909 return true;
11910 }
11911 return false;
11912}
11913
11914SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
11915 SelectionDAG &DAG) const {
11916 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11917 SDLoc dl(SDValue(Node, 0));
11918 SDValue Src = Node->getOperand(Num: 0);
11919
11920 // DstVT is the result type, while SatVT is the size to which we saturate
11921 EVT SrcVT = Src.getValueType();
11922 EVT DstVT = Node->getValueType(ResNo: 0);
11923
11924 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
11925 unsigned SatWidth = SatVT.getScalarSizeInBits();
11926 unsigned DstWidth = DstVT.getScalarSizeInBits();
11927 assert(SatWidth <= DstWidth &&
11928 "Expected saturation width smaller than result width");
11929
11930 // Determine minimum and maximum integer values and their corresponding
11931 // floating-point values.
11932 APInt MinInt, MaxInt;
11933 if (IsSigned) {
11934 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
11935 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
11936 } else {
11937 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
11938 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
11939 }
11940
11941 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11942 // libcall emission cannot handle this. Large result types will fail.
11943 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11944 Src = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: MVT::f32, Operand: Src);
11945 SrcVT = Src.getValueType();
11946 }
11947
11948 const fltSemantics &Sem = SrcVT.getFltSemantics();
11949 APFloat MinFloat(Sem);
11950 APFloat MaxFloat(Sem);
11951
11952 APFloat::opStatus MinStatus =
11953 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
11954 APFloat::opStatus MaxStatus =
11955 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
11956 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11957 !(MaxStatus & APFloat::opStatus::opInexact);
11958
11959 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
11960 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
11961
11962 // If the integer bounds are exactly representable as floats and min/max are
11963 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11964 // of comparisons and selects.
11965 bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
11966 isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
11967 if (AreExactFloatBounds && MinMaxLegal) {
11968 SDValue Clamped = Src;
11969
11970 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11971 Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
11972 // Clamp by MaxFloat from above. NaN cannot occur.
11973 Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
11974 // Convert clamped value to integer.
11975 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11976 DL: dl, VT: DstVT, Operand: Clamped);
11977
11978 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11979 // which will cast to zero.
11980 if (!IsSigned)
11981 return FpToInt;
11982
11983 // Otherwise, select 0 if Src is NaN.
11984 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
11985 EVT SetCCVT =
11986 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
11987 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
11988 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
11989 }
11990
11991 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
11992 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
11993
11994 // Result of direct conversion. The assumption here is that the operation is
11995 // non-trapping and it's fine to apply it to an out-of-range value if we
11996 // select it away later.
11997 SDValue FpToInt =
11998 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
11999
12000 SDValue Select = FpToInt;
12001
12002 EVT SetCCVT =
12003 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
12004
12005 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12006 // MinInt if Src is NaN.
12007 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
12008 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
12009 // If Src OGT MaxFloat, select MaxInt.
12010 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
12011 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
12012
12013 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12014 // is already zero.
12015 if (!IsSigned)
12016 return Select;
12017
12018 // Otherwise, select 0 if Src is NaN.
12019 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
12020 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
12021 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
12022}
12023
12024SDValue TargetLowering::expandRoundInexactToOdd(EVT ResultVT, SDValue Op,
12025 const SDLoc &dl,
12026 SelectionDAG &DAG) const {
12027 EVT OperandVT = Op.getValueType();
12028 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12029 return Op;
12030 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12031 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12032 // can induce double-rounding which may alter the results. We can
12033 // correct for this using a trick explained in: Boldo, Sylvie, and
12034 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12035 // World Congress. 2005.
12036 SDValue Narrow = DAG.getFPExtendOrRound(Op, DL: dl, VT: ResultVT);
12037 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Op: Narrow, DL: dl, VT: OperandVT);
12038
12039 // We can keep the narrow value as-is if narrowing was exact (no
12040 // rounding error), the wide value was NaN (the narrow value is also
12041 // NaN and should be preserved) or if we rounded to the odd value.
12042 SDValue NarrowBits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultIntVT, Operand: Narrow);
12043 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResultIntVT);
12044 SDValue NegativeOne = DAG.getAllOnesConstant(DL: dl, VT: ResultIntVT);
12045 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: One);
12046 EVT ResultIntVTCCVT = getSetCCResultType(
12047 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: And.getValueType());
12048 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: ResultIntVT);
12049 // The result is already odd so we don't need to do anything.
12050 SDValue AlreadyOdd = DAG.getSetCC(DL: dl, VT: ResultIntVTCCVT, LHS: And, RHS: Zero, Cond: ISD::SETNE);
12051
12052 EVT WideSetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
12053 VT: Op.getValueType());
12054 // We keep results which are exact, odd or NaN.
12055 SDValue KeepNarrow =
12056 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: Op, RHS: NarrowAsWide, Cond: ISD::SETUEQ);
12057 KeepNarrow = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: WideSetCCVT, N1: KeepNarrow, N2: AlreadyOdd);
12058 // We morally performed a round-down if AbsNarrow is smaller than
12059 // AbsWide.
12060 SDValue AbsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: Op);
12061 SDValue AbsNarrowAsWide = DAG.getNode(Opcode: ISD::FABS, DL: dl, VT: OperandVT, Operand: NarrowAsWide);
12062 SDValue NarrowIsRd =
12063 DAG.getSetCC(DL: dl, VT: WideSetCCVT, LHS: AbsWide, RHS: AbsNarrowAsWide, Cond: ISD::SETOGT);
12064 // If the narrow value is odd or exact, pick it.
12065 // Otherwise, narrow is even and corresponds to either the rounded-up
12066 // or rounded-down value. If narrow is the rounded-down value, we want
12067 // the rounded-up value as it will be odd.
12068 SDValue Adjust = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: NarrowIsRd, LHS: One, RHS: NegativeOne);
12069 SDValue Adjusted = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResultIntVT, N1: NarrowBits, N2: Adjust);
12070 Op = DAG.getSelect(DL: dl, VT: ResultIntVT, Cond: KeepNarrow, LHS: NarrowBits, RHS: Adjusted);
12071 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ResultVT, Operand: Op);
12072}
12073
12074SDValue TargetLowering::expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const {
12075 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12076 SDValue Op = Node->getOperand(Num: 0);
12077 EVT VT = Node->getValueType(ResNo: 0);
12078 SDLoc dl(Node);
12079 if (VT.getScalarType() == MVT::bf16) {
12080 if (Node->getConstantOperandVal(Num: 1) == 1) {
12081 return DAG.getNode(Opcode: ISD::FP_TO_BF16, DL: dl, VT, Operand: Node->getOperand(Num: 0));
12082 }
12083 EVT OperandVT = Op.getValueType();
12084 SDValue IsNaN = DAG.getSetCC(
12085 DL: dl,
12086 VT: getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: OperandVT),
12087 LHS: Op, RHS: Op, Cond: ISD::SETUO);
12088
12089 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12090 // can induce double-rounding which may alter the results. We can
12091 // correct for this using a trick explained in: Boldo, Sylvie, and
12092 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12093 // World Congress. 2005.
12094 EVT F32 = VT.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::f32);
12095 EVT I32 = F32.changeTypeToInteger();
12096 Op = expandRoundInexactToOdd(ResultVT: F32, Op, dl, DAG);
12097 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12098
12099 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12100 // turning into infinities.
12101 SDValue NaN =
12102 DAG.getNode(Opcode: ISD::OR, DL: dl, VT: I32, N1: Op, N2: DAG.getConstant(Val: 0x400000, DL: dl, VT: I32));
12103
12104 // Factor in the contribution of the low 16 bits.
12105 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: I32);
12106 SDValue Lsb = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12107 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12108 Lsb = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: I32, N1: Lsb, N2: One);
12109 SDValue RoundingBias =
12110 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: DAG.getConstant(Val: 0x7fff, DL: dl, VT: I32), N2: Lsb);
12111 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: I32, N1: Op, N2: RoundingBias);
12112
12113 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12114 // 0x80000000.
12115 Op = DAG.getSelect(DL: dl, VT: I32, Cond: IsNaN, LHS: NaN, RHS: Add);
12116
12117 // Now that we have rounded, shift the bits into position.
12118 Op = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: I32, N1: Op,
12119 N2: DAG.getShiftAmountConstant(Val: 16, VT: I32, DL: dl));
12120 Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: I32, Operand: Op);
12121 EVT I16 = I32.changeElementType(Context&: *DAG.getContext(), EltVT: MVT::i16);
12122 Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: I16, Operand: Op);
12123 return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Op);
12124 }
12125 return SDValue();
12126}
12127
12128SDValue TargetLowering::expandVectorSplice(SDNode *Node,
12129 SelectionDAG &DAG) const {
12130 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12131 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12132 "Unexpected opcode!");
12133 assert((Node->getValueType(0).isScalableVector() ||
12134 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12135 "Fixed length vector types with constant offsets expected to use "
12136 "SHUFFLE_VECTOR!");
12137
12138 EVT VT = Node->getValueType(ResNo: 0);
12139 SDValue V1 = Node->getOperand(Num: 0);
12140 SDValue V2 = Node->getOperand(Num: 1);
12141 SDValue Offset = Node->getOperand(Num: 2);
12142 SDLoc DL(Node);
12143
12144 // Expand through memory thusly:
12145 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12146 // Store V1, Ptr
12147 // Store V2, Ptr + sizeof(V1)
12148 // if (VECTOR_SPLICE_LEFT)
12149 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12150 // else
12151 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12152 // Res = Load Ptr
12153
12154 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12155
12156 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
12157 EC: VT.getVectorElementCount() * 2);
12158 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
12159 EVT PtrVT = StackPtr.getValueType();
12160 auto &MF = DAG.getMachineFunction();
12161 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12162 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
12163
12164 // Store the lo part of CONCAT_VECTORS(V1, V2)
12165 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
12166 // Store the hi part of CONCAT_VECTORS(V1, V2)
12167 SDValue VTBytes = DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getStoreSize());
12168 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: VTBytes);
12169 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
12170
12171 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12172 SDValue EltByteSize =
12173 DAG.getTypeSize(DL, VT: PtrVT, TS: VT.getVectorElementType().getStoreSize());
12174 Offset = DAG.getZExtOrTrunc(Op: Offset, DL, VT: PtrVT);
12175 SDValue TrailingBytes = DAG.getNode(Opcode: ISD::MUL, DL, VT: PtrVT, N1: Offset, N2: EltByteSize);
12176
12177 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VTBytes);
12178
12179 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12180 StackPtr = DAG.getMemBasePlusOffset(Base: StackPtr, Offset: TrailingBytes, DL);
12181 else
12182 StackPtr = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
12183
12184 // Load the spliced result
12185 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
12186 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
12187}
12188
12189SDValue TargetLowering::expandVECTOR_COMPRESS(SDNode *Node,
12190 SelectionDAG &DAG) const {
12191 SDLoc DL(Node);
12192 SDValue Vec = Node->getOperand(Num: 0);
12193 SDValue Mask = Node->getOperand(Num: 1);
12194 SDValue Passthru = Node->getOperand(Num: 2);
12195
12196 EVT VecVT = Vec.getValueType();
12197 EVT ScalarVT = VecVT.getScalarType();
12198 EVT MaskVT = Mask.getValueType();
12199 EVT MaskScalarVT = MaskVT.getScalarType();
12200
12201 // Needs to be handled by targets that have scalable vector types.
12202 if (VecVT.isScalableVector())
12203 report_fatal_error(reason: "Cannot expand masked_compress for scalable vectors.");
12204
12205 SDValue StackPtr = DAG.CreateStackTemporary(
12206 Bytes: VecVT.getStoreSize(), Alignment: DAG.getReducedAlign(VT: VecVT, /*UseABI=*/false));
12207 int FI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
12208 MachinePointerInfo PtrInfo =
12209 MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
12210
12211 MVT PositionVT = getVectorIdxTy(DL: DAG.getDataLayout());
12212 SDValue Chain = DAG.getEntryNode();
12213 SDValue OutPos = DAG.getConstant(Val: 0, DL, VT: PositionVT);
12214
12215 bool HasPassthru = !Passthru.isUndef();
12216
12217 // If we have a passthru vector, store it on the stack, overwrite the matching
12218 // positions and then re-write the last element that was potentially
12219 // overwritten even though mask[i] = false.
12220 if (HasPassthru)
12221 Chain = DAG.getStore(Chain, dl: DL, Val: Passthru, Ptr: StackPtr, PtrInfo);
12222
12223 SDValue LastWriteVal;
12224 APInt PassthruSplatVal;
12225 bool IsSplatPassthru =
12226 ISD::isConstantSplatVector(N: Passthru.getNode(), SplatValue&: PassthruSplatVal);
12227
12228 if (IsSplatPassthru) {
12229 // As we do not know which position we wrote to last, we cannot simply
12230 // access that index from the passthru vector. So we first check if passthru
12231 // is a splat vector, to use any element ...
12232 LastWriteVal = DAG.getConstant(Val: PassthruSplatVal, DL, VT: ScalarVT);
12233 } else if (HasPassthru) {
12234 // ... if it is not a splat vector, we need to get the passthru value at
12235 // position = popcount(mask) and re-load it from the stack before it is
12236 // overwritten in the loop below.
12237 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12238 SDValue Popcount = DAG.getNode(
12239 Opcode: ISD::TRUNCATE, DL,
12240 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: MVT::i1), Operand: Mask);
12241 Popcount = DAG.getNode(
12242 Opcode: ISD::ZERO_EXTEND, DL,
12243 VT: MaskVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: PopcountVT),
12244 Operand: Popcount);
12245 Popcount = DAG.getNode(Opcode: ISD::VECREDUCE_ADD, DL, VT: PopcountVT, Operand: Popcount);
12246 SDValue LastElmtPtr =
12247 getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: Popcount);
12248 LastWriteVal = DAG.getLoad(
12249 VT: ScalarVT, dl: DL, Chain, Ptr: LastElmtPtr,
12250 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12251 Chain = LastWriteVal.getValue(R: 1);
12252 }
12253
12254 unsigned NumElms = VecVT.getVectorNumElements();
12255 for (unsigned I = 0; I < NumElms; I++) {
12256 SDValue ValI = DAG.getExtractVectorElt(DL, VT: ScalarVT, Vec, Idx: I);
12257 SDValue OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12258 Chain = DAG.getStore(
12259 Chain, dl: DL, Val: ValI, Ptr: OutPtr,
12260 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12261
12262 // Get the mask value and add it to the current output position. This
12263 // either increments by 1 if MaskI is true or adds 0 otherwise.
12264 // Freeze in case we have poison/undef mask entries.
12265 SDValue MaskI = DAG.getExtractVectorElt(DL, VT: MaskScalarVT, Vec: Mask, Idx: I);
12266 MaskI = DAG.getFreeze(V: MaskI);
12267 MaskI = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i1, Operand: MaskI);
12268 MaskI = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PositionVT, Operand: MaskI);
12269 OutPos = DAG.getNode(Opcode: ISD::ADD, DL, VT: PositionVT, N1: OutPos, N2: MaskI);
12270
12271 if (HasPassthru && I == NumElms - 1) {
12272 SDValue EndOfVector =
12273 DAG.getConstant(Val: VecVT.getVectorNumElements() - 1, DL, VT: PositionVT);
12274 SDValue AllLanesSelected =
12275 DAG.getSetCC(DL, VT: MVT::i1, LHS: OutPos, RHS: EndOfVector, Cond: ISD::CondCode::SETUGT);
12276 OutPos = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PositionVT, N1: OutPos, N2: EndOfVector);
12277 OutPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT, Index: OutPos);
12278
12279 // Re-write the last ValI if all lanes were selected. Otherwise,
12280 // overwrite the last write it with the passthru value.
12281 LastWriteVal = DAG.getSelect(DL, VT: ScalarVT, Cond: AllLanesSelected, LHS: ValI,
12282 RHS: LastWriteVal, Flags: SDNodeFlags::Unpredictable);
12283 Chain = DAG.getStore(
12284 Chain, dl: DL, Val: LastWriteVal, Ptr: OutPtr,
12285 PtrInfo: MachinePointerInfo::getUnknownStack(MF&: DAG.getMachineFunction()));
12286 }
12287 }
12288
12289 return DAG.getLoad(VT: VecVT, dl: DL, Chain, Ptr: StackPtr, PtrInfo);
12290}
12291
12292SDValue TargetLowering::expandPartialReduceMLA(SDNode *N,
12293 SelectionDAG &DAG) const {
12294 SDLoc DL(N);
12295 SDValue Acc = N->getOperand(Num: 0);
12296 SDValue MulLHS = N->getOperand(Num: 1);
12297 SDValue MulRHS = N->getOperand(Num: 2);
12298 EVT AccVT = Acc.getValueType();
12299 EVT MulOpVT = MulLHS.getValueType();
12300
12301 EVT ExtMulOpVT =
12302 EVT::getVectorVT(Context&: *DAG.getContext(), VT: AccVT.getVectorElementType(),
12303 EC: MulOpVT.getVectorElementCount());
12304
12305 unsigned ExtOpcLHS, ExtOpcRHS;
12306 switch (N->getOpcode()) {
12307 default:
12308 llvm_unreachable("Unexpected opcode");
12309 case ISD::PARTIAL_REDUCE_UMLA:
12310 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12311 break;
12312 case ISD::PARTIAL_REDUCE_SMLA:
12313 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12314 break;
12315 case ISD::PARTIAL_REDUCE_FMLA:
12316 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12317 break;
12318 }
12319
12320 if (ExtMulOpVT != MulOpVT) {
12321 MulLHS = DAG.getNode(Opcode: ExtOpcLHS, DL, VT: ExtMulOpVT, Operand: MulLHS);
12322 MulRHS = DAG.getNode(Opcode: ExtOpcRHS, DL, VT: ExtMulOpVT, Operand: MulRHS);
12323 }
12324 SDValue Input = MulLHS;
12325 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12326 if (!llvm::isOneOrOneSplatFP(V: MulRHS))
12327 Input = DAG.getNode(Opcode: ISD::FMUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12328 } else if (!llvm::isOneOrOneSplat(V: MulRHS)) {
12329 Input = DAG.getNode(Opcode: ISD::MUL, DL, VT: ExtMulOpVT, N1: MulLHS, N2: MulRHS);
12330 }
12331
12332 unsigned Stride = AccVT.getVectorMinNumElements();
12333 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12334
12335 // Collect all of the subvectors
12336 std::deque<SDValue> Subvectors = {Acc};
12337 for (unsigned I = 0; I < ScaleFactor; I++)
12338 Subvectors.push_back(x: DAG.getExtractSubvector(DL, VT: AccVT, Vec: Input, Idx: I * Stride));
12339
12340 unsigned FlatNode =
12341 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12342
12343 // Flatten the subvector tree
12344 while (Subvectors.size() > 1) {
12345 Subvectors.push_back(
12346 x: DAG.getNode(Opcode: FlatNode, DL, VT: AccVT, Ops: {Subvectors[0], Subvectors[1]}));
12347 Subvectors.pop_front();
12348 Subvectors.pop_front();
12349 }
12350
12351 assert(Subvectors.size() == 1 &&
12352 "There should only be one subvector after tree flattening");
12353
12354 return Subvectors[0];
12355}
12356
12357/// Given a store node \p StoreNode, return true if it is safe to fold that node
12358/// into \p FPNode, which expands to a library call with output pointers.
12359static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode,
12360 SDNode *FPNode) {
12361 SmallVector<const SDNode *, 8> Worklist;
12362 SmallVector<const SDNode *, 8> DeferredNodes;
12363 SmallPtrSet<const SDNode *, 16> Visited;
12364
12365 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12366 for (SDValue Op : StoreNode->ops())
12367 if (Op.getNode() != FPNode)
12368 Worklist.push_back(Elt: Op.getNode());
12369
12370 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
12371 while (!Worklist.empty()) {
12372 const SDNode *Node = Worklist.pop_back_val();
12373 auto [_, Inserted] = Visited.insert(Ptr: Node);
12374 if (!Inserted)
12375 continue;
12376
12377 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12378 return false;
12379
12380 // Reached the FPNode (would result in a cycle).
12381 // OR Reached CALLSEQ_START (would result in nested call sequences).
12382 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12383 return false;
12384
12385 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12386 // Defer looking into call sequences (so we can check we're outside one).
12387 // We still need to look through these for the predecessor check.
12388 DeferredNodes.push_back(Elt: Node);
12389 continue;
12390 }
12391
12392 for (SDValue Op : Node->ops())
12393 Worklist.push_back(Elt: Op.getNode());
12394 }
12395
12396 // True if we're outside a call sequence and don't have the FPNode as a
12397 // predecessor. No cycles or nested call sequences possible.
12398 return !SDNode::hasPredecessorHelper(N: FPNode, Visited, Worklist&: DeferredNodes,
12399 MaxSteps);
12400}
12401
12402bool TargetLowering::expandMultipleResultFPLibCall(
12403 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12404 SmallVectorImpl<SDValue> &Results,
12405 std::optional<unsigned> CallRetResNo) const {
12406 if (LC == RTLIB::UNKNOWN_LIBCALL)
12407 return false;
12408
12409 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(Call: LC);
12410 if (LibcallImpl == RTLIB::Unsupported)
12411 return false;
12412
12413 LLVMContext &Ctx = *DAG.getContext();
12414 EVT VT = Node->getValueType(ResNo: 0);
12415 unsigned NumResults = Node->getNumValues();
12416
12417 // Find users of the node that store the results (and share input chains). The
12418 // destination pointers can be used instead of creating stack allocations.
12419 SDValue StoresInChain;
12420 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12421 for (SDNode *User : Node->users()) {
12422 if (!ISD::isNormalStore(N: User))
12423 continue;
12424 auto *ST = cast<StoreSDNode>(Val: User);
12425 SDValue StoreValue = ST->getValue();
12426 unsigned ResNo = StoreValue.getResNo();
12427 // Ensure the store corresponds to an output pointer.
12428 if (CallRetResNo == ResNo)
12429 continue;
12430 // Ensure the store to the default address space and not atomic or volatile.
12431 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12432 continue;
12433 // Ensure all store chains are the same (so they don't alias).
12434 if (StoresInChain && ST->getChain() != StoresInChain)
12435 continue;
12436 // Ensure the store is properly aligned.
12437 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Context&: Ctx);
12438 if (ST->getAlign() <
12439 DAG.getDataLayout().getABITypeAlign(Ty: StoreType->getScalarType()))
12440 continue;
12441 // Avoid:
12442 // 1. Creating cyclic dependencies.
12443 // 2. Expanding the node to a call within a call sequence.
12444 if (!canFoldStoreIntoLibCallOutputPointers(StoreNode: ST, FPNode: Node))
12445 continue;
12446 ResultStores[ResNo] = ST;
12447 StoresInChain = ST->getChain();
12448 }
12449
12450 ArgListTy Args;
12451
12452 // Pass the arguments.
12453 for (const SDValue &Op : Node->op_values()) {
12454 EVT ArgVT = Op.getValueType();
12455 Type *ArgTy = ArgVT.getTypeForEVT(Context&: Ctx);
12456 Args.emplace_back(args: Op, args&: ArgTy);
12457 }
12458
12459 // Pass the output pointers.
12460 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12461 Type *PointerTy = PointerType::getUnqual(C&: Ctx);
12462 for (auto [ResNo, ST] : llvm::enumerate(First&: ResultStores)) {
12463 if (ResNo == CallRetResNo)
12464 continue;
12465 EVT ResVT = Node->getValueType(ResNo);
12466 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(VT: ResVT);
12467 ResultPtrs[ResNo] = ResultPtr;
12468 Args.emplace_back(args&: ResultPtr, args&: PointerTy);
12469 }
12470
12471 SDLoc DL(Node);
12472
12473 if (RTLIB::RuntimeLibcallsInfo::hasVectorMaskArgument(Impl: LibcallImpl)) {
12474 // Pass the vector mask (if required).
12475 EVT MaskVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
12476 SDValue Mask = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT);
12477 Args.emplace_back(args&: Mask, args: MaskVT.getTypeForEVT(Context&: Ctx));
12478 }
12479
12480 Type *RetType = CallRetResNo.has_value()
12481 ? Node->getValueType(ResNo: *CallRetResNo).getTypeForEVT(Context&: Ctx)
12482 : Type::getVoidTy(C&: Ctx);
12483 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12484 SDValue Callee =
12485 DAG.getExternalSymbol(LCImpl: LibcallImpl, VT: getPointerTy(DL: DAG.getDataLayout()));
12486 TargetLowering::CallLoweringInfo CLI(DAG);
12487 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12488 CC: getLibcallImplCallingConv(Call: LibcallImpl), ResultType: RetType, Target: Callee, ArgsList: std::move(Args));
12489
12490 auto [Call, CallChain] = LowerCallTo(CLI);
12491
12492 for (auto [ResNo, ResultPtr] : llvm::enumerate(First&: ResultPtrs)) {
12493 if (ResNo == CallRetResNo) {
12494 Results.push_back(Elt: Call);
12495 continue;
12496 }
12497 MachinePointerInfo PtrInfo;
12498 SDValue LoadResult = DAG.getLoad(VT: Node->getValueType(ResNo), dl: DL, Chain: CallChain,
12499 Ptr: ResultPtr, PtrInfo);
12500 SDValue OutChain = LoadResult.getValue(R: 1);
12501
12502 if (StoreSDNode *ST = ResultStores[ResNo]) {
12503 // Replace store with the library call.
12504 DAG.ReplaceAllUsesOfValueWith(From: SDValue(ST, 0), To: OutChain);
12505 PtrInfo = ST->getPointerInfo();
12506 } else {
12507 PtrInfo = MachinePointerInfo::getFixedStack(
12508 MF&: DAG.getMachineFunction(),
12509 FI: cast<FrameIndexSDNode>(Val&: ResultPtr)->getIndex());
12510 }
12511
12512 Results.push_back(Elt: LoadResult);
12513 }
12514
12515 return true;
12516}
12517
12518bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
12519 SDValue &LHS, SDValue &RHS,
12520 SDValue &CC, SDValue Mask,
12521 SDValue EVL, bool &NeedInvert,
12522 const SDLoc &dl, SDValue &Chain,
12523 bool IsSignaling) const {
12524 MVT OpVT = LHS.getSimpleValueType();
12525 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
12526 NeedInvert = false;
12527 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12528 bool IsNonVP = !EVL;
12529 switch (getCondCodeAction(CC: CCCode, VT: OpVT)) {
12530 default:
12531 llvm_unreachable("Unknown condition code action!");
12532 case TargetLowering::Legal:
12533 // Nothing to do.
12534 break;
12535 case TargetLowering::Expand: {
12536 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
12537 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12538 std::swap(a&: LHS, b&: RHS);
12539 CC = DAG.getCondCode(Cond: InvCC);
12540 return true;
12541 }
12542 // Swapping operands didn't work. Try inverting the condition.
12543 bool NeedSwap = false;
12544 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
12545 if (!isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12546 // If inverting the condition is not enough, try swapping operands
12547 // on top of it.
12548 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
12549 NeedSwap = true;
12550 }
12551 if (isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
12552 CC = DAG.getCondCode(Cond: InvCC);
12553 NeedInvert = true;
12554 if (NeedSwap)
12555 std::swap(a&: LHS, b&: RHS);
12556 return true;
12557 }
12558
12559 // Special case: expand i1 comparisons using logical operations.
12560 if (OpVT == MVT::i1) {
12561 SDValue Ret;
12562 switch (CCCode) {
12563 default:
12564 llvm_unreachable("Unknown integer setcc!");
12565 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12566 Ret = DAG.getNOT(DL: dl, Val: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS),
12567 VT: MVT::i1);
12568 break;
12569 case ISD::SETNE: // X != Y --> (X ^ Y)
12570 Ret = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: MVT::i1, N1: LHS, N2: RHS);
12571 break;
12572 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12573 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12574 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: RHS,
12575 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12576 break;
12577 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12578 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12579 Ret = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i1, N1: LHS,
12580 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12581 break;
12582 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12583 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12584 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: RHS,
12585 N2: DAG.getNOT(DL: dl, Val: LHS, VT: MVT::i1));
12586 break;
12587 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12588 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12589 Ret = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: MVT::i1, N1: LHS,
12590 N2: DAG.getNOT(DL: dl, Val: RHS, VT: MVT::i1));
12591 break;
12592 }
12593
12594 LHS = DAG.getZExtOrTrunc(Op: Ret, DL: dl, VT);
12595 RHS = SDValue();
12596 CC = SDValue();
12597 return true;
12598 }
12599
12600 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
12601 unsigned Opc = 0;
12602 switch (CCCode) {
12603 default:
12604 llvm_unreachable("Don't know how to expand this condition!");
12605 case ISD::SETUO:
12606 if (isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
12607 CC1 = ISD::SETUNE;
12608 CC2 = ISD::SETUNE;
12609 Opc = ISD::OR;
12610 break;
12611 }
12612 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12613 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12614 NeedInvert = true;
12615 [[fallthrough]];
12616 case ISD::SETO:
12617 assert(isCondCodeLegal(ISD::SETOEQ, OpVT) &&
12618 "If SETO is expanded, SETOEQ must be legal!");
12619 CC1 = ISD::SETOEQ;
12620 CC2 = ISD::SETOEQ;
12621 Opc = ISD::AND;
12622 break;
12623 case ISD::SETONE:
12624 case ISD::SETUEQ:
12625 // If the SETUO or SETO CC isn't legal, we might be able to use
12626 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12627 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12628 // the operands.
12629 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12630 if (!isCondCodeLegal(CC: CC2, VT: OpVT) && (isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
12631 isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
12632 CC1 = ISD::SETOGT;
12633 CC2 = ISD::SETOLT;
12634 Opc = ISD::OR;
12635 NeedInvert = ((unsigned)CCCode & 0x8U);
12636 break;
12637 }
12638 [[fallthrough]];
12639 case ISD::SETOEQ:
12640 case ISD::SETOGT:
12641 case ISD::SETOGE:
12642 case ISD::SETOLT:
12643 case ISD::SETOLE:
12644 case ISD::SETUNE:
12645 case ISD::SETUGT:
12646 case ISD::SETUGE:
12647 case ISD::SETULT:
12648 case ISD::SETULE:
12649 // If we are floating point, assign and break, otherwise fall through.
12650 if (!OpVT.isInteger()) {
12651 // We can use the 4th bit to tell if we are the unordered
12652 // or ordered version of the opcode.
12653 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12654 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12655 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12656 break;
12657 }
12658 // Fallthrough if we are unsigned integer.
12659 [[fallthrough]];
12660 case ISD::SETLE:
12661 case ISD::SETGT:
12662 case ISD::SETGE:
12663 case ISD::SETLT:
12664 case ISD::SETNE:
12665 case ISD::SETEQ:
12666 // If all combinations of inverting the condition and swapping operands
12667 // didn't work then we have no means to expand the condition.
12668 llvm_unreachable("Don't know how to expand this condition!");
12669 }
12670
12671 SDValue SetCC1, SetCC2;
12672 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12673 // If we aren't the ordered or unorder operation,
12674 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12675 if (IsNonVP) {
12676 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
12677 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
12678 } else {
12679 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
12680 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
12681 }
12682 } else {
12683 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12684 if (IsNonVP) {
12685 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
12686 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
12687 } else {
12688 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
12689 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
12690 }
12691 }
12692 if (Chain)
12693 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: SetCC1.getValue(R: 1),
12694 N2: SetCC2.getValue(R: 1));
12695 if (IsNonVP)
12696 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
12697 else {
12698 // Transform the binary opcode to the VP equivalent.
12699 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12700 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12701 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
12702 }
12703 RHS = SDValue();
12704 CC = SDValue();
12705 return true;
12706 }
12707 }
12708 return false;
12709}
12710
12711SDValue TargetLowering::expandVectorNaryOpBySplitting(SDNode *Node,
12712 SelectionDAG &DAG) const {
12713 EVT VT = Node->getValueType(ResNo: 0);
12714 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12715 // split into two equal parts.
12716 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(RHS: 2))
12717 return SDValue();
12718
12719 // Restrict expansion to cases where both parts can be concatenated.
12720 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12721 if (LoVT != HiVT || !isTypeLegal(VT: LoVT))
12722 return SDValue();
12723
12724 SDLoc DL(Node);
12725 unsigned Opcode = Node->getOpcode();
12726
12727 // Don't expand if the result is likely to be unrolled anyway.
12728 if (!isOperationLegalOrCustomOrPromote(Op: Opcode, VT: LoVT))
12729 return SDValue();
12730
12731 SmallVector<SDValue, 4> LoOps, HiOps;
12732 for (const SDValue &V : Node->op_values()) {
12733 auto [Lo, Hi] = DAG.SplitVector(N: V, DL, LoVT, HiVT);
12734 LoOps.push_back(Elt: Lo);
12735 HiOps.push_back(Elt: Hi);
12736 }
12737
12738 SDValue SplitOpLo = DAG.getNode(Opcode, DL, VT: LoVT, Ops: LoOps);
12739 SDValue SplitOpHi = DAG.getNode(Opcode, DL, VT: HiVT, Ops: HiOps);
12740 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: SplitOpLo, N2: SplitOpHi);
12741}
12742
12743SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
12744 const SDLoc &DL,
12745 EVT InVecVT, SDValue EltNo,
12746 LoadSDNode *OriginalLoad,
12747 SelectionDAG &DAG) const {
12748 assert(OriginalLoad->isSimple());
12749
12750 EVT VecEltVT = InVecVT.getVectorElementType();
12751
12752 // If the vector element type is not a multiple of a byte then we are unable
12753 // to correctly compute an address to load only the extracted element as a
12754 // scalar.
12755 if (!VecEltVT.isByteSized())
12756 return SDValue();
12757
12758 ISD::LoadExtType ExtTy =
12759 ResultVT.bitsGT(VT: VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12760 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: VecEltVT))
12761 return SDValue();
12762
12763 std::optional<unsigned> ByteOffset;
12764 Align Alignment = OriginalLoad->getAlign();
12765 MachinePointerInfo MPI;
12766 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(Val&: EltNo)) {
12767 int Elt = ConstEltNo->getZExtValue();
12768 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12769 MPI = OriginalLoad->getPointerInfo().getWithOffset(O: *ByteOffset);
12770 Alignment = commonAlignment(A: Alignment, Offset: *ByteOffset);
12771 } else {
12772 // Discard the pointer info except the address space because the memory
12773 // operand can't represent this new access since the offset is variable.
12774 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12775 Alignment = commonAlignment(A: Alignment, Offset: VecEltVT.getSizeInBits() / 8);
12776 }
12777
12778 if (!shouldReduceLoadWidth(Load: OriginalLoad, ExtTy, NewVT: VecEltVT, ByteOffset))
12779 return SDValue();
12780
12781 unsigned IsFast = 0;
12782 if (!allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: VecEltVT,
12783 AddrSpace: OriginalLoad->getAddressSpace(), Alignment,
12784 Flags: OriginalLoad->getMemOperand()->getFlags(), Fast: &IsFast) ||
12785 !IsFast)
12786 return SDValue();
12787
12788 // The original DAG loaded the entire vector from memory, so arithmetic
12789 // within it must be inbounds.
12790 SDValue NewPtr = getInboundsVectorElementPointer(
12791 DAG, VecPtr: OriginalLoad->getBasePtr(), VecVT: InVecVT, Index: EltNo);
12792
12793 // We are replacing a vector load with a scalar load. The new load must have
12794 // identical memory op ordering to the original.
12795 SDValue Load;
12796 if (ResultVT.bitsGT(VT: VecEltVT)) {
12797 // If the result type of vextract is wider than the load, then issue an
12798 // extending load instead.
12799 ISD::LoadExtType ExtType = isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: ResultVT, MemVT: VecEltVT)
12800 ? ISD::ZEXTLOAD
12801 : ISD::EXTLOAD;
12802 Load = DAG.getExtLoad(ExtType, dl: DL, VT: ResultVT, Chain: OriginalLoad->getChain(),
12803 Ptr: NewPtr, PtrInfo: MPI, MemVT: VecEltVT, Alignment,
12804 MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12805 AAInfo: OriginalLoad->getAAInfo());
12806 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12807 } else {
12808 // The result type is narrower or the same width as the vector element
12809 Load = DAG.getLoad(VT: VecEltVT, dl: DL, Chain: OriginalLoad->getChain(), Ptr: NewPtr, PtrInfo: MPI,
12810 Alignment, MMOFlags: OriginalLoad->getMemOperand()->getFlags(),
12811 AAInfo: OriginalLoad->getAAInfo());
12812 DAG.makeEquivalentMemoryOrdering(OldLoad: OriginalLoad, NewMemOp: Load);
12813 if (ResultVT.bitsLT(VT: VecEltVT))
12814 Load = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: ResultVT, Operand: Load);
12815 else
12816 Load = DAG.getBitcast(VT: ResultVT, V: Load);
12817 }
12818
12819 return Load;
12820}
12821